1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2004-2006 Pawel Jakub Dawidek <pjd@FreeBSD.org> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/param.h> 30 #include <sys/systm.h> 31 #include <sys/bio.h> 32 #include <sys/eventhandler.h> 33 #include <sys/fail.h> 34 #include <sys/kernel.h> 35 #include <sys/kthread.h> 36 #include <sys/limits.h> 37 #include <sys/lock.h> 38 #include <sys/malloc.h> 39 #include <sys/mutex.h> 40 #include <sys/proc.h> 41 #include <sys/reboot.h> 42 #include <sys/sbuf.h> 43 #include <sys/sched.h> 44 #include <sys/sx.h> 45 #include <sys/sysctl.h> 46 47 #include <geom/geom.h> 48 #include <geom/geom_dbg.h> 49 #include <geom/geom_disk.h> 50 #include <geom/mirror/g_mirror.h> 51 52 FEATURE(geom_mirror, "GEOM mirroring support"); 53 54 static MALLOC_DEFINE(M_MIRROR, "mirror_data", "GEOM_MIRROR Data"); 55 56 SYSCTL_DECL(_kern_geom); 57 static SYSCTL_NODE(_kern_geom, OID_AUTO, mirror, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 58 "GEOM_MIRROR stuff"); 59 int g_mirror_debug = 0; 60 SYSCTL_INT(_kern_geom_mirror, OID_AUTO, debug, CTLFLAG_RWTUN, &g_mirror_debug, 0, 61 "Debug level"); 62 bool g_launch_mirror_before_timeout = true; 63 SYSCTL_BOOL(_kern_geom_mirror, OID_AUTO, launch_mirror_before_timeout, 64 CTLFLAG_RWTUN, &g_launch_mirror_before_timeout, 0, 65 "If false, force gmirror to wait out the full kern.geom.mirror.timeout " 66 "before launching mirrors"); 67 static u_int g_mirror_timeout = 4; 68 SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, timeout, CTLFLAG_RWTUN, &g_mirror_timeout, 69 0, "Time to wait on all mirror components"); 70 static u_int g_mirror_idletime = 5; 71 SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, idletime, CTLFLAG_RWTUN, 72 &g_mirror_idletime, 0, "Mark components as clean when idling"); 73 static u_int g_mirror_disconnect_on_failure = 1; 74 SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, disconnect_on_failure, CTLFLAG_RWTUN, 75 &g_mirror_disconnect_on_failure, 0, "Disconnect component on I/O failure."); 76 static u_int g_mirror_syncreqs = 2; 77 SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, sync_requests, CTLFLAG_RDTUN, 78 &g_mirror_syncreqs, 0, "Parallel synchronization I/O requests."); 79 static u_int g_mirror_sync_period = 5; 80 SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, sync_update_period, CTLFLAG_RWTUN, 81 &g_mirror_sync_period, 0, 82 "Metadata update period during synchronization, in seconds"); 83 84 #define MSLEEP(ident, mtx, priority, wmesg, timeout) do { \ 85 G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, (ident)); \ 86 msleep((ident), (mtx), (priority), (wmesg), (timeout)); \ 87 G_MIRROR_DEBUG(4, "%s: Woken up %p.", __func__, (ident)); \ 88 } while (0) 89 90 static eventhandler_tag g_mirror_post_sync = NULL; 91 static int g_mirror_shutdown = 0; 92 93 static g_ctl_destroy_geom_t g_mirror_destroy_geom; 94 static g_taste_t g_mirror_taste; 95 static g_init_t g_mirror_init; 96 static g_fini_t g_mirror_fini; 97 static g_provgone_t g_mirror_providergone; 98 static g_resize_t g_mirror_resize; 99 100 struct g_class g_mirror_class = { 101 .name = G_MIRROR_CLASS_NAME, 102 .version = G_VERSION, 103 .ctlreq = g_mirror_config, 104 .taste = g_mirror_taste, 105 .destroy_geom = g_mirror_destroy_geom, 106 .init = g_mirror_init, 107 .fini = g_mirror_fini, 108 .providergone = g_mirror_providergone, 109 .resize = g_mirror_resize 110 }; 111 112 static void g_mirror_destroy_provider(struct g_mirror_softc *sc); 113 static int g_mirror_update_disk(struct g_mirror_disk *disk, u_int state); 114 static void g_mirror_update_device(struct g_mirror_softc *sc, bool force); 115 static void g_mirror_dumpconf(struct sbuf *sb, const char *indent, 116 struct g_geom *gp, struct g_consumer *cp, struct g_provider *pp); 117 static void g_mirror_timeout_drain(struct g_mirror_softc *sc); 118 static int g_mirror_refresh_device(struct g_mirror_softc *sc, 119 const struct g_provider *pp, const struct g_mirror_metadata *md); 120 static void g_mirror_sync_reinit(const struct g_mirror_disk *disk, 121 struct bio *bp, off_t offset); 122 static void g_mirror_sync_stop(struct g_mirror_disk *disk, int type); 123 static void g_mirror_register_request(struct g_mirror_softc *sc, 124 struct bio *bp); 125 static void g_mirror_sync_release(struct g_mirror_softc *sc); 126 127 static const char * 128 g_mirror_disk_state2str(int state) 129 { 130 131 switch (state) { 132 case G_MIRROR_DISK_STATE_NONE: 133 return ("NONE"); 134 case G_MIRROR_DISK_STATE_NEW: 135 return ("NEW"); 136 case G_MIRROR_DISK_STATE_ACTIVE: 137 return ("ACTIVE"); 138 case G_MIRROR_DISK_STATE_STALE: 139 return ("STALE"); 140 case G_MIRROR_DISK_STATE_SYNCHRONIZING: 141 return ("SYNCHRONIZING"); 142 case G_MIRROR_DISK_STATE_DISCONNECTED: 143 return ("DISCONNECTED"); 144 case G_MIRROR_DISK_STATE_DESTROY: 145 return ("DESTROY"); 146 default: 147 return ("INVALID"); 148 } 149 } 150 151 static const char * 152 g_mirror_device_state2str(int state) 153 { 154 155 switch (state) { 156 case G_MIRROR_DEVICE_STATE_STARTING: 157 return ("STARTING"); 158 case G_MIRROR_DEVICE_STATE_RUNNING: 159 return ("RUNNING"); 160 default: 161 return ("INVALID"); 162 } 163 } 164 165 static const char * 166 g_mirror_get_diskname(struct g_mirror_disk *disk) 167 { 168 169 if (disk->d_consumer == NULL || disk->d_consumer->provider == NULL) 170 return ("[unknown]"); 171 return (disk->d_name); 172 } 173 174 /* 175 * --- Events handling functions --- 176 * Events in geom_mirror are used to maintain disks and device status 177 * from one thread to simplify locking. 178 */ 179 static void 180 g_mirror_event_free(struct g_mirror_event *ep) 181 { 182 183 free(ep, M_MIRROR); 184 } 185 186 static int 187 g_mirror_event_dispatch(struct g_mirror_event *ep, void *arg, int state, 188 int flags) 189 { 190 struct g_mirror_softc *sc; 191 struct g_mirror_disk *disk; 192 int error; 193 194 G_MIRROR_DEBUG(4, "%s: Sending event %p.", __func__, ep); 195 if ((flags & G_MIRROR_EVENT_DEVICE) != 0) { 196 disk = NULL; 197 sc = arg; 198 } else { 199 disk = arg; 200 sc = disk->d_softc; 201 } 202 ep->e_disk = disk; 203 ep->e_state = state; 204 ep->e_flags = flags; 205 ep->e_error = 0; 206 mtx_lock(&sc->sc_events_mtx); 207 TAILQ_INSERT_TAIL(&sc->sc_events, ep, e_next); 208 mtx_unlock(&sc->sc_events_mtx); 209 G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc); 210 mtx_lock(&sc->sc_queue_mtx); 211 wakeup(sc); 212 mtx_unlock(&sc->sc_queue_mtx); 213 if ((flags & G_MIRROR_EVENT_DONTWAIT) != 0) 214 return (0); 215 G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, ep); 216 sx_xunlock(&sc->sc_lock); 217 while ((ep->e_flags & G_MIRROR_EVENT_DONE) == 0) { 218 mtx_lock(&sc->sc_events_mtx); 219 MSLEEP(ep, &sc->sc_events_mtx, PRIBIO | PDROP, "m:event", 220 hz * 5); 221 } 222 error = ep->e_error; 223 g_mirror_event_free(ep); 224 sx_xlock(&sc->sc_lock); 225 return (error); 226 } 227 228 int 229 g_mirror_event_send(void *arg, int state, int flags) 230 { 231 struct g_mirror_event *ep; 232 233 ep = malloc(sizeof(*ep), M_MIRROR, M_WAITOK); 234 return (g_mirror_event_dispatch(ep, arg, state, flags)); 235 } 236 237 static struct g_mirror_event * 238 g_mirror_event_first(struct g_mirror_softc *sc) 239 { 240 struct g_mirror_event *ep; 241 242 mtx_lock(&sc->sc_events_mtx); 243 ep = TAILQ_FIRST(&sc->sc_events); 244 mtx_unlock(&sc->sc_events_mtx); 245 return (ep); 246 } 247 248 static void 249 g_mirror_event_remove(struct g_mirror_softc *sc, struct g_mirror_event *ep) 250 { 251 252 mtx_lock(&sc->sc_events_mtx); 253 TAILQ_REMOVE(&sc->sc_events, ep, e_next); 254 mtx_unlock(&sc->sc_events_mtx); 255 } 256 257 static void 258 g_mirror_event_cancel(struct g_mirror_disk *disk) 259 { 260 struct g_mirror_softc *sc; 261 struct g_mirror_event *ep, *tmpep; 262 263 sc = disk->d_softc; 264 sx_assert(&sc->sc_lock, SX_XLOCKED); 265 266 mtx_lock(&sc->sc_events_mtx); 267 TAILQ_FOREACH_SAFE(ep, &sc->sc_events, e_next, tmpep) { 268 if ((ep->e_flags & G_MIRROR_EVENT_DEVICE) != 0) 269 continue; 270 if (ep->e_disk != disk) 271 continue; 272 TAILQ_REMOVE(&sc->sc_events, ep, e_next); 273 if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0) 274 g_mirror_event_free(ep); 275 else { 276 ep->e_error = ECANCELED; 277 wakeup(ep); 278 } 279 } 280 mtx_unlock(&sc->sc_events_mtx); 281 } 282 283 /* 284 * Return the number of disks in given state. 285 * If state is equal to -1, count all connected disks. 286 */ 287 u_int 288 g_mirror_ndisks(struct g_mirror_softc *sc, int state) 289 { 290 struct g_mirror_disk *disk; 291 u_int n = 0; 292 293 LIST_FOREACH(disk, &sc->sc_disks, d_next) { 294 if (state == -1 || disk->d_state == state) 295 n++; 296 } 297 return (n); 298 } 299 300 /* 301 * Find a disk in mirror by its disk ID. 302 */ 303 static struct g_mirror_disk * 304 g_mirror_id2disk(struct g_mirror_softc *sc, uint32_t id) 305 { 306 struct g_mirror_disk *disk; 307 308 sx_assert(&sc->sc_lock, SX_XLOCKED); 309 310 LIST_FOREACH(disk, &sc->sc_disks, d_next) { 311 if (disk->d_id == id) 312 return (disk); 313 } 314 return (NULL); 315 } 316 317 static u_int 318 g_mirror_nrequests(struct g_mirror_softc *sc, struct g_consumer *cp) 319 { 320 struct bio *bp; 321 u_int nreqs = 0; 322 323 mtx_lock(&sc->sc_queue_mtx); 324 TAILQ_FOREACH(bp, &sc->sc_queue, bio_queue) { 325 if (bp->bio_from == cp) 326 nreqs++; 327 } 328 mtx_unlock(&sc->sc_queue_mtx); 329 return (nreqs); 330 } 331 332 static int 333 g_mirror_is_busy(struct g_mirror_softc *sc, struct g_consumer *cp) 334 { 335 336 if (cp->index > 0) { 337 G_MIRROR_DEBUG(2, 338 "I/O requests for %s exist, can't destroy it now.", 339 cp->provider->name); 340 return (1); 341 } 342 if (g_mirror_nrequests(sc, cp) > 0) { 343 G_MIRROR_DEBUG(2, 344 "I/O requests for %s in queue, can't destroy it now.", 345 cp->provider->name); 346 return (1); 347 } 348 return (0); 349 } 350 351 static void 352 g_mirror_destroy_consumer(void *arg, int flags __unused) 353 { 354 struct g_consumer *cp; 355 356 g_topology_assert(); 357 358 cp = arg; 359 G_MIRROR_DEBUG(1, "Consumer %s destroyed.", cp->provider->name); 360 g_detach(cp); 361 g_destroy_consumer(cp); 362 } 363 364 static void 365 g_mirror_kill_consumer(struct g_mirror_softc *sc, struct g_consumer *cp) 366 { 367 struct g_provider *pp; 368 int retaste_wait; 369 370 g_topology_assert(); 371 372 cp->private = NULL; 373 if (g_mirror_is_busy(sc, cp)) 374 return; 375 pp = cp->provider; 376 retaste_wait = 0; 377 if (cp->acw == 1) { 378 if ((pp->geom->flags & G_GEOM_WITHER) == 0) 379 retaste_wait = 1; 380 } 381 G_MIRROR_DEBUG(2, "Access %s r%dw%de%d = %d", pp->name, -cp->acr, 382 -cp->acw, -cp->ace, 0); 383 if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0) 384 g_access(cp, -cp->acr, -cp->acw, -cp->ace); 385 if (retaste_wait) { 386 /* 387 * After retaste event was send (inside g_access()), we can send 388 * event to detach and destroy consumer. 389 * A class, which has consumer to the given provider connected 390 * will not receive retaste event for the provider. 391 * This is the way how I ignore retaste events when I close 392 * consumers opened for write: I detach and destroy consumer 393 * after retaste event is sent. 394 */ 395 g_post_event(g_mirror_destroy_consumer, cp, M_WAITOK, NULL); 396 return; 397 } 398 G_MIRROR_DEBUG(1, "Consumer %s destroyed.", pp->name); 399 g_detach(cp); 400 g_destroy_consumer(cp); 401 } 402 403 static int 404 g_mirror_connect_disk(struct g_mirror_disk *disk, struct g_provider *pp) 405 { 406 struct g_consumer *cp; 407 int error; 408 409 g_topology_assert_not(); 410 KASSERT(disk->d_consumer == NULL, 411 ("Disk already connected (device %s).", disk->d_softc->sc_name)); 412 413 g_topology_lock(); 414 cp = g_new_consumer(disk->d_softc->sc_geom); 415 cp->flags |= G_CF_DIRECT_RECEIVE; 416 error = g_attach(cp, pp); 417 if (error != 0) { 418 g_destroy_consumer(cp); 419 g_topology_unlock(); 420 return (error); 421 } 422 error = g_access(cp, 1, 1, 1); 423 if (error != 0) { 424 g_detach(cp); 425 g_destroy_consumer(cp); 426 g_topology_unlock(); 427 G_MIRROR_DEBUG(0, "Cannot open consumer %s (error=%d).", 428 pp->name, error); 429 return (error); 430 } 431 g_topology_unlock(); 432 disk->d_consumer = cp; 433 disk->d_consumer->private = disk; 434 disk->d_consumer->index = 0; 435 436 G_MIRROR_DEBUG(2, "Disk %s connected.", g_mirror_get_diskname(disk)); 437 return (0); 438 } 439 440 static void 441 g_mirror_disconnect_consumer(struct g_mirror_softc *sc, struct g_consumer *cp) 442 { 443 444 g_topology_assert(); 445 446 if (cp == NULL) 447 return; 448 if (cp->provider != NULL) 449 g_mirror_kill_consumer(sc, cp); 450 else 451 g_destroy_consumer(cp); 452 } 453 454 /* 455 * Initialize disk. This means allocate memory, create consumer, attach it 456 * to the provider and open access (r1w1e1) to it. 457 */ 458 static struct g_mirror_disk * 459 g_mirror_init_disk(struct g_mirror_softc *sc, struct g_provider *pp, 460 struct g_mirror_metadata *md, int *errorp) 461 { 462 struct g_mirror_disk *disk; 463 int i, error; 464 465 disk = malloc(sizeof(*disk), M_MIRROR, M_NOWAIT | M_ZERO); 466 if (disk == NULL) { 467 error = ENOMEM; 468 goto fail; 469 } 470 disk->d_softc = sc; 471 error = g_mirror_connect_disk(disk, pp); 472 if (error != 0) 473 goto fail; 474 disk->d_id = md->md_did; 475 disk->d_state = G_MIRROR_DISK_STATE_NONE; 476 disk->d_priority = md->md_priority; 477 disk->d_flags = md->md_dflags; 478 error = g_getattr("GEOM::candelete", disk->d_consumer, &i); 479 if (error == 0 && i != 0) 480 disk->d_flags |= G_MIRROR_DISK_FLAG_CANDELETE; 481 error = g_getattr("GEOM::rotation_rate", disk->d_consumer, 482 &disk->d_rotation_rate); 483 if (error) 484 disk->d_rotation_rate = DISK_RR_UNKNOWN; 485 if (md->md_provider[0] != '\0') 486 disk->d_flags |= G_MIRROR_DISK_FLAG_HARDCODED; 487 disk->d_sync.ds_consumer = NULL; 488 disk->d_sync.ds_offset = md->md_sync_offset; 489 disk->d_sync.ds_offset_done = md->md_sync_offset; 490 disk->d_sync.ds_update_ts = time_uptime; 491 disk->d_genid = md->md_genid; 492 disk->d_sync.ds_syncid = md->md_syncid; 493 disk->d_init_ndisks = md->md_all; 494 disk->d_init_slice = md->md_slice; 495 disk->d_init_balance = md->md_balance; 496 disk->d_init_mediasize = md->md_mediasize; 497 if (errorp != NULL) 498 *errorp = 0; 499 return (disk); 500 fail: 501 if (errorp != NULL) 502 *errorp = error; 503 if (disk != NULL) 504 free(disk, M_MIRROR); 505 return (NULL); 506 } 507 508 static void 509 g_mirror_destroy_disk(struct g_mirror_disk *disk) 510 { 511 struct g_mirror_softc *sc; 512 513 g_topology_assert_not(); 514 sc = disk->d_softc; 515 sx_assert(&sc->sc_lock, SX_XLOCKED); 516 517 g_topology_lock(); 518 LIST_REMOVE(disk, d_next); 519 g_topology_unlock(); 520 g_mirror_event_cancel(disk); 521 if (sc->sc_hint == disk) 522 sc->sc_hint = NULL; 523 switch (disk->d_state) { 524 case G_MIRROR_DISK_STATE_SYNCHRONIZING: 525 g_mirror_sync_stop(disk, 1); 526 /* FALLTHROUGH */ 527 case G_MIRROR_DISK_STATE_NEW: 528 case G_MIRROR_DISK_STATE_STALE: 529 case G_MIRROR_DISK_STATE_ACTIVE: 530 g_topology_lock(); 531 g_mirror_disconnect_consumer(sc, disk->d_consumer); 532 g_topology_unlock(); 533 free(disk, M_MIRROR); 534 break; 535 default: 536 KASSERT(0 == 1, ("Wrong disk state (%s, %s).", 537 g_mirror_get_diskname(disk), 538 g_mirror_disk_state2str(disk->d_state))); 539 } 540 } 541 542 static void 543 g_mirror_free_device(struct g_mirror_softc *sc) 544 { 545 546 g_topology_assert(); 547 548 mtx_destroy(&sc->sc_queue_mtx); 549 mtx_destroy(&sc->sc_events_mtx); 550 mtx_destroy(&sc->sc_done_mtx); 551 sx_destroy(&sc->sc_lock); 552 free(sc, M_MIRROR); 553 } 554 555 static void 556 g_mirror_providergone(struct g_provider *pp) 557 { 558 struct g_mirror_softc *sc = pp->private; 559 560 if ((--sc->sc_refcnt) == 0) 561 g_mirror_free_device(sc); 562 } 563 564 static void 565 g_mirror_destroy_device(struct g_mirror_softc *sc) 566 { 567 struct g_mirror_disk *disk; 568 struct g_mirror_event *ep; 569 struct g_geom *gp; 570 struct g_consumer *cp, *tmpcp; 571 572 g_topology_assert_not(); 573 sx_assert(&sc->sc_lock, SX_XLOCKED); 574 575 gp = sc->sc_geom; 576 if (sc->sc_provider != NULL) 577 g_mirror_destroy_provider(sc); 578 for (disk = LIST_FIRST(&sc->sc_disks); disk != NULL; 579 disk = LIST_FIRST(&sc->sc_disks)) { 580 disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY; 581 g_mirror_update_metadata(disk); 582 g_mirror_destroy_disk(disk); 583 } 584 while ((ep = g_mirror_event_first(sc)) != NULL) { 585 g_mirror_event_remove(sc, ep); 586 if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0) 587 g_mirror_event_free(ep); 588 else { 589 ep->e_error = ECANCELED; 590 ep->e_flags |= G_MIRROR_EVENT_DONE; 591 G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, ep); 592 mtx_lock(&sc->sc_events_mtx); 593 wakeup(ep); 594 mtx_unlock(&sc->sc_events_mtx); 595 } 596 } 597 g_mirror_timeout_drain(sc); 598 599 g_topology_lock(); 600 LIST_FOREACH_SAFE(cp, &sc->sc_sync.ds_geom->consumer, consumer, tmpcp) { 601 g_mirror_disconnect_consumer(sc, cp); 602 } 603 g_wither_geom(sc->sc_sync.ds_geom, ENXIO); 604 G_MIRROR_DEBUG(0, "Device %s destroyed.", gp->name); 605 g_wither_geom(gp, ENXIO); 606 sx_xunlock(&sc->sc_lock); 607 if ((--sc->sc_refcnt) == 0) 608 g_mirror_free_device(sc); 609 g_topology_unlock(); 610 } 611 612 static void 613 g_mirror_orphan(struct g_consumer *cp) 614 { 615 struct g_mirror_disk *disk; 616 617 g_topology_assert(); 618 619 disk = cp->private; 620 if (disk == NULL) 621 return; 622 disk->d_softc->sc_bump_id |= G_MIRROR_BUMP_SYNCID; 623 g_mirror_event_send(disk, G_MIRROR_DISK_STATE_DISCONNECTED, 624 G_MIRROR_EVENT_DONTWAIT); 625 } 626 627 /* 628 * Function should return the next active disk on the list. 629 * It is possible that it will be the same disk as given. 630 * If there are no active disks on list, NULL is returned. 631 */ 632 static __inline struct g_mirror_disk * 633 g_mirror_find_next(struct g_mirror_softc *sc, struct g_mirror_disk *disk) 634 { 635 struct g_mirror_disk *dp; 636 637 for (dp = LIST_NEXT(disk, d_next); dp != disk; 638 dp = LIST_NEXT(dp, d_next)) { 639 if (dp == NULL) 640 dp = LIST_FIRST(&sc->sc_disks); 641 if (dp->d_state == G_MIRROR_DISK_STATE_ACTIVE) 642 break; 643 } 644 if (dp->d_state != G_MIRROR_DISK_STATE_ACTIVE) 645 return (NULL); 646 return (dp); 647 } 648 649 static struct g_mirror_disk * 650 g_mirror_get_disk(struct g_mirror_softc *sc) 651 { 652 struct g_mirror_disk *disk; 653 654 if (sc->sc_hint == NULL) { 655 sc->sc_hint = LIST_FIRST(&sc->sc_disks); 656 if (sc->sc_hint == NULL) 657 return (NULL); 658 } 659 disk = sc->sc_hint; 660 if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE) { 661 disk = g_mirror_find_next(sc, disk); 662 if (disk == NULL) 663 return (NULL); 664 } 665 sc->sc_hint = g_mirror_find_next(sc, disk); 666 return (disk); 667 } 668 669 static int 670 g_mirror_write_metadata(struct g_mirror_disk *disk, 671 struct g_mirror_metadata *md) 672 { 673 struct g_mirror_softc *sc; 674 struct g_consumer *cp; 675 off_t offset, length; 676 u_char *sector; 677 int error = 0; 678 679 g_topology_assert_not(); 680 sc = disk->d_softc; 681 sx_assert(&sc->sc_lock, SX_LOCKED); 682 683 cp = disk->d_consumer; 684 KASSERT(cp != NULL, ("NULL consumer (%s).", sc->sc_name)); 685 KASSERT(cp->provider != NULL, ("NULL provider (%s).", sc->sc_name)); 686 KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1, 687 ("Consumer %s closed? (r%dw%de%d).", cp->provider->name, cp->acr, 688 cp->acw, cp->ace)); 689 length = cp->provider->sectorsize; 690 offset = cp->provider->mediasize - length; 691 sector = malloc((size_t)length, M_MIRROR, M_WAITOK | M_ZERO); 692 if (md != NULL && 693 (sc->sc_flags & G_MIRROR_DEVICE_FLAG_WIPE) == 0) { 694 /* 695 * Handle the case, when the size of parent provider reduced. 696 */ 697 if (offset < md->md_mediasize) 698 error = ENOSPC; 699 else 700 mirror_metadata_encode(md, sector); 701 } 702 KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_metadata_write, error); 703 if (error == 0) 704 error = g_write_data(cp, offset, sector, length); 705 free(sector, M_MIRROR); 706 if (error != 0) { 707 if ((disk->d_flags & G_MIRROR_DISK_FLAG_BROKEN) == 0) { 708 disk->d_flags |= G_MIRROR_DISK_FLAG_BROKEN; 709 G_MIRROR_DEBUG(0, "Cannot write metadata on %s " 710 "(device=%s, error=%d).", 711 g_mirror_get_diskname(disk), sc->sc_name, error); 712 } else { 713 G_MIRROR_DEBUG(1, "Cannot write metadata on %s " 714 "(device=%s, error=%d).", 715 g_mirror_get_diskname(disk), sc->sc_name, error); 716 } 717 if (g_mirror_disconnect_on_failure && 718 g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 1) { 719 sc->sc_bump_id |= G_MIRROR_BUMP_GENID; 720 g_mirror_event_send(disk, 721 G_MIRROR_DISK_STATE_DISCONNECTED, 722 G_MIRROR_EVENT_DONTWAIT); 723 } 724 } 725 return (error); 726 } 727 728 static int 729 g_mirror_clear_metadata(struct g_mirror_disk *disk) 730 { 731 int error; 732 733 g_topology_assert_not(); 734 sx_assert(&disk->d_softc->sc_lock, SX_LOCKED); 735 736 if (disk->d_softc->sc_type != G_MIRROR_TYPE_AUTOMATIC) 737 return (0); 738 error = g_mirror_write_metadata(disk, NULL); 739 if (error == 0) { 740 G_MIRROR_DEBUG(2, "Metadata on %s cleared.", 741 g_mirror_get_diskname(disk)); 742 } else { 743 G_MIRROR_DEBUG(0, 744 "Cannot clear metadata on disk %s (error=%d).", 745 g_mirror_get_diskname(disk), error); 746 } 747 return (error); 748 } 749 750 void 751 g_mirror_fill_metadata(struct g_mirror_softc *sc, struct g_mirror_disk *disk, 752 struct g_mirror_metadata *md) 753 { 754 755 bzero(md, sizeof(*md)); 756 strlcpy(md->md_magic, G_MIRROR_MAGIC, sizeof(md->md_magic)); 757 md->md_version = G_MIRROR_VERSION; 758 strlcpy(md->md_name, sc->sc_name, sizeof(md->md_name)); 759 md->md_mid = sc->sc_id; 760 md->md_all = sc->sc_ndisks; 761 md->md_slice = sc->sc_slice; 762 md->md_balance = sc->sc_balance; 763 md->md_genid = sc->sc_genid; 764 md->md_mediasize = sc->sc_mediasize; 765 md->md_sectorsize = sc->sc_sectorsize; 766 md->md_mflags = (sc->sc_flags & G_MIRROR_DEVICE_FLAG_MASK); 767 if (disk == NULL) { 768 md->md_did = arc4random(); 769 } else { 770 md->md_did = disk->d_id; 771 md->md_priority = disk->d_priority; 772 md->md_syncid = disk->d_sync.ds_syncid; 773 md->md_dflags = (disk->d_flags & G_MIRROR_DISK_FLAG_MASK); 774 if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) 775 md->md_sync_offset = disk->d_sync.ds_offset_done; 776 if ((disk->d_flags & G_MIRROR_DISK_FLAG_HARDCODED) != 0) { 777 strlcpy(md->md_provider, 778 disk->d_consumer->provider->name, 779 sizeof(md->md_provider)); 780 } 781 md->md_provsize = disk->d_consumer->provider->mediasize; 782 } 783 } 784 785 void 786 g_mirror_update_metadata(struct g_mirror_disk *disk) 787 { 788 struct g_mirror_softc *sc; 789 struct g_mirror_metadata md; 790 int error; 791 792 g_topology_assert_not(); 793 sc = disk->d_softc; 794 sx_assert(&sc->sc_lock, SX_LOCKED); 795 796 if (sc->sc_type != G_MIRROR_TYPE_AUTOMATIC) 797 return; 798 if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_WIPE) == 0) 799 g_mirror_fill_metadata(sc, disk, &md); 800 error = g_mirror_write_metadata(disk, &md); 801 if (error == 0) { 802 G_MIRROR_DEBUG(2, "Metadata on %s updated.", 803 g_mirror_get_diskname(disk)); 804 } else { 805 G_MIRROR_DEBUG(0, 806 "Cannot update metadata on disk %s (error=%d).", 807 g_mirror_get_diskname(disk), error); 808 } 809 } 810 811 static void 812 g_mirror_bump_syncid(struct g_mirror_softc *sc) 813 { 814 struct g_mirror_disk *disk; 815 816 g_topology_assert_not(); 817 sx_assert(&sc->sc_lock, SX_XLOCKED); 818 KASSERT(g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 0, 819 ("%s called with no active disks (device=%s).", __func__, 820 sc->sc_name)); 821 822 sc->sc_syncid++; 823 G_MIRROR_DEBUG(1, "Device %s: syncid bumped to %u.", sc->sc_name, 824 sc->sc_syncid); 825 LIST_FOREACH(disk, &sc->sc_disks, d_next) { 826 if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE || 827 disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) { 828 disk->d_sync.ds_syncid = sc->sc_syncid; 829 g_mirror_update_metadata(disk); 830 } 831 } 832 } 833 834 static void 835 g_mirror_bump_genid(struct g_mirror_softc *sc) 836 { 837 struct g_mirror_disk *disk; 838 839 g_topology_assert_not(); 840 sx_assert(&sc->sc_lock, SX_XLOCKED); 841 KASSERT(g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 0, 842 ("%s called with no active disks (device=%s).", __func__, 843 sc->sc_name)); 844 845 sc->sc_genid++; 846 G_MIRROR_DEBUG(1, "Device %s: genid bumped to %u.", sc->sc_name, 847 sc->sc_genid); 848 LIST_FOREACH(disk, &sc->sc_disks, d_next) { 849 if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE || 850 disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) { 851 disk->d_genid = sc->sc_genid; 852 g_mirror_update_metadata(disk); 853 } 854 } 855 } 856 857 static int 858 g_mirror_idle(struct g_mirror_softc *sc, int acw) 859 { 860 struct g_mirror_disk *disk; 861 int timeout; 862 863 g_topology_assert_not(); 864 sx_assert(&sc->sc_lock, SX_XLOCKED); 865 866 if (sc->sc_provider == NULL) 867 return (0); 868 if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOFAILSYNC) != 0) 869 return (0); 870 if (sc->sc_idle) 871 return (0); 872 if (sc->sc_writes > 0) 873 return (0); 874 if (acw > 0 || (acw == -1 && sc->sc_provider->acw > 0)) { 875 timeout = g_mirror_idletime - (time_uptime - sc->sc_last_write); 876 if (!g_mirror_shutdown && timeout > 0) 877 return (timeout); 878 } 879 sc->sc_idle = 1; 880 LIST_FOREACH(disk, &sc->sc_disks, d_next) { 881 if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE) 882 continue; 883 G_MIRROR_DEBUG(2, "Disk %s (device %s) marked as clean.", 884 g_mirror_get_diskname(disk), sc->sc_name); 885 disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY; 886 g_mirror_update_metadata(disk); 887 } 888 return (0); 889 } 890 891 static void 892 g_mirror_unidle(struct g_mirror_softc *sc) 893 { 894 struct g_mirror_disk *disk; 895 896 g_topology_assert_not(); 897 sx_assert(&sc->sc_lock, SX_XLOCKED); 898 899 if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOFAILSYNC) != 0) 900 return; 901 sc->sc_idle = 0; 902 sc->sc_last_write = time_uptime; 903 LIST_FOREACH(disk, &sc->sc_disks, d_next) { 904 if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE) 905 continue; 906 G_MIRROR_DEBUG(2, "Disk %s (device %s) marked as dirty.", 907 g_mirror_get_diskname(disk), sc->sc_name); 908 disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY; 909 g_mirror_update_metadata(disk); 910 } 911 } 912 913 static void 914 g_mirror_done(struct bio *bp) 915 { 916 struct g_mirror_softc *sc; 917 918 sc = bp->bio_from->geom->softc; 919 bp->bio_cflags = G_MIRROR_BIO_FLAG_REGULAR; 920 mtx_lock(&sc->sc_queue_mtx); 921 TAILQ_INSERT_TAIL(&sc->sc_queue, bp, bio_queue); 922 mtx_unlock(&sc->sc_queue_mtx); 923 wakeup(sc); 924 } 925 926 static void 927 g_mirror_regular_request_error(struct g_mirror_softc *sc, 928 struct g_mirror_disk *disk, struct bio *bp) 929 { 930 931 if ((bp->bio_cmd == BIO_FLUSH || bp->bio_cmd == BIO_SPEEDUP) && 932 bp->bio_error == EOPNOTSUPP) 933 return; 934 935 if ((disk->d_flags & G_MIRROR_DISK_FLAG_BROKEN) == 0) { 936 disk->d_flags |= G_MIRROR_DISK_FLAG_BROKEN; 937 G_MIRROR_LOGREQ(0, bp, "Request failed (error=%d).", 938 bp->bio_error); 939 } else { 940 G_MIRROR_LOGREQ(1, bp, "Request failed (error=%d).", 941 bp->bio_error); 942 } 943 if (g_mirror_disconnect_on_failure && 944 g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 1) { 945 if (bp->bio_error == ENXIO && 946 bp->bio_cmd == BIO_READ) 947 sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID; 948 else if (bp->bio_error == ENXIO) 949 sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID_NOW; 950 else 951 sc->sc_bump_id |= G_MIRROR_BUMP_GENID; 952 g_mirror_event_send(disk, G_MIRROR_DISK_STATE_DISCONNECTED, 953 G_MIRROR_EVENT_DONTWAIT); 954 } 955 } 956 957 static void 958 g_mirror_regular_request(struct g_mirror_softc *sc, struct bio *bp) 959 { 960 struct g_mirror_disk *disk; 961 struct bio *pbp; 962 963 g_topology_assert_not(); 964 KASSERT(sc->sc_provider == bp->bio_parent->bio_to, 965 ("regular request %p with unexpected origin", bp)); 966 967 pbp = bp->bio_parent; 968 bp->bio_from->index--; 969 if (bp->bio_cmd == BIO_WRITE || bp->bio_cmd == BIO_DELETE) 970 sc->sc_writes--; 971 disk = bp->bio_from->private; 972 if (disk == NULL) { 973 g_topology_lock(); 974 g_mirror_kill_consumer(sc, bp->bio_from); 975 g_topology_unlock(); 976 } 977 978 switch (bp->bio_cmd) { 979 case BIO_READ: 980 KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_regular_request_read, 981 bp->bio_error); 982 break; 983 case BIO_WRITE: 984 KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_regular_request_write, 985 bp->bio_error); 986 break; 987 case BIO_DELETE: 988 KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_regular_request_delete, 989 bp->bio_error); 990 break; 991 case BIO_FLUSH: 992 KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_regular_request_flush, 993 bp->bio_error); 994 break; 995 case BIO_SPEEDUP: 996 KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_regular_request_speedup, 997 bp->bio_error); 998 break; 999 } 1000 1001 pbp->bio_inbed++; 1002 KASSERT(pbp->bio_inbed <= pbp->bio_children, 1003 ("bio_inbed (%u) is bigger than bio_children (%u).", pbp->bio_inbed, 1004 pbp->bio_children)); 1005 if (bp->bio_error == 0 && pbp->bio_error == 0) { 1006 G_MIRROR_LOGREQ(3, bp, "Request delivered."); 1007 g_destroy_bio(bp); 1008 if (pbp->bio_children == pbp->bio_inbed) { 1009 G_MIRROR_LOGREQ(3, pbp, "Request delivered."); 1010 pbp->bio_completed = pbp->bio_length; 1011 if (pbp->bio_cmd == BIO_WRITE || 1012 pbp->bio_cmd == BIO_DELETE) { 1013 TAILQ_REMOVE(&sc->sc_inflight, pbp, bio_queue); 1014 /* Release delayed sync requests if possible. */ 1015 g_mirror_sync_release(sc); 1016 } 1017 g_io_deliver(pbp, pbp->bio_error); 1018 } 1019 return; 1020 } else if (bp->bio_error != 0) { 1021 if (pbp->bio_error == 0) 1022 pbp->bio_error = bp->bio_error; 1023 if (disk != NULL) 1024 g_mirror_regular_request_error(sc, disk, bp); 1025 switch (pbp->bio_cmd) { 1026 case BIO_DELETE: 1027 case BIO_WRITE: 1028 case BIO_FLUSH: 1029 case BIO_SPEEDUP: 1030 pbp->bio_inbed--; 1031 pbp->bio_children--; 1032 break; 1033 } 1034 } 1035 g_destroy_bio(bp); 1036 1037 switch (pbp->bio_cmd) { 1038 case BIO_READ: 1039 if (pbp->bio_inbed < pbp->bio_children) 1040 break; 1041 1042 /* 1043 * If there is only one active disk we want to double-check that 1044 * it is, in fact, the disk that we already tried. This is 1045 * necessary because we might have just lost a race with a 1046 * removal of the tried disk (likely because of the same error) 1047 * and the only remaining disk is still viable for a retry. 1048 */ 1049 if (g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) == 1 && 1050 disk != NULL && 1051 disk->d_state == G_MIRROR_DISK_STATE_ACTIVE) { 1052 g_io_deliver(pbp, pbp->bio_error); 1053 } else { 1054 pbp->bio_error = 0; 1055 mtx_lock(&sc->sc_queue_mtx); 1056 TAILQ_INSERT_TAIL(&sc->sc_queue, pbp, bio_queue); 1057 mtx_unlock(&sc->sc_queue_mtx); 1058 G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc); 1059 wakeup(sc); 1060 } 1061 break; 1062 case BIO_DELETE: 1063 case BIO_WRITE: 1064 case BIO_FLUSH: 1065 case BIO_SPEEDUP: 1066 if (pbp->bio_children == 0) { 1067 /* 1068 * All requests failed. 1069 */ 1070 } else if (pbp->bio_inbed < pbp->bio_children) { 1071 /* Do nothing. */ 1072 break; 1073 } else if (pbp->bio_children == pbp->bio_inbed) { 1074 /* Some requests succeeded. */ 1075 pbp->bio_error = 0; 1076 pbp->bio_completed = pbp->bio_length; 1077 } 1078 if (pbp->bio_cmd == BIO_WRITE || pbp->bio_cmd == BIO_DELETE) { 1079 TAILQ_REMOVE(&sc->sc_inflight, pbp, bio_queue); 1080 /* Release delayed sync requests if possible. */ 1081 g_mirror_sync_release(sc); 1082 } 1083 g_io_deliver(pbp, pbp->bio_error); 1084 break; 1085 default: 1086 KASSERT(1 == 0, ("Invalid request: %u.", pbp->bio_cmd)); 1087 break; 1088 } 1089 } 1090 1091 static void 1092 g_mirror_sync_done(struct bio *bp) 1093 { 1094 struct g_mirror_softc *sc; 1095 1096 G_MIRROR_LOGREQ(3, bp, "Synchronization request delivered."); 1097 sc = bp->bio_from->geom->softc; 1098 bp->bio_cflags = G_MIRROR_BIO_FLAG_SYNC; 1099 mtx_lock(&sc->sc_queue_mtx); 1100 TAILQ_INSERT_TAIL(&sc->sc_queue, bp, bio_queue); 1101 mtx_unlock(&sc->sc_queue_mtx); 1102 wakeup(sc); 1103 } 1104 1105 static void 1106 g_mirror_candelete(struct bio *bp) 1107 { 1108 struct g_mirror_softc *sc; 1109 struct g_mirror_disk *disk; 1110 int val; 1111 1112 sc = bp->bio_to->private; 1113 LIST_FOREACH(disk, &sc->sc_disks, d_next) { 1114 if (disk->d_flags & G_MIRROR_DISK_FLAG_CANDELETE) 1115 break; 1116 } 1117 val = disk != NULL; 1118 g_handleattr(bp, "GEOM::candelete", &val, sizeof(val)); 1119 } 1120 1121 static void 1122 g_mirror_kernel_dump(struct bio *bp) 1123 { 1124 struct g_mirror_softc *sc; 1125 struct g_mirror_disk *disk; 1126 struct bio *cbp; 1127 struct g_kerneldump *gkd; 1128 1129 /* 1130 * We configure dumping to the first component, because this component 1131 * will be used for reading with 'prefer' balance algorithm. 1132 * If the component with the highest priority is currently disconnected 1133 * we will not be able to read the dump after the reboot if it will be 1134 * connected and synchronized later. Can we do something better? 1135 */ 1136 sc = bp->bio_to->private; 1137 disk = LIST_FIRST(&sc->sc_disks); 1138 1139 gkd = (struct g_kerneldump *)bp->bio_data; 1140 if (gkd->length > bp->bio_to->mediasize) 1141 gkd->length = bp->bio_to->mediasize; 1142 cbp = g_clone_bio(bp); 1143 if (cbp == NULL) { 1144 g_io_deliver(bp, ENOMEM); 1145 return; 1146 } 1147 cbp->bio_done = g_std_done; 1148 g_io_request(cbp, disk->d_consumer); 1149 G_MIRROR_DEBUG(1, "Kernel dump will go to %s.", 1150 g_mirror_get_diskname(disk)); 1151 } 1152 1153 static void 1154 g_mirror_rotation_rate(struct bio *bp) 1155 { 1156 struct g_mirror_softc *sc; 1157 struct g_mirror_disk *disk; 1158 bool first = true; 1159 uint16_t rr = DISK_RR_UNKNOWN; 1160 1161 sc = bp->bio_to->private; 1162 LIST_FOREACH(disk, &sc->sc_disks, d_next) { 1163 if (first) 1164 rr = disk->d_rotation_rate; 1165 else if (rr != disk->d_rotation_rate) { 1166 rr = DISK_RR_UNKNOWN; 1167 break; 1168 } 1169 first = false; 1170 } 1171 g_handleattr(bp, "GEOM::rotation_rate", &rr, sizeof(rr)); 1172 } 1173 1174 static void 1175 g_mirror_start(struct bio *bp) 1176 { 1177 struct g_mirror_softc *sc; 1178 1179 sc = bp->bio_to->private; 1180 /* 1181 * If sc == NULL or there are no valid disks, provider's error 1182 * should be set and g_mirror_start() should not be called at all. 1183 */ 1184 KASSERT(sc != NULL && sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING, 1185 ("Provider's error should be set (error=%d)(mirror=%s).", 1186 bp->bio_to->error, bp->bio_to->name)); 1187 G_MIRROR_LOGREQ(3, bp, "Request received."); 1188 1189 switch (bp->bio_cmd) { 1190 case BIO_READ: 1191 case BIO_WRITE: 1192 case BIO_DELETE: 1193 case BIO_SPEEDUP: 1194 case BIO_FLUSH: 1195 break; 1196 case BIO_GETATTR: 1197 if (!strcmp(bp->bio_attribute, "GEOM::candelete")) { 1198 g_mirror_candelete(bp); 1199 return; 1200 } else if (strcmp("GEOM::kerneldump", bp->bio_attribute) == 0) { 1201 g_mirror_kernel_dump(bp); 1202 return; 1203 } else if (!strcmp(bp->bio_attribute, "GEOM::rotation_rate")) { 1204 g_mirror_rotation_rate(bp); 1205 return; 1206 } 1207 /* FALLTHROUGH */ 1208 default: 1209 g_io_deliver(bp, EOPNOTSUPP); 1210 return; 1211 } 1212 mtx_lock(&sc->sc_queue_mtx); 1213 if (bp->bio_to->error != 0) { 1214 mtx_unlock(&sc->sc_queue_mtx); 1215 g_io_deliver(bp, bp->bio_to->error); 1216 return; 1217 } 1218 TAILQ_INSERT_TAIL(&sc->sc_queue, bp, bio_queue); 1219 mtx_unlock(&sc->sc_queue_mtx); 1220 G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc); 1221 wakeup(sc); 1222 } 1223 1224 /* 1225 * Return true if the given request is colliding with a in-progress 1226 * synchronization request. 1227 */ 1228 static bool 1229 g_mirror_sync_collision(struct g_mirror_softc *sc, struct bio *bp) 1230 { 1231 struct g_mirror_disk *disk; 1232 struct bio *sbp; 1233 off_t rstart, rend, sstart, send; 1234 u_int i; 1235 1236 if (sc->sc_sync.ds_ndisks == 0) 1237 return (false); 1238 rstart = bp->bio_offset; 1239 rend = bp->bio_offset + bp->bio_length; 1240 LIST_FOREACH(disk, &sc->sc_disks, d_next) { 1241 if (disk->d_state != G_MIRROR_DISK_STATE_SYNCHRONIZING) 1242 continue; 1243 for (i = 0; i < g_mirror_syncreqs; i++) { 1244 sbp = disk->d_sync.ds_bios[i]; 1245 if (sbp == NULL) 1246 continue; 1247 sstart = sbp->bio_offset; 1248 send = sbp->bio_offset + sbp->bio_length; 1249 if (rend > sstart && rstart < send) 1250 return (true); 1251 } 1252 } 1253 return (false); 1254 } 1255 1256 /* 1257 * Return true if the given sync request is colliding with a in-progress regular 1258 * request. 1259 */ 1260 static bool 1261 g_mirror_regular_collision(struct g_mirror_softc *sc, struct bio *sbp) 1262 { 1263 off_t rstart, rend, sstart, send; 1264 struct bio *bp; 1265 1266 if (sc->sc_sync.ds_ndisks == 0) 1267 return (false); 1268 sstart = sbp->bio_offset; 1269 send = sbp->bio_offset + sbp->bio_length; 1270 TAILQ_FOREACH(bp, &sc->sc_inflight, bio_queue) { 1271 rstart = bp->bio_offset; 1272 rend = bp->bio_offset + bp->bio_length; 1273 if (rend > sstart && rstart < send) 1274 return (true); 1275 } 1276 return (false); 1277 } 1278 1279 /* 1280 * Puts regular request onto delayed queue. 1281 */ 1282 static void 1283 g_mirror_regular_delay(struct g_mirror_softc *sc, struct bio *bp) 1284 { 1285 1286 G_MIRROR_LOGREQ(2, bp, "Delaying request."); 1287 TAILQ_INSERT_TAIL(&sc->sc_regular_delayed, bp, bio_queue); 1288 } 1289 1290 /* 1291 * Puts synchronization request onto delayed queue. 1292 */ 1293 static void 1294 g_mirror_sync_delay(struct g_mirror_softc *sc, struct bio *bp) 1295 { 1296 1297 G_MIRROR_LOGREQ(2, bp, "Delaying synchronization request."); 1298 TAILQ_INSERT_TAIL(&sc->sc_sync_delayed, bp, bio_queue); 1299 } 1300 1301 /* 1302 * Requeue delayed regular requests. 1303 */ 1304 static void 1305 g_mirror_regular_release(struct g_mirror_softc *sc) 1306 { 1307 struct bio *bp; 1308 1309 if ((bp = TAILQ_FIRST(&sc->sc_regular_delayed)) == NULL) 1310 return; 1311 if (g_mirror_sync_collision(sc, bp)) 1312 return; 1313 1314 G_MIRROR_DEBUG(2, "Requeuing regular requests after collision."); 1315 mtx_lock(&sc->sc_queue_mtx); 1316 TAILQ_CONCAT(&sc->sc_regular_delayed, &sc->sc_queue, bio_queue); 1317 TAILQ_SWAP(&sc->sc_regular_delayed, &sc->sc_queue, bio, bio_queue); 1318 mtx_unlock(&sc->sc_queue_mtx); 1319 } 1320 1321 /* 1322 * Releases delayed sync requests which don't collide anymore with regular 1323 * requests. 1324 */ 1325 static void 1326 g_mirror_sync_release(struct g_mirror_softc *sc) 1327 { 1328 struct bio *bp, *bp2; 1329 1330 TAILQ_FOREACH_SAFE(bp, &sc->sc_sync_delayed, bio_queue, bp2) { 1331 if (g_mirror_regular_collision(sc, bp)) 1332 continue; 1333 TAILQ_REMOVE(&sc->sc_sync_delayed, bp, bio_queue); 1334 G_MIRROR_LOGREQ(2, bp, 1335 "Releasing delayed synchronization request."); 1336 g_io_request(bp, bp->bio_from); 1337 } 1338 } 1339 1340 /* 1341 * Free a synchronization request and clear its slot in the array. 1342 */ 1343 static void 1344 g_mirror_sync_request_free(struct g_mirror_disk *disk, struct bio *bp) 1345 { 1346 int idx; 1347 1348 if (disk != NULL && disk->d_sync.ds_bios != NULL) { 1349 idx = (int)(uintptr_t)bp->bio_caller1; 1350 KASSERT(disk->d_sync.ds_bios[idx] == bp, 1351 ("unexpected sync BIO at %p:%d", disk, idx)); 1352 disk->d_sync.ds_bios[idx] = NULL; 1353 } 1354 free(bp->bio_data, M_MIRROR); 1355 g_destroy_bio(bp); 1356 } 1357 1358 /* 1359 * Handle synchronization requests. 1360 * Every synchronization request is a two-step process: first, a read request is 1361 * sent to the mirror provider via the sync consumer. If that request completes 1362 * successfully, it is converted to a write and sent to the disk being 1363 * synchronized. If the write also completes successfully, the synchronization 1364 * offset is advanced and a new read request is submitted. 1365 */ 1366 static void 1367 g_mirror_sync_request(struct g_mirror_softc *sc, struct bio *bp) 1368 { 1369 struct g_mirror_disk *disk; 1370 struct g_mirror_disk_sync *sync; 1371 1372 KASSERT((bp->bio_cmd == BIO_READ && 1373 bp->bio_from->geom == sc->sc_sync.ds_geom) || 1374 (bp->bio_cmd == BIO_WRITE && bp->bio_from->geom == sc->sc_geom), 1375 ("Sync BIO %p with unexpected origin", bp)); 1376 1377 bp->bio_from->index--; 1378 disk = bp->bio_from->private; 1379 if (disk == NULL) { 1380 sx_xunlock(&sc->sc_lock); /* Avoid recursion on sc_lock. */ 1381 g_topology_lock(); 1382 g_mirror_kill_consumer(sc, bp->bio_from); 1383 g_topology_unlock(); 1384 g_mirror_sync_request_free(NULL, bp); 1385 sx_xlock(&sc->sc_lock); 1386 return; 1387 } 1388 1389 sync = &disk->d_sync; 1390 1391 /* 1392 * Synchronization request. 1393 */ 1394 switch (bp->bio_cmd) { 1395 case BIO_READ: { 1396 struct g_consumer *cp; 1397 1398 KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_sync_request_read, 1399 bp->bio_error); 1400 1401 if (bp->bio_error != 0) { 1402 G_MIRROR_LOGREQ(0, bp, 1403 "Synchronization request failed (error=%d).", 1404 bp->bio_error); 1405 1406 /* 1407 * The read error will trigger a syncid bump, so there's 1408 * no need to do that here. 1409 * 1410 * The read error handling for regular requests will 1411 * retry the read from all active mirrors before passing 1412 * the error back up, so there's no need to retry here. 1413 */ 1414 g_mirror_sync_request_free(disk, bp); 1415 g_mirror_event_send(disk, 1416 G_MIRROR_DISK_STATE_DISCONNECTED, 1417 G_MIRROR_EVENT_DONTWAIT); 1418 return; 1419 } 1420 G_MIRROR_LOGREQ(3, bp, 1421 "Synchronization request half-finished."); 1422 bp->bio_cmd = BIO_WRITE; 1423 bp->bio_cflags = 0; 1424 cp = disk->d_consumer; 1425 KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1, 1426 ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, 1427 cp->acr, cp->acw, cp->ace)); 1428 cp->index++; 1429 g_io_request(bp, cp); 1430 return; 1431 } 1432 case BIO_WRITE: { 1433 off_t offset; 1434 int i; 1435 1436 KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_sync_request_write, 1437 bp->bio_error); 1438 1439 if (bp->bio_error != 0) { 1440 G_MIRROR_LOGREQ(0, bp, 1441 "Synchronization request failed (error=%d).", 1442 bp->bio_error); 1443 g_mirror_sync_request_free(disk, bp); 1444 sc->sc_bump_id |= G_MIRROR_BUMP_GENID; 1445 g_mirror_event_send(disk, 1446 G_MIRROR_DISK_STATE_DISCONNECTED, 1447 G_MIRROR_EVENT_DONTWAIT); 1448 return; 1449 } 1450 G_MIRROR_LOGREQ(3, bp, "Synchronization request finished."); 1451 if (sync->ds_offset >= sc->sc_mediasize || 1452 sync->ds_consumer == NULL || 1453 (sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0) { 1454 /* Don't send more synchronization requests. */ 1455 sync->ds_inflight--; 1456 g_mirror_sync_request_free(disk, bp); 1457 if (sync->ds_inflight > 0) 1458 return; 1459 if (sync->ds_consumer == NULL || 1460 (sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0) { 1461 return; 1462 } 1463 /* Disk up-to-date, activate it. */ 1464 g_mirror_event_send(disk, G_MIRROR_DISK_STATE_ACTIVE, 1465 G_MIRROR_EVENT_DONTWAIT); 1466 return; 1467 } 1468 1469 /* Send next synchronization request. */ 1470 g_mirror_sync_reinit(disk, bp, sync->ds_offset); 1471 sync->ds_offset += bp->bio_length; 1472 1473 G_MIRROR_LOGREQ(3, bp, "Sending synchronization request."); 1474 sync->ds_consumer->index++; 1475 1476 /* 1477 * Delay the request if it is colliding with a regular request. 1478 */ 1479 if (g_mirror_regular_collision(sc, bp)) 1480 g_mirror_sync_delay(sc, bp); 1481 else 1482 g_io_request(bp, sync->ds_consumer); 1483 1484 /* Requeue delayed requests if possible. */ 1485 g_mirror_regular_release(sc); 1486 1487 /* Find the smallest offset */ 1488 offset = sc->sc_mediasize; 1489 for (i = 0; i < g_mirror_syncreqs; i++) { 1490 bp = sync->ds_bios[i]; 1491 if (bp != NULL && bp->bio_offset < offset) 1492 offset = bp->bio_offset; 1493 } 1494 if (g_mirror_sync_period > 0 && 1495 time_uptime - sync->ds_update_ts > g_mirror_sync_period) { 1496 sync->ds_offset_done = offset; 1497 g_mirror_update_metadata(disk); 1498 sync->ds_update_ts = time_uptime; 1499 } 1500 return; 1501 } 1502 default: 1503 panic("Invalid I/O request %p", bp); 1504 } 1505 } 1506 1507 static void 1508 g_mirror_request_prefer(struct g_mirror_softc *sc, struct bio *bp) 1509 { 1510 struct g_mirror_disk *disk; 1511 struct g_consumer *cp; 1512 struct bio *cbp; 1513 1514 LIST_FOREACH(disk, &sc->sc_disks, d_next) { 1515 if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE) 1516 break; 1517 } 1518 if (disk == NULL) { 1519 if (bp->bio_error == 0) 1520 bp->bio_error = ENXIO; 1521 g_io_deliver(bp, bp->bio_error); 1522 return; 1523 } 1524 cbp = g_clone_bio(bp); 1525 if (cbp == NULL) { 1526 if (bp->bio_error == 0) 1527 bp->bio_error = ENOMEM; 1528 g_io_deliver(bp, bp->bio_error); 1529 return; 1530 } 1531 /* 1532 * Fill in the component buf structure. 1533 */ 1534 cp = disk->d_consumer; 1535 cbp->bio_done = g_mirror_done; 1536 cbp->bio_to = cp->provider; 1537 G_MIRROR_LOGREQ(3, cbp, "Sending request."); 1538 KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1, 1539 ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr, 1540 cp->acw, cp->ace)); 1541 cp->index++; 1542 g_io_request(cbp, cp); 1543 } 1544 1545 static void 1546 g_mirror_request_round_robin(struct g_mirror_softc *sc, struct bio *bp) 1547 { 1548 struct g_mirror_disk *disk; 1549 struct g_consumer *cp; 1550 struct bio *cbp; 1551 1552 disk = g_mirror_get_disk(sc); 1553 if (disk == NULL) { 1554 if (bp->bio_error == 0) 1555 bp->bio_error = ENXIO; 1556 g_io_deliver(bp, bp->bio_error); 1557 return; 1558 } 1559 cbp = g_clone_bio(bp); 1560 if (cbp == NULL) { 1561 if (bp->bio_error == 0) 1562 bp->bio_error = ENOMEM; 1563 g_io_deliver(bp, bp->bio_error); 1564 return; 1565 } 1566 /* 1567 * Fill in the component buf structure. 1568 */ 1569 cp = disk->d_consumer; 1570 cbp->bio_done = g_mirror_done; 1571 cbp->bio_to = cp->provider; 1572 G_MIRROR_LOGREQ(3, cbp, "Sending request."); 1573 KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1, 1574 ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr, 1575 cp->acw, cp->ace)); 1576 cp->index++; 1577 g_io_request(cbp, cp); 1578 } 1579 1580 #define TRACK_SIZE (1 * 1024 * 1024) 1581 #define LOAD_SCALE 256 1582 #define ABS(x) (((x) >= 0) ? (x) : (-(x))) 1583 1584 static void 1585 g_mirror_request_load(struct g_mirror_softc *sc, struct bio *bp) 1586 { 1587 struct g_mirror_disk *disk, *dp; 1588 struct g_consumer *cp; 1589 struct bio *cbp; 1590 int prio, best; 1591 1592 /* Find a disk with the smallest load. */ 1593 disk = NULL; 1594 best = INT_MAX; 1595 LIST_FOREACH(dp, &sc->sc_disks, d_next) { 1596 if (dp->d_state != G_MIRROR_DISK_STATE_ACTIVE) 1597 continue; 1598 prio = dp->load; 1599 /* If disk head is precisely in position - highly prefer it. */ 1600 if (dp->d_last_offset == bp->bio_offset) 1601 prio -= 2 * LOAD_SCALE; 1602 else 1603 /* If disk head is close to position - prefer it. */ 1604 if (ABS(dp->d_last_offset - bp->bio_offset) < TRACK_SIZE) 1605 prio -= 1 * LOAD_SCALE; 1606 if (prio <= best) { 1607 disk = dp; 1608 best = prio; 1609 } 1610 } 1611 KASSERT(disk != NULL, ("NULL disk for %s.", sc->sc_name)); 1612 cbp = g_clone_bio(bp); 1613 if (cbp == NULL) { 1614 if (bp->bio_error == 0) 1615 bp->bio_error = ENOMEM; 1616 g_io_deliver(bp, bp->bio_error); 1617 return; 1618 } 1619 /* 1620 * Fill in the component buf structure. 1621 */ 1622 cp = disk->d_consumer; 1623 cbp->bio_done = g_mirror_done; 1624 cbp->bio_to = cp->provider; 1625 G_MIRROR_LOGREQ(3, cbp, "Sending request."); 1626 KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1, 1627 ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr, 1628 cp->acw, cp->ace)); 1629 cp->index++; 1630 /* Remember last head position */ 1631 disk->d_last_offset = bp->bio_offset + bp->bio_length; 1632 /* Update loads. */ 1633 LIST_FOREACH(dp, &sc->sc_disks, d_next) { 1634 dp->load = (dp->d_consumer->index * LOAD_SCALE + 1635 dp->load * 7) / 8; 1636 } 1637 g_io_request(cbp, cp); 1638 } 1639 1640 static void 1641 g_mirror_request_split(struct g_mirror_softc *sc, struct bio *bp) 1642 { 1643 struct bio_queue queue; 1644 struct g_mirror_disk *disk; 1645 struct g_consumer *cp __diagused; 1646 struct bio *cbp; 1647 off_t left, mod, offset, slice; 1648 u_char *data; 1649 u_int ndisks; 1650 1651 if (bp->bio_length <= sc->sc_slice) { 1652 g_mirror_request_round_robin(sc, bp); 1653 return; 1654 } 1655 ndisks = g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE); 1656 slice = bp->bio_length / ndisks; 1657 mod = slice % sc->sc_provider->sectorsize; 1658 if (mod != 0) 1659 slice += sc->sc_provider->sectorsize - mod; 1660 /* 1661 * Allocate all bios before sending any request, so we can 1662 * return ENOMEM in nice and clean way. 1663 */ 1664 left = bp->bio_length; 1665 offset = bp->bio_offset; 1666 data = bp->bio_data; 1667 TAILQ_INIT(&queue); 1668 LIST_FOREACH(disk, &sc->sc_disks, d_next) { 1669 if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE) 1670 continue; 1671 cbp = g_clone_bio(bp); 1672 if (cbp == NULL) { 1673 while ((cbp = TAILQ_FIRST(&queue)) != NULL) { 1674 TAILQ_REMOVE(&queue, cbp, bio_queue); 1675 g_destroy_bio(cbp); 1676 } 1677 if (bp->bio_error == 0) 1678 bp->bio_error = ENOMEM; 1679 g_io_deliver(bp, bp->bio_error); 1680 return; 1681 } 1682 TAILQ_INSERT_TAIL(&queue, cbp, bio_queue); 1683 cbp->bio_done = g_mirror_done; 1684 cbp->bio_caller1 = disk; 1685 cbp->bio_to = disk->d_consumer->provider; 1686 cbp->bio_offset = offset; 1687 cbp->bio_data = data; 1688 cbp->bio_length = MIN(left, slice); 1689 left -= cbp->bio_length; 1690 if (left == 0) 1691 break; 1692 offset += cbp->bio_length; 1693 data += cbp->bio_length; 1694 } 1695 while ((cbp = TAILQ_FIRST(&queue)) != NULL) { 1696 TAILQ_REMOVE(&queue, cbp, bio_queue); 1697 G_MIRROR_LOGREQ(3, cbp, "Sending request."); 1698 disk = cbp->bio_caller1; 1699 cbp->bio_caller1 = NULL; 1700 cp = disk->d_consumer; 1701 KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1, 1702 ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, 1703 cp->acr, cp->acw, cp->ace)); 1704 disk->d_consumer->index++; 1705 g_io_request(cbp, disk->d_consumer); 1706 } 1707 } 1708 1709 static void 1710 g_mirror_register_request(struct g_mirror_softc *sc, struct bio *bp) 1711 { 1712 struct bio_queue queue; 1713 struct bio *cbp; 1714 struct g_consumer *cp; 1715 struct g_mirror_disk *disk; 1716 1717 sx_assert(&sc->sc_lock, SA_XLOCKED); 1718 1719 /* 1720 * To avoid ordering issues, if a write is deferred because of a 1721 * collision with a sync request, all I/O is deferred until that 1722 * write is initiated. 1723 */ 1724 if (bp->bio_from->geom != sc->sc_sync.ds_geom && 1725 !TAILQ_EMPTY(&sc->sc_regular_delayed)) { 1726 g_mirror_regular_delay(sc, bp); 1727 return; 1728 } 1729 1730 switch (bp->bio_cmd) { 1731 case BIO_READ: 1732 switch (sc->sc_balance) { 1733 case G_MIRROR_BALANCE_LOAD: 1734 g_mirror_request_load(sc, bp); 1735 break; 1736 case G_MIRROR_BALANCE_PREFER: 1737 g_mirror_request_prefer(sc, bp); 1738 break; 1739 case G_MIRROR_BALANCE_ROUND_ROBIN: 1740 g_mirror_request_round_robin(sc, bp); 1741 break; 1742 case G_MIRROR_BALANCE_SPLIT: 1743 g_mirror_request_split(sc, bp); 1744 break; 1745 } 1746 return; 1747 case BIO_WRITE: 1748 case BIO_DELETE: 1749 /* 1750 * Delay the request if it is colliding with a synchronization 1751 * request. 1752 */ 1753 if (g_mirror_sync_collision(sc, bp)) { 1754 g_mirror_regular_delay(sc, bp); 1755 return; 1756 } 1757 1758 if (sc->sc_idle) 1759 g_mirror_unidle(sc); 1760 else 1761 sc->sc_last_write = time_uptime; 1762 1763 /* 1764 * Bump syncid on first write. 1765 */ 1766 if ((sc->sc_bump_id & G_MIRROR_BUMP_SYNCID) != 0) { 1767 sc->sc_bump_id &= ~G_MIRROR_BUMP_SYNCID; 1768 g_mirror_bump_syncid(sc); 1769 } 1770 1771 /* 1772 * Allocate all bios before sending any request, so we can 1773 * return ENOMEM in nice and clean way. 1774 */ 1775 TAILQ_INIT(&queue); 1776 LIST_FOREACH(disk, &sc->sc_disks, d_next) { 1777 switch (disk->d_state) { 1778 case G_MIRROR_DISK_STATE_ACTIVE: 1779 break; 1780 case G_MIRROR_DISK_STATE_SYNCHRONIZING: 1781 if (bp->bio_offset >= disk->d_sync.ds_offset) 1782 continue; 1783 break; 1784 default: 1785 continue; 1786 } 1787 if (bp->bio_cmd == BIO_DELETE && 1788 (disk->d_flags & G_MIRROR_DISK_FLAG_CANDELETE) == 0) 1789 continue; 1790 cbp = g_clone_bio(bp); 1791 if (cbp == NULL) { 1792 while ((cbp = TAILQ_FIRST(&queue)) != NULL) { 1793 TAILQ_REMOVE(&queue, cbp, bio_queue); 1794 g_destroy_bio(cbp); 1795 } 1796 if (bp->bio_error == 0) 1797 bp->bio_error = ENOMEM; 1798 g_io_deliver(bp, bp->bio_error); 1799 return; 1800 } 1801 TAILQ_INSERT_TAIL(&queue, cbp, bio_queue); 1802 cbp->bio_done = g_mirror_done; 1803 cp = disk->d_consumer; 1804 cbp->bio_caller1 = cp; 1805 cbp->bio_to = cp->provider; 1806 KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1, 1807 ("Consumer %s not opened (r%dw%de%d).", 1808 cp->provider->name, cp->acr, cp->acw, cp->ace)); 1809 } 1810 if (TAILQ_EMPTY(&queue)) { 1811 KASSERT(bp->bio_cmd == BIO_DELETE, 1812 ("No consumers for regular request %p", bp)); 1813 g_io_deliver(bp, EOPNOTSUPP); 1814 return; 1815 } 1816 while ((cbp = TAILQ_FIRST(&queue)) != NULL) { 1817 G_MIRROR_LOGREQ(3, cbp, "Sending request."); 1818 TAILQ_REMOVE(&queue, cbp, bio_queue); 1819 cp = cbp->bio_caller1; 1820 cbp->bio_caller1 = NULL; 1821 cp->index++; 1822 sc->sc_writes++; 1823 g_io_request(cbp, cp); 1824 } 1825 /* 1826 * Put request onto inflight queue, so we can check if new 1827 * synchronization requests don't collide with it. 1828 */ 1829 TAILQ_INSERT_TAIL(&sc->sc_inflight, bp, bio_queue); 1830 return; 1831 case BIO_SPEEDUP: 1832 case BIO_FLUSH: 1833 TAILQ_INIT(&queue); 1834 LIST_FOREACH(disk, &sc->sc_disks, d_next) { 1835 if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE) 1836 continue; 1837 cbp = g_clone_bio(bp); 1838 if (cbp == NULL) { 1839 while ((cbp = TAILQ_FIRST(&queue)) != NULL) { 1840 TAILQ_REMOVE(&queue, cbp, bio_queue); 1841 g_destroy_bio(cbp); 1842 } 1843 if (bp->bio_error == 0) 1844 bp->bio_error = ENOMEM; 1845 g_io_deliver(bp, bp->bio_error); 1846 return; 1847 } 1848 TAILQ_INSERT_TAIL(&queue, cbp, bio_queue); 1849 cbp->bio_done = g_mirror_done; 1850 cbp->bio_caller1 = disk; 1851 cbp->bio_to = disk->d_consumer->provider; 1852 } 1853 KASSERT(!TAILQ_EMPTY(&queue), 1854 ("No consumers for regular request %p", bp)); 1855 while ((cbp = TAILQ_FIRST(&queue)) != NULL) { 1856 G_MIRROR_LOGREQ(3, cbp, "Sending request."); 1857 TAILQ_REMOVE(&queue, cbp, bio_queue); 1858 disk = cbp->bio_caller1; 1859 cbp->bio_caller1 = NULL; 1860 cp = disk->d_consumer; 1861 KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1, 1862 ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, 1863 cp->acr, cp->acw, cp->ace)); 1864 cp->index++; 1865 g_io_request(cbp, cp); 1866 } 1867 break; 1868 default: 1869 KASSERT(1 == 0, ("Invalid command here: %u (device=%s)", 1870 bp->bio_cmd, sc->sc_name)); 1871 break; 1872 } 1873 } 1874 1875 static int 1876 g_mirror_can_destroy(struct g_mirror_softc *sc) 1877 { 1878 struct g_geom *gp; 1879 struct g_consumer *cp; 1880 1881 g_topology_assert(); 1882 gp = sc->sc_geom; 1883 if (gp->softc == NULL) 1884 return (1); 1885 if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_TASTING) != 0) 1886 return (0); 1887 LIST_FOREACH(cp, &gp->consumer, consumer) { 1888 if (g_mirror_is_busy(sc, cp)) 1889 return (0); 1890 } 1891 gp = sc->sc_sync.ds_geom; 1892 LIST_FOREACH(cp, &gp->consumer, consumer) { 1893 if (g_mirror_is_busy(sc, cp)) 1894 return (0); 1895 } 1896 G_MIRROR_DEBUG(2, "No I/O requests for %s, it can be destroyed.", 1897 sc->sc_name); 1898 return (1); 1899 } 1900 1901 static int 1902 g_mirror_try_destroy(struct g_mirror_softc *sc) 1903 { 1904 1905 if (sc->sc_rootmount != NULL) { 1906 G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p", __LINE__, 1907 sc->sc_rootmount); 1908 root_mount_rel(sc->sc_rootmount); 1909 sc->sc_rootmount = NULL; 1910 } 1911 g_topology_lock(); 1912 if (!g_mirror_can_destroy(sc)) { 1913 g_topology_unlock(); 1914 return (0); 1915 } 1916 sc->sc_geom->softc = NULL; 1917 sc->sc_sync.ds_geom->softc = NULL; 1918 if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DRAIN) != 0) { 1919 g_topology_unlock(); 1920 G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, 1921 &sc->sc_worker); 1922 /* Unlock sc_lock here, as it can be destroyed after wakeup. */ 1923 sx_xunlock(&sc->sc_lock); 1924 wakeup(&sc->sc_worker); 1925 sc->sc_worker = NULL; 1926 } else { 1927 g_topology_unlock(); 1928 g_mirror_destroy_device(sc); 1929 } 1930 return (1); 1931 } 1932 1933 /* 1934 * Worker thread. 1935 */ 1936 static void 1937 g_mirror_worker(void *arg) 1938 { 1939 struct g_mirror_softc *sc; 1940 struct g_mirror_event *ep; 1941 struct bio *bp; 1942 int timeout; 1943 1944 sc = arg; 1945 thread_lock(curthread); 1946 sched_prio(curthread, PRIBIO); 1947 thread_unlock(curthread); 1948 1949 sx_xlock(&sc->sc_lock); 1950 for (;;) { 1951 G_MIRROR_DEBUG(5, "%s: Let's see...", __func__); 1952 /* 1953 * First take a look at events. 1954 * This is important to handle events before any I/O requests. 1955 */ 1956 ep = g_mirror_event_first(sc); 1957 if (ep != NULL) { 1958 g_mirror_event_remove(sc, ep); 1959 if ((ep->e_flags & G_MIRROR_EVENT_DEVICE) != 0) { 1960 /* Update only device status. */ 1961 G_MIRROR_DEBUG(3, 1962 "Running event for device %s.", 1963 sc->sc_name); 1964 ep->e_error = 0; 1965 g_mirror_update_device(sc, true); 1966 } else { 1967 /* Update disk status. */ 1968 G_MIRROR_DEBUG(3, "Running event for disk %s.", 1969 g_mirror_get_diskname(ep->e_disk)); 1970 ep->e_error = g_mirror_update_disk(ep->e_disk, 1971 ep->e_state); 1972 if (ep->e_error == 0) 1973 g_mirror_update_device(sc, false); 1974 } 1975 if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0) { 1976 KASSERT(ep->e_error == 0, 1977 ("Error cannot be handled.")); 1978 g_mirror_event_free(ep); 1979 } else { 1980 ep->e_flags |= G_MIRROR_EVENT_DONE; 1981 G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, 1982 ep); 1983 mtx_lock(&sc->sc_events_mtx); 1984 wakeup(ep); 1985 mtx_unlock(&sc->sc_events_mtx); 1986 } 1987 if ((sc->sc_flags & 1988 G_MIRROR_DEVICE_FLAG_DESTROY) != 0) { 1989 if (g_mirror_try_destroy(sc)) { 1990 curthread->td_pflags &= ~TDP_GEOM; 1991 G_MIRROR_DEBUG(1, "Thread exiting."); 1992 kproc_exit(0); 1993 } 1994 } 1995 G_MIRROR_DEBUG(5, "%s: I'm here 1.", __func__); 1996 continue; 1997 } 1998 1999 /* 2000 * Check if we can mark array as CLEAN and if we can't take 2001 * how much seconds should we wait. 2002 */ 2003 timeout = g_mirror_idle(sc, -1); 2004 2005 /* 2006 * Handle I/O requests. 2007 */ 2008 mtx_lock(&sc->sc_queue_mtx); 2009 bp = TAILQ_FIRST(&sc->sc_queue); 2010 if (bp != NULL) 2011 TAILQ_REMOVE(&sc->sc_queue, bp, bio_queue); 2012 else { 2013 if ((sc->sc_flags & 2014 G_MIRROR_DEVICE_FLAG_DESTROY) != 0) { 2015 mtx_unlock(&sc->sc_queue_mtx); 2016 if (g_mirror_try_destroy(sc)) { 2017 curthread->td_pflags &= ~TDP_GEOM; 2018 G_MIRROR_DEBUG(1, "Thread exiting."); 2019 kproc_exit(0); 2020 } 2021 mtx_lock(&sc->sc_queue_mtx); 2022 if (!TAILQ_EMPTY(&sc->sc_queue)) { 2023 mtx_unlock(&sc->sc_queue_mtx); 2024 continue; 2025 } 2026 } 2027 if (g_mirror_event_first(sc) != NULL) { 2028 mtx_unlock(&sc->sc_queue_mtx); 2029 continue; 2030 } 2031 sx_xunlock(&sc->sc_lock); 2032 MSLEEP(sc, &sc->sc_queue_mtx, PRIBIO | PDROP, "m:w1", 2033 timeout * hz); 2034 sx_xlock(&sc->sc_lock); 2035 G_MIRROR_DEBUG(5, "%s: I'm here 4.", __func__); 2036 continue; 2037 } 2038 mtx_unlock(&sc->sc_queue_mtx); 2039 2040 if (bp->bio_from->geom == sc->sc_sync.ds_geom && 2041 (bp->bio_cflags & G_MIRROR_BIO_FLAG_SYNC) != 0) { 2042 /* 2043 * Handle completion of the first half (the read) of a 2044 * block synchronization operation. 2045 */ 2046 g_mirror_sync_request(sc, bp); 2047 } else if (bp->bio_to != sc->sc_provider) { 2048 if ((bp->bio_cflags & G_MIRROR_BIO_FLAG_REGULAR) != 0) 2049 /* 2050 * Handle completion of a regular I/O request. 2051 */ 2052 g_mirror_regular_request(sc, bp); 2053 else if ((bp->bio_cflags & G_MIRROR_BIO_FLAG_SYNC) != 0) 2054 /* 2055 * Handle completion of the second half (the 2056 * write) of a block synchronization operation. 2057 */ 2058 g_mirror_sync_request(sc, bp); 2059 else { 2060 KASSERT(0, 2061 ("Invalid request cflags=0x%hx to=%s.", 2062 bp->bio_cflags, bp->bio_to->name)); 2063 } 2064 } else { 2065 /* 2066 * Initiate an I/O request. 2067 */ 2068 g_mirror_register_request(sc, bp); 2069 } 2070 G_MIRROR_DEBUG(5, "%s: I'm here 9.", __func__); 2071 } 2072 } 2073 2074 static void 2075 g_mirror_update_idle(struct g_mirror_softc *sc, struct g_mirror_disk *disk) 2076 { 2077 2078 sx_assert(&sc->sc_lock, SX_LOCKED); 2079 2080 if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOFAILSYNC) != 0) 2081 return; 2082 if (!sc->sc_idle && (disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) == 0) { 2083 G_MIRROR_DEBUG(2, "Disk %s (device %s) marked as dirty.", 2084 g_mirror_get_diskname(disk), sc->sc_name); 2085 disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY; 2086 } else if (sc->sc_idle && 2087 (disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) != 0) { 2088 G_MIRROR_DEBUG(2, "Disk %s (device %s) marked as clean.", 2089 g_mirror_get_diskname(disk), sc->sc_name); 2090 disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY; 2091 } 2092 } 2093 2094 static void 2095 g_mirror_sync_reinit(const struct g_mirror_disk *disk, struct bio *bp, 2096 off_t offset) 2097 { 2098 void *data; 2099 int idx; 2100 2101 data = bp->bio_data; 2102 idx = (int)(uintptr_t)bp->bio_caller1; 2103 g_reset_bio(bp); 2104 2105 bp->bio_cmd = BIO_READ; 2106 bp->bio_data = data; 2107 bp->bio_done = g_mirror_sync_done; 2108 bp->bio_from = disk->d_sync.ds_consumer; 2109 bp->bio_to = disk->d_softc->sc_provider; 2110 bp->bio_caller1 = (void *)(uintptr_t)idx; 2111 bp->bio_offset = offset; 2112 bp->bio_length = MIN(maxphys, 2113 disk->d_softc->sc_mediasize - bp->bio_offset); 2114 } 2115 2116 static void 2117 g_mirror_sync_start(struct g_mirror_disk *disk) 2118 { 2119 struct g_mirror_softc *sc; 2120 struct g_mirror_disk_sync *sync; 2121 struct g_consumer *cp; 2122 struct bio *bp; 2123 int error __diagused, i; 2124 2125 g_topology_assert_not(); 2126 sc = disk->d_softc; 2127 sync = &disk->d_sync; 2128 sx_assert(&sc->sc_lock, SX_LOCKED); 2129 2130 KASSERT(disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING, 2131 ("Disk %s is not marked for synchronization.", 2132 g_mirror_get_diskname(disk))); 2133 KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING, 2134 ("Device not in RUNNING state (%s, %u).", sc->sc_name, 2135 sc->sc_state)); 2136 2137 sx_xunlock(&sc->sc_lock); 2138 g_topology_lock(); 2139 cp = g_new_consumer(sc->sc_sync.ds_geom); 2140 cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE; 2141 error = g_attach(cp, sc->sc_provider); 2142 KASSERT(error == 0, 2143 ("Cannot attach to %s (error=%d).", sc->sc_name, error)); 2144 error = g_access(cp, 1, 0, 0); 2145 KASSERT(error == 0, ("Cannot open %s (error=%d).", sc->sc_name, error)); 2146 g_topology_unlock(); 2147 sx_xlock(&sc->sc_lock); 2148 2149 G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s.", sc->sc_name, 2150 g_mirror_get_diskname(disk)); 2151 if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOFAILSYNC) == 0) 2152 disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY; 2153 KASSERT(sync->ds_consumer == NULL, 2154 ("Sync consumer already exists (device=%s, disk=%s).", 2155 sc->sc_name, g_mirror_get_diskname(disk))); 2156 2157 sync->ds_consumer = cp; 2158 sync->ds_consumer->private = disk; 2159 sync->ds_consumer->index = 0; 2160 2161 /* 2162 * Allocate memory for synchronization bios and initialize them. 2163 */ 2164 sync->ds_bios = malloc(sizeof(struct bio *) * g_mirror_syncreqs, 2165 M_MIRROR, M_WAITOK); 2166 for (i = 0; i < g_mirror_syncreqs; i++) { 2167 bp = g_alloc_bio(); 2168 sync->ds_bios[i] = bp; 2169 2170 bp->bio_data = malloc(maxphys, M_MIRROR, M_WAITOK); 2171 bp->bio_caller1 = (void *)(uintptr_t)i; 2172 g_mirror_sync_reinit(disk, bp, sync->ds_offset); 2173 sync->ds_offset += bp->bio_length; 2174 } 2175 2176 /* Increase the number of disks in SYNCHRONIZING state. */ 2177 sc->sc_sync.ds_ndisks++; 2178 /* Set the number of in-flight synchronization requests. */ 2179 sync->ds_inflight = g_mirror_syncreqs; 2180 2181 /* 2182 * Fire off first synchronization requests. 2183 */ 2184 for (i = 0; i < g_mirror_syncreqs; i++) { 2185 bp = sync->ds_bios[i]; 2186 G_MIRROR_LOGREQ(3, bp, "Sending synchronization request."); 2187 sync->ds_consumer->index++; 2188 /* 2189 * Delay the request if it is colliding with a regular request. 2190 */ 2191 if (g_mirror_regular_collision(sc, bp)) 2192 g_mirror_sync_delay(sc, bp); 2193 else 2194 g_io_request(bp, sync->ds_consumer); 2195 } 2196 } 2197 2198 /* 2199 * Stop synchronization process. 2200 * type: 0 - synchronization finished 2201 * 1 - synchronization stopped 2202 */ 2203 static void 2204 g_mirror_sync_stop(struct g_mirror_disk *disk, int type) 2205 { 2206 struct g_mirror_softc *sc; 2207 struct g_consumer *cp; 2208 2209 g_topology_assert_not(); 2210 sc = disk->d_softc; 2211 sx_assert(&sc->sc_lock, SX_LOCKED); 2212 2213 KASSERT(disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING, 2214 ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk), 2215 g_mirror_disk_state2str(disk->d_state))); 2216 if (disk->d_sync.ds_consumer == NULL) 2217 return; 2218 2219 if (type == 0) { 2220 G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s finished.", 2221 sc->sc_name, g_mirror_get_diskname(disk)); 2222 } else /* if (type == 1) */ { 2223 G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s stopped.", 2224 sc->sc_name, g_mirror_get_diskname(disk)); 2225 } 2226 g_mirror_regular_release(sc); 2227 free(disk->d_sync.ds_bios, M_MIRROR); 2228 disk->d_sync.ds_bios = NULL; 2229 cp = disk->d_sync.ds_consumer; 2230 disk->d_sync.ds_consumer = NULL; 2231 disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY; 2232 sc->sc_sync.ds_ndisks--; 2233 sx_xunlock(&sc->sc_lock); /* Avoid recursion on sc_lock. */ 2234 g_topology_lock(); 2235 g_mirror_kill_consumer(sc, cp); 2236 g_topology_unlock(); 2237 sx_xlock(&sc->sc_lock); 2238 } 2239 2240 static void 2241 g_mirror_launch_provider(struct g_mirror_softc *sc) 2242 { 2243 struct g_mirror_disk *disk; 2244 struct g_provider *pp, *dp; 2245 2246 sx_assert(&sc->sc_lock, SX_LOCKED); 2247 2248 g_topology_lock(); 2249 pp = g_new_providerf(sc->sc_geom, "mirror/%s", sc->sc_name); 2250 pp->flags |= G_PF_DIRECT_RECEIVE; 2251 pp->mediasize = sc->sc_mediasize; 2252 pp->sectorsize = sc->sc_sectorsize; 2253 pp->stripesize = 0; 2254 pp->stripeoffset = 0; 2255 2256 /* Splitting of unmapped BIO's could work but isn't implemented now */ 2257 if (sc->sc_balance != G_MIRROR_BALANCE_SPLIT) 2258 pp->flags |= G_PF_ACCEPT_UNMAPPED; 2259 2260 LIST_FOREACH(disk, &sc->sc_disks, d_next) { 2261 if (disk->d_consumer && disk->d_consumer->provider) { 2262 dp = disk->d_consumer->provider; 2263 if (dp->stripesize > pp->stripesize) { 2264 pp->stripesize = dp->stripesize; 2265 pp->stripeoffset = dp->stripeoffset; 2266 } 2267 /* A provider underneath us doesn't support unmapped */ 2268 if ((dp->flags & G_PF_ACCEPT_UNMAPPED) == 0) { 2269 G_MIRROR_DEBUG(0, "Cancelling unmapped " 2270 "because of %s.", dp->name); 2271 pp->flags &= ~G_PF_ACCEPT_UNMAPPED; 2272 } 2273 } 2274 } 2275 pp->private = sc; 2276 sc->sc_refcnt++; 2277 sc->sc_provider = pp; 2278 g_error_provider(pp, 0); 2279 g_topology_unlock(); 2280 G_MIRROR_DEBUG(0, "Device %s launched (%u/%u).", pp->name, 2281 g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE), sc->sc_ndisks); 2282 LIST_FOREACH(disk, &sc->sc_disks, d_next) { 2283 if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) 2284 g_mirror_sync_start(disk); 2285 } 2286 } 2287 2288 static void 2289 g_mirror_destroy_provider(struct g_mirror_softc *sc) 2290 { 2291 struct g_mirror_disk *disk; 2292 struct bio *bp; 2293 2294 g_topology_assert_not(); 2295 KASSERT(sc->sc_provider != NULL, ("NULL provider (device=%s).", 2296 sc->sc_name)); 2297 2298 LIST_FOREACH(disk, &sc->sc_disks, d_next) { 2299 if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) 2300 g_mirror_sync_stop(disk, 1); 2301 } 2302 2303 g_topology_lock(); 2304 g_error_provider(sc->sc_provider, ENXIO); 2305 mtx_lock(&sc->sc_queue_mtx); 2306 while ((bp = TAILQ_FIRST(&sc->sc_queue)) != NULL) { 2307 TAILQ_REMOVE(&sc->sc_queue, bp, bio_queue); 2308 /* 2309 * Abort any pending I/O that wasn't generated by us. 2310 * Synchronization requests and requests destined for individual 2311 * mirror components can be destroyed immediately. 2312 */ 2313 if (bp->bio_to == sc->sc_provider && 2314 bp->bio_from->geom != sc->sc_sync.ds_geom) { 2315 g_io_deliver(bp, ENXIO); 2316 } else { 2317 if ((bp->bio_cflags & G_MIRROR_BIO_FLAG_SYNC) != 0) 2318 free(bp->bio_data, M_MIRROR); 2319 g_destroy_bio(bp); 2320 } 2321 } 2322 mtx_unlock(&sc->sc_queue_mtx); 2323 g_wither_provider(sc->sc_provider, ENXIO); 2324 sc->sc_provider = NULL; 2325 G_MIRROR_DEBUG(0, "Device %s: provider destroyed.", sc->sc_name); 2326 g_topology_unlock(); 2327 } 2328 2329 static void 2330 g_mirror_go(void *arg) 2331 { 2332 struct g_mirror_softc *sc; 2333 struct g_mirror_event *ep; 2334 2335 sc = arg; 2336 G_MIRROR_DEBUG(0, "Force device %s start due to timeout.", sc->sc_name); 2337 ep = sc->sc_timeout_event; 2338 sc->sc_timeout_event = NULL; 2339 g_mirror_event_dispatch(ep, sc, 0, 2340 G_MIRROR_EVENT_DONTWAIT | G_MIRROR_EVENT_DEVICE); 2341 } 2342 2343 static void 2344 g_mirror_timeout_drain(struct g_mirror_softc *sc) 2345 { 2346 sx_assert(&sc->sc_lock, SX_XLOCKED); 2347 2348 callout_drain(&sc->sc_callout); 2349 g_mirror_event_free(sc->sc_timeout_event); 2350 sc->sc_timeout_event = NULL; 2351 } 2352 2353 static u_int 2354 g_mirror_determine_state(struct g_mirror_disk *disk) 2355 { 2356 struct g_mirror_softc *sc; 2357 u_int state; 2358 2359 sc = disk->d_softc; 2360 if (sc->sc_syncid == disk->d_sync.ds_syncid) { 2361 if ((disk->d_flags & 2362 G_MIRROR_DISK_FLAG_SYNCHRONIZING) == 0 && 2363 (g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) == 0 || 2364 (disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) == 0)) { 2365 /* Disk does not need synchronization. */ 2366 state = G_MIRROR_DISK_STATE_ACTIVE; 2367 } else { 2368 if ((sc->sc_flags & 2369 G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) == 0 || 2370 (disk->d_flags & 2371 G_MIRROR_DISK_FLAG_FORCE_SYNC) != 0) { 2372 /* 2373 * We can start synchronization from 2374 * the stored offset. 2375 */ 2376 state = G_MIRROR_DISK_STATE_SYNCHRONIZING; 2377 } else { 2378 state = G_MIRROR_DISK_STATE_STALE; 2379 } 2380 } 2381 } else if (disk->d_sync.ds_syncid < sc->sc_syncid) { 2382 /* 2383 * Reset all synchronization data for this disk, 2384 * because if it even was synchronized, it was 2385 * synchronized to disks with different syncid. 2386 */ 2387 disk->d_flags |= G_MIRROR_DISK_FLAG_SYNCHRONIZING; 2388 disk->d_sync.ds_offset = 0; 2389 disk->d_sync.ds_offset_done = 0; 2390 disk->d_sync.ds_syncid = sc->sc_syncid; 2391 if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) == 0 || 2392 (disk->d_flags & G_MIRROR_DISK_FLAG_FORCE_SYNC) != 0) { 2393 state = G_MIRROR_DISK_STATE_SYNCHRONIZING; 2394 } else { 2395 state = G_MIRROR_DISK_STATE_STALE; 2396 } 2397 } else /* if (sc->sc_syncid < disk->d_sync.ds_syncid) */ { 2398 /* 2399 * Not good, NOT GOOD! 2400 * It means that mirror was started on stale disks 2401 * and more fresh disk just arrive. 2402 * If there were writes, mirror is broken, sorry. 2403 * I think the best choice here is don't touch 2404 * this disk and inform the user loudly. 2405 */ 2406 G_MIRROR_DEBUG(0, "Device %s was started before the freshest " 2407 "disk (%s) arrives!! It will not be connected to the " 2408 "running device.", sc->sc_name, 2409 g_mirror_get_diskname(disk)); 2410 g_mirror_destroy_disk(disk); 2411 state = G_MIRROR_DISK_STATE_NONE; 2412 /* Return immediately, because disk was destroyed. */ 2413 return (state); 2414 } 2415 G_MIRROR_DEBUG(3, "State for %s disk: %s.", 2416 g_mirror_get_diskname(disk), g_mirror_disk_state2str(state)); 2417 return (state); 2418 } 2419 2420 /* 2421 * Update device state. 2422 */ 2423 static void 2424 g_mirror_update_device(struct g_mirror_softc *sc, bool force) 2425 { 2426 struct g_mirror_disk *disk; 2427 u_int state; 2428 2429 sx_assert(&sc->sc_lock, SX_XLOCKED); 2430 2431 switch (sc->sc_state) { 2432 case G_MIRROR_DEVICE_STATE_STARTING: 2433 { 2434 struct g_mirror_disk *pdisk, *tdisk; 2435 const char *mismatch; 2436 uintmax_t found, newest; 2437 u_int dirty, ndisks; 2438 2439 /* Pre-flight checks */ 2440 LIST_FOREACH_SAFE(disk, &sc->sc_disks, d_next, tdisk) { 2441 /* 2442 * Confirm we already detected the newest genid. 2443 */ 2444 KASSERT(sc->sc_genid >= disk->d_genid, 2445 ("%s: found newer genid %u (sc:%p had %u).", __func__, 2446 disk->d_genid, sc, sc->sc_genid)); 2447 2448 /* Kick out any previously tasted stale components. */ 2449 if (disk->d_genid < sc->sc_genid) { 2450 G_MIRROR_DEBUG(0, "Stale 'genid' field on %s " 2451 "(device %s) (component=%u latest=%u), skipping.", 2452 g_mirror_get_diskname(disk), sc->sc_name, 2453 disk->d_genid, sc->sc_genid); 2454 g_mirror_destroy_disk(disk); 2455 sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID; 2456 continue; 2457 } 2458 2459 /* 2460 * Confirm we already detected the newest syncid. 2461 */ 2462 KASSERT(sc->sc_syncid >= disk->d_sync.ds_syncid, 2463 ("%s: found newer syncid %u (sc:%p had %u).", 2464 __func__, disk->d_sync.ds_syncid, sc, 2465 sc->sc_syncid)); 2466 2467 #define DETECT_MISMATCH(field, name) \ 2468 if (mismatch == NULL && \ 2469 disk->d_init_ ## field != sc->sc_ ## field) { \ 2470 mismatch = name; \ 2471 found = (intmax_t)disk->d_init_ ## field; \ 2472 newest = (intmax_t)sc->sc_ ## field; \ 2473 } 2474 mismatch = NULL; 2475 DETECT_MISMATCH(ndisks, "md_all"); 2476 DETECT_MISMATCH(balance, "md_balance"); 2477 DETECT_MISMATCH(slice, "md_slice"); 2478 DETECT_MISMATCH(mediasize, "md_mediasize"); 2479 #undef DETECT_MISMATCH 2480 if (mismatch != NULL) { 2481 G_MIRROR_DEBUG(0, "Found a mismatching '%s' " 2482 "field on %s (device %s) (found=%ju " 2483 "newest=%ju).", mismatch, 2484 g_mirror_get_diskname(disk), sc->sc_name, 2485 found, newest); 2486 g_mirror_destroy_disk(disk); 2487 sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID; 2488 continue; 2489 } 2490 } 2491 2492 KASSERT(sc->sc_provider == NULL, 2493 ("Non-NULL provider in STARTING state (%s).", sc->sc_name)); 2494 /* 2495 * Are we ready? If the timeout (force is true) has expired, and 2496 * any disks are present, then yes. If we're permitted to launch 2497 * before the timeout has expired and the expected number of 2498 * current-generation mirror disks have been tasted, then yes. 2499 */ 2500 ndisks = g_mirror_ndisks(sc, -1); 2501 if ((force && ndisks > 0) || 2502 (g_launch_mirror_before_timeout && ndisks == sc->sc_ndisks)) { 2503 ; 2504 } else if (ndisks == 0) { 2505 /* 2506 * Disks went down in starting phase, so destroy 2507 * device. 2508 */ 2509 g_mirror_timeout_drain(sc); 2510 sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY; 2511 G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p", __LINE__, 2512 sc->sc_rootmount); 2513 root_mount_rel(sc->sc_rootmount); 2514 sc->sc_rootmount = NULL; 2515 return; 2516 } else { 2517 return; 2518 } 2519 2520 /* 2521 * Activate all disks with the biggest syncid. 2522 */ 2523 if (force) { 2524 /* 2525 * If 'force' is true, we have been called due to 2526 * timeout, so don't bother canceling timeout. 2527 */ 2528 ndisks = 0; 2529 LIST_FOREACH(disk, &sc->sc_disks, d_next) { 2530 if ((disk->d_flags & 2531 G_MIRROR_DISK_FLAG_SYNCHRONIZING) == 0) { 2532 ndisks++; 2533 } 2534 } 2535 if (ndisks == 0) { 2536 /* No valid disks found, destroy device. */ 2537 sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY; 2538 G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p", 2539 __LINE__, sc->sc_rootmount); 2540 root_mount_rel(sc->sc_rootmount); 2541 sc->sc_rootmount = NULL; 2542 return; 2543 } 2544 } else { 2545 /* Cancel timeout. */ 2546 g_mirror_timeout_drain(sc); 2547 } 2548 2549 /* 2550 * Here we need to look for dirty disks and if all disks 2551 * with the biggest syncid are dirty, we have to choose 2552 * one with the biggest priority and rebuild the rest. 2553 */ 2554 /* 2555 * Find the number of dirty disks with the biggest syncid. 2556 * Find the number of disks with the biggest syncid. 2557 * While here, find a disk with the biggest priority. 2558 */ 2559 dirty = ndisks = 0; 2560 pdisk = NULL; 2561 LIST_FOREACH(disk, &sc->sc_disks, d_next) { 2562 if (disk->d_sync.ds_syncid != sc->sc_syncid) 2563 continue; 2564 if ((disk->d_flags & 2565 G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) { 2566 continue; 2567 } 2568 ndisks++; 2569 if ((disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) != 0) { 2570 dirty++; 2571 if (pdisk == NULL || 2572 pdisk->d_priority < disk->d_priority) { 2573 pdisk = disk; 2574 } 2575 } 2576 } 2577 if (dirty == 0) { 2578 /* No dirty disks at all, great. */ 2579 } else if (dirty == ndisks) { 2580 /* 2581 * Force synchronization for all dirty disks except one 2582 * with the biggest priority. 2583 */ 2584 KASSERT(pdisk != NULL, ("pdisk == NULL")); 2585 G_MIRROR_DEBUG(1, "Using disk %s (device %s) as a " 2586 "master disk for synchronization.", 2587 g_mirror_get_diskname(pdisk), sc->sc_name); 2588 LIST_FOREACH(disk, &sc->sc_disks, d_next) { 2589 if (disk->d_sync.ds_syncid != sc->sc_syncid) 2590 continue; 2591 if ((disk->d_flags & 2592 G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) { 2593 continue; 2594 } 2595 KASSERT((disk->d_flags & 2596 G_MIRROR_DISK_FLAG_DIRTY) != 0, 2597 ("Disk %s isn't marked as dirty.", 2598 g_mirror_get_diskname(disk))); 2599 /* Skip the disk with the biggest priority. */ 2600 if (disk == pdisk) 2601 continue; 2602 disk->d_sync.ds_syncid = 0; 2603 } 2604 } else if (dirty < ndisks) { 2605 /* 2606 * Force synchronization for all dirty disks. 2607 * We have some non-dirty disks. 2608 */ 2609 LIST_FOREACH(disk, &sc->sc_disks, d_next) { 2610 if (disk->d_sync.ds_syncid != sc->sc_syncid) 2611 continue; 2612 if ((disk->d_flags & 2613 G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) { 2614 continue; 2615 } 2616 if ((disk->d_flags & 2617 G_MIRROR_DISK_FLAG_DIRTY) == 0) { 2618 continue; 2619 } 2620 disk->d_sync.ds_syncid = 0; 2621 } 2622 } 2623 2624 /* Reset hint. */ 2625 sc->sc_hint = NULL; 2626 if (force) { 2627 /* Remember to bump syncid on first write. */ 2628 sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID; 2629 } 2630 state = G_MIRROR_DEVICE_STATE_RUNNING; 2631 G_MIRROR_DEBUG(1, "Device %s state changed from %s to %s.", 2632 sc->sc_name, g_mirror_device_state2str(sc->sc_state), 2633 g_mirror_device_state2str(state)); 2634 sc->sc_state = state; 2635 LIST_FOREACH(disk, &sc->sc_disks, d_next) { 2636 state = g_mirror_determine_state(disk); 2637 g_mirror_event_send(disk, state, 2638 G_MIRROR_EVENT_DONTWAIT); 2639 if (state == G_MIRROR_DISK_STATE_STALE) 2640 sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID; 2641 } 2642 break; 2643 } 2644 case G_MIRROR_DEVICE_STATE_RUNNING: 2645 if (g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) == 0 && 2646 g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_NEW) == 0) { 2647 /* 2648 * No usable disks, so destroy the device. 2649 */ 2650 sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY; 2651 break; 2652 } else if (g_mirror_ndisks(sc, 2653 G_MIRROR_DISK_STATE_ACTIVE) > 0 && 2654 g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_NEW) == 0) { 2655 /* 2656 * We have active disks, launch provider if it doesn't 2657 * exist. 2658 */ 2659 if (sc->sc_provider == NULL) 2660 g_mirror_launch_provider(sc); 2661 if (sc->sc_rootmount != NULL) { 2662 G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p", 2663 __LINE__, sc->sc_rootmount); 2664 root_mount_rel(sc->sc_rootmount); 2665 sc->sc_rootmount = NULL; 2666 } 2667 } 2668 /* 2669 * Genid should be bumped immediately, so do it here. 2670 */ 2671 if ((sc->sc_bump_id & G_MIRROR_BUMP_GENID) != 0) { 2672 sc->sc_bump_id &= ~G_MIRROR_BUMP_GENID; 2673 g_mirror_bump_genid(sc); 2674 } 2675 if ((sc->sc_bump_id & G_MIRROR_BUMP_SYNCID_NOW) != 0) { 2676 sc->sc_bump_id &= ~G_MIRROR_BUMP_SYNCID_NOW; 2677 g_mirror_bump_syncid(sc); 2678 } 2679 break; 2680 default: 2681 KASSERT(1 == 0, ("Wrong device state (%s, %s).", 2682 sc->sc_name, g_mirror_device_state2str(sc->sc_state))); 2683 break; 2684 } 2685 } 2686 2687 /* 2688 * Update disk state and device state if needed. 2689 */ 2690 #define DISK_STATE_CHANGED() G_MIRROR_DEBUG(1, \ 2691 "Disk %s state changed from %s to %s (device %s).", \ 2692 g_mirror_get_diskname(disk), \ 2693 g_mirror_disk_state2str(disk->d_state), \ 2694 g_mirror_disk_state2str(state), sc->sc_name) 2695 static int 2696 g_mirror_update_disk(struct g_mirror_disk *disk, u_int state) 2697 { 2698 struct g_mirror_softc *sc; 2699 2700 sc = disk->d_softc; 2701 sx_assert(&sc->sc_lock, SX_XLOCKED); 2702 2703 again: 2704 G_MIRROR_DEBUG(3, "Changing disk %s state from %s to %s.", 2705 g_mirror_get_diskname(disk), g_mirror_disk_state2str(disk->d_state), 2706 g_mirror_disk_state2str(state)); 2707 switch (state) { 2708 case G_MIRROR_DISK_STATE_NEW: 2709 /* 2710 * Possible scenarios: 2711 * 1. New disk arrive. 2712 */ 2713 /* Previous state should be NONE. */ 2714 KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NONE, 2715 ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk), 2716 g_mirror_disk_state2str(disk->d_state))); 2717 DISK_STATE_CHANGED(); 2718 2719 disk->d_state = state; 2720 g_topology_lock(); 2721 if (LIST_EMPTY(&sc->sc_disks)) 2722 LIST_INSERT_HEAD(&sc->sc_disks, disk, d_next); 2723 else { 2724 struct g_mirror_disk *dp; 2725 2726 LIST_FOREACH(dp, &sc->sc_disks, d_next) { 2727 if (disk->d_priority >= dp->d_priority) { 2728 LIST_INSERT_BEFORE(dp, disk, d_next); 2729 dp = NULL; 2730 break; 2731 } 2732 if (LIST_NEXT(dp, d_next) == NULL) 2733 break; 2734 } 2735 if (dp != NULL) 2736 LIST_INSERT_AFTER(dp, disk, d_next); 2737 } 2738 g_topology_unlock(); 2739 G_MIRROR_DEBUG(1, "Device %s: provider %s detected.", 2740 sc->sc_name, g_mirror_get_diskname(disk)); 2741 if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING) 2742 break; 2743 KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING, 2744 ("Wrong device state (%s, %s, %s, %s).", sc->sc_name, 2745 g_mirror_device_state2str(sc->sc_state), 2746 g_mirror_get_diskname(disk), 2747 g_mirror_disk_state2str(disk->d_state))); 2748 state = g_mirror_determine_state(disk); 2749 if (state != G_MIRROR_DISK_STATE_NONE) 2750 goto again; 2751 break; 2752 case G_MIRROR_DISK_STATE_ACTIVE: 2753 /* 2754 * Possible scenarios: 2755 * 1. New disk does not need synchronization. 2756 * 2. Synchronization process finished successfully. 2757 */ 2758 KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING, 2759 ("Wrong device state (%s, %s, %s, %s).", sc->sc_name, 2760 g_mirror_device_state2str(sc->sc_state), 2761 g_mirror_get_diskname(disk), 2762 g_mirror_disk_state2str(disk->d_state))); 2763 /* Previous state should be NEW or SYNCHRONIZING. */ 2764 KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW || 2765 disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING, 2766 ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk), 2767 g_mirror_disk_state2str(disk->d_state))); 2768 DISK_STATE_CHANGED(); 2769 2770 if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) { 2771 disk->d_flags &= ~G_MIRROR_DISK_FLAG_SYNCHRONIZING; 2772 disk->d_flags &= ~G_MIRROR_DISK_FLAG_FORCE_SYNC; 2773 g_mirror_sync_stop(disk, 0); 2774 } 2775 disk->d_state = state; 2776 disk->d_sync.ds_offset = 0; 2777 disk->d_sync.ds_offset_done = 0; 2778 g_mirror_update_idle(sc, disk); 2779 g_mirror_update_metadata(disk); 2780 G_MIRROR_DEBUG(1, "Device %s: provider %s activated.", 2781 sc->sc_name, g_mirror_get_diskname(disk)); 2782 break; 2783 case G_MIRROR_DISK_STATE_STALE: 2784 /* 2785 * Possible scenarios: 2786 * 1. Stale disk was connected. 2787 */ 2788 /* Previous state should be NEW. */ 2789 KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW, 2790 ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk), 2791 g_mirror_disk_state2str(disk->d_state))); 2792 KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING, 2793 ("Wrong device state (%s, %s, %s, %s).", sc->sc_name, 2794 g_mirror_device_state2str(sc->sc_state), 2795 g_mirror_get_diskname(disk), 2796 g_mirror_disk_state2str(disk->d_state))); 2797 /* 2798 * STALE state is only possible if device is marked 2799 * NOAUTOSYNC. 2800 */ 2801 KASSERT((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) != 0, 2802 ("Wrong device state (%s, %s, %s, %s).", sc->sc_name, 2803 g_mirror_device_state2str(sc->sc_state), 2804 g_mirror_get_diskname(disk), 2805 g_mirror_disk_state2str(disk->d_state))); 2806 DISK_STATE_CHANGED(); 2807 2808 disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY; 2809 disk->d_state = state; 2810 g_mirror_update_metadata(disk); 2811 G_MIRROR_DEBUG(0, "Device %s: provider %s is stale.", 2812 sc->sc_name, g_mirror_get_diskname(disk)); 2813 break; 2814 case G_MIRROR_DISK_STATE_SYNCHRONIZING: 2815 /* 2816 * Possible scenarios: 2817 * 1. Disk which needs synchronization was connected. 2818 */ 2819 /* Previous state should be NEW. */ 2820 KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW, 2821 ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk), 2822 g_mirror_disk_state2str(disk->d_state))); 2823 KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING, 2824 ("Wrong device state (%s, %s, %s, %s).", sc->sc_name, 2825 g_mirror_device_state2str(sc->sc_state), 2826 g_mirror_get_diskname(disk), 2827 g_mirror_disk_state2str(disk->d_state))); 2828 DISK_STATE_CHANGED(); 2829 2830 if (disk->d_state == G_MIRROR_DISK_STATE_NEW) 2831 disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY; 2832 disk->d_state = state; 2833 if (sc->sc_provider != NULL) { 2834 g_mirror_sync_start(disk); 2835 g_mirror_update_metadata(disk); 2836 } 2837 break; 2838 case G_MIRROR_DISK_STATE_DISCONNECTED: 2839 /* 2840 * Possible scenarios: 2841 * 1. Device wasn't running yet, but disk disappear. 2842 * 2. Disk was active and disapppear. 2843 * 3. Disk disappear during synchronization process. 2844 */ 2845 if (sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING) { 2846 /* 2847 * Previous state should be ACTIVE, STALE or 2848 * SYNCHRONIZING. 2849 */ 2850 KASSERT(disk->d_state == G_MIRROR_DISK_STATE_ACTIVE || 2851 disk->d_state == G_MIRROR_DISK_STATE_STALE || 2852 disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING, 2853 ("Wrong disk state (%s, %s).", 2854 g_mirror_get_diskname(disk), 2855 g_mirror_disk_state2str(disk->d_state))); 2856 } else if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING) { 2857 /* Previous state should be NEW. */ 2858 KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW, 2859 ("Wrong disk state (%s, %s).", 2860 g_mirror_get_diskname(disk), 2861 g_mirror_disk_state2str(disk->d_state))); 2862 /* 2863 * Reset bumping syncid if disk disappeared in STARTING 2864 * state. 2865 */ 2866 if ((sc->sc_bump_id & G_MIRROR_BUMP_SYNCID) != 0) 2867 sc->sc_bump_id &= ~G_MIRROR_BUMP_SYNCID; 2868 #ifdef INVARIANTS 2869 } else { 2870 KASSERT(1 == 0, ("Wrong device state (%s, %s, %s, %s).", 2871 sc->sc_name, 2872 g_mirror_device_state2str(sc->sc_state), 2873 g_mirror_get_diskname(disk), 2874 g_mirror_disk_state2str(disk->d_state))); 2875 #endif 2876 } 2877 DISK_STATE_CHANGED(); 2878 G_MIRROR_DEBUG(0, "Device %s: provider %s disconnected.", 2879 sc->sc_name, g_mirror_get_diskname(disk)); 2880 2881 g_mirror_destroy_disk(disk); 2882 break; 2883 case G_MIRROR_DISK_STATE_DESTROY: 2884 { 2885 int error; 2886 2887 error = g_mirror_clear_metadata(disk); 2888 if (error != 0) { 2889 G_MIRROR_DEBUG(0, 2890 "Device %s: failed to clear metadata on %s: %d.", 2891 sc->sc_name, g_mirror_get_diskname(disk), error); 2892 break; 2893 } 2894 DISK_STATE_CHANGED(); 2895 G_MIRROR_DEBUG(0, "Device %s: provider %s destroyed.", 2896 sc->sc_name, g_mirror_get_diskname(disk)); 2897 2898 g_mirror_destroy_disk(disk); 2899 sc->sc_ndisks--; 2900 LIST_FOREACH(disk, &sc->sc_disks, d_next) { 2901 g_mirror_update_metadata(disk); 2902 } 2903 break; 2904 } 2905 default: 2906 KASSERT(1 == 0, ("Unknown state (%u).", state)); 2907 break; 2908 } 2909 return (0); 2910 } 2911 #undef DISK_STATE_CHANGED 2912 2913 int 2914 g_mirror_read_metadata(struct g_consumer *cp, struct g_mirror_metadata *md) 2915 { 2916 struct g_provider *pp; 2917 u_char *buf; 2918 int error; 2919 2920 g_topology_assert(); 2921 2922 error = g_access(cp, 1, 0, 0); 2923 if (error != 0) 2924 return (error); 2925 pp = cp->provider; 2926 g_topology_unlock(); 2927 /* Metadata are stored on last sector. */ 2928 buf = g_read_data(cp, pp->mediasize - pp->sectorsize, pp->sectorsize, 2929 &error); 2930 g_topology_lock(); 2931 g_access(cp, -1, 0, 0); 2932 if (buf == NULL) { 2933 G_MIRROR_DEBUG(1, "Cannot read metadata from %s (error=%d).", 2934 cp->provider->name, error); 2935 return (error); 2936 } 2937 2938 /* Decode metadata. */ 2939 error = mirror_metadata_decode(buf, md); 2940 g_free(buf); 2941 if (strcmp(md->md_magic, G_MIRROR_MAGIC) != 0) 2942 return (EINVAL); 2943 if (md->md_version > G_MIRROR_VERSION) { 2944 G_MIRROR_DEBUG(0, 2945 "Kernel module is too old to handle metadata from %s.", 2946 cp->provider->name); 2947 return (EINVAL); 2948 } 2949 if (error != 0) { 2950 G_MIRROR_DEBUG(1, "MD5 metadata hash mismatch for provider %s.", 2951 cp->provider->name); 2952 return (error); 2953 } 2954 2955 return (0); 2956 } 2957 2958 static int 2959 g_mirror_check_metadata(struct g_mirror_softc *sc, struct g_provider *pp, 2960 struct g_mirror_metadata *md) 2961 { 2962 2963 G_MIRROR_DEBUG(2, "%s: md_did 0x%u disk %s device %s md_all 0x%x " 2964 "sc_ndisks 0x%x md_slice 0x%x sc_slice 0x%x md_balance 0x%x " 2965 "sc_balance 0x%x sc_mediasize 0x%jx pp_mediasize 0x%jx " 2966 "md_sectorsize 0x%x sc_sectorsize 0x%x md_mflags 0x%jx " 2967 "md_dflags 0x%jx md_syncid 0x%x md_genid 0x%x md_priority 0x%x " 2968 "sc_state 0x%x.", 2969 __func__, md->md_did, pp->name, sc->sc_name, md->md_all, 2970 sc->sc_ndisks, md->md_slice, sc->sc_slice, md->md_balance, 2971 sc->sc_balance, (uintmax_t)sc->sc_mediasize, 2972 (uintmax_t)pp->mediasize, md->md_sectorsize, sc->sc_sectorsize, 2973 (uintmax_t)md->md_mflags, (uintmax_t)md->md_dflags, md->md_syncid, 2974 md->md_genid, md->md_priority, sc->sc_state); 2975 2976 if (g_mirror_id2disk(sc, md->md_did) != NULL) { 2977 G_MIRROR_DEBUG(1, "Disk %s (id=%u) already exists, skipping.", 2978 pp->name, md->md_did); 2979 return (EEXIST); 2980 } 2981 if (sc->sc_mediasize > pp->mediasize) { 2982 G_MIRROR_DEBUG(1, 2983 "Invalid size of disk %s (device %s), skipping.", pp->name, 2984 sc->sc_name); 2985 return (EINVAL); 2986 } 2987 if (md->md_sectorsize != sc->sc_sectorsize) { 2988 G_MIRROR_DEBUG(1, 2989 "Invalid '%s' field on disk %s (device %s), skipping.", 2990 "md_sectorsize", pp->name, sc->sc_name); 2991 return (EINVAL); 2992 } 2993 if ((sc->sc_sectorsize % pp->sectorsize) != 0) { 2994 G_MIRROR_DEBUG(1, 2995 "Invalid sector size of disk %s (device %s), skipping.", 2996 pp->name, sc->sc_name); 2997 return (EINVAL); 2998 } 2999 if ((md->md_mflags & ~G_MIRROR_DEVICE_FLAG_MASK) != 0) { 3000 G_MIRROR_DEBUG(1, 3001 "Invalid device flags on disk %s (device %s), skipping.", 3002 pp->name, sc->sc_name); 3003 return (EINVAL); 3004 } 3005 if ((md->md_dflags & ~G_MIRROR_DISK_FLAG_MASK) != 0) { 3006 G_MIRROR_DEBUG(1, 3007 "Invalid disk flags on disk %s (device %s), skipping.", 3008 pp->name, sc->sc_name); 3009 return (EINVAL); 3010 } 3011 return (0); 3012 } 3013 3014 int 3015 g_mirror_add_disk(struct g_mirror_softc *sc, struct g_provider *pp, 3016 struct g_mirror_metadata *md) 3017 { 3018 struct g_mirror_disk *disk; 3019 int error; 3020 3021 g_topology_assert_not(); 3022 G_MIRROR_DEBUG(2, "Adding disk %s.", pp->name); 3023 3024 error = g_mirror_check_metadata(sc, pp, md); 3025 if (error != 0) 3026 return (error); 3027 3028 if (md->md_genid < sc->sc_genid) { 3029 G_MIRROR_DEBUG(0, "Component %s (device %s) broken, skipping.", 3030 pp->name, sc->sc_name); 3031 return (EINVAL); 3032 } 3033 3034 /* 3035 * If the component disk we're tasting has newer metadata than the 3036 * STARTING gmirror device, refresh the device from the component. 3037 */ 3038 error = g_mirror_refresh_device(sc, pp, md); 3039 if (error != 0) 3040 return (error); 3041 3042 disk = g_mirror_init_disk(sc, pp, md, &error); 3043 if (disk == NULL) 3044 return (error); 3045 error = g_mirror_event_send(disk, G_MIRROR_DISK_STATE_NEW, 3046 G_MIRROR_EVENT_WAIT); 3047 if (error != 0) 3048 return (error); 3049 if (md->md_version < G_MIRROR_VERSION) { 3050 G_MIRROR_DEBUG(0, "Upgrading metadata on %s (v%d->v%d).", 3051 pp->name, md->md_version, G_MIRROR_VERSION); 3052 g_mirror_update_metadata(disk); 3053 } 3054 return (0); 3055 } 3056 3057 static void 3058 g_mirror_destroy_delayed(void *arg, int flag) 3059 { 3060 struct g_mirror_softc *sc; 3061 int error; 3062 3063 if (flag == EV_CANCEL) { 3064 G_MIRROR_DEBUG(1, "Destroying canceled."); 3065 return; 3066 } 3067 sc = arg; 3068 g_topology_unlock(); 3069 sx_xlock(&sc->sc_lock); 3070 KASSERT((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) == 0, 3071 ("DESTROY flag set on %s.", sc->sc_name)); 3072 KASSERT((sc->sc_flags & G_MIRROR_DEVICE_FLAG_CLOSEWAIT) != 0, 3073 ("CLOSEWAIT flag not set on %s.", sc->sc_name)); 3074 G_MIRROR_DEBUG(1, "Destroying %s (delayed).", sc->sc_name); 3075 error = g_mirror_destroy(sc, G_MIRROR_DESTROY_SOFT); 3076 if (error != 0) { 3077 G_MIRROR_DEBUG(0, "Cannot destroy %s (error=%d).", 3078 sc->sc_name, error); 3079 sx_xunlock(&sc->sc_lock); 3080 } 3081 g_topology_lock(); 3082 } 3083 3084 static int 3085 g_mirror_access(struct g_provider *pp, int acr, int acw, int ace) 3086 { 3087 struct g_mirror_softc *sc; 3088 int error = 0; 3089 3090 g_topology_assert(); 3091 G_MIRROR_DEBUG(2, "Access request for %s: r%dw%de%d.", pp->name, acr, 3092 acw, ace); 3093 3094 sc = pp->private; 3095 KASSERT(sc != NULL, ("NULL softc (provider=%s).", pp->name)); 3096 3097 g_topology_unlock(); 3098 sx_xlock(&sc->sc_lock); 3099 if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0 || 3100 (sc->sc_flags & G_MIRROR_DEVICE_FLAG_CLOSEWAIT) != 0 || 3101 LIST_EMPTY(&sc->sc_disks)) { 3102 if (acr > 0 || acw > 0 || ace > 0) 3103 error = ENXIO; 3104 goto end; 3105 } 3106 sc->sc_provider_open += acr + acw + ace; 3107 if (pp->acw + acw == 0) 3108 g_mirror_idle(sc, 0); 3109 if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_CLOSEWAIT) != 0 && 3110 sc->sc_provider_open == 0) 3111 g_post_event(g_mirror_destroy_delayed, sc, M_WAITOK, sc, NULL); 3112 end: 3113 sx_xunlock(&sc->sc_lock); 3114 g_topology_lock(); 3115 return (error); 3116 } 3117 3118 static void 3119 g_mirror_reinit_from_metadata(struct g_mirror_softc *sc, 3120 const struct g_mirror_metadata *md) 3121 { 3122 3123 sc->sc_genid = md->md_genid; 3124 sc->sc_syncid = md->md_syncid; 3125 3126 sc->sc_slice = md->md_slice; 3127 sc->sc_balance = md->md_balance; 3128 sc->sc_mediasize = md->md_mediasize; 3129 sc->sc_ndisks = md->md_all; 3130 sc->sc_flags &= ~G_MIRROR_DEVICE_FLAG_MASK; 3131 sc->sc_flags |= (md->md_mflags & G_MIRROR_DEVICE_FLAG_MASK); 3132 } 3133 3134 struct g_geom * 3135 g_mirror_create(struct g_class *mp, const struct g_mirror_metadata *md, 3136 u_int type) 3137 { 3138 struct g_mirror_softc *sc; 3139 struct g_geom *gp; 3140 int error, timeout; 3141 3142 g_topology_assert(); 3143 G_MIRROR_DEBUG(1, "Creating device %s (id=%u).", md->md_name, 3144 md->md_mid); 3145 3146 /* One disk is minimum. */ 3147 if (md->md_all < 1) 3148 return (NULL); 3149 /* 3150 * Action geom. 3151 */ 3152 gp = g_new_geomf(mp, "%s", md->md_name); 3153 sc = malloc(sizeof(*sc), M_MIRROR, M_WAITOK | M_ZERO); 3154 gp->start = g_mirror_start; 3155 gp->orphan = g_mirror_orphan; 3156 gp->access = g_mirror_access; 3157 gp->dumpconf = g_mirror_dumpconf; 3158 3159 sc->sc_type = type; 3160 sc->sc_id = md->md_mid; 3161 g_mirror_reinit_from_metadata(sc, md); 3162 sc->sc_sectorsize = md->md_sectorsize; 3163 sc->sc_bump_id = 0; 3164 sc->sc_idle = 1; 3165 sc->sc_last_write = time_uptime; 3166 sc->sc_writes = 0; 3167 sc->sc_refcnt = 1; 3168 sx_init(&sc->sc_lock, "gmirror:lock"); 3169 TAILQ_INIT(&sc->sc_queue); 3170 mtx_init(&sc->sc_queue_mtx, "gmirror:queue", NULL, MTX_DEF); 3171 TAILQ_INIT(&sc->sc_regular_delayed); 3172 TAILQ_INIT(&sc->sc_inflight); 3173 TAILQ_INIT(&sc->sc_sync_delayed); 3174 LIST_INIT(&sc->sc_disks); 3175 TAILQ_INIT(&sc->sc_events); 3176 mtx_init(&sc->sc_events_mtx, "gmirror:events", NULL, MTX_DEF); 3177 callout_init(&sc->sc_callout, 1); 3178 mtx_init(&sc->sc_done_mtx, "gmirror:done", NULL, MTX_DEF); 3179 sc->sc_state = G_MIRROR_DEVICE_STATE_STARTING; 3180 gp->softc = sc; 3181 sc->sc_geom = gp; 3182 sc->sc_provider = NULL; 3183 sc->sc_provider_open = 0; 3184 /* 3185 * Synchronization geom. 3186 */ 3187 gp = g_new_geomf(mp, "%s.sync", md->md_name); 3188 gp->softc = sc; 3189 gp->orphan = g_mirror_orphan; 3190 sc->sc_sync.ds_geom = gp; 3191 sc->sc_sync.ds_ndisks = 0; 3192 error = kproc_create(g_mirror_worker, sc, &sc->sc_worker, 0, 0, 3193 "g_mirror %s", md->md_name); 3194 if (error != 0) { 3195 G_MIRROR_DEBUG(1, "Cannot create kernel thread for %s.", 3196 sc->sc_name); 3197 g_destroy_geom(sc->sc_sync.ds_geom); 3198 g_destroy_geom(sc->sc_geom); 3199 g_mirror_free_device(sc); 3200 return (NULL); 3201 } 3202 3203 G_MIRROR_DEBUG(1, "Device %s created (%u components, id=%u).", 3204 sc->sc_name, sc->sc_ndisks, sc->sc_id); 3205 3206 sc->sc_rootmount = root_mount_hold("GMIRROR"); 3207 G_MIRROR_DEBUG(1, "root_mount_hold %p", sc->sc_rootmount); 3208 3209 /* 3210 * Schedule startup timeout. 3211 */ 3212 timeout = g_mirror_timeout * hz; 3213 sc->sc_timeout_event = malloc(sizeof(struct g_mirror_event), M_MIRROR, 3214 M_WAITOK); 3215 callout_reset(&sc->sc_callout, timeout, g_mirror_go, sc); 3216 return (sc->sc_geom); 3217 } 3218 3219 int 3220 g_mirror_destroy(struct g_mirror_softc *sc, int how) 3221 { 3222 struct g_mirror_disk *disk; 3223 3224 g_topology_assert_not(); 3225 sx_assert(&sc->sc_lock, SX_XLOCKED); 3226 3227 if (sc->sc_provider_open != 0) { 3228 switch (how) { 3229 case G_MIRROR_DESTROY_SOFT: 3230 G_MIRROR_DEBUG(1, 3231 "Device %s is still open (%d).", sc->sc_name, 3232 sc->sc_provider_open); 3233 return (EBUSY); 3234 case G_MIRROR_DESTROY_DELAYED: 3235 G_MIRROR_DEBUG(1, 3236 "Device %s will be destroyed on last close.", 3237 sc->sc_name); 3238 LIST_FOREACH(disk, &sc->sc_disks, d_next) { 3239 if (disk->d_state == 3240 G_MIRROR_DISK_STATE_SYNCHRONIZING) { 3241 g_mirror_sync_stop(disk, 1); 3242 } 3243 } 3244 sc->sc_flags |= G_MIRROR_DEVICE_FLAG_CLOSEWAIT; 3245 return (EBUSY); 3246 case G_MIRROR_DESTROY_HARD: 3247 G_MIRROR_DEBUG(1, "Device %s is still open, so it " 3248 "can't be definitely removed.", sc->sc_name); 3249 } 3250 } 3251 3252 if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0) { 3253 sx_xunlock(&sc->sc_lock); 3254 return (0); 3255 } 3256 sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY; 3257 sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DRAIN; 3258 G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc); 3259 sx_xunlock(&sc->sc_lock); 3260 mtx_lock(&sc->sc_queue_mtx); 3261 wakeup(sc); 3262 mtx_unlock(&sc->sc_queue_mtx); 3263 G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, &sc->sc_worker); 3264 while (sc->sc_worker != NULL) 3265 tsleep(&sc->sc_worker, PRIBIO, "m:destroy", hz / 5); 3266 G_MIRROR_DEBUG(4, "%s: Woken up %p.", __func__, &sc->sc_worker); 3267 sx_xlock(&sc->sc_lock); 3268 g_mirror_destroy_device(sc); 3269 return (0); 3270 } 3271 3272 static void 3273 g_mirror_taste_orphan(struct g_consumer *cp) 3274 { 3275 3276 KASSERT(1 == 0, ("%s called while tasting %s.", __func__, 3277 cp->provider->name)); 3278 } 3279 3280 static struct g_geom * 3281 g_mirror_taste(struct g_class *mp, struct g_provider *pp, int flags __unused) 3282 { 3283 struct g_mirror_metadata md; 3284 struct g_mirror_softc *sc; 3285 struct g_consumer *cp; 3286 struct g_geom *gp; 3287 int error; 3288 3289 g_topology_assert(); 3290 g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name); 3291 G_MIRROR_DEBUG(2, "Tasting %s.", pp->name); 3292 3293 gp = g_new_geomf(mp, "mirror:taste"); 3294 /* 3295 * This orphan function should be never called. 3296 */ 3297 gp->orphan = g_mirror_taste_orphan; 3298 cp = g_new_consumer(gp); 3299 cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE; 3300 error = g_attach(cp, pp); 3301 if (error == 0) { 3302 error = g_mirror_read_metadata(cp, &md); 3303 g_detach(cp); 3304 } 3305 g_destroy_consumer(cp); 3306 g_destroy_geom(gp); 3307 if (error != 0) 3308 return (NULL); 3309 gp = NULL; 3310 3311 if (md.md_provider[0] != '\0' && 3312 !g_compare_names(md.md_provider, pp->name)) 3313 return (NULL); 3314 if (md.md_provsize != 0 && md.md_provsize != pp->mediasize) 3315 return (NULL); 3316 if ((md.md_dflags & G_MIRROR_DISK_FLAG_INACTIVE) != 0) { 3317 G_MIRROR_DEBUG(0, 3318 "Device %s: provider %s marked as inactive, skipping.", 3319 md.md_name, pp->name); 3320 return (NULL); 3321 } 3322 if (g_mirror_debug >= 2) 3323 mirror_metadata_dump(&md); 3324 3325 /* 3326 * Let's check if device already exists. 3327 */ 3328 sc = NULL; 3329 LIST_FOREACH(gp, &mp->geom, geom) { 3330 sc = gp->softc; 3331 if (sc == NULL) 3332 continue; 3333 if (sc->sc_type != G_MIRROR_TYPE_AUTOMATIC) 3334 continue; 3335 if (sc->sc_sync.ds_geom == gp) 3336 continue; 3337 if (strcmp(md.md_name, sc->sc_name) != 0) 3338 continue; 3339 if (md.md_mid != sc->sc_id) { 3340 G_MIRROR_DEBUG(0, "Device %s already configured.", 3341 sc->sc_name); 3342 return (NULL); 3343 } 3344 break; 3345 } 3346 if (gp == NULL) { 3347 gp = g_mirror_create(mp, &md, G_MIRROR_TYPE_AUTOMATIC); 3348 if (gp == NULL) { 3349 G_MIRROR_DEBUG(0, "Cannot create device %s.", 3350 md.md_name); 3351 return (NULL); 3352 } 3353 sc = gp->softc; 3354 } 3355 G_MIRROR_DEBUG(1, "Adding disk %s to %s.", pp->name, gp->name); 3356 g_topology_unlock(); 3357 sx_xlock(&sc->sc_lock); 3358 sc->sc_flags |= G_MIRROR_DEVICE_FLAG_TASTING; 3359 error = g_mirror_add_disk(sc, pp, &md); 3360 sc->sc_flags &= ~G_MIRROR_DEVICE_FLAG_TASTING; 3361 if (error != 0) { 3362 G_MIRROR_DEBUG(0, "Cannot add disk %s to %s (error=%d).", 3363 pp->name, gp->name, error); 3364 if (LIST_EMPTY(&sc->sc_disks)) { 3365 g_cancel_event(sc); 3366 g_mirror_destroy(sc, G_MIRROR_DESTROY_HARD); 3367 g_topology_lock(); 3368 return (NULL); 3369 } 3370 gp = NULL; 3371 } 3372 if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0) { 3373 g_mirror_destroy(sc, G_MIRROR_DESTROY_HARD); 3374 g_topology_lock(); 3375 return (NULL); 3376 } 3377 sx_xunlock(&sc->sc_lock); 3378 g_topology_lock(); 3379 return (gp); 3380 } 3381 3382 static void 3383 g_mirror_resize(struct g_consumer *cp) 3384 { 3385 struct g_mirror_disk *disk; 3386 3387 g_topology_assert(); 3388 g_trace(G_T_TOPOLOGY, "%s(%s)", __func__, cp->provider->name); 3389 3390 disk = cp->private; 3391 if (disk == NULL) 3392 return; 3393 g_topology_unlock(); 3394 g_mirror_update_metadata(disk); 3395 g_topology_lock(); 3396 } 3397 3398 static int 3399 g_mirror_destroy_geom(struct gctl_req *req __unused, 3400 struct g_class *mp __unused, struct g_geom *gp) 3401 { 3402 struct g_mirror_softc *sc; 3403 int error; 3404 3405 g_topology_unlock(); 3406 sc = gp->softc; 3407 sx_xlock(&sc->sc_lock); 3408 g_cancel_event(sc); 3409 error = g_mirror_destroy(gp->softc, G_MIRROR_DESTROY_SOFT); 3410 if (error != 0) 3411 sx_xunlock(&sc->sc_lock); 3412 g_topology_lock(); 3413 return (error); 3414 } 3415 3416 static void 3417 g_mirror_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp, 3418 struct g_consumer *cp, struct g_provider *pp) 3419 { 3420 struct g_mirror_softc *sc; 3421 3422 g_topology_assert(); 3423 3424 sc = gp->softc; 3425 if (sc == NULL) 3426 return; 3427 /* Skip synchronization geom. */ 3428 if (gp == sc->sc_sync.ds_geom) 3429 return; 3430 if (pp != NULL) { 3431 /* Nothing here. */ 3432 } else if (cp != NULL) { 3433 struct g_mirror_disk *disk; 3434 3435 disk = cp->private; 3436 if (disk == NULL) 3437 return; 3438 sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)disk->d_id); 3439 if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) { 3440 sbuf_printf(sb, "%s<Synchronized>", indent); 3441 if (disk->d_sync.ds_offset == 0) 3442 sbuf_cat(sb, "0%"); 3443 else 3444 sbuf_printf(sb, "%u%%", 3445 (u_int)((disk->d_sync.ds_offset * 100) / 3446 sc->sc_mediasize)); 3447 sbuf_cat(sb, "</Synchronized>\n"); 3448 if (disk->d_sync.ds_offset > 0) 3449 sbuf_printf(sb, "%s<BytesSynced>%jd" 3450 "</BytesSynced>\n", indent, 3451 (intmax_t)disk->d_sync.ds_offset); 3452 } 3453 sbuf_printf(sb, "%s<SyncID>%u</SyncID>\n", indent, 3454 disk->d_sync.ds_syncid); 3455 sbuf_printf(sb, "%s<GenID>%u</GenID>\n", indent, 3456 disk->d_genid); 3457 sbuf_printf(sb, "%s<Flags>", indent); 3458 if (disk->d_flags == 0) 3459 sbuf_cat(sb, "NONE"); 3460 else { 3461 int first = 1; 3462 3463 #define ADD_FLAG(flag, name) do { \ 3464 if ((disk->d_flags & (flag)) != 0) { \ 3465 if (!first) \ 3466 sbuf_cat(sb, ", "); \ 3467 else \ 3468 first = 0; \ 3469 sbuf_cat(sb, name); \ 3470 } \ 3471 } while (0) 3472 ADD_FLAG(G_MIRROR_DISK_FLAG_DIRTY, "DIRTY"); 3473 ADD_FLAG(G_MIRROR_DISK_FLAG_HARDCODED, "HARDCODED"); 3474 ADD_FLAG(G_MIRROR_DISK_FLAG_INACTIVE, "INACTIVE"); 3475 ADD_FLAG(G_MIRROR_DISK_FLAG_SYNCHRONIZING, 3476 "SYNCHRONIZING"); 3477 ADD_FLAG(G_MIRROR_DISK_FLAG_FORCE_SYNC, "FORCE_SYNC"); 3478 ADD_FLAG(G_MIRROR_DISK_FLAG_BROKEN, "BROKEN"); 3479 #undef ADD_FLAG 3480 } 3481 sbuf_cat(sb, "</Flags>\n"); 3482 sbuf_printf(sb, "%s<Priority>%u</Priority>\n", indent, 3483 disk->d_priority); 3484 sbuf_printf(sb, "%s<State>%s</State>\n", indent, 3485 g_mirror_disk_state2str(disk->d_state)); 3486 } else { 3487 sbuf_printf(sb, "%s<Type>", indent); 3488 switch (sc->sc_type) { 3489 case G_MIRROR_TYPE_AUTOMATIC: 3490 sbuf_cat(sb, "AUTOMATIC"); 3491 break; 3492 case G_MIRROR_TYPE_MANUAL: 3493 sbuf_cat(sb, "MANUAL"); 3494 break; 3495 default: 3496 sbuf_cat(sb, "UNKNOWN"); 3497 break; 3498 } 3499 sbuf_cat(sb, "</Type>\n"); 3500 sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)sc->sc_id); 3501 sbuf_printf(sb, "%s<SyncID>%u</SyncID>\n", indent, sc->sc_syncid); 3502 sbuf_printf(sb, "%s<GenID>%u</GenID>\n", indent, sc->sc_genid); 3503 sbuf_printf(sb, "%s<Flags>", indent); 3504 if (sc->sc_flags == 0) 3505 sbuf_cat(sb, "NONE"); 3506 else { 3507 int first = 1; 3508 3509 #define ADD_FLAG(flag, name) do { \ 3510 if ((sc->sc_flags & (flag)) != 0) { \ 3511 if (!first) \ 3512 sbuf_cat(sb, ", "); \ 3513 else \ 3514 first = 0; \ 3515 sbuf_cat(sb, name); \ 3516 } \ 3517 } while (0) 3518 ADD_FLAG(G_MIRROR_DEVICE_FLAG_NOFAILSYNC, "NOFAILSYNC"); 3519 ADD_FLAG(G_MIRROR_DEVICE_FLAG_NOAUTOSYNC, "NOAUTOSYNC"); 3520 #undef ADD_FLAG 3521 } 3522 sbuf_cat(sb, "</Flags>\n"); 3523 sbuf_printf(sb, "%s<Slice>%u</Slice>\n", indent, 3524 (u_int)sc->sc_slice); 3525 sbuf_printf(sb, "%s<Balance>%s</Balance>\n", indent, 3526 balance_name(sc->sc_balance)); 3527 sbuf_printf(sb, "%s<Components>%u</Components>\n", indent, 3528 sc->sc_ndisks); 3529 sbuf_printf(sb, "%s<State>", indent); 3530 if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING) 3531 sbuf_printf(sb, "%s", "STARTING"); 3532 else if (sc->sc_ndisks == 3533 g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE)) 3534 sbuf_printf(sb, "%s", "COMPLETE"); 3535 else 3536 sbuf_printf(sb, "%s", "DEGRADED"); 3537 sbuf_cat(sb, "</State>\n"); 3538 } 3539 } 3540 3541 static void 3542 g_mirror_shutdown_post_sync(void *arg, int howto) 3543 { 3544 struct g_class *mp; 3545 struct g_geom *gp, *gp2; 3546 struct g_mirror_softc *sc; 3547 int error; 3548 3549 if ((howto & RB_NOSYNC) != 0) 3550 return; 3551 3552 mp = arg; 3553 g_topology_lock(); 3554 g_mirror_shutdown = 1; 3555 LIST_FOREACH_SAFE(gp, &mp->geom, geom, gp2) { 3556 if ((sc = gp->softc) == NULL) 3557 continue; 3558 /* Skip synchronization geom. */ 3559 if (gp == sc->sc_sync.ds_geom) 3560 continue; 3561 g_topology_unlock(); 3562 sx_xlock(&sc->sc_lock); 3563 g_mirror_idle(sc, -1); 3564 g_cancel_event(sc); 3565 error = g_mirror_destroy(sc, G_MIRROR_DESTROY_DELAYED); 3566 if (error != 0) 3567 sx_xunlock(&sc->sc_lock); 3568 g_topology_lock(); 3569 } 3570 g_topology_unlock(); 3571 } 3572 3573 static void 3574 g_mirror_init(struct g_class *mp) 3575 { 3576 3577 g_mirror_post_sync = EVENTHANDLER_REGISTER(shutdown_post_sync, 3578 g_mirror_shutdown_post_sync, mp, SHUTDOWN_PRI_FIRST); 3579 if (g_mirror_post_sync == NULL) 3580 G_MIRROR_DEBUG(0, "Warning! Cannot register shutdown event."); 3581 } 3582 3583 static void 3584 g_mirror_fini(struct g_class *mp) 3585 { 3586 3587 if (g_mirror_post_sync != NULL) 3588 EVENTHANDLER_DEREGISTER(shutdown_post_sync, g_mirror_post_sync); 3589 } 3590 3591 /* 3592 * Refresh the mirror device's metadata when gmirror encounters a newer 3593 * generation as the individual components are being added to the mirror set. 3594 */ 3595 static int 3596 g_mirror_refresh_device(struct g_mirror_softc *sc, const struct g_provider *pp, 3597 const struct g_mirror_metadata *md) 3598 { 3599 3600 g_topology_assert_not(); 3601 sx_assert(&sc->sc_lock, SX_XLOCKED); 3602 3603 KASSERT(sc->sc_genid <= md->md_genid, 3604 ("%s: attempted to refresh from stale component %s (device %s) " 3605 "(%u < %u).", __func__, pp->name, sc->sc_name, md->md_genid, 3606 sc->sc_genid)); 3607 3608 if (sc->sc_genid > md->md_genid || (sc->sc_genid == md->md_genid && 3609 sc->sc_syncid >= md->md_syncid)) 3610 return (0); 3611 3612 G_MIRROR_DEBUG(0, "Found newer version for device %s (genid: curr=%u " 3613 "new=%u; syncid: curr=%u new=%u; ndisks: curr=%u new=%u; " 3614 "provider=%s).", sc->sc_name, sc->sc_genid, md->md_genid, 3615 sc->sc_syncid, md->md_syncid, sc->sc_ndisks, md->md_all, pp->name); 3616 3617 if (sc->sc_state != G_MIRROR_DEVICE_STATE_STARTING) { 3618 /* Probable data corruption detected */ 3619 G_MIRROR_DEBUG(0, "Cannot refresh metadata in %s state " 3620 "(device=%s genid=%u). A stale mirror device was launched.", 3621 g_mirror_device_state2str(sc->sc_state), sc->sc_name, 3622 sc->sc_genid); 3623 return (EINVAL); 3624 } 3625 3626 /* Update softc */ 3627 g_mirror_reinit_from_metadata(sc, md); 3628 3629 G_MIRROR_DEBUG(1, "Refresh device %s (id=%u, state=%s) from disk %s " 3630 "(genid=%u syncid=%u md_all=%u).", sc->sc_name, md->md_mid, 3631 g_mirror_device_state2str(sc->sc_state), pp->name, md->md_genid, 3632 md->md_syncid, (unsigned)md->md_all); 3633 3634 return (0); 3635 } 3636 3637 DECLARE_GEOM_CLASS(g_mirror_class, g_mirror); 3638 MODULE_VERSION(geom_mirror, 0); 3639