1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2004-2006 Pawel Jakub Dawidek <pjd@FreeBSD.org> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/param.h> 30 #include <sys/systm.h> 31 #include <sys/bio.h> 32 #include <sys/eventhandler.h> 33 #include <sys/kernel.h> 34 #include <sys/kthread.h> 35 #include <sys/limits.h> 36 #include <sys/lock.h> 37 #include <sys/malloc.h> 38 #include <sys/module.h> 39 #include <sys/mutex.h> 40 #include <sys/proc.h> 41 #include <sys/reboot.h> 42 #include <sys/sbuf.h> 43 #include <sys/sched.h> 44 #include <sys/sysctl.h> 45 46 #include <vm/uma.h> 47 48 #include <geom/geom.h> 49 #include <geom/geom_dbg.h> 50 #include <geom/raid3/g_raid3.h> 51 52 FEATURE(geom_raid3, "GEOM RAID-3 functionality"); 53 54 static MALLOC_DEFINE(M_RAID3, "raid3_data", "GEOM_RAID3 Data"); 55 56 SYSCTL_DECL(_kern_geom); 57 static SYSCTL_NODE(_kern_geom, OID_AUTO, raid3, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 58 "GEOM_RAID3 stuff"); 59 u_int g_raid3_debug = 0; 60 SYSCTL_UINT(_kern_geom_raid3, OID_AUTO, debug, CTLFLAG_RWTUN, &g_raid3_debug, 0, 61 "Debug level"); 62 static u_int g_raid3_timeout = 4; 63 SYSCTL_UINT(_kern_geom_raid3, OID_AUTO, timeout, CTLFLAG_RWTUN, &g_raid3_timeout, 64 0, "Time to wait on all raid3 components"); 65 static u_int g_raid3_idletime = 5; 66 SYSCTL_UINT(_kern_geom_raid3, OID_AUTO, idletime, CTLFLAG_RWTUN, 67 &g_raid3_idletime, 0, "Mark components as clean when idling"); 68 static u_int g_raid3_disconnect_on_failure = 1; 69 SYSCTL_UINT(_kern_geom_raid3, OID_AUTO, disconnect_on_failure, CTLFLAG_RWTUN, 70 &g_raid3_disconnect_on_failure, 0, "Disconnect component on I/O failure."); 71 static u_int g_raid3_syncreqs = 2; 72 SYSCTL_UINT(_kern_geom_raid3, OID_AUTO, sync_requests, CTLFLAG_RDTUN, 73 &g_raid3_syncreqs, 0, "Parallel synchronization I/O requests."); 74 static u_int g_raid3_use_malloc = 0; 75 SYSCTL_UINT(_kern_geom_raid3, OID_AUTO, use_malloc, CTLFLAG_RDTUN, 76 &g_raid3_use_malloc, 0, "Use malloc(9) instead of uma(9)."); 77 78 static u_int g_raid3_n64k = 50; 79 SYSCTL_UINT(_kern_geom_raid3, OID_AUTO, n64k, CTLFLAG_RDTUN, &g_raid3_n64k, 0, 80 "Maximum number of 64kB allocations"); 81 static u_int g_raid3_n16k = 200; 82 SYSCTL_UINT(_kern_geom_raid3, OID_AUTO, n16k, CTLFLAG_RDTUN, &g_raid3_n16k, 0, 83 "Maximum number of 16kB allocations"); 84 static u_int g_raid3_n4k = 1200; 85 SYSCTL_UINT(_kern_geom_raid3, OID_AUTO, n4k, CTLFLAG_RDTUN, &g_raid3_n4k, 0, 86 "Maximum number of 4kB allocations"); 87 88 static SYSCTL_NODE(_kern_geom_raid3, OID_AUTO, stat, 89 CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 90 "GEOM_RAID3 statistics"); 91 static u_int g_raid3_parity_mismatch = 0; 92 SYSCTL_UINT(_kern_geom_raid3_stat, OID_AUTO, parity_mismatch, CTLFLAG_RD, 93 &g_raid3_parity_mismatch, 0, "Number of failures in VERIFY mode"); 94 95 #define MSLEEP(ident, mtx, priority, wmesg, timeout) do { \ 96 G_RAID3_DEBUG(4, "%s: Sleeping %p.", __func__, (ident)); \ 97 msleep((ident), (mtx), (priority), (wmesg), (timeout)); \ 98 G_RAID3_DEBUG(4, "%s: Woken up %p.", __func__, (ident)); \ 99 } while (0) 100 101 static eventhandler_tag g_raid3_post_sync = NULL; 102 static int g_raid3_shutdown = 0; 103 104 static int g_raid3_destroy_geom(struct gctl_req *req, struct g_class *mp, 105 struct g_geom *gp); 106 static g_taste_t g_raid3_taste; 107 static void g_raid3_init(struct g_class *mp); 108 static void g_raid3_fini(struct g_class *mp); 109 static void g_raid3_providergone(struct g_provider *pp); 110 111 struct g_class g_raid3_class = { 112 .name = G_RAID3_CLASS_NAME, 113 .version = G_VERSION, 114 .ctlreq = g_raid3_config, 115 .taste = g_raid3_taste, 116 .destroy_geom = g_raid3_destroy_geom, 117 .init = g_raid3_init, 118 .fini = g_raid3_fini, 119 .providergone = g_raid3_providergone, 120 }; 121 122 static void g_raid3_destroy_provider(struct g_raid3_softc *sc); 123 static int g_raid3_update_disk(struct g_raid3_disk *disk, u_int state); 124 static void g_raid3_update_device(struct g_raid3_softc *sc, boolean_t force); 125 static void g_raid3_dumpconf(struct sbuf *sb, const char *indent, 126 struct g_geom *gp, struct g_consumer *cp, struct g_provider *pp); 127 static void g_raid3_sync_stop(struct g_raid3_softc *sc, int type); 128 static int g_raid3_register_request(struct bio *pbp); 129 static void g_raid3_sync_release(struct g_raid3_softc *sc); 130 static void g_raid3_timeout_drain(struct g_raid3_softc *sc); 131 132 static const char * 133 g_raid3_disk_state2str(int state) 134 { 135 136 switch (state) { 137 case G_RAID3_DISK_STATE_NODISK: 138 return ("NODISK"); 139 case G_RAID3_DISK_STATE_NONE: 140 return ("NONE"); 141 case G_RAID3_DISK_STATE_NEW: 142 return ("NEW"); 143 case G_RAID3_DISK_STATE_ACTIVE: 144 return ("ACTIVE"); 145 case G_RAID3_DISK_STATE_STALE: 146 return ("STALE"); 147 case G_RAID3_DISK_STATE_SYNCHRONIZING: 148 return ("SYNCHRONIZING"); 149 case G_RAID3_DISK_STATE_DISCONNECTED: 150 return ("DISCONNECTED"); 151 default: 152 return ("INVALID"); 153 } 154 } 155 156 static const char * 157 g_raid3_device_state2str(int state) 158 { 159 160 switch (state) { 161 case G_RAID3_DEVICE_STATE_STARTING: 162 return ("STARTING"); 163 case G_RAID3_DEVICE_STATE_DEGRADED: 164 return ("DEGRADED"); 165 case G_RAID3_DEVICE_STATE_COMPLETE: 166 return ("COMPLETE"); 167 default: 168 return ("INVALID"); 169 } 170 } 171 172 const char * 173 g_raid3_get_diskname(struct g_raid3_disk *disk) 174 { 175 176 if (disk->d_consumer == NULL || disk->d_consumer->provider == NULL) 177 return ("[unknown]"); 178 return (disk->d_name); 179 } 180 181 static void * 182 g_raid3_alloc(struct g_raid3_softc *sc, size_t size, int flags) 183 { 184 void *ptr; 185 enum g_raid3_zones zone; 186 187 if (g_raid3_use_malloc || 188 (zone = g_raid3_zone(size)) == G_RAID3_NUM_ZONES) 189 ptr = malloc(size, M_RAID3, flags); 190 else { 191 ptr = uma_zalloc_arg(sc->sc_zones[zone].sz_zone, 192 &sc->sc_zones[zone], flags); 193 sc->sc_zones[zone].sz_requested++; 194 if (ptr == NULL) 195 sc->sc_zones[zone].sz_failed++; 196 } 197 return (ptr); 198 } 199 200 static void 201 g_raid3_free(struct g_raid3_softc *sc, void *ptr, size_t size) 202 { 203 enum g_raid3_zones zone; 204 205 if (g_raid3_use_malloc || 206 (zone = g_raid3_zone(size)) == G_RAID3_NUM_ZONES) 207 free(ptr, M_RAID3); 208 else { 209 uma_zfree_arg(sc->sc_zones[zone].sz_zone, 210 ptr, &sc->sc_zones[zone]); 211 } 212 } 213 214 static int 215 g_raid3_uma_ctor(void *mem, int size, void *arg, int flags) 216 { 217 struct g_raid3_zone *sz = arg; 218 219 if (sz->sz_max > 0 && sz->sz_inuse == sz->sz_max) 220 return (ENOMEM); 221 sz->sz_inuse++; 222 return (0); 223 } 224 225 static void 226 g_raid3_uma_dtor(void *mem, int size, void *arg) 227 { 228 struct g_raid3_zone *sz = arg; 229 230 sz->sz_inuse--; 231 } 232 233 #define g_raid3_xor(src, dst, size) \ 234 _g_raid3_xor((uint64_t *)(src), \ 235 (uint64_t *)(dst), (size_t)size) 236 static void 237 _g_raid3_xor(uint64_t *src, uint64_t *dst, size_t size) 238 { 239 240 KASSERT((size % 128) == 0, ("Invalid size: %zu.", size)); 241 for (; size > 0; size -= 128) { 242 *dst++ ^= (*src++); 243 *dst++ ^= (*src++); 244 *dst++ ^= (*src++); 245 *dst++ ^= (*src++); 246 *dst++ ^= (*src++); 247 *dst++ ^= (*src++); 248 *dst++ ^= (*src++); 249 *dst++ ^= (*src++); 250 *dst++ ^= (*src++); 251 *dst++ ^= (*src++); 252 *dst++ ^= (*src++); 253 *dst++ ^= (*src++); 254 *dst++ ^= (*src++); 255 *dst++ ^= (*src++); 256 *dst++ ^= (*src++); 257 *dst++ ^= (*src++); 258 } 259 } 260 261 static int 262 g_raid3_is_zero(struct bio *bp) 263 { 264 static const uint64_t zeros[] = { 265 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 266 }; 267 u_char *addr; 268 ssize_t size; 269 270 size = bp->bio_length; 271 addr = (u_char *)bp->bio_data; 272 for (; size > 0; size -= sizeof(zeros), addr += sizeof(zeros)) { 273 if (bcmp(addr, zeros, sizeof(zeros)) != 0) 274 return (0); 275 } 276 return (1); 277 } 278 279 /* 280 * --- Events handling functions --- 281 * Events in geom_raid3 are used to maintain disks and device status 282 * from one thread to simplify locking. 283 */ 284 static void 285 g_raid3_event_free(struct g_raid3_event *ep) 286 { 287 288 free(ep, M_RAID3); 289 } 290 291 static int 292 g_raid3_event_dispatch(struct g_raid3_event *ep, void *arg, int state, 293 int flags) 294 { 295 struct g_raid3_softc *sc; 296 struct g_raid3_disk *disk; 297 int error; 298 299 G_RAID3_DEBUG(4, "%s: Sending event %p.", __func__, ep); 300 if ((flags & G_RAID3_EVENT_DEVICE) != 0) { 301 disk = NULL; 302 sc = arg; 303 } else { 304 disk = arg; 305 sc = disk->d_softc; 306 } 307 ep->e_disk = disk; 308 ep->e_state = state; 309 ep->e_flags = flags; 310 ep->e_error = 0; 311 mtx_lock(&sc->sc_events_mtx); 312 TAILQ_INSERT_TAIL(&sc->sc_events, ep, e_next); 313 mtx_unlock(&sc->sc_events_mtx); 314 G_RAID3_DEBUG(4, "%s: Waking up %p.", __func__, sc); 315 mtx_lock(&sc->sc_queue_mtx); 316 wakeup(sc); 317 wakeup(&sc->sc_queue); 318 mtx_unlock(&sc->sc_queue_mtx); 319 if ((flags & G_RAID3_EVENT_DONTWAIT) != 0) 320 return (0); 321 sx_assert(&sc->sc_lock, SX_XLOCKED); 322 G_RAID3_DEBUG(4, "%s: Sleeping %p.", __func__, ep); 323 sx_xunlock(&sc->sc_lock); 324 while ((ep->e_flags & G_RAID3_EVENT_DONE) == 0) { 325 mtx_lock(&sc->sc_events_mtx); 326 MSLEEP(ep, &sc->sc_events_mtx, PRIBIO | PDROP, "r3:event", 327 hz * 5); 328 } 329 error = ep->e_error; 330 g_raid3_event_free(ep); 331 sx_xlock(&sc->sc_lock); 332 return (error); 333 } 334 335 int 336 g_raid3_event_send(void *arg, int state, int flags) 337 { 338 struct g_raid3_event *ep; 339 340 ep = malloc(sizeof(*ep), M_RAID3, M_WAITOK); 341 return (g_raid3_event_dispatch(ep, arg, state, flags)); 342 } 343 344 static struct g_raid3_event * 345 g_raid3_event_get(struct g_raid3_softc *sc) 346 { 347 struct g_raid3_event *ep; 348 349 mtx_lock(&sc->sc_events_mtx); 350 ep = TAILQ_FIRST(&sc->sc_events); 351 mtx_unlock(&sc->sc_events_mtx); 352 return (ep); 353 } 354 355 static void 356 g_raid3_event_remove(struct g_raid3_softc *sc, struct g_raid3_event *ep) 357 { 358 359 mtx_lock(&sc->sc_events_mtx); 360 TAILQ_REMOVE(&sc->sc_events, ep, e_next); 361 mtx_unlock(&sc->sc_events_mtx); 362 } 363 364 static void 365 g_raid3_event_cancel(struct g_raid3_disk *disk) 366 { 367 struct g_raid3_softc *sc; 368 struct g_raid3_event *ep, *tmpep; 369 370 sc = disk->d_softc; 371 sx_assert(&sc->sc_lock, SX_XLOCKED); 372 373 mtx_lock(&sc->sc_events_mtx); 374 TAILQ_FOREACH_SAFE(ep, &sc->sc_events, e_next, tmpep) { 375 if ((ep->e_flags & G_RAID3_EVENT_DEVICE) != 0) 376 continue; 377 if (ep->e_disk != disk) 378 continue; 379 TAILQ_REMOVE(&sc->sc_events, ep, e_next); 380 if ((ep->e_flags & G_RAID3_EVENT_DONTWAIT) != 0) 381 g_raid3_event_free(ep); 382 else { 383 ep->e_error = ECANCELED; 384 wakeup(ep); 385 } 386 } 387 mtx_unlock(&sc->sc_events_mtx); 388 } 389 390 /* 391 * Return the number of disks in the given state. 392 * If state is equal to -1, count all connected disks. 393 */ 394 u_int 395 g_raid3_ndisks(struct g_raid3_softc *sc, int state) 396 { 397 struct g_raid3_disk *disk; 398 u_int n, ndisks; 399 400 sx_assert(&sc->sc_lock, SX_LOCKED); 401 402 for (n = ndisks = 0; n < sc->sc_ndisks; n++) { 403 disk = &sc->sc_disks[n]; 404 if (disk->d_state == G_RAID3_DISK_STATE_NODISK) 405 continue; 406 if (state == -1 || disk->d_state == state) 407 ndisks++; 408 } 409 return (ndisks); 410 } 411 412 static u_int 413 g_raid3_nrequests(struct g_raid3_softc *sc, struct g_consumer *cp) 414 { 415 struct bio *bp; 416 u_int nreqs = 0; 417 418 mtx_lock(&sc->sc_queue_mtx); 419 TAILQ_FOREACH(bp, &sc->sc_queue.queue, bio_queue) { 420 if (bp->bio_from == cp) 421 nreqs++; 422 } 423 mtx_unlock(&sc->sc_queue_mtx); 424 return (nreqs); 425 } 426 427 static int 428 g_raid3_is_busy(struct g_raid3_softc *sc, struct g_consumer *cp) 429 { 430 431 if (cp->index > 0) { 432 G_RAID3_DEBUG(2, 433 "I/O requests for %s exist, can't destroy it now.", 434 cp->provider->name); 435 return (1); 436 } 437 if (g_raid3_nrequests(sc, cp) > 0) { 438 G_RAID3_DEBUG(2, 439 "I/O requests for %s in queue, can't destroy it now.", 440 cp->provider->name); 441 return (1); 442 } 443 return (0); 444 } 445 446 static void 447 g_raid3_destroy_consumer(void *arg, int flags __unused) 448 { 449 struct g_consumer *cp; 450 451 g_topology_assert(); 452 453 cp = arg; 454 G_RAID3_DEBUG(1, "Consumer %s destroyed.", cp->provider->name); 455 g_detach(cp); 456 g_destroy_consumer(cp); 457 } 458 459 static void 460 g_raid3_kill_consumer(struct g_raid3_softc *sc, struct g_consumer *cp) 461 { 462 struct g_provider *pp; 463 int retaste_wait; 464 465 g_topology_assert(); 466 467 cp->private = NULL; 468 if (g_raid3_is_busy(sc, cp)) 469 return; 470 G_RAID3_DEBUG(2, "Consumer %s destroyed.", cp->provider->name); 471 pp = cp->provider; 472 retaste_wait = 0; 473 if (cp->acw == 1) { 474 if ((pp->geom->flags & G_GEOM_WITHER) == 0) 475 retaste_wait = 1; 476 } 477 G_RAID3_DEBUG(2, "Access %s r%dw%de%d = %d", pp->name, -cp->acr, 478 -cp->acw, -cp->ace, 0); 479 if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0) 480 g_access(cp, -cp->acr, -cp->acw, -cp->ace); 481 if (retaste_wait) { 482 /* 483 * After retaste event was send (inside g_access()), we can send 484 * event to detach and destroy consumer. 485 * A class, which has consumer to the given provider connected 486 * will not receive retaste event for the provider. 487 * This is the way how I ignore retaste events when I close 488 * consumers opened for write: I detach and destroy consumer 489 * after retaste event is sent. 490 */ 491 g_post_event(g_raid3_destroy_consumer, cp, M_WAITOK, NULL); 492 return; 493 } 494 G_RAID3_DEBUG(1, "Consumer %s destroyed.", pp->name); 495 g_detach(cp); 496 g_destroy_consumer(cp); 497 } 498 499 static int 500 g_raid3_connect_disk(struct g_raid3_disk *disk, struct g_provider *pp) 501 { 502 struct g_consumer *cp; 503 int error; 504 505 g_topology_assert_not(); 506 KASSERT(disk->d_consumer == NULL, 507 ("Disk already connected (device %s).", disk->d_softc->sc_name)); 508 509 g_topology_lock(); 510 cp = g_new_consumer(disk->d_softc->sc_geom); 511 error = g_attach(cp, pp); 512 if (error != 0) { 513 g_destroy_consumer(cp); 514 g_topology_unlock(); 515 return (error); 516 } 517 error = g_access(cp, 1, 1, 1); 518 g_topology_unlock(); 519 if (error != 0) { 520 g_detach(cp); 521 g_destroy_consumer(cp); 522 G_RAID3_DEBUG(0, "Cannot open consumer %s (error=%d).", 523 pp->name, error); 524 return (error); 525 } 526 disk->d_consumer = cp; 527 disk->d_consumer->private = disk; 528 disk->d_consumer->index = 0; 529 G_RAID3_DEBUG(2, "Disk %s connected.", g_raid3_get_diskname(disk)); 530 return (0); 531 } 532 533 static void 534 g_raid3_disconnect_consumer(struct g_raid3_softc *sc, struct g_consumer *cp) 535 { 536 537 g_topology_assert(); 538 539 if (cp == NULL) 540 return; 541 if (cp->provider != NULL) 542 g_raid3_kill_consumer(sc, cp); 543 else 544 g_destroy_consumer(cp); 545 } 546 547 /* 548 * Initialize disk. This means allocate memory, create consumer, attach it 549 * to the provider and open access (r1w1e1) to it. 550 */ 551 static struct g_raid3_disk * 552 g_raid3_init_disk(struct g_raid3_softc *sc, struct g_provider *pp, 553 struct g_raid3_metadata *md, int *errorp) 554 { 555 struct g_raid3_disk *disk; 556 int error; 557 558 disk = &sc->sc_disks[md->md_no]; 559 error = g_raid3_connect_disk(disk, pp); 560 if (error != 0) { 561 if (errorp != NULL) 562 *errorp = error; 563 return (NULL); 564 } 565 disk->d_state = G_RAID3_DISK_STATE_NONE; 566 disk->d_flags = md->md_dflags; 567 if (md->md_provider[0] != '\0') 568 disk->d_flags |= G_RAID3_DISK_FLAG_HARDCODED; 569 disk->d_sync.ds_consumer = NULL; 570 disk->d_sync.ds_offset = md->md_sync_offset; 571 disk->d_sync.ds_offset_done = md->md_sync_offset; 572 disk->d_genid = md->md_genid; 573 disk->d_sync.ds_syncid = md->md_syncid; 574 if (errorp != NULL) 575 *errorp = 0; 576 return (disk); 577 } 578 579 static void 580 g_raid3_destroy_disk(struct g_raid3_disk *disk) 581 { 582 struct g_raid3_softc *sc; 583 584 g_topology_assert_not(); 585 sc = disk->d_softc; 586 sx_assert(&sc->sc_lock, SX_XLOCKED); 587 588 if (disk->d_state == G_RAID3_DISK_STATE_NODISK) 589 return; 590 g_raid3_event_cancel(disk); 591 switch (disk->d_state) { 592 case G_RAID3_DISK_STATE_SYNCHRONIZING: 593 if (sc->sc_syncdisk != NULL) 594 g_raid3_sync_stop(sc, 1); 595 /* FALLTHROUGH */ 596 case G_RAID3_DISK_STATE_NEW: 597 case G_RAID3_DISK_STATE_STALE: 598 case G_RAID3_DISK_STATE_ACTIVE: 599 g_topology_lock(); 600 g_raid3_disconnect_consumer(sc, disk->d_consumer); 601 g_topology_unlock(); 602 disk->d_consumer = NULL; 603 break; 604 default: 605 KASSERT(0 == 1, ("Wrong disk state (%s, %s).", 606 g_raid3_get_diskname(disk), 607 g_raid3_disk_state2str(disk->d_state))); 608 } 609 disk->d_state = G_RAID3_DISK_STATE_NODISK; 610 } 611 612 static void 613 g_raid3_free_device(struct g_raid3_softc *sc) 614 { 615 KASSERT(sc->sc_refcnt == 0, 616 ("%s: non-zero refcount %u", __func__, sc->sc_refcnt)); 617 618 if (!g_raid3_use_malloc) { 619 uma_zdestroy(sc->sc_zones[G_RAID3_ZONE_64K].sz_zone); 620 uma_zdestroy(sc->sc_zones[G_RAID3_ZONE_16K].sz_zone); 621 uma_zdestroy(sc->sc_zones[G_RAID3_ZONE_4K].sz_zone); 622 } 623 mtx_destroy(&sc->sc_queue_mtx); 624 mtx_destroy(&sc->sc_events_mtx); 625 sx_xunlock(&sc->sc_lock); 626 sx_destroy(&sc->sc_lock); 627 free(sc->sc_disks, M_RAID3); 628 free(sc, M_RAID3); 629 } 630 631 static void 632 g_raid3_providergone(struct g_provider *pp) 633 { 634 struct g_raid3_softc *sc = pp->private; 635 636 if (--sc->sc_refcnt == 0) 637 g_raid3_free_device(sc); 638 } 639 640 static void 641 g_raid3_destroy_device(struct g_raid3_softc *sc) 642 { 643 struct g_raid3_event *ep; 644 struct g_raid3_disk *disk; 645 struct g_geom *gp; 646 struct g_consumer *cp; 647 u_int n; 648 649 g_topology_assert_not(); 650 sx_assert(&sc->sc_lock, SX_XLOCKED); 651 652 gp = sc->sc_geom; 653 if (sc->sc_provider != NULL) 654 g_raid3_destroy_provider(sc); 655 for (n = 0; n < sc->sc_ndisks; n++) { 656 disk = &sc->sc_disks[n]; 657 if (disk->d_state != G_RAID3_DISK_STATE_NODISK) { 658 disk->d_flags &= ~G_RAID3_DISK_FLAG_DIRTY; 659 g_raid3_update_metadata(disk); 660 g_raid3_destroy_disk(disk); 661 } 662 } 663 while ((ep = g_raid3_event_get(sc)) != NULL) { 664 g_raid3_event_remove(sc, ep); 665 if ((ep->e_flags & G_RAID3_EVENT_DONTWAIT) != 0) 666 g_raid3_event_free(ep); 667 else { 668 ep->e_error = ECANCELED; 669 ep->e_flags |= G_RAID3_EVENT_DONE; 670 G_RAID3_DEBUG(4, "%s: Waking up %p.", __func__, ep); 671 mtx_lock(&sc->sc_events_mtx); 672 wakeup(ep); 673 mtx_unlock(&sc->sc_events_mtx); 674 } 675 } 676 g_raid3_timeout_drain(sc); 677 cp = LIST_FIRST(&sc->sc_sync.ds_geom->consumer); 678 g_topology_lock(); 679 if (cp != NULL) 680 g_raid3_disconnect_consumer(sc, cp); 681 g_wither_geom(sc->sc_sync.ds_geom, ENXIO); 682 G_RAID3_DEBUG(0, "Device %s destroyed.", gp->name); 683 g_wither_geom(gp, ENXIO); 684 if (--sc->sc_refcnt == 0) 685 g_raid3_free_device(sc); 686 g_topology_unlock(); 687 } 688 689 static void 690 g_raid3_orphan(struct g_consumer *cp) 691 { 692 struct g_raid3_disk *disk; 693 694 g_topology_assert(); 695 696 disk = cp->private; 697 if (disk == NULL) 698 return; 699 disk->d_softc->sc_bump_id = G_RAID3_BUMP_SYNCID; 700 g_raid3_event_send(disk, G_RAID3_DISK_STATE_DISCONNECTED, 701 G_RAID3_EVENT_DONTWAIT); 702 } 703 704 static int 705 g_raid3_write_metadata(struct g_raid3_disk *disk, struct g_raid3_metadata *md) 706 { 707 struct g_raid3_softc *sc; 708 struct g_consumer *cp; 709 off_t offset, length; 710 u_char *sector; 711 int error = 0; 712 713 g_topology_assert_not(); 714 sc = disk->d_softc; 715 sx_assert(&sc->sc_lock, SX_LOCKED); 716 717 cp = disk->d_consumer; 718 KASSERT(cp != NULL, ("NULL consumer (%s).", sc->sc_name)); 719 KASSERT(cp->provider != NULL, ("NULL provider (%s).", sc->sc_name)); 720 KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1, 721 ("Consumer %s closed? (r%dw%de%d).", cp->provider->name, cp->acr, 722 cp->acw, cp->ace)); 723 length = cp->provider->sectorsize; 724 offset = cp->provider->mediasize - length; 725 sector = malloc((size_t)length, M_RAID3, M_WAITOK | M_ZERO); 726 if (md != NULL) 727 raid3_metadata_encode(md, sector); 728 error = g_write_data(cp, offset, sector, length); 729 free(sector, M_RAID3); 730 if (error != 0) { 731 if ((disk->d_flags & G_RAID3_DISK_FLAG_BROKEN) == 0) { 732 G_RAID3_DEBUG(0, "Cannot write metadata on %s " 733 "(device=%s, error=%d).", 734 g_raid3_get_diskname(disk), sc->sc_name, error); 735 disk->d_flags |= G_RAID3_DISK_FLAG_BROKEN; 736 } else { 737 G_RAID3_DEBUG(1, "Cannot write metadata on %s " 738 "(device=%s, error=%d).", 739 g_raid3_get_diskname(disk), sc->sc_name, error); 740 } 741 if (g_raid3_disconnect_on_failure && 742 sc->sc_state == G_RAID3_DEVICE_STATE_COMPLETE) { 743 sc->sc_bump_id |= G_RAID3_BUMP_GENID; 744 g_raid3_event_send(disk, 745 G_RAID3_DISK_STATE_DISCONNECTED, 746 G_RAID3_EVENT_DONTWAIT); 747 } 748 } 749 return (error); 750 } 751 752 int 753 g_raid3_clear_metadata(struct g_raid3_disk *disk) 754 { 755 int error; 756 757 g_topology_assert_not(); 758 sx_assert(&disk->d_softc->sc_lock, SX_LOCKED); 759 760 error = g_raid3_write_metadata(disk, NULL); 761 if (error == 0) { 762 G_RAID3_DEBUG(2, "Metadata on %s cleared.", 763 g_raid3_get_diskname(disk)); 764 } else { 765 G_RAID3_DEBUG(0, 766 "Cannot clear metadata on disk %s (error=%d).", 767 g_raid3_get_diskname(disk), error); 768 } 769 return (error); 770 } 771 772 void 773 g_raid3_fill_metadata(struct g_raid3_disk *disk, struct g_raid3_metadata *md) 774 { 775 struct g_raid3_softc *sc; 776 struct g_provider *pp; 777 778 bzero(md, sizeof(*md)); 779 sc = disk->d_softc; 780 strlcpy(md->md_magic, G_RAID3_MAGIC, sizeof(md->md_magic)); 781 md->md_version = G_RAID3_VERSION; 782 strlcpy(md->md_name, sc->sc_name, sizeof(md->md_name)); 783 md->md_id = sc->sc_id; 784 md->md_all = sc->sc_ndisks; 785 md->md_genid = sc->sc_genid; 786 md->md_mediasize = sc->sc_mediasize; 787 md->md_sectorsize = sc->sc_sectorsize; 788 md->md_mflags = (sc->sc_flags & G_RAID3_DEVICE_FLAG_MASK); 789 md->md_no = disk->d_no; 790 md->md_syncid = disk->d_sync.ds_syncid; 791 md->md_dflags = (disk->d_flags & G_RAID3_DISK_FLAG_MASK); 792 if (disk->d_state == G_RAID3_DISK_STATE_SYNCHRONIZING) { 793 md->md_sync_offset = 794 disk->d_sync.ds_offset_done / (sc->sc_ndisks - 1); 795 } 796 if (disk->d_consumer != NULL && disk->d_consumer->provider != NULL) 797 pp = disk->d_consumer->provider; 798 else 799 pp = NULL; 800 if ((disk->d_flags & G_RAID3_DISK_FLAG_HARDCODED) != 0 && pp != NULL) 801 strlcpy(md->md_provider, pp->name, sizeof(md->md_provider)); 802 if (pp != NULL) 803 md->md_provsize = pp->mediasize; 804 } 805 806 void 807 g_raid3_update_metadata(struct g_raid3_disk *disk) 808 { 809 struct g_raid3_softc *sc __diagused; 810 struct g_raid3_metadata md; 811 int error; 812 813 g_topology_assert_not(); 814 sc = disk->d_softc; 815 sx_assert(&sc->sc_lock, SX_LOCKED); 816 817 g_raid3_fill_metadata(disk, &md); 818 error = g_raid3_write_metadata(disk, &md); 819 if (error == 0) { 820 G_RAID3_DEBUG(2, "Metadata on %s updated.", 821 g_raid3_get_diskname(disk)); 822 } else { 823 G_RAID3_DEBUG(0, 824 "Cannot update metadata on disk %s (error=%d).", 825 g_raid3_get_diskname(disk), error); 826 } 827 } 828 829 static void 830 g_raid3_bump_syncid(struct g_raid3_softc *sc) 831 { 832 struct g_raid3_disk *disk; 833 u_int n; 834 835 g_topology_assert_not(); 836 sx_assert(&sc->sc_lock, SX_XLOCKED); 837 KASSERT(g_raid3_ndisks(sc, G_RAID3_DISK_STATE_ACTIVE) > 0, 838 ("%s called with no active disks (device=%s).", __func__, 839 sc->sc_name)); 840 841 sc->sc_syncid++; 842 G_RAID3_DEBUG(1, "Device %s: syncid bumped to %u.", sc->sc_name, 843 sc->sc_syncid); 844 for (n = 0; n < sc->sc_ndisks; n++) { 845 disk = &sc->sc_disks[n]; 846 if (disk->d_state == G_RAID3_DISK_STATE_ACTIVE || 847 disk->d_state == G_RAID3_DISK_STATE_SYNCHRONIZING) { 848 disk->d_sync.ds_syncid = sc->sc_syncid; 849 g_raid3_update_metadata(disk); 850 } 851 } 852 } 853 854 static void 855 g_raid3_bump_genid(struct g_raid3_softc *sc) 856 { 857 struct g_raid3_disk *disk; 858 u_int n; 859 860 g_topology_assert_not(); 861 sx_assert(&sc->sc_lock, SX_XLOCKED); 862 KASSERT(g_raid3_ndisks(sc, G_RAID3_DISK_STATE_ACTIVE) > 0, 863 ("%s called with no active disks (device=%s).", __func__, 864 sc->sc_name)); 865 866 sc->sc_genid++; 867 G_RAID3_DEBUG(1, "Device %s: genid bumped to %u.", sc->sc_name, 868 sc->sc_genid); 869 for (n = 0; n < sc->sc_ndisks; n++) { 870 disk = &sc->sc_disks[n]; 871 if (disk->d_state == G_RAID3_DISK_STATE_ACTIVE || 872 disk->d_state == G_RAID3_DISK_STATE_SYNCHRONIZING) { 873 disk->d_genid = sc->sc_genid; 874 g_raid3_update_metadata(disk); 875 } 876 } 877 } 878 879 static int 880 g_raid3_idle(struct g_raid3_softc *sc, int acw) 881 { 882 struct g_raid3_disk *disk; 883 u_int i; 884 int timeout; 885 886 g_topology_assert_not(); 887 sx_assert(&sc->sc_lock, SX_XLOCKED); 888 889 if (sc->sc_provider == NULL) 890 return (0); 891 if ((sc->sc_flags & G_RAID3_DEVICE_FLAG_NOFAILSYNC) != 0) 892 return (0); 893 if (sc->sc_idle) 894 return (0); 895 if (sc->sc_writes > 0) 896 return (0); 897 if (acw > 0 || (acw == -1 && sc->sc_provider->acw > 0)) { 898 timeout = g_raid3_idletime - (time_uptime - sc->sc_last_write); 899 if (!g_raid3_shutdown && timeout > 0) 900 return (timeout); 901 } 902 sc->sc_idle = 1; 903 for (i = 0; i < sc->sc_ndisks; i++) { 904 disk = &sc->sc_disks[i]; 905 if (disk->d_state != G_RAID3_DISK_STATE_ACTIVE) 906 continue; 907 G_RAID3_DEBUG(1, "Disk %s (device %s) marked as clean.", 908 g_raid3_get_diskname(disk), sc->sc_name); 909 disk->d_flags &= ~G_RAID3_DISK_FLAG_DIRTY; 910 g_raid3_update_metadata(disk); 911 } 912 return (0); 913 } 914 915 static void 916 g_raid3_unidle(struct g_raid3_softc *sc) 917 { 918 struct g_raid3_disk *disk; 919 u_int i; 920 921 g_topology_assert_not(); 922 sx_assert(&sc->sc_lock, SX_XLOCKED); 923 924 if ((sc->sc_flags & G_RAID3_DEVICE_FLAG_NOFAILSYNC) != 0) 925 return; 926 sc->sc_idle = 0; 927 sc->sc_last_write = time_uptime; 928 for (i = 0; i < sc->sc_ndisks; i++) { 929 disk = &sc->sc_disks[i]; 930 if (disk->d_state != G_RAID3_DISK_STATE_ACTIVE) 931 continue; 932 G_RAID3_DEBUG(1, "Disk %s (device %s) marked as dirty.", 933 g_raid3_get_diskname(disk), sc->sc_name); 934 disk->d_flags |= G_RAID3_DISK_FLAG_DIRTY; 935 g_raid3_update_metadata(disk); 936 } 937 } 938 939 /* 940 * Treat bio_driver1 field in parent bio as list head and field bio_caller1 941 * in child bio as pointer to the next element on the list. 942 */ 943 #define G_RAID3_HEAD_BIO(pbp) (pbp)->bio_driver1 944 945 #define G_RAID3_NEXT_BIO(cbp) (cbp)->bio_caller1 946 947 #define G_RAID3_FOREACH_BIO(pbp, bp) \ 948 for ((bp) = G_RAID3_HEAD_BIO(pbp); (bp) != NULL; \ 949 (bp) = G_RAID3_NEXT_BIO(bp)) 950 951 #define G_RAID3_FOREACH_SAFE_BIO(pbp, bp, tmpbp) \ 952 for ((bp) = G_RAID3_HEAD_BIO(pbp); \ 953 (bp) != NULL && ((tmpbp) = G_RAID3_NEXT_BIO(bp), 1); \ 954 (bp) = (tmpbp)) 955 956 static void 957 g_raid3_init_bio(struct bio *pbp) 958 { 959 960 G_RAID3_HEAD_BIO(pbp) = NULL; 961 } 962 963 static void 964 g_raid3_remove_bio(struct bio *cbp) 965 { 966 struct bio *pbp, *bp; 967 968 pbp = cbp->bio_parent; 969 if (G_RAID3_HEAD_BIO(pbp) == cbp) 970 G_RAID3_HEAD_BIO(pbp) = G_RAID3_NEXT_BIO(cbp); 971 else { 972 G_RAID3_FOREACH_BIO(pbp, bp) { 973 if (G_RAID3_NEXT_BIO(bp) == cbp) { 974 G_RAID3_NEXT_BIO(bp) = G_RAID3_NEXT_BIO(cbp); 975 break; 976 } 977 } 978 } 979 G_RAID3_NEXT_BIO(cbp) = NULL; 980 } 981 982 static void 983 g_raid3_replace_bio(struct bio *sbp, struct bio *dbp) 984 { 985 struct bio *pbp, *bp; 986 987 g_raid3_remove_bio(sbp); 988 pbp = dbp->bio_parent; 989 G_RAID3_NEXT_BIO(sbp) = G_RAID3_NEXT_BIO(dbp); 990 if (G_RAID3_HEAD_BIO(pbp) == dbp) 991 G_RAID3_HEAD_BIO(pbp) = sbp; 992 else { 993 G_RAID3_FOREACH_BIO(pbp, bp) { 994 if (G_RAID3_NEXT_BIO(bp) == dbp) { 995 G_RAID3_NEXT_BIO(bp) = sbp; 996 break; 997 } 998 } 999 } 1000 G_RAID3_NEXT_BIO(dbp) = NULL; 1001 } 1002 1003 static void 1004 g_raid3_destroy_bio(struct g_raid3_softc *sc, struct bio *cbp) 1005 { 1006 struct bio *bp, *pbp; 1007 size_t size; 1008 1009 pbp = cbp->bio_parent; 1010 pbp->bio_children--; 1011 KASSERT(cbp->bio_data != NULL, ("NULL bio_data")); 1012 size = pbp->bio_length / (sc->sc_ndisks - 1); 1013 g_raid3_free(sc, cbp->bio_data, size); 1014 if (G_RAID3_HEAD_BIO(pbp) == cbp) { 1015 G_RAID3_HEAD_BIO(pbp) = G_RAID3_NEXT_BIO(cbp); 1016 G_RAID3_NEXT_BIO(cbp) = NULL; 1017 g_destroy_bio(cbp); 1018 } else { 1019 G_RAID3_FOREACH_BIO(pbp, bp) { 1020 if (G_RAID3_NEXT_BIO(bp) == cbp) 1021 break; 1022 } 1023 if (bp != NULL) { 1024 KASSERT(G_RAID3_NEXT_BIO(bp) != NULL, 1025 ("NULL bp->bio_driver1")); 1026 G_RAID3_NEXT_BIO(bp) = G_RAID3_NEXT_BIO(cbp); 1027 G_RAID3_NEXT_BIO(cbp) = NULL; 1028 } 1029 g_destroy_bio(cbp); 1030 } 1031 } 1032 1033 static struct bio * 1034 g_raid3_clone_bio(struct g_raid3_softc *sc, struct bio *pbp) 1035 { 1036 struct bio *bp, *cbp; 1037 size_t size; 1038 int memflag; 1039 1040 cbp = g_clone_bio(pbp); 1041 if (cbp == NULL) 1042 return (NULL); 1043 size = pbp->bio_length / (sc->sc_ndisks - 1); 1044 if ((pbp->bio_cflags & G_RAID3_BIO_CFLAG_REGULAR) != 0) 1045 memflag = M_WAITOK; 1046 else 1047 memflag = M_NOWAIT; 1048 cbp->bio_data = g_raid3_alloc(sc, size, memflag); 1049 if (cbp->bio_data == NULL) { 1050 pbp->bio_children--; 1051 g_destroy_bio(cbp); 1052 return (NULL); 1053 } 1054 G_RAID3_NEXT_BIO(cbp) = NULL; 1055 if (G_RAID3_HEAD_BIO(pbp) == NULL) 1056 G_RAID3_HEAD_BIO(pbp) = cbp; 1057 else { 1058 G_RAID3_FOREACH_BIO(pbp, bp) { 1059 if (G_RAID3_NEXT_BIO(bp) == NULL) { 1060 G_RAID3_NEXT_BIO(bp) = cbp; 1061 break; 1062 } 1063 } 1064 } 1065 return (cbp); 1066 } 1067 1068 static void 1069 g_raid3_scatter(struct bio *pbp) 1070 { 1071 struct g_raid3_softc *sc; 1072 struct g_raid3_disk *disk; 1073 struct bio *bp, *cbp, *tmpbp; 1074 off_t atom, cadd, padd, left; 1075 int first; 1076 1077 sc = pbp->bio_to->private; 1078 bp = NULL; 1079 if ((pbp->bio_pflags & G_RAID3_BIO_PFLAG_NOPARITY) == 0) { 1080 /* 1081 * Find bio for which we should calculate data. 1082 */ 1083 G_RAID3_FOREACH_BIO(pbp, cbp) { 1084 if ((cbp->bio_cflags & G_RAID3_BIO_CFLAG_PARITY) != 0) { 1085 bp = cbp; 1086 break; 1087 } 1088 } 1089 KASSERT(bp != NULL, ("NULL parity bio.")); 1090 } 1091 atom = sc->sc_sectorsize / (sc->sc_ndisks - 1); 1092 cadd = padd = 0; 1093 for (left = pbp->bio_length; left > 0; left -= sc->sc_sectorsize) { 1094 G_RAID3_FOREACH_BIO(pbp, cbp) { 1095 if (cbp == bp) 1096 continue; 1097 bcopy(pbp->bio_data + padd, cbp->bio_data + cadd, atom); 1098 padd += atom; 1099 } 1100 cadd += atom; 1101 } 1102 if ((pbp->bio_pflags & G_RAID3_BIO_PFLAG_NOPARITY) == 0) { 1103 /* 1104 * Calculate parity. 1105 */ 1106 first = 1; 1107 G_RAID3_FOREACH_SAFE_BIO(pbp, cbp, tmpbp) { 1108 if (cbp == bp) 1109 continue; 1110 if (first) { 1111 bcopy(cbp->bio_data, bp->bio_data, 1112 bp->bio_length); 1113 first = 0; 1114 } else { 1115 g_raid3_xor(cbp->bio_data, bp->bio_data, 1116 bp->bio_length); 1117 } 1118 if ((cbp->bio_cflags & G_RAID3_BIO_CFLAG_NODISK) != 0) 1119 g_raid3_destroy_bio(sc, cbp); 1120 } 1121 } 1122 G_RAID3_FOREACH_SAFE_BIO(pbp, cbp, tmpbp) { 1123 struct g_consumer *cp; 1124 1125 disk = cbp->bio_caller2; 1126 cp = disk->d_consumer; 1127 cbp->bio_to = cp->provider; 1128 G_RAID3_LOGREQ(3, cbp, "Sending request."); 1129 KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1, 1130 ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, 1131 cp->acr, cp->acw, cp->ace)); 1132 cp->index++; 1133 sc->sc_writes++; 1134 g_io_request(cbp, cp); 1135 } 1136 } 1137 1138 static void 1139 g_raid3_gather(struct bio *pbp) 1140 { 1141 struct g_raid3_softc *sc; 1142 struct g_raid3_disk *disk; 1143 struct bio *xbp, *fbp, *cbp; 1144 off_t atom, cadd, padd, left; 1145 1146 sc = pbp->bio_to->private; 1147 /* 1148 * Find bio for which we have to calculate data. 1149 * While going through this path, check if all requests 1150 * succeeded, if not, deny whole request. 1151 * If we're in COMPLETE mode, we allow one request to fail, 1152 * so if we find one, we're sending it to the parity consumer. 1153 * If there are more failed requests, we deny whole request. 1154 */ 1155 xbp = fbp = NULL; 1156 G_RAID3_FOREACH_BIO(pbp, cbp) { 1157 if ((cbp->bio_cflags & G_RAID3_BIO_CFLAG_PARITY) != 0) { 1158 KASSERT(xbp == NULL, ("More than one parity bio.")); 1159 xbp = cbp; 1160 } 1161 if (cbp->bio_error == 0) 1162 continue; 1163 /* 1164 * Found failed request. 1165 */ 1166 if (fbp == NULL) { 1167 if ((pbp->bio_pflags & G_RAID3_BIO_PFLAG_DEGRADED) != 0) { 1168 /* 1169 * We are already in degraded mode, so we can't 1170 * accept any failures. 1171 */ 1172 if (pbp->bio_error == 0) 1173 pbp->bio_error = cbp->bio_error; 1174 } else { 1175 fbp = cbp; 1176 } 1177 } else { 1178 /* 1179 * Next failed request, that's too many. 1180 */ 1181 if (pbp->bio_error == 0) 1182 pbp->bio_error = fbp->bio_error; 1183 } 1184 disk = cbp->bio_caller2; 1185 if (disk == NULL) 1186 continue; 1187 if ((disk->d_flags & G_RAID3_DISK_FLAG_BROKEN) == 0) { 1188 disk->d_flags |= G_RAID3_DISK_FLAG_BROKEN; 1189 G_RAID3_LOGREQ(0, cbp, "Request failed (error=%d).", 1190 cbp->bio_error); 1191 } else { 1192 G_RAID3_LOGREQ(1, cbp, "Request failed (error=%d).", 1193 cbp->bio_error); 1194 } 1195 if (g_raid3_disconnect_on_failure && 1196 sc->sc_state == G_RAID3_DEVICE_STATE_COMPLETE) { 1197 sc->sc_bump_id |= G_RAID3_BUMP_GENID; 1198 g_raid3_event_send(disk, 1199 G_RAID3_DISK_STATE_DISCONNECTED, 1200 G_RAID3_EVENT_DONTWAIT); 1201 } 1202 } 1203 if (pbp->bio_error != 0) 1204 goto finish; 1205 if (fbp != NULL && (pbp->bio_pflags & G_RAID3_BIO_PFLAG_VERIFY) != 0) { 1206 pbp->bio_pflags &= ~G_RAID3_BIO_PFLAG_VERIFY; 1207 if (xbp != fbp) 1208 g_raid3_replace_bio(xbp, fbp); 1209 g_raid3_destroy_bio(sc, fbp); 1210 } else if (fbp != NULL) { 1211 struct g_consumer *cp; 1212 1213 /* 1214 * One request failed, so send the same request to 1215 * the parity consumer. 1216 */ 1217 disk = pbp->bio_driver2; 1218 if (disk->d_state != G_RAID3_DISK_STATE_ACTIVE) { 1219 pbp->bio_error = fbp->bio_error; 1220 goto finish; 1221 } 1222 pbp->bio_pflags |= G_RAID3_BIO_PFLAG_DEGRADED; 1223 pbp->bio_inbed--; 1224 fbp->bio_flags &= ~(BIO_DONE | BIO_ERROR); 1225 if (disk->d_no == sc->sc_ndisks - 1) 1226 fbp->bio_cflags |= G_RAID3_BIO_CFLAG_PARITY; 1227 fbp->bio_error = 0; 1228 fbp->bio_completed = 0; 1229 fbp->bio_children = 0; 1230 fbp->bio_inbed = 0; 1231 cp = disk->d_consumer; 1232 fbp->bio_caller2 = disk; 1233 fbp->bio_to = cp->provider; 1234 G_RAID3_LOGREQ(3, fbp, "Sending request (recover)."); 1235 KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1, 1236 ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, 1237 cp->acr, cp->acw, cp->ace)); 1238 cp->index++; 1239 g_io_request(fbp, cp); 1240 return; 1241 } 1242 if (xbp != NULL) { 1243 /* 1244 * Calculate parity. 1245 */ 1246 G_RAID3_FOREACH_BIO(pbp, cbp) { 1247 if ((cbp->bio_cflags & G_RAID3_BIO_CFLAG_PARITY) != 0) 1248 continue; 1249 g_raid3_xor(cbp->bio_data, xbp->bio_data, 1250 xbp->bio_length); 1251 } 1252 xbp->bio_cflags &= ~G_RAID3_BIO_CFLAG_PARITY; 1253 if ((pbp->bio_pflags & G_RAID3_BIO_PFLAG_VERIFY) != 0) { 1254 if (!g_raid3_is_zero(xbp)) { 1255 g_raid3_parity_mismatch++; 1256 pbp->bio_error = EIO; 1257 goto finish; 1258 } 1259 g_raid3_destroy_bio(sc, xbp); 1260 } 1261 } 1262 atom = sc->sc_sectorsize / (sc->sc_ndisks - 1); 1263 cadd = padd = 0; 1264 for (left = pbp->bio_length; left > 0; left -= sc->sc_sectorsize) { 1265 G_RAID3_FOREACH_BIO(pbp, cbp) { 1266 bcopy(cbp->bio_data + cadd, pbp->bio_data + padd, atom); 1267 pbp->bio_completed += atom; 1268 padd += atom; 1269 } 1270 cadd += atom; 1271 } 1272 finish: 1273 if (pbp->bio_error == 0) 1274 G_RAID3_LOGREQ(3, pbp, "Request finished."); 1275 else { 1276 if ((pbp->bio_pflags & G_RAID3_BIO_PFLAG_VERIFY) != 0) 1277 G_RAID3_LOGREQ(1, pbp, "Verification error."); 1278 else 1279 G_RAID3_LOGREQ(0, pbp, "Request failed."); 1280 } 1281 pbp->bio_pflags &= ~G_RAID3_BIO_PFLAG_MASK; 1282 while ((cbp = G_RAID3_HEAD_BIO(pbp)) != NULL) 1283 g_raid3_destroy_bio(sc, cbp); 1284 g_io_deliver(pbp, pbp->bio_error); 1285 } 1286 1287 static void 1288 g_raid3_done(struct bio *bp) 1289 { 1290 struct g_raid3_softc *sc; 1291 1292 sc = bp->bio_from->geom->softc; 1293 bp->bio_cflags |= G_RAID3_BIO_CFLAG_REGULAR; 1294 G_RAID3_LOGREQ(3, bp, "Regular request done (error=%d).", bp->bio_error); 1295 mtx_lock(&sc->sc_queue_mtx); 1296 bioq_insert_head(&sc->sc_queue, bp); 1297 mtx_unlock(&sc->sc_queue_mtx); 1298 wakeup(sc); 1299 wakeup(&sc->sc_queue); 1300 } 1301 1302 static void 1303 g_raid3_regular_request(struct bio *cbp) 1304 { 1305 struct g_raid3_softc *sc; 1306 struct g_raid3_disk *disk; 1307 struct bio *pbp; 1308 1309 g_topology_assert_not(); 1310 1311 pbp = cbp->bio_parent; 1312 sc = pbp->bio_to->private; 1313 cbp->bio_from->index--; 1314 if (cbp->bio_cmd == BIO_WRITE) 1315 sc->sc_writes--; 1316 disk = cbp->bio_from->private; 1317 if (disk == NULL) { 1318 g_topology_lock(); 1319 g_raid3_kill_consumer(sc, cbp->bio_from); 1320 g_topology_unlock(); 1321 } 1322 1323 G_RAID3_LOGREQ(3, cbp, "Request finished."); 1324 pbp->bio_inbed++; 1325 KASSERT(pbp->bio_inbed <= pbp->bio_children, 1326 ("bio_inbed (%u) is bigger than bio_children (%u).", pbp->bio_inbed, 1327 pbp->bio_children)); 1328 if (pbp->bio_inbed != pbp->bio_children) 1329 return; 1330 switch (pbp->bio_cmd) { 1331 case BIO_READ: 1332 g_raid3_gather(pbp); 1333 break; 1334 case BIO_WRITE: 1335 case BIO_DELETE: 1336 { 1337 int error = 0; 1338 1339 pbp->bio_completed = pbp->bio_length; 1340 while ((cbp = G_RAID3_HEAD_BIO(pbp)) != NULL) { 1341 if (cbp->bio_error == 0) { 1342 g_raid3_destroy_bio(sc, cbp); 1343 continue; 1344 } 1345 1346 if (error == 0) 1347 error = cbp->bio_error; 1348 else if (pbp->bio_error == 0) { 1349 /* 1350 * Next failed request, that's too many. 1351 */ 1352 pbp->bio_error = error; 1353 } 1354 1355 disk = cbp->bio_caller2; 1356 if (disk == NULL) { 1357 g_raid3_destroy_bio(sc, cbp); 1358 continue; 1359 } 1360 1361 if ((disk->d_flags & G_RAID3_DISK_FLAG_BROKEN) == 0) { 1362 disk->d_flags |= G_RAID3_DISK_FLAG_BROKEN; 1363 G_RAID3_LOGREQ(0, cbp, 1364 "Request failed (error=%d).", 1365 cbp->bio_error); 1366 } else { 1367 G_RAID3_LOGREQ(1, cbp, 1368 "Request failed (error=%d).", 1369 cbp->bio_error); 1370 } 1371 if (g_raid3_disconnect_on_failure && 1372 sc->sc_state == G_RAID3_DEVICE_STATE_COMPLETE) { 1373 sc->sc_bump_id |= G_RAID3_BUMP_GENID; 1374 g_raid3_event_send(disk, 1375 G_RAID3_DISK_STATE_DISCONNECTED, 1376 G_RAID3_EVENT_DONTWAIT); 1377 } 1378 g_raid3_destroy_bio(sc, cbp); 1379 } 1380 if (pbp->bio_error == 0) 1381 G_RAID3_LOGREQ(3, pbp, "Request finished."); 1382 else 1383 G_RAID3_LOGREQ(0, pbp, "Request failed."); 1384 pbp->bio_pflags &= ~G_RAID3_BIO_PFLAG_DEGRADED; 1385 pbp->bio_pflags &= ~G_RAID3_BIO_PFLAG_NOPARITY; 1386 bioq_remove(&sc->sc_inflight, pbp); 1387 /* Release delayed sync requests if possible. */ 1388 g_raid3_sync_release(sc); 1389 g_io_deliver(pbp, pbp->bio_error); 1390 break; 1391 } 1392 } 1393 } 1394 1395 static void 1396 g_raid3_sync_done(struct bio *bp) 1397 { 1398 struct g_raid3_softc *sc; 1399 1400 G_RAID3_LOGREQ(3, bp, "Synchronization request delivered."); 1401 sc = bp->bio_from->geom->softc; 1402 bp->bio_cflags |= G_RAID3_BIO_CFLAG_SYNC; 1403 mtx_lock(&sc->sc_queue_mtx); 1404 bioq_insert_head(&sc->sc_queue, bp); 1405 mtx_unlock(&sc->sc_queue_mtx); 1406 wakeup(sc); 1407 wakeup(&sc->sc_queue); 1408 } 1409 1410 static void 1411 g_raid3_flush(struct g_raid3_softc *sc, struct bio *bp) 1412 { 1413 struct bio_queue_head queue; 1414 struct g_raid3_disk *disk; 1415 struct g_consumer *cp __diagused; 1416 struct bio *cbp; 1417 u_int i; 1418 1419 bioq_init(&queue); 1420 for (i = 0; i < sc->sc_ndisks; i++) { 1421 disk = &sc->sc_disks[i]; 1422 if (disk->d_state != G_RAID3_DISK_STATE_ACTIVE) 1423 continue; 1424 cbp = g_clone_bio(bp); 1425 if (cbp == NULL) { 1426 for (cbp = bioq_first(&queue); cbp != NULL; 1427 cbp = bioq_first(&queue)) { 1428 bioq_remove(&queue, cbp); 1429 g_destroy_bio(cbp); 1430 } 1431 if (bp->bio_error == 0) 1432 bp->bio_error = ENOMEM; 1433 g_io_deliver(bp, bp->bio_error); 1434 return; 1435 } 1436 bioq_insert_tail(&queue, cbp); 1437 cbp->bio_done = g_std_done; 1438 cbp->bio_caller1 = disk; 1439 cbp->bio_to = disk->d_consumer->provider; 1440 } 1441 for (cbp = bioq_first(&queue); cbp != NULL; cbp = bioq_first(&queue)) { 1442 bioq_remove(&queue, cbp); 1443 G_RAID3_LOGREQ(3, cbp, "Sending request."); 1444 disk = cbp->bio_caller1; 1445 cbp->bio_caller1 = NULL; 1446 cp = disk->d_consumer; 1447 KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1, 1448 ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, 1449 cp->acr, cp->acw, cp->ace)); 1450 g_io_request(cbp, disk->d_consumer); 1451 } 1452 } 1453 1454 static void 1455 g_raid3_start(struct bio *bp) 1456 { 1457 struct g_raid3_softc *sc; 1458 1459 sc = bp->bio_to->private; 1460 /* 1461 * If sc == NULL or there are no valid disks, provider's error 1462 * should be set and g_raid3_start() should not be called at all. 1463 */ 1464 KASSERT(sc != NULL && (sc->sc_state == G_RAID3_DEVICE_STATE_DEGRADED || 1465 sc->sc_state == G_RAID3_DEVICE_STATE_COMPLETE), 1466 ("Provider's error should be set (error=%d)(device=%s).", 1467 bp->bio_to->error, bp->bio_to->name)); 1468 G_RAID3_LOGREQ(3, bp, "Request received."); 1469 1470 switch (bp->bio_cmd) { 1471 case BIO_READ: 1472 case BIO_WRITE: 1473 case BIO_DELETE: 1474 break; 1475 case BIO_SPEEDUP: 1476 case BIO_FLUSH: 1477 g_raid3_flush(sc, bp); 1478 return; 1479 case BIO_GETATTR: 1480 default: 1481 g_io_deliver(bp, EOPNOTSUPP); 1482 return; 1483 } 1484 mtx_lock(&sc->sc_queue_mtx); 1485 bioq_insert_tail(&sc->sc_queue, bp); 1486 mtx_unlock(&sc->sc_queue_mtx); 1487 G_RAID3_DEBUG(4, "%s: Waking up %p.", __func__, sc); 1488 wakeup(sc); 1489 } 1490 1491 /* 1492 * Return TRUE if the given request is colliding with a in-progress 1493 * synchronization request. 1494 */ 1495 static int 1496 g_raid3_sync_collision(struct g_raid3_softc *sc, struct bio *bp) 1497 { 1498 struct g_raid3_disk *disk; 1499 struct bio *sbp; 1500 off_t rstart, rend, sstart, send; 1501 int i; 1502 1503 disk = sc->sc_syncdisk; 1504 if (disk == NULL) 1505 return (0); 1506 rstart = bp->bio_offset; 1507 rend = bp->bio_offset + bp->bio_length; 1508 for (i = 0; i < g_raid3_syncreqs; i++) { 1509 sbp = disk->d_sync.ds_bios[i]; 1510 if (sbp == NULL) 1511 continue; 1512 sstart = sbp->bio_offset; 1513 send = sbp->bio_length; 1514 if (sbp->bio_cmd == BIO_WRITE) { 1515 sstart *= sc->sc_ndisks - 1; 1516 send *= sc->sc_ndisks - 1; 1517 } 1518 send += sstart; 1519 if (rend > sstart && rstart < send) 1520 return (1); 1521 } 1522 return (0); 1523 } 1524 1525 /* 1526 * Return TRUE if the given sync request is colliding with a in-progress regular 1527 * request. 1528 */ 1529 static int 1530 g_raid3_regular_collision(struct g_raid3_softc *sc, struct bio *sbp) 1531 { 1532 off_t rstart, rend, sstart, send; 1533 struct bio *bp; 1534 1535 if (sc->sc_syncdisk == NULL) 1536 return (0); 1537 sstart = sbp->bio_offset; 1538 send = sstart + sbp->bio_length; 1539 TAILQ_FOREACH(bp, &sc->sc_inflight.queue, bio_queue) { 1540 rstart = bp->bio_offset; 1541 rend = bp->bio_offset + bp->bio_length; 1542 if (rend > sstart && rstart < send) 1543 return (1); 1544 } 1545 return (0); 1546 } 1547 1548 /* 1549 * Puts request onto delayed queue. 1550 */ 1551 static void 1552 g_raid3_regular_delay(struct g_raid3_softc *sc, struct bio *bp) 1553 { 1554 1555 G_RAID3_LOGREQ(2, bp, "Delaying request."); 1556 bioq_insert_head(&sc->sc_regular_delayed, bp); 1557 } 1558 1559 /* 1560 * Puts synchronization request onto delayed queue. 1561 */ 1562 static void 1563 g_raid3_sync_delay(struct g_raid3_softc *sc, struct bio *bp) 1564 { 1565 1566 G_RAID3_LOGREQ(2, bp, "Delaying synchronization request."); 1567 bioq_insert_tail(&sc->sc_sync_delayed, bp); 1568 } 1569 1570 /* 1571 * Releases delayed regular requests which don't collide anymore with sync 1572 * requests. 1573 */ 1574 static void 1575 g_raid3_regular_release(struct g_raid3_softc *sc) 1576 { 1577 struct bio *bp, *bp2; 1578 1579 TAILQ_FOREACH_SAFE(bp, &sc->sc_regular_delayed.queue, bio_queue, bp2) { 1580 if (g_raid3_sync_collision(sc, bp)) 1581 continue; 1582 bioq_remove(&sc->sc_regular_delayed, bp); 1583 G_RAID3_LOGREQ(2, bp, "Releasing delayed request (%p).", bp); 1584 mtx_lock(&sc->sc_queue_mtx); 1585 bioq_insert_head(&sc->sc_queue, bp); 1586 #if 0 1587 /* 1588 * wakeup() is not needed, because this function is called from 1589 * the worker thread. 1590 */ 1591 wakeup(&sc->sc_queue); 1592 #endif 1593 mtx_unlock(&sc->sc_queue_mtx); 1594 } 1595 } 1596 1597 /* 1598 * Releases delayed sync requests which don't collide anymore with regular 1599 * requests. 1600 */ 1601 static void 1602 g_raid3_sync_release(struct g_raid3_softc *sc) 1603 { 1604 struct bio *bp, *bp2; 1605 1606 TAILQ_FOREACH_SAFE(bp, &sc->sc_sync_delayed.queue, bio_queue, bp2) { 1607 if (g_raid3_regular_collision(sc, bp)) 1608 continue; 1609 bioq_remove(&sc->sc_sync_delayed, bp); 1610 G_RAID3_LOGREQ(2, bp, 1611 "Releasing delayed synchronization request."); 1612 g_io_request(bp, bp->bio_from); 1613 } 1614 } 1615 1616 /* 1617 * Handle synchronization requests. 1618 * Every synchronization request is two-steps process: first, READ request is 1619 * send to active provider and then WRITE request (with read data) to the provider 1620 * being synchronized. When WRITE is finished, new synchronization request is 1621 * send. 1622 */ 1623 static void 1624 g_raid3_sync_request(struct bio *bp) 1625 { 1626 struct g_raid3_softc *sc; 1627 struct g_raid3_disk *disk; 1628 1629 bp->bio_from->index--; 1630 sc = bp->bio_from->geom->softc; 1631 disk = bp->bio_from->private; 1632 if (disk == NULL) { 1633 sx_xunlock(&sc->sc_lock); /* Avoid recursion on sc_lock. */ 1634 g_topology_lock(); 1635 g_raid3_kill_consumer(sc, bp->bio_from); 1636 g_topology_unlock(); 1637 free(bp->bio_data, M_RAID3); 1638 g_destroy_bio(bp); 1639 sx_xlock(&sc->sc_lock); 1640 return; 1641 } 1642 1643 /* 1644 * Synchronization request. 1645 */ 1646 switch (bp->bio_cmd) { 1647 case BIO_READ: 1648 { 1649 struct g_consumer *cp; 1650 u_char *dst, *src; 1651 off_t left; 1652 u_int atom; 1653 1654 if (bp->bio_error != 0) { 1655 G_RAID3_LOGREQ(0, bp, 1656 "Synchronization request failed (error=%d).", 1657 bp->bio_error); 1658 g_destroy_bio(bp); 1659 return; 1660 } 1661 G_RAID3_LOGREQ(3, bp, "Synchronization request finished."); 1662 atom = sc->sc_sectorsize / (sc->sc_ndisks - 1); 1663 dst = src = bp->bio_data; 1664 if (disk->d_no == sc->sc_ndisks - 1) { 1665 u_int n; 1666 1667 /* Parity component. */ 1668 for (left = bp->bio_length; left > 0; 1669 left -= sc->sc_sectorsize) { 1670 bcopy(src, dst, atom); 1671 src += atom; 1672 for (n = 1; n < sc->sc_ndisks - 1; n++) { 1673 g_raid3_xor(src, dst, atom); 1674 src += atom; 1675 } 1676 dst += atom; 1677 } 1678 } else { 1679 /* Regular component. */ 1680 src += atom * disk->d_no; 1681 for (left = bp->bio_length; left > 0; 1682 left -= sc->sc_sectorsize) { 1683 bcopy(src, dst, atom); 1684 src += sc->sc_sectorsize; 1685 dst += atom; 1686 } 1687 } 1688 bp->bio_driver1 = bp->bio_driver2 = NULL; 1689 bp->bio_pflags = 0; 1690 bp->bio_offset /= sc->sc_ndisks - 1; 1691 bp->bio_length /= sc->sc_ndisks - 1; 1692 bp->bio_cmd = BIO_WRITE; 1693 bp->bio_cflags = 0; 1694 bp->bio_children = bp->bio_inbed = 0; 1695 cp = disk->d_consumer; 1696 KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1, 1697 ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, 1698 cp->acr, cp->acw, cp->ace)); 1699 cp->index++; 1700 g_io_request(bp, cp); 1701 return; 1702 } 1703 case BIO_WRITE: 1704 { 1705 struct g_raid3_disk_sync *sync; 1706 off_t boffset, moffset; 1707 void *data; 1708 int i; 1709 1710 if (bp->bio_error != 0) { 1711 G_RAID3_LOGREQ(0, bp, 1712 "Synchronization request failed (error=%d).", 1713 bp->bio_error); 1714 g_destroy_bio(bp); 1715 sc->sc_bump_id |= G_RAID3_BUMP_GENID; 1716 g_raid3_event_send(disk, 1717 G_RAID3_DISK_STATE_DISCONNECTED, 1718 G_RAID3_EVENT_DONTWAIT); 1719 return; 1720 } 1721 G_RAID3_LOGREQ(3, bp, "Synchronization request finished."); 1722 sync = &disk->d_sync; 1723 if (sync->ds_offset == sc->sc_mediasize / (sc->sc_ndisks - 1) || 1724 sync->ds_consumer == NULL || 1725 (sc->sc_flags & G_RAID3_DEVICE_FLAG_DESTROY) != 0) { 1726 /* Don't send more synchronization requests. */ 1727 sync->ds_inflight--; 1728 if (sync->ds_bios != NULL) { 1729 i = (int)(uintptr_t)bp->bio_caller1; 1730 sync->ds_bios[i] = NULL; 1731 } 1732 free(bp->bio_data, M_RAID3); 1733 g_destroy_bio(bp); 1734 if (sync->ds_inflight > 0) 1735 return; 1736 if (sync->ds_consumer == NULL || 1737 (sc->sc_flags & G_RAID3_DEVICE_FLAG_DESTROY) != 0) { 1738 return; 1739 } 1740 /* 1741 * Disk up-to-date, activate it. 1742 */ 1743 g_raid3_event_send(disk, G_RAID3_DISK_STATE_ACTIVE, 1744 G_RAID3_EVENT_DONTWAIT); 1745 return; 1746 } 1747 1748 /* Send next synchronization request. */ 1749 data = bp->bio_data; 1750 g_reset_bio(bp); 1751 bp->bio_cmd = BIO_READ; 1752 bp->bio_offset = sync->ds_offset * (sc->sc_ndisks - 1); 1753 bp->bio_length = MIN(maxphys, sc->sc_mediasize - bp->bio_offset); 1754 sync->ds_offset += bp->bio_length / (sc->sc_ndisks - 1); 1755 bp->bio_done = g_raid3_sync_done; 1756 bp->bio_data = data; 1757 bp->bio_from = sync->ds_consumer; 1758 bp->bio_to = sc->sc_provider; 1759 G_RAID3_LOGREQ(3, bp, "Sending synchronization request."); 1760 sync->ds_consumer->index++; 1761 /* 1762 * Delay the request if it is colliding with a regular request. 1763 */ 1764 if (g_raid3_regular_collision(sc, bp)) 1765 g_raid3_sync_delay(sc, bp); 1766 else 1767 g_io_request(bp, sync->ds_consumer); 1768 1769 /* Release delayed requests if possible. */ 1770 g_raid3_regular_release(sc); 1771 1772 /* Find the smallest offset. */ 1773 moffset = sc->sc_mediasize; 1774 for (i = 0; i < g_raid3_syncreqs; i++) { 1775 bp = sync->ds_bios[i]; 1776 boffset = bp->bio_offset; 1777 if (bp->bio_cmd == BIO_WRITE) 1778 boffset *= sc->sc_ndisks - 1; 1779 if (boffset < moffset) 1780 moffset = boffset; 1781 } 1782 if (sync->ds_offset_done + maxphys * 100 < moffset) { 1783 /* Update offset_done on every 100 blocks. */ 1784 sync->ds_offset_done = moffset; 1785 g_raid3_update_metadata(disk); 1786 } 1787 return; 1788 } 1789 default: 1790 KASSERT(1 == 0, ("Invalid command here: %u (device=%s)", 1791 bp->bio_cmd, sc->sc_name)); 1792 break; 1793 } 1794 } 1795 1796 static int 1797 g_raid3_register_request(struct bio *pbp) 1798 { 1799 struct g_raid3_softc *sc; 1800 struct g_raid3_disk *disk; 1801 struct g_consumer *cp; 1802 struct bio *cbp, *tmpbp; 1803 off_t offset, length; 1804 u_int n, ndisks; 1805 int round_robin, verify; 1806 1807 ndisks = 0; 1808 sc = pbp->bio_to->private; 1809 if ((pbp->bio_cflags & G_RAID3_BIO_CFLAG_REGSYNC) != 0 && 1810 sc->sc_syncdisk == NULL) { 1811 g_io_deliver(pbp, EIO); 1812 return (0); 1813 } 1814 g_raid3_init_bio(pbp); 1815 length = pbp->bio_length / (sc->sc_ndisks - 1); 1816 offset = pbp->bio_offset / (sc->sc_ndisks - 1); 1817 round_robin = verify = 0; 1818 switch (pbp->bio_cmd) { 1819 case BIO_READ: 1820 if ((sc->sc_flags & G_RAID3_DEVICE_FLAG_VERIFY) != 0 && 1821 sc->sc_state == G_RAID3_DEVICE_STATE_COMPLETE) { 1822 pbp->bio_pflags |= G_RAID3_BIO_PFLAG_VERIFY; 1823 verify = 1; 1824 ndisks = sc->sc_ndisks; 1825 } else { 1826 verify = 0; 1827 ndisks = sc->sc_ndisks - 1; 1828 } 1829 if ((sc->sc_flags & G_RAID3_DEVICE_FLAG_ROUND_ROBIN) != 0 && 1830 sc->sc_state == G_RAID3_DEVICE_STATE_COMPLETE) { 1831 round_robin = 1; 1832 } else { 1833 round_robin = 0; 1834 } 1835 KASSERT(!round_robin || !verify, 1836 ("ROUND-ROBIN and VERIFY are mutually exclusive.")); 1837 pbp->bio_driver2 = &sc->sc_disks[sc->sc_ndisks - 1]; 1838 break; 1839 case BIO_WRITE: 1840 case BIO_DELETE: 1841 /* 1842 * Delay the request if it is colliding with a synchronization 1843 * request. 1844 */ 1845 if (g_raid3_sync_collision(sc, pbp)) { 1846 g_raid3_regular_delay(sc, pbp); 1847 return (0); 1848 } 1849 1850 if (sc->sc_idle) 1851 g_raid3_unidle(sc); 1852 else 1853 sc->sc_last_write = time_uptime; 1854 1855 ndisks = sc->sc_ndisks; 1856 break; 1857 } 1858 for (n = 0; n < ndisks; n++) { 1859 disk = &sc->sc_disks[n]; 1860 cbp = g_raid3_clone_bio(sc, pbp); 1861 if (cbp == NULL) { 1862 while ((cbp = G_RAID3_HEAD_BIO(pbp)) != NULL) 1863 g_raid3_destroy_bio(sc, cbp); 1864 /* 1865 * To prevent deadlock, we must run back up 1866 * with the ENOMEM for failed requests of any 1867 * of our consumers. Our own sync requests 1868 * can stick around, as they are finite. 1869 */ 1870 if ((pbp->bio_cflags & 1871 G_RAID3_BIO_CFLAG_REGULAR) != 0) { 1872 g_io_deliver(pbp, ENOMEM); 1873 return (0); 1874 } 1875 return (ENOMEM); 1876 } 1877 cbp->bio_offset = offset; 1878 cbp->bio_length = length; 1879 cbp->bio_done = g_raid3_done; 1880 switch (pbp->bio_cmd) { 1881 case BIO_READ: 1882 if (disk->d_state != G_RAID3_DISK_STATE_ACTIVE) { 1883 /* 1884 * Replace invalid component with the parity 1885 * component. 1886 */ 1887 disk = &sc->sc_disks[sc->sc_ndisks - 1]; 1888 cbp->bio_cflags |= G_RAID3_BIO_CFLAG_PARITY; 1889 pbp->bio_pflags |= G_RAID3_BIO_PFLAG_DEGRADED; 1890 } else if (round_robin && 1891 disk->d_no == sc->sc_round_robin) { 1892 /* 1893 * In round-robin mode skip one data component 1894 * and use parity component when reading. 1895 */ 1896 pbp->bio_driver2 = disk; 1897 disk = &sc->sc_disks[sc->sc_ndisks - 1]; 1898 cbp->bio_cflags |= G_RAID3_BIO_CFLAG_PARITY; 1899 sc->sc_round_robin++; 1900 round_robin = 0; 1901 } else if (verify && disk->d_no == sc->sc_ndisks - 1) { 1902 cbp->bio_cflags |= G_RAID3_BIO_CFLAG_PARITY; 1903 } 1904 break; 1905 case BIO_WRITE: 1906 case BIO_DELETE: 1907 if (disk->d_state == G_RAID3_DISK_STATE_ACTIVE || 1908 disk->d_state == G_RAID3_DISK_STATE_SYNCHRONIZING) { 1909 if (n == ndisks - 1) { 1910 /* 1911 * Active parity component, mark it as such. 1912 */ 1913 cbp->bio_cflags |= 1914 G_RAID3_BIO_CFLAG_PARITY; 1915 } 1916 } else { 1917 pbp->bio_pflags |= G_RAID3_BIO_PFLAG_DEGRADED; 1918 if (n == ndisks - 1) { 1919 /* 1920 * Parity component is not connected, 1921 * so destroy its request. 1922 */ 1923 pbp->bio_pflags |= 1924 G_RAID3_BIO_PFLAG_NOPARITY; 1925 g_raid3_destroy_bio(sc, cbp); 1926 cbp = NULL; 1927 } else { 1928 cbp->bio_cflags |= 1929 G_RAID3_BIO_CFLAG_NODISK; 1930 disk = NULL; 1931 } 1932 } 1933 break; 1934 } 1935 if (cbp != NULL) 1936 cbp->bio_caller2 = disk; 1937 } 1938 switch (pbp->bio_cmd) { 1939 case BIO_READ: 1940 if (round_robin) { 1941 /* 1942 * If we are in round-robin mode and 'round_robin' is 1943 * still 1, it means, that we skipped parity component 1944 * for this read and must reset sc_round_robin field. 1945 */ 1946 sc->sc_round_robin = 0; 1947 } 1948 G_RAID3_FOREACH_SAFE_BIO(pbp, cbp, tmpbp) { 1949 disk = cbp->bio_caller2; 1950 cp = disk->d_consumer; 1951 cbp->bio_to = cp->provider; 1952 G_RAID3_LOGREQ(3, cbp, "Sending request."); 1953 KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1, 1954 ("Consumer %s not opened (r%dw%de%d).", 1955 cp->provider->name, cp->acr, cp->acw, cp->ace)); 1956 cp->index++; 1957 g_io_request(cbp, cp); 1958 } 1959 break; 1960 case BIO_WRITE: 1961 case BIO_DELETE: 1962 /* 1963 * Put request onto inflight queue, so we can check if new 1964 * synchronization requests don't collide with it. 1965 */ 1966 bioq_insert_tail(&sc->sc_inflight, pbp); 1967 1968 /* 1969 * Bump syncid on first write. 1970 */ 1971 if ((sc->sc_bump_id & G_RAID3_BUMP_SYNCID) != 0) { 1972 sc->sc_bump_id &= ~G_RAID3_BUMP_SYNCID; 1973 g_raid3_bump_syncid(sc); 1974 } 1975 g_raid3_scatter(pbp); 1976 break; 1977 } 1978 return (0); 1979 } 1980 1981 static int 1982 g_raid3_can_destroy(struct g_raid3_softc *sc) 1983 { 1984 struct g_geom *gp; 1985 struct g_consumer *cp; 1986 1987 g_topology_assert(); 1988 gp = sc->sc_geom; 1989 if (gp->softc == NULL) 1990 return (1); 1991 LIST_FOREACH(cp, &gp->consumer, consumer) { 1992 if (g_raid3_is_busy(sc, cp)) 1993 return (0); 1994 } 1995 gp = sc->sc_sync.ds_geom; 1996 LIST_FOREACH(cp, &gp->consumer, consumer) { 1997 if (g_raid3_is_busy(sc, cp)) 1998 return (0); 1999 } 2000 G_RAID3_DEBUG(2, "No I/O requests for %s, it can be destroyed.", 2001 sc->sc_name); 2002 return (1); 2003 } 2004 2005 static int 2006 g_raid3_try_destroy(struct g_raid3_softc *sc) 2007 { 2008 2009 g_topology_assert_not(); 2010 sx_assert(&sc->sc_lock, SX_XLOCKED); 2011 2012 if (sc->sc_rootmount != NULL) { 2013 G_RAID3_DEBUG(1, "root_mount_rel[%u] %p", __LINE__, 2014 sc->sc_rootmount); 2015 root_mount_rel(sc->sc_rootmount); 2016 sc->sc_rootmount = NULL; 2017 } 2018 2019 g_topology_lock(); 2020 if (!g_raid3_can_destroy(sc)) { 2021 g_topology_unlock(); 2022 return (0); 2023 } 2024 sc->sc_geom->softc = NULL; 2025 sc->sc_sync.ds_geom->softc = NULL; 2026 if ((sc->sc_flags & G_RAID3_DEVICE_FLAG_WAIT) != 0) { 2027 g_topology_unlock(); 2028 G_RAID3_DEBUG(4, "%s: Waking up %p.", __func__, 2029 &sc->sc_worker); 2030 /* Unlock sc_lock here, as it can be destroyed after wakeup. */ 2031 sx_xunlock(&sc->sc_lock); 2032 wakeup(&sc->sc_worker); 2033 sc->sc_worker = NULL; 2034 } else { 2035 g_topology_unlock(); 2036 g_raid3_destroy_device(sc); 2037 } 2038 return (1); 2039 } 2040 2041 /* 2042 * Worker thread. 2043 */ 2044 static void 2045 g_raid3_worker(void *arg) 2046 { 2047 struct g_raid3_softc *sc; 2048 struct g_raid3_event *ep; 2049 struct bio *bp; 2050 int timeout; 2051 2052 sc = arg; 2053 thread_lock(curthread); 2054 sched_prio(curthread, PRIBIO); 2055 thread_unlock(curthread); 2056 2057 sx_xlock(&sc->sc_lock); 2058 for (;;) { 2059 G_RAID3_DEBUG(5, "%s: Let's see...", __func__); 2060 /* 2061 * First take a look at events. 2062 * This is important to handle events before any I/O requests. 2063 */ 2064 ep = g_raid3_event_get(sc); 2065 if (ep != NULL) { 2066 g_raid3_event_remove(sc, ep); 2067 if ((ep->e_flags & G_RAID3_EVENT_DEVICE) != 0) { 2068 /* Update only device status. */ 2069 G_RAID3_DEBUG(3, 2070 "Running event for device %s.", 2071 sc->sc_name); 2072 ep->e_error = 0; 2073 g_raid3_update_device(sc, 1); 2074 } else { 2075 /* Update disk status. */ 2076 G_RAID3_DEBUG(3, "Running event for disk %s.", 2077 g_raid3_get_diskname(ep->e_disk)); 2078 ep->e_error = g_raid3_update_disk(ep->e_disk, 2079 ep->e_state); 2080 if (ep->e_error == 0) 2081 g_raid3_update_device(sc, 0); 2082 } 2083 if ((ep->e_flags & G_RAID3_EVENT_DONTWAIT) != 0) { 2084 KASSERT(ep->e_error == 0, 2085 ("Error cannot be handled.")); 2086 g_raid3_event_free(ep); 2087 } else { 2088 ep->e_flags |= G_RAID3_EVENT_DONE; 2089 G_RAID3_DEBUG(4, "%s: Waking up %p.", __func__, 2090 ep); 2091 mtx_lock(&sc->sc_events_mtx); 2092 wakeup(ep); 2093 mtx_unlock(&sc->sc_events_mtx); 2094 } 2095 if ((sc->sc_flags & 2096 G_RAID3_DEVICE_FLAG_DESTROY) != 0) { 2097 if (g_raid3_try_destroy(sc)) { 2098 curthread->td_pflags &= ~TDP_GEOM; 2099 G_RAID3_DEBUG(1, "Thread exiting."); 2100 kproc_exit(0); 2101 } 2102 } 2103 G_RAID3_DEBUG(5, "%s: I'm here 1.", __func__); 2104 continue; 2105 } 2106 /* 2107 * Check if we can mark array as CLEAN and if we can't take 2108 * how much seconds should we wait. 2109 */ 2110 timeout = g_raid3_idle(sc, -1); 2111 /* 2112 * Now I/O requests. 2113 */ 2114 /* Get first request from the queue. */ 2115 mtx_lock(&sc->sc_queue_mtx); 2116 bp = bioq_first(&sc->sc_queue); 2117 if (bp == NULL) { 2118 if ((sc->sc_flags & 2119 G_RAID3_DEVICE_FLAG_DESTROY) != 0) { 2120 mtx_unlock(&sc->sc_queue_mtx); 2121 if (g_raid3_try_destroy(sc)) { 2122 curthread->td_pflags &= ~TDP_GEOM; 2123 G_RAID3_DEBUG(1, "Thread exiting."); 2124 kproc_exit(0); 2125 } 2126 mtx_lock(&sc->sc_queue_mtx); 2127 } 2128 sx_xunlock(&sc->sc_lock); 2129 /* 2130 * XXX: We can miss an event here, because an event 2131 * can be added without sx-device-lock and without 2132 * mtx-queue-lock. Maybe I should just stop using 2133 * dedicated mutex for events synchronization and 2134 * stick with the queue lock? 2135 * The event will hang here until next I/O request 2136 * or next event is received. 2137 */ 2138 MSLEEP(sc, &sc->sc_queue_mtx, PRIBIO | PDROP, "r3:w1", 2139 timeout * hz); 2140 sx_xlock(&sc->sc_lock); 2141 G_RAID3_DEBUG(5, "%s: I'm here 4.", __func__); 2142 continue; 2143 } 2144 process: 2145 bioq_remove(&sc->sc_queue, bp); 2146 mtx_unlock(&sc->sc_queue_mtx); 2147 2148 if (bp->bio_from->geom == sc->sc_sync.ds_geom && 2149 (bp->bio_cflags & G_RAID3_BIO_CFLAG_SYNC) != 0) { 2150 g_raid3_sync_request(bp); /* READ */ 2151 } else if (bp->bio_to != sc->sc_provider) { 2152 if ((bp->bio_cflags & G_RAID3_BIO_CFLAG_REGULAR) != 0) 2153 g_raid3_regular_request(bp); 2154 else if ((bp->bio_cflags & G_RAID3_BIO_CFLAG_SYNC) != 0) 2155 g_raid3_sync_request(bp); /* WRITE */ 2156 else { 2157 KASSERT(0, 2158 ("Invalid request cflags=0x%hx to=%s.", 2159 bp->bio_cflags, bp->bio_to->name)); 2160 } 2161 } else if (g_raid3_register_request(bp) != 0) { 2162 mtx_lock(&sc->sc_queue_mtx); 2163 bioq_insert_head(&sc->sc_queue, bp); 2164 /* 2165 * We are short in memory, let see if there are finished 2166 * request we can free. 2167 */ 2168 TAILQ_FOREACH(bp, &sc->sc_queue.queue, bio_queue) { 2169 if (bp->bio_cflags & G_RAID3_BIO_CFLAG_REGULAR) 2170 goto process; 2171 } 2172 /* 2173 * No finished regular request, so at least keep 2174 * synchronization running. 2175 */ 2176 TAILQ_FOREACH(bp, &sc->sc_queue.queue, bio_queue) { 2177 if (bp->bio_cflags & G_RAID3_BIO_CFLAG_SYNC) 2178 goto process; 2179 } 2180 sx_xunlock(&sc->sc_lock); 2181 MSLEEP(&sc->sc_queue, &sc->sc_queue_mtx, PRIBIO | PDROP, 2182 "r3:lowmem", hz / 10); 2183 sx_xlock(&sc->sc_lock); 2184 } 2185 G_RAID3_DEBUG(5, "%s: I'm here 9.", __func__); 2186 } 2187 } 2188 2189 static void 2190 g_raid3_update_idle(struct g_raid3_softc *sc, struct g_raid3_disk *disk) 2191 { 2192 2193 sx_assert(&sc->sc_lock, SX_LOCKED); 2194 if ((sc->sc_flags & G_RAID3_DEVICE_FLAG_NOFAILSYNC) != 0) 2195 return; 2196 if (!sc->sc_idle && (disk->d_flags & G_RAID3_DISK_FLAG_DIRTY) == 0) { 2197 G_RAID3_DEBUG(1, "Disk %s (device %s) marked as dirty.", 2198 g_raid3_get_diskname(disk), sc->sc_name); 2199 disk->d_flags |= G_RAID3_DISK_FLAG_DIRTY; 2200 } else if (sc->sc_idle && 2201 (disk->d_flags & G_RAID3_DISK_FLAG_DIRTY) != 0) { 2202 G_RAID3_DEBUG(1, "Disk %s (device %s) marked as clean.", 2203 g_raid3_get_diskname(disk), sc->sc_name); 2204 disk->d_flags &= ~G_RAID3_DISK_FLAG_DIRTY; 2205 } 2206 } 2207 2208 static void 2209 g_raid3_sync_start(struct g_raid3_softc *sc) 2210 { 2211 struct g_raid3_disk *disk; 2212 struct g_consumer *cp; 2213 struct bio *bp; 2214 int error __diagused; 2215 u_int n; 2216 2217 g_topology_assert_not(); 2218 sx_assert(&sc->sc_lock, SX_XLOCKED); 2219 2220 KASSERT(sc->sc_state == G_RAID3_DEVICE_STATE_DEGRADED, 2221 ("Device not in DEGRADED state (%s, %u).", sc->sc_name, 2222 sc->sc_state)); 2223 KASSERT(sc->sc_syncdisk == NULL, ("Syncdisk is not NULL (%s, %u).", 2224 sc->sc_name, sc->sc_state)); 2225 disk = NULL; 2226 for (n = 0; n < sc->sc_ndisks; n++) { 2227 if (sc->sc_disks[n].d_state != G_RAID3_DISK_STATE_SYNCHRONIZING) 2228 continue; 2229 disk = &sc->sc_disks[n]; 2230 break; 2231 } 2232 if (disk == NULL) 2233 return; 2234 2235 sx_xunlock(&sc->sc_lock); 2236 g_topology_lock(); 2237 cp = g_new_consumer(sc->sc_sync.ds_geom); 2238 error = g_attach(cp, sc->sc_provider); 2239 KASSERT(error == 0, 2240 ("Cannot attach to %s (error=%d).", sc->sc_name, error)); 2241 error = g_access(cp, 1, 0, 0); 2242 KASSERT(error == 0, ("Cannot open %s (error=%d).", sc->sc_name, error)); 2243 g_topology_unlock(); 2244 sx_xlock(&sc->sc_lock); 2245 2246 G_RAID3_DEBUG(0, "Device %s: rebuilding provider %s.", sc->sc_name, 2247 g_raid3_get_diskname(disk)); 2248 if ((sc->sc_flags & G_RAID3_DEVICE_FLAG_NOFAILSYNC) == 0) 2249 disk->d_flags |= G_RAID3_DISK_FLAG_DIRTY; 2250 KASSERT(disk->d_sync.ds_consumer == NULL, 2251 ("Sync consumer already exists (device=%s, disk=%s).", 2252 sc->sc_name, g_raid3_get_diskname(disk))); 2253 2254 disk->d_sync.ds_consumer = cp; 2255 disk->d_sync.ds_consumer->private = disk; 2256 disk->d_sync.ds_consumer->index = 0; 2257 sc->sc_syncdisk = disk; 2258 2259 /* 2260 * Allocate memory for synchronization bios and initialize them. 2261 */ 2262 disk->d_sync.ds_bios = malloc(sizeof(struct bio *) * g_raid3_syncreqs, 2263 M_RAID3, M_WAITOK); 2264 for (n = 0; n < g_raid3_syncreqs; n++) { 2265 bp = g_alloc_bio(); 2266 disk->d_sync.ds_bios[n] = bp; 2267 bp->bio_parent = NULL; 2268 bp->bio_cmd = BIO_READ; 2269 bp->bio_data = malloc(maxphys, M_RAID3, M_WAITOK); 2270 bp->bio_cflags = 0; 2271 bp->bio_offset = disk->d_sync.ds_offset * (sc->sc_ndisks - 1); 2272 bp->bio_length = MIN(maxphys, sc->sc_mediasize - bp->bio_offset); 2273 disk->d_sync.ds_offset += bp->bio_length / (sc->sc_ndisks - 1); 2274 bp->bio_done = g_raid3_sync_done; 2275 bp->bio_from = disk->d_sync.ds_consumer; 2276 bp->bio_to = sc->sc_provider; 2277 bp->bio_caller1 = (void *)(uintptr_t)n; 2278 } 2279 2280 /* Set the number of in-flight synchronization requests. */ 2281 disk->d_sync.ds_inflight = g_raid3_syncreqs; 2282 2283 /* 2284 * Fire off first synchronization requests. 2285 */ 2286 for (n = 0; n < g_raid3_syncreqs; n++) { 2287 bp = disk->d_sync.ds_bios[n]; 2288 G_RAID3_LOGREQ(3, bp, "Sending synchronization request."); 2289 disk->d_sync.ds_consumer->index++; 2290 /* 2291 * Delay the request if it is colliding with a regular request. 2292 */ 2293 if (g_raid3_regular_collision(sc, bp)) 2294 g_raid3_sync_delay(sc, bp); 2295 else 2296 g_io_request(bp, disk->d_sync.ds_consumer); 2297 } 2298 } 2299 2300 /* 2301 * Stop synchronization process. 2302 * type: 0 - synchronization finished 2303 * 1 - synchronization stopped 2304 */ 2305 static void 2306 g_raid3_sync_stop(struct g_raid3_softc *sc, int type) 2307 { 2308 struct g_raid3_disk *disk; 2309 struct g_consumer *cp; 2310 2311 g_topology_assert_not(); 2312 sx_assert(&sc->sc_lock, SX_LOCKED); 2313 2314 KASSERT(sc->sc_state == G_RAID3_DEVICE_STATE_DEGRADED, 2315 ("Device not in DEGRADED state (%s, %u).", sc->sc_name, 2316 sc->sc_state)); 2317 disk = sc->sc_syncdisk; 2318 sc->sc_syncdisk = NULL; 2319 KASSERT(disk != NULL, ("No disk was synchronized (%s).", sc->sc_name)); 2320 KASSERT(disk->d_state == G_RAID3_DISK_STATE_SYNCHRONIZING, 2321 ("Wrong disk state (%s, %s).", g_raid3_get_diskname(disk), 2322 g_raid3_disk_state2str(disk->d_state))); 2323 if (disk->d_sync.ds_consumer == NULL) 2324 return; 2325 2326 if (type == 0) { 2327 G_RAID3_DEBUG(0, "Device %s: rebuilding provider %s finished.", 2328 sc->sc_name, g_raid3_get_diskname(disk)); 2329 } else /* if (type == 1) */ { 2330 G_RAID3_DEBUG(0, "Device %s: rebuilding provider %s stopped.", 2331 sc->sc_name, g_raid3_get_diskname(disk)); 2332 } 2333 free(disk->d_sync.ds_bios, M_RAID3); 2334 disk->d_sync.ds_bios = NULL; 2335 cp = disk->d_sync.ds_consumer; 2336 disk->d_sync.ds_consumer = NULL; 2337 disk->d_flags &= ~G_RAID3_DISK_FLAG_DIRTY; 2338 sx_xunlock(&sc->sc_lock); /* Avoid recursion on sc_lock. */ 2339 g_topology_lock(); 2340 g_raid3_kill_consumer(sc, cp); 2341 g_topology_unlock(); 2342 sx_xlock(&sc->sc_lock); 2343 } 2344 2345 static void 2346 g_raid3_launch_provider(struct g_raid3_softc *sc) 2347 { 2348 struct g_provider *pp; 2349 struct g_raid3_disk *disk; 2350 int n; 2351 2352 sx_assert(&sc->sc_lock, SX_LOCKED); 2353 2354 g_topology_lock(); 2355 pp = g_new_providerf(sc->sc_geom, "raid3/%s", sc->sc_name); 2356 pp->mediasize = sc->sc_mediasize; 2357 pp->sectorsize = sc->sc_sectorsize; 2358 pp->stripesize = 0; 2359 pp->stripeoffset = 0; 2360 for (n = 0; n < sc->sc_ndisks; n++) { 2361 disk = &sc->sc_disks[n]; 2362 if (disk->d_consumer && disk->d_consumer->provider && 2363 disk->d_consumer->provider->stripesize > pp->stripesize) { 2364 pp->stripesize = disk->d_consumer->provider->stripesize; 2365 pp->stripeoffset = disk->d_consumer->provider->stripeoffset; 2366 } 2367 } 2368 pp->stripesize *= sc->sc_ndisks - 1; 2369 pp->stripeoffset *= sc->sc_ndisks - 1; 2370 pp->private = sc; 2371 sc->sc_refcnt++; 2372 sc->sc_provider = pp; 2373 g_error_provider(pp, 0); 2374 g_topology_unlock(); 2375 G_RAID3_DEBUG(0, "Device %s launched (%u/%u).", pp->name, 2376 g_raid3_ndisks(sc, G_RAID3_DISK_STATE_ACTIVE), sc->sc_ndisks); 2377 2378 if (sc->sc_state == G_RAID3_DEVICE_STATE_DEGRADED) 2379 g_raid3_sync_start(sc); 2380 } 2381 2382 static void 2383 g_raid3_destroy_provider(struct g_raid3_softc *sc) 2384 { 2385 struct bio *bp; 2386 2387 g_topology_assert_not(); 2388 KASSERT(sc->sc_provider != NULL, ("NULL provider (device=%s).", 2389 sc->sc_name)); 2390 2391 g_topology_lock(); 2392 g_error_provider(sc->sc_provider, ENXIO); 2393 mtx_lock(&sc->sc_queue_mtx); 2394 while ((bp = bioq_first(&sc->sc_queue)) != NULL) { 2395 bioq_remove(&sc->sc_queue, bp); 2396 g_io_deliver(bp, ENXIO); 2397 } 2398 mtx_unlock(&sc->sc_queue_mtx); 2399 G_RAID3_DEBUG(0, "Device %s: provider %s destroyed.", sc->sc_name, 2400 sc->sc_provider->name); 2401 g_wither_provider(sc->sc_provider, ENXIO); 2402 g_topology_unlock(); 2403 sc->sc_provider = NULL; 2404 if (sc->sc_syncdisk != NULL) 2405 g_raid3_sync_stop(sc, 1); 2406 } 2407 2408 static void 2409 g_raid3_go(void *arg) 2410 { 2411 struct g_raid3_softc *sc; 2412 struct g_raid3_event *ep; 2413 2414 sc = arg; 2415 G_RAID3_DEBUG(0, "Force device %s start due to timeout.", sc->sc_name); 2416 ep = sc->sc_timeout_event; 2417 sc->sc_timeout_event = NULL; 2418 g_raid3_event_dispatch(ep, sc, 0, 2419 G_RAID3_EVENT_DONTWAIT | G_RAID3_EVENT_DEVICE); 2420 } 2421 2422 static void 2423 g_raid3_timeout_drain(struct g_raid3_softc *sc) 2424 { 2425 sx_assert(&sc->sc_lock, SX_XLOCKED); 2426 2427 callout_drain(&sc->sc_callout); 2428 g_raid3_event_free(sc->sc_timeout_event); 2429 sc->sc_timeout_event = NULL; 2430 } 2431 2432 static u_int 2433 g_raid3_determine_state(struct g_raid3_disk *disk) 2434 { 2435 struct g_raid3_softc *sc; 2436 u_int state; 2437 2438 sc = disk->d_softc; 2439 if (sc->sc_syncid == disk->d_sync.ds_syncid) { 2440 if ((disk->d_flags & 2441 G_RAID3_DISK_FLAG_SYNCHRONIZING) == 0) { 2442 /* Disk does not need synchronization. */ 2443 state = G_RAID3_DISK_STATE_ACTIVE; 2444 } else { 2445 if ((sc->sc_flags & 2446 G_RAID3_DEVICE_FLAG_NOAUTOSYNC) == 0 || 2447 (disk->d_flags & 2448 G_RAID3_DISK_FLAG_FORCE_SYNC) != 0) { 2449 /* 2450 * We can start synchronization from 2451 * the stored offset. 2452 */ 2453 state = G_RAID3_DISK_STATE_SYNCHRONIZING; 2454 } else { 2455 state = G_RAID3_DISK_STATE_STALE; 2456 } 2457 } 2458 } else if (disk->d_sync.ds_syncid < sc->sc_syncid) { 2459 /* 2460 * Reset all synchronization data for this disk, 2461 * because if it even was synchronized, it was 2462 * synchronized to disks with different syncid. 2463 */ 2464 disk->d_flags |= G_RAID3_DISK_FLAG_SYNCHRONIZING; 2465 disk->d_sync.ds_offset = 0; 2466 disk->d_sync.ds_offset_done = 0; 2467 disk->d_sync.ds_syncid = sc->sc_syncid; 2468 if ((sc->sc_flags & G_RAID3_DEVICE_FLAG_NOAUTOSYNC) == 0 || 2469 (disk->d_flags & G_RAID3_DISK_FLAG_FORCE_SYNC) != 0) { 2470 state = G_RAID3_DISK_STATE_SYNCHRONIZING; 2471 } else { 2472 state = G_RAID3_DISK_STATE_STALE; 2473 } 2474 } else /* if (sc->sc_syncid < disk->d_sync.ds_syncid) */ { 2475 /* 2476 * Not good, NOT GOOD! 2477 * It means that device was started on stale disks 2478 * and more fresh disk just arrive. 2479 * If there were writes, device is broken, sorry. 2480 * I think the best choice here is don't touch 2481 * this disk and inform the user loudly. 2482 */ 2483 G_RAID3_DEBUG(0, "Device %s was started before the freshest " 2484 "disk (%s) arrives!! It will not be connected to the " 2485 "running device.", sc->sc_name, 2486 g_raid3_get_diskname(disk)); 2487 g_raid3_destroy_disk(disk); 2488 state = G_RAID3_DISK_STATE_NONE; 2489 /* Return immediately, because disk was destroyed. */ 2490 return (state); 2491 } 2492 G_RAID3_DEBUG(3, "State for %s disk: %s.", 2493 g_raid3_get_diskname(disk), g_raid3_disk_state2str(state)); 2494 return (state); 2495 } 2496 2497 /* 2498 * Update device state. 2499 */ 2500 static void 2501 g_raid3_update_device(struct g_raid3_softc *sc, boolean_t force) 2502 { 2503 struct g_raid3_disk *disk; 2504 u_int state; 2505 2506 sx_assert(&sc->sc_lock, SX_XLOCKED); 2507 2508 switch (sc->sc_state) { 2509 case G_RAID3_DEVICE_STATE_STARTING: 2510 { 2511 u_int n, ndirty, ndisks, genid, syncid; 2512 2513 KASSERT(sc->sc_provider == NULL, 2514 ("Non-NULL provider in STARTING state (%s).", sc->sc_name)); 2515 /* 2516 * Are we ready? We are, if all disks are connected or 2517 * one disk is missing and 'force' is true. 2518 */ 2519 if (g_raid3_ndisks(sc, -1) + force == sc->sc_ndisks) { 2520 if (!force) 2521 g_raid3_timeout_drain(sc); 2522 } else { 2523 if (force) { 2524 /* 2525 * Timeout expired, so destroy device. 2526 */ 2527 sc->sc_flags |= G_RAID3_DEVICE_FLAG_DESTROY; 2528 G_RAID3_DEBUG(1, "root_mount_rel[%u] %p", 2529 __LINE__, sc->sc_rootmount); 2530 root_mount_rel(sc->sc_rootmount); 2531 sc->sc_rootmount = NULL; 2532 } 2533 return; 2534 } 2535 2536 /* 2537 * Find the biggest genid. 2538 */ 2539 genid = 0; 2540 for (n = 0; n < sc->sc_ndisks; n++) { 2541 disk = &sc->sc_disks[n]; 2542 if (disk->d_state == G_RAID3_DISK_STATE_NODISK) 2543 continue; 2544 if (disk->d_genid > genid) 2545 genid = disk->d_genid; 2546 } 2547 sc->sc_genid = genid; 2548 /* 2549 * Remove all disks without the biggest genid. 2550 */ 2551 for (n = 0; n < sc->sc_ndisks; n++) { 2552 disk = &sc->sc_disks[n]; 2553 if (disk->d_state == G_RAID3_DISK_STATE_NODISK) 2554 continue; 2555 if (disk->d_genid < genid) { 2556 G_RAID3_DEBUG(0, 2557 "Component %s (device %s) broken, skipping.", 2558 g_raid3_get_diskname(disk), sc->sc_name); 2559 g_raid3_destroy_disk(disk); 2560 } 2561 } 2562 2563 /* 2564 * There must be at least 'sc->sc_ndisks - 1' components 2565 * with the same syncid and without SYNCHRONIZING flag. 2566 */ 2567 2568 /* 2569 * Find the biggest syncid, number of valid components and 2570 * number of dirty components. 2571 */ 2572 ndirty = ndisks = syncid = 0; 2573 for (n = 0; n < sc->sc_ndisks; n++) { 2574 disk = &sc->sc_disks[n]; 2575 if (disk->d_state == G_RAID3_DISK_STATE_NODISK) 2576 continue; 2577 if ((disk->d_flags & G_RAID3_DISK_FLAG_DIRTY) != 0) 2578 ndirty++; 2579 if (disk->d_sync.ds_syncid > syncid) { 2580 syncid = disk->d_sync.ds_syncid; 2581 ndisks = 0; 2582 } else if (disk->d_sync.ds_syncid < syncid) { 2583 continue; 2584 } 2585 if ((disk->d_flags & 2586 G_RAID3_DISK_FLAG_SYNCHRONIZING) != 0) { 2587 continue; 2588 } 2589 ndisks++; 2590 } 2591 /* 2592 * Do we have enough valid components? 2593 */ 2594 if (ndisks + 1 < sc->sc_ndisks) { 2595 G_RAID3_DEBUG(0, 2596 "Device %s is broken, too few valid components.", 2597 sc->sc_name); 2598 sc->sc_flags |= G_RAID3_DEVICE_FLAG_DESTROY; 2599 return; 2600 } 2601 /* 2602 * If there is one DIRTY component and all disks are present, 2603 * mark it for synchronization. If there is more than one DIRTY 2604 * component, mark parity component for synchronization. 2605 */ 2606 if (ndisks == sc->sc_ndisks && ndirty == 1) { 2607 for (n = 0; n < sc->sc_ndisks; n++) { 2608 disk = &sc->sc_disks[n]; 2609 if ((disk->d_flags & 2610 G_RAID3_DISK_FLAG_DIRTY) == 0) { 2611 continue; 2612 } 2613 disk->d_flags |= 2614 G_RAID3_DISK_FLAG_SYNCHRONIZING; 2615 } 2616 } else if (ndisks == sc->sc_ndisks && ndirty > 1) { 2617 disk = &sc->sc_disks[sc->sc_ndisks - 1]; 2618 disk->d_flags |= G_RAID3_DISK_FLAG_SYNCHRONIZING; 2619 } 2620 2621 sc->sc_syncid = syncid; 2622 if (force) { 2623 /* Remember to bump syncid on first write. */ 2624 sc->sc_bump_id |= G_RAID3_BUMP_SYNCID; 2625 } 2626 if (ndisks == sc->sc_ndisks) 2627 state = G_RAID3_DEVICE_STATE_COMPLETE; 2628 else /* if (ndisks == sc->sc_ndisks - 1) */ 2629 state = G_RAID3_DEVICE_STATE_DEGRADED; 2630 G_RAID3_DEBUG(1, "Device %s state changed from %s to %s.", 2631 sc->sc_name, g_raid3_device_state2str(sc->sc_state), 2632 g_raid3_device_state2str(state)); 2633 sc->sc_state = state; 2634 for (n = 0; n < sc->sc_ndisks; n++) { 2635 disk = &sc->sc_disks[n]; 2636 if (disk->d_state == G_RAID3_DISK_STATE_NODISK) 2637 continue; 2638 state = g_raid3_determine_state(disk); 2639 g_raid3_event_send(disk, state, G_RAID3_EVENT_DONTWAIT); 2640 if (state == G_RAID3_DISK_STATE_STALE) 2641 sc->sc_bump_id |= G_RAID3_BUMP_SYNCID; 2642 } 2643 break; 2644 } 2645 case G_RAID3_DEVICE_STATE_DEGRADED: 2646 /* 2647 * Genid need to be bumped immediately, so do it here. 2648 */ 2649 if ((sc->sc_bump_id & G_RAID3_BUMP_GENID) != 0) { 2650 sc->sc_bump_id &= ~G_RAID3_BUMP_GENID; 2651 g_raid3_bump_genid(sc); 2652 } 2653 2654 if (g_raid3_ndisks(sc, G_RAID3_DISK_STATE_NEW) > 0) 2655 return; 2656 if (g_raid3_ndisks(sc, G_RAID3_DISK_STATE_ACTIVE) < 2657 sc->sc_ndisks - 1) { 2658 if (sc->sc_provider != NULL) 2659 g_raid3_destroy_provider(sc); 2660 sc->sc_flags |= G_RAID3_DEVICE_FLAG_DESTROY; 2661 return; 2662 } 2663 if (g_raid3_ndisks(sc, G_RAID3_DISK_STATE_ACTIVE) == 2664 sc->sc_ndisks) { 2665 state = G_RAID3_DEVICE_STATE_COMPLETE; 2666 G_RAID3_DEBUG(1, 2667 "Device %s state changed from %s to %s.", 2668 sc->sc_name, g_raid3_device_state2str(sc->sc_state), 2669 g_raid3_device_state2str(state)); 2670 sc->sc_state = state; 2671 } 2672 if (sc->sc_provider == NULL) 2673 g_raid3_launch_provider(sc); 2674 if (sc->sc_rootmount != NULL) { 2675 G_RAID3_DEBUG(1, "root_mount_rel[%u] %p", __LINE__, 2676 sc->sc_rootmount); 2677 root_mount_rel(sc->sc_rootmount); 2678 sc->sc_rootmount = NULL; 2679 } 2680 break; 2681 case G_RAID3_DEVICE_STATE_COMPLETE: 2682 /* 2683 * Genid need to be bumped immediately, so do it here. 2684 */ 2685 if ((sc->sc_bump_id & G_RAID3_BUMP_GENID) != 0) { 2686 sc->sc_bump_id &= ~G_RAID3_BUMP_GENID; 2687 g_raid3_bump_genid(sc); 2688 } 2689 2690 if (g_raid3_ndisks(sc, G_RAID3_DISK_STATE_NEW) > 0) 2691 return; 2692 KASSERT(g_raid3_ndisks(sc, G_RAID3_DISK_STATE_ACTIVE) >= 2693 sc->sc_ndisks - 1, 2694 ("Too few ACTIVE components in COMPLETE state (device %s).", 2695 sc->sc_name)); 2696 if (g_raid3_ndisks(sc, G_RAID3_DISK_STATE_ACTIVE) == 2697 sc->sc_ndisks - 1) { 2698 state = G_RAID3_DEVICE_STATE_DEGRADED; 2699 G_RAID3_DEBUG(1, 2700 "Device %s state changed from %s to %s.", 2701 sc->sc_name, g_raid3_device_state2str(sc->sc_state), 2702 g_raid3_device_state2str(state)); 2703 sc->sc_state = state; 2704 } 2705 if (sc->sc_provider == NULL) 2706 g_raid3_launch_provider(sc); 2707 if (sc->sc_rootmount != NULL) { 2708 G_RAID3_DEBUG(1, "root_mount_rel[%u] %p", __LINE__, 2709 sc->sc_rootmount); 2710 root_mount_rel(sc->sc_rootmount); 2711 sc->sc_rootmount = NULL; 2712 } 2713 break; 2714 default: 2715 KASSERT(1 == 0, ("Wrong device state (%s, %s).", sc->sc_name, 2716 g_raid3_device_state2str(sc->sc_state))); 2717 break; 2718 } 2719 } 2720 2721 /* 2722 * Update disk state and device state if needed. 2723 */ 2724 #define DISK_STATE_CHANGED() G_RAID3_DEBUG(1, \ 2725 "Disk %s state changed from %s to %s (device %s).", \ 2726 g_raid3_get_diskname(disk), \ 2727 g_raid3_disk_state2str(disk->d_state), \ 2728 g_raid3_disk_state2str(state), sc->sc_name) 2729 static int 2730 g_raid3_update_disk(struct g_raid3_disk *disk, u_int state) 2731 { 2732 struct g_raid3_softc *sc; 2733 2734 sc = disk->d_softc; 2735 sx_assert(&sc->sc_lock, SX_XLOCKED); 2736 2737 again: 2738 G_RAID3_DEBUG(3, "Changing disk %s state from %s to %s.", 2739 g_raid3_get_diskname(disk), g_raid3_disk_state2str(disk->d_state), 2740 g_raid3_disk_state2str(state)); 2741 switch (state) { 2742 case G_RAID3_DISK_STATE_NEW: 2743 /* 2744 * Possible scenarios: 2745 * 1. New disk arrive. 2746 */ 2747 /* Previous state should be NONE. */ 2748 KASSERT(disk->d_state == G_RAID3_DISK_STATE_NONE, 2749 ("Wrong disk state (%s, %s).", g_raid3_get_diskname(disk), 2750 g_raid3_disk_state2str(disk->d_state))); 2751 DISK_STATE_CHANGED(); 2752 2753 disk->d_state = state; 2754 G_RAID3_DEBUG(1, "Device %s: provider %s detected.", 2755 sc->sc_name, g_raid3_get_diskname(disk)); 2756 if (sc->sc_state == G_RAID3_DEVICE_STATE_STARTING) 2757 break; 2758 KASSERT(sc->sc_state == G_RAID3_DEVICE_STATE_DEGRADED || 2759 sc->sc_state == G_RAID3_DEVICE_STATE_COMPLETE, 2760 ("Wrong device state (%s, %s, %s, %s).", sc->sc_name, 2761 g_raid3_device_state2str(sc->sc_state), 2762 g_raid3_get_diskname(disk), 2763 g_raid3_disk_state2str(disk->d_state))); 2764 state = g_raid3_determine_state(disk); 2765 if (state != G_RAID3_DISK_STATE_NONE) 2766 goto again; 2767 break; 2768 case G_RAID3_DISK_STATE_ACTIVE: 2769 /* 2770 * Possible scenarios: 2771 * 1. New disk does not need synchronization. 2772 * 2. Synchronization process finished successfully. 2773 */ 2774 KASSERT(sc->sc_state == G_RAID3_DEVICE_STATE_DEGRADED || 2775 sc->sc_state == G_RAID3_DEVICE_STATE_COMPLETE, 2776 ("Wrong device state (%s, %s, %s, %s).", sc->sc_name, 2777 g_raid3_device_state2str(sc->sc_state), 2778 g_raid3_get_diskname(disk), 2779 g_raid3_disk_state2str(disk->d_state))); 2780 /* Previous state should be NEW or SYNCHRONIZING. */ 2781 KASSERT(disk->d_state == G_RAID3_DISK_STATE_NEW || 2782 disk->d_state == G_RAID3_DISK_STATE_SYNCHRONIZING, 2783 ("Wrong disk state (%s, %s).", g_raid3_get_diskname(disk), 2784 g_raid3_disk_state2str(disk->d_state))); 2785 DISK_STATE_CHANGED(); 2786 2787 if (disk->d_state == G_RAID3_DISK_STATE_SYNCHRONIZING) { 2788 disk->d_flags &= ~G_RAID3_DISK_FLAG_SYNCHRONIZING; 2789 disk->d_flags &= ~G_RAID3_DISK_FLAG_FORCE_SYNC; 2790 g_raid3_sync_stop(sc, 0); 2791 } 2792 disk->d_state = state; 2793 disk->d_sync.ds_offset = 0; 2794 disk->d_sync.ds_offset_done = 0; 2795 g_raid3_update_idle(sc, disk); 2796 g_raid3_update_metadata(disk); 2797 G_RAID3_DEBUG(1, "Device %s: provider %s activated.", 2798 sc->sc_name, g_raid3_get_diskname(disk)); 2799 break; 2800 case G_RAID3_DISK_STATE_STALE: 2801 /* 2802 * Possible scenarios: 2803 * 1. Stale disk was connected. 2804 */ 2805 /* Previous state should be NEW. */ 2806 KASSERT(disk->d_state == G_RAID3_DISK_STATE_NEW, 2807 ("Wrong disk state (%s, %s).", g_raid3_get_diskname(disk), 2808 g_raid3_disk_state2str(disk->d_state))); 2809 KASSERT(sc->sc_state == G_RAID3_DEVICE_STATE_DEGRADED || 2810 sc->sc_state == G_RAID3_DEVICE_STATE_COMPLETE, 2811 ("Wrong device state (%s, %s, %s, %s).", sc->sc_name, 2812 g_raid3_device_state2str(sc->sc_state), 2813 g_raid3_get_diskname(disk), 2814 g_raid3_disk_state2str(disk->d_state))); 2815 /* 2816 * STALE state is only possible if device is marked 2817 * NOAUTOSYNC. 2818 */ 2819 KASSERT((sc->sc_flags & G_RAID3_DEVICE_FLAG_NOAUTOSYNC) != 0, 2820 ("Wrong device state (%s, %s, %s, %s).", sc->sc_name, 2821 g_raid3_device_state2str(sc->sc_state), 2822 g_raid3_get_diskname(disk), 2823 g_raid3_disk_state2str(disk->d_state))); 2824 DISK_STATE_CHANGED(); 2825 2826 disk->d_flags &= ~G_RAID3_DISK_FLAG_DIRTY; 2827 disk->d_state = state; 2828 g_raid3_update_metadata(disk); 2829 G_RAID3_DEBUG(0, "Device %s: provider %s is stale.", 2830 sc->sc_name, g_raid3_get_diskname(disk)); 2831 break; 2832 case G_RAID3_DISK_STATE_SYNCHRONIZING: 2833 /* 2834 * Possible scenarios: 2835 * 1. Disk which needs synchronization was connected. 2836 */ 2837 /* Previous state should be NEW. */ 2838 KASSERT(disk->d_state == G_RAID3_DISK_STATE_NEW, 2839 ("Wrong disk state (%s, %s).", g_raid3_get_diskname(disk), 2840 g_raid3_disk_state2str(disk->d_state))); 2841 KASSERT(sc->sc_state == G_RAID3_DEVICE_STATE_DEGRADED || 2842 sc->sc_state == G_RAID3_DEVICE_STATE_COMPLETE, 2843 ("Wrong device state (%s, %s, %s, %s).", sc->sc_name, 2844 g_raid3_device_state2str(sc->sc_state), 2845 g_raid3_get_diskname(disk), 2846 g_raid3_disk_state2str(disk->d_state))); 2847 DISK_STATE_CHANGED(); 2848 2849 if (disk->d_state == G_RAID3_DISK_STATE_NEW) 2850 disk->d_flags &= ~G_RAID3_DISK_FLAG_DIRTY; 2851 disk->d_state = state; 2852 if (sc->sc_provider != NULL) { 2853 g_raid3_sync_start(sc); 2854 g_raid3_update_metadata(disk); 2855 } 2856 break; 2857 case G_RAID3_DISK_STATE_DISCONNECTED: 2858 /* 2859 * Possible scenarios: 2860 * 1. Device wasn't running yet, but disk disappear. 2861 * 2. Disk was active and disapppear. 2862 * 3. Disk disappear during synchronization process. 2863 */ 2864 if (sc->sc_state == G_RAID3_DEVICE_STATE_DEGRADED || 2865 sc->sc_state == G_RAID3_DEVICE_STATE_COMPLETE) { 2866 /* 2867 * Previous state should be ACTIVE, STALE or 2868 * SYNCHRONIZING. 2869 */ 2870 KASSERT(disk->d_state == G_RAID3_DISK_STATE_ACTIVE || 2871 disk->d_state == G_RAID3_DISK_STATE_STALE || 2872 disk->d_state == G_RAID3_DISK_STATE_SYNCHRONIZING, 2873 ("Wrong disk state (%s, %s).", 2874 g_raid3_get_diskname(disk), 2875 g_raid3_disk_state2str(disk->d_state))); 2876 } else if (sc->sc_state == G_RAID3_DEVICE_STATE_STARTING) { 2877 /* Previous state should be NEW. */ 2878 KASSERT(disk->d_state == G_RAID3_DISK_STATE_NEW, 2879 ("Wrong disk state (%s, %s).", 2880 g_raid3_get_diskname(disk), 2881 g_raid3_disk_state2str(disk->d_state))); 2882 /* 2883 * Reset bumping syncid if disk disappeared in STARTING 2884 * state. 2885 */ 2886 if ((sc->sc_bump_id & G_RAID3_BUMP_SYNCID) != 0) 2887 sc->sc_bump_id &= ~G_RAID3_BUMP_SYNCID; 2888 #ifdef INVARIANTS 2889 } else { 2890 KASSERT(1 == 0, ("Wrong device state (%s, %s, %s, %s).", 2891 sc->sc_name, 2892 g_raid3_device_state2str(sc->sc_state), 2893 g_raid3_get_diskname(disk), 2894 g_raid3_disk_state2str(disk->d_state))); 2895 #endif 2896 } 2897 DISK_STATE_CHANGED(); 2898 G_RAID3_DEBUG(0, "Device %s: provider %s disconnected.", 2899 sc->sc_name, g_raid3_get_diskname(disk)); 2900 2901 g_raid3_destroy_disk(disk); 2902 break; 2903 default: 2904 KASSERT(1 == 0, ("Unknown state (%u).", state)); 2905 break; 2906 } 2907 return (0); 2908 } 2909 #undef DISK_STATE_CHANGED 2910 2911 int 2912 g_raid3_read_metadata(struct g_consumer *cp, struct g_raid3_metadata *md) 2913 { 2914 struct g_provider *pp; 2915 u_char *buf; 2916 int error; 2917 2918 g_topology_assert(); 2919 2920 error = g_access(cp, 1, 0, 0); 2921 if (error != 0) 2922 return (error); 2923 pp = cp->provider; 2924 g_topology_unlock(); 2925 /* Metadata are stored on last sector. */ 2926 buf = g_read_data(cp, pp->mediasize - pp->sectorsize, pp->sectorsize, 2927 &error); 2928 g_topology_lock(); 2929 g_access(cp, -1, 0, 0); 2930 if (buf == NULL) { 2931 G_RAID3_DEBUG(1, "Cannot read metadata from %s (error=%d).", 2932 cp->provider->name, error); 2933 return (error); 2934 } 2935 2936 /* Decode metadata. */ 2937 error = raid3_metadata_decode(buf, md); 2938 g_free(buf); 2939 if (strcmp(md->md_magic, G_RAID3_MAGIC) != 0) 2940 return (EINVAL); 2941 if (md->md_version > G_RAID3_VERSION) { 2942 G_RAID3_DEBUG(0, 2943 "Kernel module is too old to handle metadata from %s.", 2944 cp->provider->name); 2945 return (EINVAL); 2946 } 2947 if (error != 0) { 2948 G_RAID3_DEBUG(1, "MD5 metadata hash mismatch for provider %s.", 2949 cp->provider->name); 2950 return (error); 2951 } 2952 if (md->md_sectorsize > maxphys) { 2953 G_RAID3_DEBUG(0, "The blocksize is too big."); 2954 return (EINVAL); 2955 } 2956 2957 return (0); 2958 } 2959 2960 static int 2961 g_raid3_check_metadata(struct g_raid3_softc *sc, struct g_provider *pp, 2962 struct g_raid3_metadata *md) 2963 { 2964 2965 if (md->md_no >= sc->sc_ndisks) { 2966 G_RAID3_DEBUG(1, "Invalid disk %s number (no=%u), skipping.", 2967 pp->name, md->md_no); 2968 return (EINVAL); 2969 } 2970 if (sc->sc_disks[md->md_no].d_state != G_RAID3_DISK_STATE_NODISK) { 2971 G_RAID3_DEBUG(1, "Disk %s (no=%u) already exists, skipping.", 2972 pp->name, md->md_no); 2973 return (EEXIST); 2974 } 2975 if (md->md_all != sc->sc_ndisks) { 2976 G_RAID3_DEBUG(1, 2977 "Invalid '%s' field on disk %s (device %s), skipping.", 2978 "md_all", pp->name, sc->sc_name); 2979 return (EINVAL); 2980 } 2981 if ((md->md_mediasize % md->md_sectorsize) != 0) { 2982 G_RAID3_DEBUG(1, "Invalid metadata (mediasize %% sectorsize != " 2983 "0) on disk %s (device %s), skipping.", pp->name, 2984 sc->sc_name); 2985 return (EINVAL); 2986 } 2987 if (md->md_mediasize != sc->sc_mediasize) { 2988 G_RAID3_DEBUG(1, 2989 "Invalid '%s' field on disk %s (device %s), skipping.", 2990 "md_mediasize", pp->name, sc->sc_name); 2991 return (EINVAL); 2992 } 2993 if ((md->md_mediasize % (sc->sc_ndisks - 1)) != 0) { 2994 G_RAID3_DEBUG(1, 2995 "Invalid '%s' field on disk %s (device %s), skipping.", 2996 "md_mediasize", pp->name, sc->sc_name); 2997 return (EINVAL); 2998 } 2999 if ((sc->sc_mediasize / (sc->sc_ndisks - 1)) > pp->mediasize) { 3000 G_RAID3_DEBUG(1, 3001 "Invalid size of disk %s (device %s), skipping.", pp->name, 3002 sc->sc_name); 3003 return (EINVAL); 3004 } 3005 if ((md->md_sectorsize / pp->sectorsize) < sc->sc_ndisks - 1) { 3006 G_RAID3_DEBUG(1, 3007 "Invalid '%s' field on disk %s (device %s), skipping.", 3008 "md_sectorsize", pp->name, sc->sc_name); 3009 return (EINVAL); 3010 } 3011 if (md->md_sectorsize != sc->sc_sectorsize) { 3012 G_RAID3_DEBUG(1, 3013 "Invalid '%s' field on disk %s (device %s), skipping.", 3014 "md_sectorsize", pp->name, sc->sc_name); 3015 return (EINVAL); 3016 } 3017 if ((sc->sc_sectorsize % pp->sectorsize) != 0) { 3018 G_RAID3_DEBUG(1, 3019 "Invalid sector size of disk %s (device %s), skipping.", 3020 pp->name, sc->sc_name); 3021 return (EINVAL); 3022 } 3023 if ((md->md_mflags & ~G_RAID3_DEVICE_FLAG_MASK) != 0) { 3024 G_RAID3_DEBUG(1, 3025 "Invalid device flags on disk %s (device %s), skipping.", 3026 pp->name, sc->sc_name); 3027 return (EINVAL); 3028 } 3029 if ((md->md_mflags & G_RAID3_DEVICE_FLAG_VERIFY) != 0 && 3030 (md->md_mflags & G_RAID3_DEVICE_FLAG_ROUND_ROBIN) != 0) { 3031 /* 3032 * VERIFY and ROUND-ROBIN options are mutally exclusive. 3033 */ 3034 G_RAID3_DEBUG(1, "Both VERIFY and ROUND-ROBIN flags exist on " 3035 "disk %s (device %s), skipping.", pp->name, sc->sc_name); 3036 return (EINVAL); 3037 } 3038 if ((md->md_dflags & ~G_RAID3_DISK_FLAG_MASK) != 0) { 3039 G_RAID3_DEBUG(1, 3040 "Invalid disk flags on disk %s (device %s), skipping.", 3041 pp->name, sc->sc_name); 3042 return (EINVAL); 3043 } 3044 return (0); 3045 } 3046 3047 int 3048 g_raid3_add_disk(struct g_raid3_softc *sc, struct g_provider *pp, 3049 struct g_raid3_metadata *md) 3050 { 3051 struct g_raid3_disk *disk; 3052 int error; 3053 3054 g_topology_assert_not(); 3055 G_RAID3_DEBUG(2, "Adding disk %s.", pp->name); 3056 3057 error = g_raid3_check_metadata(sc, pp, md); 3058 if (error != 0) 3059 return (error); 3060 if (sc->sc_state != G_RAID3_DEVICE_STATE_STARTING && 3061 md->md_genid < sc->sc_genid) { 3062 G_RAID3_DEBUG(0, "Component %s (device %s) broken, skipping.", 3063 pp->name, sc->sc_name); 3064 return (EINVAL); 3065 } 3066 disk = g_raid3_init_disk(sc, pp, md, &error); 3067 if (disk == NULL) 3068 return (error); 3069 error = g_raid3_event_send(disk, G_RAID3_DISK_STATE_NEW, 3070 G_RAID3_EVENT_WAIT); 3071 if (error != 0) 3072 return (error); 3073 if (md->md_version < G_RAID3_VERSION) { 3074 G_RAID3_DEBUG(0, "Upgrading metadata on %s (v%d->v%d).", 3075 pp->name, md->md_version, G_RAID3_VERSION); 3076 g_raid3_update_metadata(disk); 3077 } 3078 return (0); 3079 } 3080 3081 static void 3082 g_raid3_destroy_delayed(void *arg, int flag) 3083 { 3084 struct g_raid3_softc *sc; 3085 int error; 3086 3087 if (flag == EV_CANCEL) { 3088 G_RAID3_DEBUG(1, "Destroying canceled."); 3089 return; 3090 } 3091 sc = arg; 3092 g_topology_unlock(); 3093 sx_xlock(&sc->sc_lock); 3094 KASSERT((sc->sc_flags & G_RAID3_DEVICE_FLAG_DESTROY) == 0, 3095 ("DESTROY flag set on %s.", sc->sc_name)); 3096 KASSERT((sc->sc_flags & G_RAID3_DEVICE_FLAG_DESTROYING) != 0, 3097 ("DESTROYING flag not set on %s.", sc->sc_name)); 3098 G_RAID3_DEBUG(0, "Destroying %s (delayed).", sc->sc_name); 3099 error = g_raid3_destroy(sc, G_RAID3_DESTROY_SOFT); 3100 if (error != 0) { 3101 G_RAID3_DEBUG(0, "Cannot destroy %s.", sc->sc_name); 3102 sx_xunlock(&sc->sc_lock); 3103 } 3104 g_topology_lock(); 3105 } 3106 3107 static int 3108 g_raid3_access(struct g_provider *pp, int acr, int acw, int ace) 3109 { 3110 struct g_raid3_softc *sc; 3111 int dcr, dcw, dce, error = 0; 3112 3113 g_topology_assert(); 3114 G_RAID3_DEBUG(2, "Access request for %s: r%dw%de%d.", pp->name, acr, 3115 acw, ace); 3116 3117 sc = pp->private; 3118 KASSERT(sc != NULL, ("NULL softc (provider=%s).", pp->name)); 3119 3120 dcr = pp->acr + acr; 3121 dcw = pp->acw + acw; 3122 dce = pp->ace + ace; 3123 3124 g_topology_unlock(); 3125 sx_xlock(&sc->sc_lock); 3126 if ((sc->sc_flags & G_RAID3_DEVICE_FLAG_DESTROY) != 0 || 3127 g_raid3_ndisks(sc, G_RAID3_DISK_STATE_ACTIVE) < sc->sc_ndisks - 1) { 3128 if (acr > 0 || acw > 0 || ace > 0) 3129 error = ENXIO; 3130 goto end; 3131 } 3132 if (dcw == 0) 3133 g_raid3_idle(sc, dcw); 3134 if ((sc->sc_flags & G_RAID3_DEVICE_FLAG_DESTROYING) != 0) { 3135 if (acr > 0 || acw > 0 || ace > 0) { 3136 error = ENXIO; 3137 goto end; 3138 } 3139 if (dcr == 0 && dcw == 0 && dce == 0) { 3140 g_post_event(g_raid3_destroy_delayed, sc, M_WAITOK, 3141 sc, NULL); 3142 } 3143 } 3144 end: 3145 sx_xunlock(&sc->sc_lock); 3146 g_topology_lock(); 3147 return (error); 3148 } 3149 3150 static struct g_geom * 3151 g_raid3_create(struct g_class *mp, const struct g_raid3_metadata *md) 3152 { 3153 struct g_raid3_softc *sc; 3154 struct g_geom *gp; 3155 int error, timeout; 3156 u_int n; 3157 3158 g_topology_assert(); 3159 G_RAID3_DEBUG(1, "Creating device %s (id=%u).", md->md_name, md->md_id); 3160 3161 /* One disk is minimum. */ 3162 if (md->md_all < 1) 3163 return (NULL); 3164 /* 3165 * Action geom. 3166 */ 3167 gp = g_new_geomf(mp, "%s", md->md_name); 3168 sc = malloc(sizeof(*sc), M_RAID3, M_WAITOK | M_ZERO); 3169 sc->sc_disks = malloc(sizeof(struct g_raid3_disk) * md->md_all, M_RAID3, 3170 M_WAITOK | M_ZERO); 3171 gp->start = g_raid3_start; 3172 gp->orphan = g_raid3_orphan; 3173 gp->access = g_raid3_access; 3174 gp->dumpconf = g_raid3_dumpconf; 3175 3176 sc->sc_id = md->md_id; 3177 sc->sc_mediasize = md->md_mediasize; 3178 sc->sc_sectorsize = md->md_sectorsize; 3179 sc->sc_ndisks = md->md_all; 3180 sc->sc_round_robin = 0; 3181 sc->sc_flags = md->md_mflags; 3182 sc->sc_bump_id = 0; 3183 sc->sc_idle = 1; 3184 sc->sc_last_write = time_uptime; 3185 sc->sc_writes = 0; 3186 sc->sc_refcnt = 1; 3187 for (n = 0; n < sc->sc_ndisks; n++) { 3188 sc->sc_disks[n].d_softc = sc; 3189 sc->sc_disks[n].d_no = n; 3190 sc->sc_disks[n].d_state = G_RAID3_DISK_STATE_NODISK; 3191 } 3192 sx_init(&sc->sc_lock, "graid3:lock"); 3193 bioq_init(&sc->sc_queue); 3194 mtx_init(&sc->sc_queue_mtx, "graid3:queue", NULL, MTX_DEF); 3195 bioq_init(&sc->sc_regular_delayed); 3196 bioq_init(&sc->sc_inflight); 3197 bioq_init(&sc->sc_sync_delayed); 3198 TAILQ_INIT(&sc->sc_events); 3199 mtx_init(&sc->sc_events_mtx, "graid3:events", NULL, MTX_DEF); 3200 callout_init(&sc->sc_callout, 1); 3201 sc->sc_state = G_RAID3_DEVICE_STATE_STARTING; 3202 gp->softc = sc; 3203 sc->sc_geom = gp; 3204 sc->sc_provider = NULL; 3205 /* 3206 * Synchronization geom. 3207 */ 3208 gp = g_new_geomf(mp, "%s.sync", md->md_name); 3209 gp->softc = sc; 3210 gp->orphan = g_raid3_orphan; 3211 sc->sc_sync.ds_geom = gp; 3212 3213 if (!g_raid3_use_malloc) { 3214 sc->sc_zones[G_RAID3_ZONE_64K].sz_zone = uma_zcreate("gr3:64k", 3215 65536, g_raid3_uma_ctor, g_raid3_uma_dtor, NULL, NULL, 3216 UMA_ALIGN_PTR, 0); 3217 sc->sc_zones[G_RAID3_ZONE_64K].sz_inuse = 0; 3218 sc->sc_zones[G_RAID3_ZONE_64K].sz_max = g_raid3_n64k; 3219 sc->sc_zones[G_RAID3_ZONE_64K].sz_requested = 3220 sc->sc_zones[G_RAID3_ZONE_64K].sz_failed = 0; 3221 sc->sc_zones[G_RAID3_ZONE_16K].sz_zone = uma_zcreate("gr3:16k", 3222 16384, g_raid3_uma_ctor, g_raid3_uma_dtor, NULL, NULL, 3223 UMA_ALIGN_PTR, 0); 3224 sc->sc_zones[G_RAID3_ZONE_16K].sz_inuse = 0; 3225 sc->sc_zones[G_RAID3_ZONE_16K].sz_max = g_raid3_n16k; 3226 sc->sc_zones[G_RAID3_ZONE_16K].sz_requested = 3227 sc->sc_zones[G_RAID3_ZONE_16K].sz_failed = 0; 3228 sc->sc_zones[G_RAID3_ZONE_4K].sz_zone = uma_zcreate("gr3:4k", 3229 4096, g_raid3_uma_ctor, g_raid3_uma_dtor, NULL, NULL, 3230 UMA_ALIGN_PTR, 0); 3231 sc->sc_zones[G_RAID3_ZONE_4K].sz_inuse = 0; 3232 sc->sc_zones[G_RAID3_ZONE_4K].sz_max = g_raid3_n4k; 3233 sc->sc_zones[G_RAID3_ZONE_4K].sz_requested = 3234 sc->sc_zones[G_RAID3_ZONE_4K].sz_failed = 0; 3235 } 3236 3237 error = kproc_create(g_raid3_worker, sc, &sc->sc_worker, 0, 0, 3238 "g_raid3 %s", md->md_name); 3239 if (error != 0) { 3240 G_RAID3_DEBUG(1, "Cannot create kernel thread for %s.", 3241 sc->sc_name); 3242 g_destroy_geom(sc->sc_geom); 3243 g_raid3_free_device(sc); 3244 return (NULL); 3245 } 3246 3247 G_RAID3_DEBUG(1, "Device %s created (%u components, id=%u).", 3248 sc->sc_name, sc->sc_ndisks, sc->sc_id); 3249 3250 sc->sc_rootmount = root_mount_hold("GRAID3"); 3251 G_RAID3_DEBUG(1, "root_mount_hold %p", sc->sc_rootmount); 3252 3253 /* 3254 * Schedule startup timeout. 3255 */ 3256 timeout = atomic_load_acq_int(&g_raid3_timeout); 3257 sc->sc_timeout_event = malloc(sizeof(struct g_raid3_event), M_RAID3, 3258 M_WAITOK); 3259 callout_reset(&sc->sc_callout, timeout * hz, g_raid3_go, sc); 3260 return (sc->sc_geom); 3261 } 3262 3263 int 3264 g_raid3_destroy(struct g_raid3_softc *sc, int how) 3265 { 3266 struct g_provider *pp; 3267 3268 g_topology_assert_not(); 3269 sx_assert(&sc->sc_lock, SX_XLOCKED); 3270 3271 pp = sc->sc_provider; 3272 if (pp != NULL && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)) { 3273 switch (how) { 3274 case G_RAID3_DESTROY_SOFT: 3275 G_RAID3_DEBUG(1, 3276 "Device %s is still open (r%dw%de%d).", pp->name, 3277 pp->acr, pp->acw, pp->ace); 3278 return (EBUSY); 3279 case G_RAID3_DESTROY_DELAYED: 3280 G_RAID3_DEBUG(1, 3281 "Device %s will be destroyed on last close.", 3282 pp->name); 3283 if (sc->sc_syncdisk != NULL) 3284 g_raid3_sync_stop(sc, 1); 3285 sc->sc_flags |= G_RAID3_DEVICE_FLAG_DESTROYING; 3286 return (EBUSY); 3287 case G_RAID3_DESTROY_HARD: 3288 G_RAID3_DEBUG(1, "Device %s is still open, so it " 3289 "can't be definitely removed.", pp->name); 3290 break; 3291 } 3292 } 3293 3294 g_topology_lock(); 3295 if (sc->sc_geom->softc == NULL) { 3296 g_topology_unlock(); 3297 return (0); 3298 } 3299 sc->sc_geom->softc = NULL; 3300 sc->sc_sync.ds_geom->softc = NULL; 3301 g_topology_unlock(); 3302 3303 sc->sc_flags |= G_RAID3_DEVICE_FLAG_DESTROY; 3304 sc->sc_flags |= G_RAID3_DEVICE_FLAG_WAIT; 3305 G_RAID3_DEBUG(4, "%s: Waking up %p.", __func__, sc); 3306 sx_xunlock(&sc->sc_lock); 3307 mtx_lock(&sc->sc_queue_mtx); 3308 wakeup(sc); 3309 wakeup(&sc->sc_queue); 3310 mtx_unlock(&sc->sc_queue_mtx); 3311 G_RAID3_DEBUG(4, "%s: Sleeping %p.", __func__, &sc->sc_worker); 3312 while (sc->sc_worker != NULL) 3313 tsleep(&sc->sc_worker, PRIBIO, "r3:destroy", hz / 5); 3314 G_RAID3_DEBUG(4, "%s: Woken up %p.", __func__, &sc->sc_worker); 3315 sx_xlock(&sc->sc_lock); 3316 g_raid3_destroy_device(sc); 3317 return (0); 3318 } 3319 3320 static void 3321 g_raid3_taste_orphan(struct g_consumer *cp) 3322 { 3323 3324 KASSERT(1 == 0, ("%s called while tasting %s.", __func__, 3325 cp->provider->name)); 3326 } 3327 3328 static struct g_geom * 3329 g_raid3_taste(struct g_class *mp, struct g_provider *pp, int flags __unused) 3330 { 3331 struct g_raid3_metadata md; 3332 struct g_raid3_softc *sc; 3333 struct g_consumer *cp; 3334 struct g_geom *gp; 3335 int error; 3336 3337 g_topology_assert(); 3338 g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name); 3339 G_RAID3_DEBUG(2, "Tasting %s.", pp->name); 3340 3341 gp = g_new_geomf(mp, "raid3:taste"); 3342 /* This orphan function should be never called. */ 3343 gp->orphan = g_raid3_taste_orphan; 3344 cp = g_new_consumer(gp); 3345 cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE; 3346 error = g_attach(cp, pp); 3347 if (error == 0) { 3348 error = g_raid3_read_metadata(cp, &md); 3349 g_detach(cp); 3350 } 3351 g_destroy_consumer(cp); 3352 g_destroy_geom(gp); 3353 if (error != 0) 3354 return (NULL); 3355 gp = NULL; 3356 3357 if (md.md_provider[0] != '\0' && 3358 !g_compare_names(md.md_provider, pp->name)) 3359 return (NULL); 3360 if (md.md_provsize != 0 && md.md_provsize != pp->mediasize) 3361 return (NULL); 3362 if (g_raid3_debug >= 2) 3363 raid3_metadata_dump(&md); 3364 3365 /* 3366 * Let's check if device already exists. 3367 */ 3368 sc = NULL; 3369 LIST_FOREACH(gp, &mp->geom, geom) { 3370 sc = gp->softc; 3371 if (sc == NULL) 3372 continue; 3373 if (sc->sc_sync.ds_geom == gp) 3374 continue; 3375 if (strcmp(md.md_name, sc->sc_name) != 0) 3376 continue; 3377 if (md.md_id != sc->sc_id) { 3378 G_RAID3_DEBUG(0, "Device %s already configured.", 3379 sc->sc_name); 3380 return (NULL); 3381 } 3382 break; 3383 } 3384 if (gp == NULL) { 3385 gp = g_raid3_create(mp, &md); 3386 if (gp == NULL) { 3387 G_RAID3_DEBUG(0, "Cannot create device %s.", 3388 md.md_name); 3389 return (NULL); 3390 } 3391 sc = gp->softc; 3392 } 3393 G_RAID3_DEBUG(1, "Adding disk %s to %s.", pp->name, gp->name); 3394 g_topology_unlock(); 3395 sx_xlock(&sc->sc_lock); 3396 error = g_raid3_add_disk(sc, pp, &md); 3397 if (error != 0) { 3398 G_RAID3_DEBUG(0, "Cannot add disk %s to %s (error=%d).", 3399 pp->name, gp->name, error); 3400 if (g_raid3_ndisks(sc, G_RAID3_DISK_STATE_NODISK) == 3401 sc->sc_ndisks) { 3402 g_cancel_event(sc); 3403 g_raid3_destroy(sc, G_RAID3_DESTROY_HARD); 3404 g_topology_lock(); 3405 return (NULL); 3406 } 3407 gp = NULL; 3408 } 3409 sx_xunlock(&sc->sc_lock); 3410 g_topology_lock(); 3411 return (gp); 3412 } 3413 3414 static int 3415 g_raid3_destroy_geom(struct gctl_req *req __unused, struct g_class *mp __unused, 3416 struct g_geom *gp) 3417 { 3418 struct g_raid3_softc *sc; 3419 int error; 3420 3421 g_topology_unlock(); 3422 sc = gp->softc; 3423 sx_xlock(&sc->sc_lock); 3424 g_cancel_event(sc); 3425 error = g_raid3_destroy(gp->softc, G_RAID3_DESTROY_SOFT); 3426 if (error != 0) 3427 sx_xunlock(&sc->sc_lock); 3428 g_topology_lock(); 3429 return (error); 3430 } 3431 3432 static void 3433 g_raid3_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp, 3434 struct g_consumer *cp, struct g_provider *pp) 3435 { 3436 struct g_raid3_softc *sc; 3437 3438 g_topology_assert(); 3439 3440 sc = gp->softc; 3441 if (sc == NULL) 3442 return; 3443 /* Skip synchronization geom. */ 3444 if (gp == sc->sc_sync.ds_geom) 3445 return; 3446 if (pp != NULL) { 3447 /* Nothing here. */ 3448 } else if (cp != NULL) { 3449 struct g_raid3_disk *disk; 3450 3451 disk = cp->private; 3452 if (disk == NULL) 3453 return; 3454 g_topology_unlock(); 3455 sx_xlock(&sc->sc_lock); 3456 sbuf_printf(sb, "%s<Type>", indent); 3457 if (disk->d_no == sc->sc_ndisks - 1) 3458 sbuf_cat(sb, "PARITY"); 3459 else 3460 sbuf_cat(sb, "DATA"); 3461 sbuf_cat(sb, "</Type>\n"); 3462 sbuf_printf(sb, "%s<Number>%u</Number>\n", indent, 3463 (u_int)disk->d_no); 3464 if (disk->d_state == G_RAID3_DISK_STATE_SYNCHRONIZING) { 3465 sbuf_printf(sb, "%s<Synchronized>", indent); 3466 if (disk->d_sync.ds_offset == 0) 3467 sbuf_cat(sb, "0%"); 3468 else { 3469 sbuf_printf(sb, "%u%%", 3470 (u_int)((disk->d_sync.ds_offset * 100) / 3471 (sc->sc_mediasize / (sc->sc_ndisks - 1)))); 3472 } 3473 sbuf_cat(sb, "</Synchronized>\n"); 3474 if (disk->d_sync.ds_offset > 0) { 3475 sbuf_printf(sb, "%s<BytesSynced>%jd" 3476 "</BytesSynced>\n", indent, 3477 (intmax_t)disk->d_sync.ds_offset); 3478 } 3479 } 3480 sbuf_printf(sb, "%s<SyncID>%u</SyncID>\n", indent, 3481 disk->d_sync.ds_syncid); 3482 sbuf_printf(sb, "%s<GenID>%u</GenID>\n", indent, disk->d_genid); 3483 sbuf_printf(sb, "%s<Flags>", indent); 3484 if (disk->d_flags == 0) 3485 sbuf_cat(sb, "NONE"); 3486 else { 3487 int first = 1; 3488 3489 #define ADD_FLAG(flag, name) do { \ 3490 if ((disk->d_flags & (flag)) != 0) { \ 3491 if (!first) \ 3492 sbuf_cat(sb, ", "); \ 3493 else \ 3494 first = 0; \ 3495 sbuf_cat(sb, name); \ 3496 } \ 3497 } while (0) 3498 ADD_FLAG(G_RAID3_DISK_FLAG_DIRTY, "DIRTY"); 3499 ADD_FLAG(G_RAID3_DISK_FLAG_HARDCODED, "HARDCODED"); 3500 ADD_FLAG(G_RAID3_DISK_FLAG_SYNCHRONIZING, 3501 "SYNCHRONIZING"); 3502 ADD_FLAG(G_RAID3_DISK_FLAG_FORCE_SYNC, "FORCE_SYNC"); 3503 ADD_FLAG(G_RAID3_DISK_FLAG_BROKEN, "BROKEN"); 3504 #undef ADD_FLAG 3505 } 3506 sbuf_cat(sb, "</Flags>\n"); 3507 sbuf_printf(sb, "%s<State>%s</State>\n", indent, 3508 g_raid3_disk_state2str(disk->d_state)); 3509 sx_xunlock(&sc->sc_lock); 3510 g_topology_lock(); 3511 } else { 3512 g_topology_unlock(); 3513 sx_xlock(&sc->sc_lock); 3514 if (!g_raid3_use_malloc) { 3515 sbuf_printf(sb, 3516 "%s<Zone4kRequested>%u</Zone4kRequested>\n", indent, 3517 sc->sc_zones[G_RAID3_ZONE_4K].sz_requested); 3518 sbuf_printf(sb, 3519 "%s<Zone4kFailed>%u</Zone4kFailed>\n", indent, 3520 sc->sc_zones[G_RAID3_ZONE_4K].sz_failed); 3521 sbuf_printf(sb, 3522 "%s<Zone16kRequested>%u</Zone16kRequested>\n", indent, 3523 sc->sc_zones[G_RAID3_ZONE_16K].sz_requested); 3524 sbuf_printf(sb, 3525 "%s<Zone16kFailed>%u</Zone16kFailed>\n", indent, 3526 sc->sc_zones[G_RAID3_ZONE_16K].sz_failed); 3527 sbuf_printf(sb, 3528 "%s<Zone64kRequested>%u</Zone64kRequested>\n", indent, 3529 sc->sc_zones[G_RAID3_ZONE_64K].sz_requested); 3530 sbuf_printf(sb, 3531 "%s<Zone64kFailed>%u</Zone64kFailed>\n", indent, 3532 sc->sc_zones[G_RAID3_ZONE_64K].sz_failed); 3533 } 3534 sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)sc->sc_id); 3535 sbuf_printf(sb, "%s<SyncID>%u</SyncID>\n", indent, sc->sc_syncid); 3536 sbuf_printf(sb, "%s<GenID>%u</GenID>\n", indent, sc->sc_genid); 3537 sbuf_printf(sb, "%s<Flags>", indent); 3538 if (sc->sc_flags == 0) 3539 sbuf_cat(sb, "NONE"); 3540 else { 3541 int first = 1; 3542 3543 #define ADD_FLAG(flag, name) do { \ 3544 if ((sc->sc_flags & (flag)) != 0) { \ 3545 if (!first) \ 3546 sbuf_cat(sb, ", "); \ 3547 else \ 3548 first = 0; \ 3549 sbuf_cat(sb, name); \ 3550 } \ 3551 } while (0) 3552 ADD_FLAG(G_RAID3_DEVICE_FLAG_NOFAILSYNC, "NOFAILSYNC"); 3553 ADD_FLAG(G_RAID3_DEVICE_FLAG_NOAUTOSYNC, "NOAUTOSYNC"); 3554 ADD_FLAG(G_RAID3_DEVICE_FLAG_ROUND_ROBIN, 3555 "ROUND-ROBIN"); 3556 ADD_FLAG(G_RAID3_DEVICE_FLAG_VERIFY, "VERIFY"); 3557 #undef ADD_FLAG 3558 } 3559 sbuf_cat(sb, "</Flags>\n"); 3560 sbuf_printf(sb, "%s<Components>%u</Components>\n", indent, 3561 sc->sc_ndisks); 3562 sbuf_printf(sb, "%s<State>%s</State>\n", indent, 3563 g_raid3_device_state2str(sc->sc_state)); 3564 sx_xunlock(&sc->sc_lock); 3565 g_topology_lock(); 3566 } 3567 } 3568 3569 static void 3570 g_raid3_shutdown_post_sync(void *arg, int howto) 3571 { 3572 struct g_class *mp; 3573 struct g_geom *gp, *gp2; 3574 struct g_raid3_softc *sc; 3575 int error; 3576 3577 if ((howto & RB_NOSYNC) != 0) 3578 return; 3579 3580 mp = arg; 3581 g_topology_lock(); 3582 g_raid3_shutdown = 1; 3583 LIST_FOREACH_SAFE(gp, &mp->geom, geom, gp2) { 3584 if ((sc = gp->softc) == NULL) 3585 continue; 3586 /* Skip synchronization geom. */ 3587 if (gp == sc->sc_sync.ds_geom) 3588 continue; 3589 g_topology_unlock(); 3590 sx_xlock(&sc->sc_lock); 3591 g_raid3_idle(sc, -1); 3592 g_cancel_event(sc); 3593 error = g_raid3_destroy(sc, G_RAID3_DESTROY_DELAYED); 3594 if (error != 0) 3595 sx_xunlock(&sc->sc_lock); 3596 g_topology_lock(); 3597 } 3598 g_topology_unlock(); 3599 } 3600 3601 static void 3602 g_raid3_init(struct g_class *mp) 3603 { 3604 3605 g_raid3_post_sync = EVENTHANDLER_REGISTER(shutdown_post_sync, 3606 g_raid3_shutdown_post_sync, mp, SHUTDOWN_PRI_FIRST); 3607 if (g_raid3_post_sync == NULL) 3608 G_RAID3_DEBUG(0, "Warning! Cannot register shutdown event."); 3609 } 3610 3611 static void 3612 g_raid3_fini(struct g_class *mp) 3613 { 3614 3615 if (g_raid3_post_sync != NULL) 3616 EVENTHANDLER_DEREGISTER(shutdown_post_sync, g_raid3_post_sync); 3617 } 3618 3619 DECLARE_GEOM_CLASS(g_raid3_class, g_raid3); 3620 MODULE_VERSION(geom_raid3, 0); 3621