1 /*- 2 * Copyright (c) 2004-2006 Pawel Jakub Dawidek <pjd@FreeBSD.org> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27 #include <sys/cdefs.h> 28 __FBSDID("$FreeBSD$"); 29 30 #include <sys/param.h> 31 #include <sys/systm.h> 32 #include <sys/kernel.h> 33 #include <sys/module.h> 34 #include <sys/limits.h> 35 #include <sys/lock.h> 36 #include <sys/mutex.h> 37 #include <sys/bio.h> 38 #include <sys/sysctl.h> 39 #include <sys/malloc.h> 40 #include <sys/eventhandler.h> 41 #include <vm/uma.h> 42 #include <geom/geom.h> 43 #include <sys/proc.h> 44 #include <sys/kthread.h> 45 #include <sys/sched.h> 46 #include <geom/raid3/g_raid3.h> 47 48 49 static MALLOC_DEFINE(M_RAID3, "raid3_data", "GEOM_RAID3 Data"); 50 51 SYSCTL_DECL(_kern_geom); 52 SYSCTL_NODE(_kern_geom, OID_AUTO, raid3, CTLFLAG_RW, 0, "GEOM_RAID3 stuff"); 53 u_int g_raid3_debug = 0; 54 TUNABLE_INT("kern.geom.raid3.debug", &g_raid3_debug); 55 SYSCTL_UINT(_kern_geom_raid3, OID_AUTO, debug, CTLFLAG_RW, &g_raid3_debug, 0, 56 "Debug level"); 57 static u_int g_raid3_timeout = 4; 58 TUNABLE_INT("kern.geom.raid3.timeout", &g_raid3_timeout); 59 SYSCTL_UINT(_kern_geom_raid3, OID_AUTO, timeout, CTLFLAG_RW, &g_raid3_timeout, 60 0, "Time to wait on all raid3 components"); 61 static u_int g_raid3_idletime = 5; 62 TUNABLE_INT("kern.geom.raid3.idletime", &g_raid3_idletime); 63 SYSCTL_UINT(_kern_geom_raid3, OID_AUTO, idletime, CTLFLAG_RW, 64 &g_raid3_idletime, 0, "Mark components as clean when idling"); 65 static u_int g_raid3_disconnect_on_failure = 1; 66 TUNABLE_INT("kern.geom.raid3.disconnect_on_failure", 67 &g_raid3_disconnect_on_failure); 68 SYSCTL_UINT(_kern_geom_raid3, OID_AUTO, disconnect_on_failure, CTLFLAG_RW, 69 &g_raid3_disconnect_on_failure, 0, "Disconnect component on I/O failure."); 70 static u_int g_raid3_syncreqs = 2; 71 TUNABLE_INT("kern.geom.raid3.sync_requests", &g_raid3_syncreqs); 72 SYSCTL_UINT(_kern_geom_raid3, OID_AUTO, sync_requests, CTLFLAG_RDTUN, 73 &g_raid3_syncreqs, 0, "Parallel synchronization I/O requests."); 74 static u_int g_raid3_use_malloc = 0; 75 TUNABLE_INT("kern.geom.raid3.use_malloc", &g_raid3_use_malloc); 76 SYSCTL_UINT(_kern_geom_raid3, OID_AUTO, use_malloc, CTLFLAG_RDTUN, 77 &g_raid3_use_malloc, 0, "Use malloc(9) instead of uma(9)."); 78 79 static u_int g_raid3_n64k = 50; 80 TUNABLE_INT("kern.geom.raid3.n64k", &g_raid3_n64k); 81 SYSCTL_UINT(_kern_geom_raid3, OID_AUTO, n64k, CTLFLAG_RD, &g_raid3_n64k, 0, 82 "Maximum number of 64kB allocations"); 83 static u_int g_raid3_n16k = 200; 84 TUNABLE_INT("kern.geom.raid3.n16k", &g_raid3_n16k); 85 SYSCTL_UINT(_kern_geom_raid3, OID_AUTO, n16k, CTLFLAG_RD, &g_raid3_n16k, 0, 86 "Maximum number of 16kB allocations"); 87 static u_int g_raid3_n4k = 1200; 88 TUNABLE_INT("kern.geom.raid3.n4k", &g_raid3_n4k); 89 SYSCTL_UINT(_kern_geom_raid3, OID_AUTO, n4k, CTLFLAG_RD, &g_raid3_n4k, 0, 90 "Maximum number of 4kB allocations"); 91 92 SYSCTL_NODE(_kern_geom_raid3, OID_AUTO, stat, CTLFLAG_RW, 0, 93 "GEOM_RAID3 statistics"); 94 static u_int g_raid3_parity_mismatch = 0; 95 SYSCTL_UINT(_kern_geom_raid3_stat, OID_AUTO, parity_mismatch, CTLFLAG_RD, 96 &g_raid3_parity_mismatch, 0, "Number of failures in VERIFY mode"); 97 98 #define MSLEEP(ident, mtx, priority, wmesg, timeout) do { \ 99 G_RAID3_DEBUG(4, "%s: Sleeping %p.", __func__, (ident)); \ 100 msleep((ident), (mtx), (priority), (wmesg), (timeout)); \ 101 G_RAID3_DEBUG(4, "%s: Woken up %p.", __func__, (ident)); \ 102 } while (0) 103 104 static eventhandler_tag g_raid3_pre_sync = NULL; 105 106 static int g_raid3_destroy_geom(struct gctl_req *req, struct g_class *mp, 107 struct g_geom *gp); 108 static g_taste_t g_raid3_taste; 109 static void g_raid3_init(struct g_class *mp); 110 static void g_raid3_fini(struct g_class *mp); 111 112 struct g_class g_raid3_class = { 113 .name = G_RAID3_CLASS_NAME, 114 .version = G_VERSION, 115 .ctlreq = g_raid3_config, 116 .taste = g_raid3_taste, 117 .destroy_geom = g_raid3_destroy_geom, 118 .init = g_raid3_init, 119 .fini = g_raid3_fini 120 }; 121 122 123 static void g_raid3_destroy_provider(struct g_raid3_softc *sc); 124 static int g_raid3_update_disk(struct g_raid3_disk *disk, u_int state); 125 static void g_raid3_update_device(struct g_raid3_softc *sc, boolean_t force); 126 static void g_raid3_dumpconf(struct sbuf *sb, const char *indent, 127 struct g_geom *gp, struct g_consumer *cp, struct g_provider *pp); 128 static void g_raid3_sync_stop(struct g_raid3_softc *sc, int type); 129 static int g_raid3_register_request(struct bio *pbp); 130 static void g_raid3_sync_release(struct g_raid3_softc *sc); 131 132 133 static const char * 134 g_raid3_disk_state2str(int state) 135 { 136 137 switch (state) { 138 case G_RAID3_DISK_STATE_NODISK: 139 return ("NODISK"); 140 case G_RAID3_DISK_STATE_NONE: 141 return ("NONE"); 142 case G_RAID3_DISK_STATE_NEW: 143 return ("NEW"); 144 case G_RAID3_DISK_STATE_ACTIVE: 145 return ("ACTIVE"); 146 case G_RAID3_DISK_STATE_STALE: 147 return ("STALE"); 148 case G_RAID3_DISK_STATE_SYNCHRONIZING: 149 return ("SYNCHRONIZING"); 150 case G_RAID3_DISK_STATE_DISCONNECTED: 151 return ("DISCONNECTED"); 152 default: 153 return ("INVALID"); 154 } 155 } 156 157 static const char * 158 g_raid3_device_state2str(int state) 159 { 160 161 switch (state) { 162 case G_RAID3_DEVICE_STATE_STARTING: 163 return ("STARTING"); 164 case G_RAID3_DEVICE_STATE_DEGRADED: 165 return ("DEGRADED"); 166 case G_RAID3_DEVICE_STATE_COMPLETE: 167 return ("COMPLETE"); 168 default: 169 return ("INVALID"); 170 } 171 } 172 173 const char * 174 g_raid3_get_diskname(struct g_raid3_disk *disk) 175 { 176 177 if (disk->d_consumer == NULL || disk->d_consumer->provider == NULL) 178 return ("[unknown]"); 179 return (disk->d_name); 180 } 181 182 static void * 183 g_raid3_alloc(struct g_raid3_softc *sc, size_t size, int flags) 184 { 185 void *ptr; 186 187 if (g_raid3_use_malloc) 188 ptr = malloc(size, M_RAID3, flags); 189 else { 190 ptr = uma_zalloc_arg(sc->sc_zones[g_raid3_zone(size)].sz_zone, 191 &sc->sc_zones[g_raid3_zone(size)], flags); 192 sc->sc_zones[g_raid3_zone(size)].sz_requested++; 193 if (ptr == NULL) 194 sc->sc_zones[g_raid3_zone(size)].sz_failed++; 195 } 196 return (ptr); 197 } 198 199 static void 200 g_raid3_free(struct g_raid3_softc *sc, void *ptr, size_t size) 201 { 202 203 if (g_raid3_use_malloc) 204 free(ptr, M_RAID3); 205 else { 206 uma_zfree_arg(sc->sc_zones[g_raid3_zone(size)].sz_zone, 207 ptr, &sc->sc_zones[g_raid3_zone(size)]); 208 } 209 } 210 211 static int 212 g_raid3_uma_ctor(void *mem, int size, void *arg, int flags) 213 { 214 struct g_raid3_zone *sz = arg; 215 216 if (sz->sz_max > 0 && sz->sz_inuse == sz->sz_max) 217 return (ENOMEM); 218 sz->sz_inuse++; 219 return (0); 220 } 221 222 static void 223 g_raid3_uma_dtor(void *mem, int size, void *arg) 224 { 225 struct g_raid3_zone *sz = arg; 226 227 sz->sz_inuse--; 228 } 229 230 #define g_raid3_xor(src1, src2, dst, size) \ 231 _g_raid3_xor((uint64_t *)(src1), (uint64_t *)(src2), \ 232 (uint64_t *)(dst), (size_t)size) 233 static void 234 _g_raid3_xor(uint64_t *src1, uint64_t *src2, uint64_t *dst, size_t size) 235 { 236 237 KASSERT((size % 128) == 0, ("Invalid size: %zu.", size)); 238 for (; size > 0; size -= 128) { 239 *dst++ = (*src1++) ^ (*src2++); 240 *dst++ = (*src1++) ^ (*src2++); 241 *dst++ = (*src1++) ^ (*src2++); 242 *dst++ = (*src1++) ^ (*src2++); 243 *dst++ = (*src1++) ^ (*src2++); 244 *dst++ = (*src1++) ^ (*src2++); 245 *dst++ = (*src1++) ^ (*src2++); 246 *dst++ = (*src1++) ^ (*src2++); 247 *dst++ = (*src1++) ^ (*src2++); 248 *dst++ = (*src1++) ^ (*src2++); 249 *dst++ = (*src1++) ^ (*src2++); 250 *dst++ = (*src1++) ^ (*src2++); 251 *dst++ = (*src1++) ^ (*src2++); 252 *dst++ = (*src1++) ^ (*src2++); 253 *dst++ = (*src1++) ^ (*src2++); 254 *dst++ = (*src1++) ^ (*src2++); 255 } 256 } 257 258 static int 259 g_raid3_is_zero(struct bio *bp) 260 { 261 static const uint64_t zeros[] = { 262 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 263 }; 264 u_char *addr; 265 ssize_t size; 266 267 size = bp->bio_length; 268 addr = (u_char *)bp->bio_data; 269 for (; size > 0; size -= sizeof(zeros), addr += sizeof(zeros)) { 270 if (bcmp(addr, zeros, sizeof(zeros)) != 0) 271 return (0); 272 } 273 return (1); 274 } 275 276 /* 277 * --- Events handling functions --- 278 * Events in geom_raid3 are used to maintain disks and device status 279 * from one thread to simplify locking. 280 */ 281 static void 282 g_raid3_event_free(struct g_raid3_event *ep) 283 { 284 285 free(ep, M_RAID3); 286 } 287 288 int 289 g_raid3_event_send(void *arg, int state, int flags) 290 { 291 struct g_raid3_softc *sc; 292 struct g_raid3_disk *disk; 293 struct g_raid3_event *ep; 294 int error; 295 296 ep = malloc(sizeof(*ep), M_RAID3, M_WAITOK); 297 G_RAID3_DEBUG(4, "%s: Sending event %p.", __func__, ep); 298 if ((flags & G_RAID3_EVENT_DEVICE) != 0) { 299 disk = NULL; 300 sc = arg; 301 } else { 302 disk = arg; 303 sc = disk->d_softc; 304 } 305 ep->e_disk = disk; 306 ep->e_state = state; 307 ep->e_flags = flags; 308 ep->e_error = 0; 309 mtx_lock(&sc->sc_events_mtx); 310 TAILQ_INSERT_TAIL(&sc->sc_events, ep, e_next); 311 mtx_unlock(&sc->sc_events_mtx); 312 G_RAID3_DEBUG(4, "%s: Waking up %p.", __func__, sc); 313 mtx_lock(&sc->sc_queue_mtx); 314 wakeup(sc); 315 wakeup(&sc->sc_queue); 316 mtx_unlock(&sc->sc_queue_mtx); 317 if ((flags & G_RAID3_EVENT_DONTWAIT) != 0) 318 return (0); 319 sx_assert(&sc->sc_lock, SX_XLOCKED); 320 G_RAID3_DEBUG(4, "%s: Sleeping %p.", __func__, ep); 321 sx_xunlock(&sc->sc_lock); 322 while ((ep->e_flags & G_RAID3_EVENT_DONE) == 0) { 323 mtx_lock(&sc->sc_events_mtx); 324 MSLEEP(ep, &sc->sc_events_mtx, PRIBIO | PDROP, "r3:event", 325 hz * 5); 326 } 327 error = ep->e_error; 328 g_raid3_event_free(ep); 329 sx_xlock(&sc->sc_lock); 330 return (error); 331 } 332 333 static struct g_raid3_event * 334 g_raid3_event_get(struct g_raid3_softc *sc) 335 { 336 struct g_raid3_event *ep; 337 338 mtx_lock(&sc->sc_events_mtx); 339 ep = TAILQ_FIRST(&sc->sc_events); 340 mtx_unlock(&sc->sc_events_mtx); 341 return (ep); 342 } 343 344 static void 345 g_raid3_event_remove(struct g_raid3_softc *sc, struct g_raid3_event *ep) 346 { 347 348 mtx_lock(&sc->sc_events_mtx); 349 TAILQ_REMOVE(&sc->sc_events, ep, e_next); 350 mtx_unlock(&sc->sc_events_mtx); 351 } 352 353 static void 354 g_raid3_event_cancel(struct g_raid3_disk *disk) 355 { 356 struct g_raid3_softc *sc; 357 struct g_raid3_event *ep, *tmpep; 358 359 sc = disk->d_softc; 360 sx_assert(&sc->sc_lock, SX_XLOCKED); 361 362 mtx_lock(&sc->sc_events_mtx); 363 TAILQ_FOREACH_SAFE(ep, &sc->sc_events, e_next, tmpep) { 364 if ((ep->e_flags & G_RAID3_EVENT_DEVICE) != 0) 365 continue; 366 if (ep->e_disk != disk) 367 continue; 368 TAILQ_REMOVE(&sc->sc_events, ep, e_next); 369 if ((ep->e_flags & G_RAID3_EVENT_DONTWAIT) != 0) 370 g_raid3_event_free(ep); 371 else { 372 ep->e_error = ECANCELED; 373 wakeup(ep); 374 } 375 } 376 mtx_unlock(&sc->sc_events_mtx); 377 } 378 379 /* 380 * Return the number of disks in the given state. 381 * If state is equal to -1, count all connected disks. 382 */ 383 u_int 384 g_raid3_ndisks(struct g_raid3_softc *sc, int state) 385 { 386 struct g_raid3_disk *disk; 387 u_int n, ndisks; 388 389 sx_assert(&sc->sc_lock, SX_LOCKED); 390 391 for (n = ndisks = 0; n < sc->sc_ndisks; n++) { 392 disk = &sc->sc_disks[n]; 393 if (disk->d_state == G_RAID3_DISK_STATE_NODISK) 394 continue; 395 if (state == -1 || disk->d_state == state) 396 ndisks++; 397 } 398 return (ndisks); 399 } 400 401 static u_int 402 g_raid3_nrequests(struct g_raid3_softc *sc, struct g_consumer *cp) 403 { 404 struct bio *bp; 405 u_int nreqs = 0; 406 407 mtx_lock(&sc->sc_queue_mtx); 408 TAILQ_FOREACH(bp, &sc->sc_queue.queue, bio_queue) { 409 if (bp->bio_from == cp) 410 nreqs++; 411 } 412 mtx_unlock(&sc->sc_queue_mtx); 413 return (nreqs); 414 } 415 416 static int 417 g_raid3_is_busy(struct g_raid3_softc *sc, struct g_consumer *cp) 418 { 419 420 if (cp->index > 0) { 421 G_RAID3_DEBUG(2, 422 "I/O requests for %s exist, can't destroy it now.", 423 cp->provider->name); 424 return (1); 425 } 426 if (g_raid3_nrequests(sc, cp) > 0) { 427 G_RAID3_DEBUG(2, 428 "I/O requests for %s in queue, can't destroy it now.", 429 cp->provider->name); 430 return (1); 431 } 432 return (0); 433 } 434 435 static void 436 g_raid3_destroy_consumer(void *arg, int flags __unused) 437 { 438 struct g_consumer *cp; 439 440 g_topology_assert(); 441 442 cp = arg; 443 G_RAID3_DEBUG(1, "Consumer %s destroyed.", cp->provider->name); 444 g_detach(cp); 445 g_destroy_consumer(cp); 446 } 447 448 static void 449 g_raid3_kill_consumer(struct g_raid3_softc *sc, struct g_consumer *cp) 450 { 451 struct g_provider *pp; 452 int retaste_wait; 453 454 g_topology_assert(); 455 456 cp->private = NULL; 457 if (g_raid3_is_busy(sc, cp)) 458 return; 459 G_RAID3_DEBUG(2, "Consumer %s destroyed.", cp->provider->name); 460 pp = cp->provider; 461 retaste_wait = 0; 462 if (cp->acw == 1) { 463 if ((pp->geom->flags & G_GEOM_WITHER) == 0) 464 retaste_wait = 1; 465 } 466 G_RAID3_DEBUG(2, "Access %s r%dw%de%d = %d", pp->name, -cp->acr, 467 -cp->acw, -cp->ace, 0); 468 if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0) 469 g_access(cp, -cp->acr, -cp->acw, -cp->ace); 470 if (retaste_wait) { 471 /* 472 * After retaste event was send (inside g_access()), we can send 473 * event to detach and destroy consumer. 474 * A class, which has consumer to the given provider connected 475 * will not receive retaste event for the provider. 476 * This is the way how I ignore retaste events when I close 477 * consumers opened for write: I detach and destroy consumer 478 * after retaste event is sent. 479 */ 480 g_post_event(g_raid3_destroy_consumer, cp, M_WAITOK, NULL); 481 return; 482 } 483 G_RAID3_DEBUG(1, "Consumer %s destroyed.", pp->name); 484 g_detach(cp); 485 g_destroy_consumer(cp); 486 } 487 488 static int 489 g_raid3_connect_disk(struct g_raid3_disk *disk, struct g_provider *pp) 490 { 491 struct g_consumer *cp; 492 int error; 493 494 g_topology_assert_not(); 495 KASSERT(disk->d_consumer == NULL, 496 ("Disk already connected (device %s).", disk->d_softc->sc_name)); 497 498 g_topology_lock(); 499 cp = g_new_consumer(disk->d_softc->sc_geom); 500 error = g_attach(cp, pp); 501 if (error != 0) { 502 g_destroy_consumer(cp); 503 g_topology_unlock(); 504 return (error); 505 } 506 error = g_access(cp, 1, 1, 1); 507 g_topology_unlock(); 508 if (error != 0) { 509 g_detach(cp); 510 g_destroy_consumer(cp); 511 G_RAID3_DEBUG(0, "Cannot open consumer %s (error=%d).", 512 pp->name, error); 513 return (error); 514 } 515 disk->d_consumer = cp; 516 disk->d_consumer->private = disk; 517 disk->d_consumer->index = 0; 518 G_RAID3_DEBUG(2, "Disk %s connected.", g_raid3_get_diskname(disk)); 519 return (0); 520 } 521 522 static void 523 g_raid3_disconnect_consumer(struct g_raid3_softc *sc, struct g_consumer *cp) 524 { 525 526 g_topology_assert(); 527 528 if (cp == NULL) 529 return; 530 if (cp->provider != NULL) 531 g_raid3_kill_consumer(sc, cp); 532 else 533 g_destroy_consumer(cp); 534 } 535 536 /* 537 * Initialize disk. This means allocate memory, create consumer, attach it 538 * to the provider and open access (r1w1e1) to it. 539 */ 540 static struct g_raid3_disk * 541 g_raid3_init_disk(struct g_raid3_softc *sc, struct g_provider *pp, 542 struct g_raid3_metadata *md, int *errorp) 543 { 544 struct g_raid3_disk *disk; 545 int error; 546 547 disk = &sc->sc_disks[md->md_no]; 548 error = g_raid3_connect_disk(disk, pp); 549 if (error != 0) { 550 if (errorp != NULL) 551 *errorp = error; 552 return (NULL); 553 } 554 disk->d_state = G_RAID3_DISK_STATE_NONE; 555 disk->d_flags = md->md_dflags; 556 if (md->md_provider[0] != '\0') 557 disk->d_flags |= G_RAID3_DISK_FLAG_HARDCODED; 558 disk->d_sync.ds_consumer = NULL; 559 disk->d_sync.ds_offset = md->md_sync_offset; 560 disk->d_sync.ds_offset_done = md->md_sync_offset; 561 disk->d_genid = md->md_genid; 562 disk->d_sync.ds_syncid = md->md_syncid; 563 if (errorp != NULL) 564 *errorp = 0; 565 return (disk); 566 } 567 568 static void 569 g_raid3_destroy_disk(struct g_raid3_disk *disk) 570 { 571 struct g_raid3_softc *sc; 572 573 g_topology_assert_not(); 574 sc = disk->d_softc; 575 sx_assert(&sc->sc_lock, SX_XLOCKED); 576 577 if (disk->d_state == G_RAID3_DISK_STATE_NODISK) 578 return; 579 g_raid3_event_cancel(disk); 580 switch (disk->d_state) { 581 case G_RAID3_DISK_STATE_SYNCHRONIZING: 582 if (sc->sc_syncdisk != NULL) 583 g_raid3_sync_stop(sc, 1); 584 /* FALLTHROUGH */ 585 case G_RAID3_DISK_STATE_NEW: 586 case G_RAID3_DISK_STATE_STALE: 587 case G_RAID3_DISK_STATE_ACTIVE: 588 g_topology_lock(); 589 g_raid3_disconnect_consumer(sc, disk->d_consumer); 590 g_topology_unlock(); 591 disk->d_consumer = NULL; 592 break; 593 default: 594 KASSERT(0 == 1, ("Wrong disk state (%s, %s).", 595 g_raid3_get_diskname(disk), 596 g_raid3_disk_state2str(disk->d_state))); 597 } 598 disk->d_state = G_RAID3_DISK_STATE_NODISK; 599 } 600 601 static void 602 g_raid3_destroy_device(struct g_raid3_softc *sc) 603 { 604 struct g_raid3_event *ep; 605 struct g_raid3_disk *disk; 606 struct g_geom *gp; 607 struct g_consumer *cp; 608 u_int n; 609 610 g_topology_assert_not(); 611 sx_assert(&sc->sc_lock, SX_XLOCKED); 612 613 gp = sc->sc_geom; 614 if (sc->sc_provider != NULL) 615 g_raid3_destroy_provider(sc); 616 for (n = 0; n < sc->sc_ndisks; n++) { 617 disk = &sc->sc_disks[n]; 618 if (disk->d_state != G_RAID3_DISK_STATE_NODISK) { 619 disk->d_flags &= ~G_RAID3_DISK_FLAG_DIRTY; 620 g_raid3_update_metadata(disk); 621 g_raid3_destroy_disk(disk); 622 } 623 } 624 while ((ep = g_raid3_event_get(sc)) != NULL) { 625 g_raid3_event_remove(sc, ep); 626 if ((ep->e_flags & G_RAID3_EVENT_DONTWAIT) != 0) 627 g_raid3_event_free(ep); 628 else { 629 ep->e_error = ECANCELED; 630 ep->e_flags |= G_RAID3_EVENT_DONE; 631 G_RAID3_DEBUG(4, "%s: Waking up %p.", __func__, ep); 632 mtx_lock(&sc->sc_events_mtx); 633 wakeup(ep); 634 mtx_unlock(&sc->sc_events_mtx); 635 } 636 } 637 callout_drain(&sc->sc_callout); 638 cp = LIST_FIRST(&sc->sc_sync.ds_geom->consumer); 639 g_topology_lock(); 640 if (cp != NULL) 641 g_raid3_disconnect_consumer(sc, cp); 642 g_wither_geom(sc->sc_sync.ds_geom, ENXIO); 643 G_RAID3_DEBUG(0, "Device %s destroyed.", gp->name); 644 g_wither_geom(gp, ENXIO); 645 g_topology_unlock(); 646 if (!g_raid3_use_malloc) { 647 uma_zdestroy(sc->sc_zones[G_RAID3_ZONE_64K].sz_zone); 648 uma_zdestroy(sc->sc_zones[G_RAID3_ZONE_16K].sz_zone); 649 uma_zdestroy(sc->sc_zones[G_RAID3_ZONE_4K].sz_zone); 650 } 651 mtx_destroy(&sc->sc_queue_mtx); 652 mtx_destroy(&sc->sc_events_mtx); 653 sx_xunlock(&sc->sc_lock); 654 sx_destroy(&sc->sc_lock); 655 } 656 657 static void 658 g_raid3_orphan(struct g_consumer *cp) 659 { 660 struct g_raid3_disk *disk; 661 662 g_topology_assert(); 663 664 disk = cp->private; 665 if (disk == NULL) 666 return; 667 disk->d_softc->sc_bump_id = G_RAID3_BUMP_SYNCID; 668 g_raid3_event_send(disk, G_RAID3_DISK_STATE_DISCONNECTED, 669 G_RAID3_EVENT_DONTWAIT); 670 } 671 672 static int 673 g_raid3_write_metadata(struct g_raid3_disk *disk, struct g_raid3_metadata *md) 674 { 675 struct g_raid3_softc *sc; 676 struct g_consumer *cp; 677 off_t offset, length; 678 u_char *sector; 679 int error = 0; 680 681 g_topology_assert_not(); 682 sc = disk->d_softc; 683 sx_assert(&sc->sc_lock, SX_LOCKED); 684 685 cp = disk->d_consumer; 686 KASSERT(cp != NULL, ("NULL consumer (%s).", sc->sc_name)); 687 KASSERT(cp->provider != NULL, ("NULL provider (%s).", sc->sc_name)); 688 KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1, 689 ("Consumer %s closed? (r%dw%de%d).", cp->provider->name, cp->acr, 690 cp->acw, cp->ace)); 691 length = cp->provider->sectorsize; 692 offset = cp->provider->mediasize - length; 693 sector = malloc((size_t)length, M_RAID3, M_WAITOK | M_ZERO); 694 if (md != NULL) 695 raid3_metadata_encode(md, sector); 696 error = g_write_data(cp, offset, sector, length); 697 free(sector, M_RAID3); 698 if (error != 0) { 699 if ((disk->d_flags & G_RAID3_DISK_FLAG_BROKEN) == 0) { 700 G_RAID3_DEBUG(0, "Cannot write metadata on %s " 701 "(device=%s, error=%d).", 702 g_raid3_get_diskname(disk), sc->sc_name, error); 703 disk->d_flags |= G_RAID3_DISK_FLAG_BROKEN; 704 } else { 705 G_RAID3_DEBUG(1, "Cannot write metadata on %s " 706 "(device=%s, error=%d).", 707 g_raid3_get_diskname(disk), sc->sc_name, error); 708 } 709 if (g_raid3_disconnect_on_failure && 710 sc->sc_state == G_RAID3_DEVICE_STATE_COMPLETE) { 711 sc->sc_bump_id |= G_RAID3_BUMP_GENID; 712 g_raid3_event_send(disk, 713 G_RAID3_DISK_STATE_DISCONNECTED, 714 G_RAID3_EVENT_DONTWAIT); 715 } 716 } 717 return (error); 718 } 719 720 int 721 g_raid3_clear_metadata(struct g_raid3_disk *disk) 722 { 723 int error; 724 725 g_topology_assert_not(); 726 sx_assert(&disk->d_softc->sc_lock, SX_LOCKED); 727 728 error = g_raid3_write_metadata(disk, NULL); 729 if (error == 0) { 730 G_RAID3_DEBUG(2, "Metadata on %s cleared.", 731 g_raid3_get_diskname(disk)); 732 } else { 733 G_RAID3_DEBUG(0, 734 "Cannot clear metadata on disk %s (error=%d).", 735 g_raid3_get_diskname(disk), error); 736 } 737 return (error); 738 } 739 740 void 741 g_raid3_fill_metadata(struct g_raid3_disk *disk, struct g_raid3_metadata *md) 742 { 743 struct g_raid3_softc *sc; 744 struct g_provider *pp; 745 746 sc = disk->d_softc; 747 strlcpy(md->md_magic, G_RAID3_MAGIC, sizeof(md->md_magic)); 748 md->md_version = G_RAID3_VERSION; 749 strlcpy(md->md_name, sc->sc_name, sizeof(md->md_name)); 750 md->md_id = sc->sc_id; 751 md->md_all = sc->sc_ndisks; 752 md->md_genid = sc->sc_genid; 753 md->md_mediasize = sc->sc_mediasize; 754 md->md_sectorsize = sc->sc_sectorsize; 755 md->md_mflags = (sc->sc_flags & G_RAID3_DEVICE_FLAG_MASK); 756 md->md_no = disk->d_no; 757 md->md_syncid = disk->d_sync.ds_syncid; 758 md->md_dflags = (disk->d_flags & G_RAID3_DISK_FLAG_MASK); 759 if (disk->d_state != G_RAID3_DISK_STATE_SYNCHRONIZING) 760 md->md_sync_offset = 0; 761 else { 762 md->md_sync_offset = 763 disk->d_sync.ds_offset_done / (sc->sc_ndisks - 1); 764 } 765 if (disk->d_consumer != NULL && disk->d_consumer->provider != NULL) 766 pp = disk->d_consumer->provider; 767 else 768 pp = NULL; 769 if ((disk->d_flags & G_RAID3_DISK_FLAG_HARDCODED) != 0 && pp != NULL) 770 strlcpy(md->md_provider, pp->name, sizeof(md->md_provider)); 771 else 772 bzero(md->md_provider, sizeof(md->md_provider)); 773 if (pp != NULL) 774 md->md_provsize = pp->mediasize; 775 else 776 md->md_provsize = 0; 777 } 778 779 void 780 g_raid3_update_metadata(struct g_raid3_disk *disk) 781 { 782 struct g_raid3_softc *sc; 783 struct g_raid3_metadata md; 784 int error; 785 786 g_topology_assert_not(); 787 sc = disk->d_softc; 788 sx_assert(&sc->sc_lock, SX_LOCKED); 789 790 g_raid3_fill_metadata(disk, &md); 791 error = g_raid3_write_metadata(disk, &md); 792 if (error == 0) { 793 G_RAID3_DEBUG(2, "Metadata on %s updated.", 794 g_raid3_get_diskname(disk)); 795 } else { 796 G_RAID3_DEBUG(0, 797 "Cannot update metadata on disk %s (error=%d).", 798 g_raid3_get_diskname(disk), error); 799 } 800 } 801 802 static void 803 g_raid3_bump_syncid(struct g_raid3_softc *sc) 804 { 805 struct g_raid3_disk *disk; 806 u_int n; 807 808 g_topology_assert_not(); 809 sx_assert(&sc->sc_lock, SX_XLOCKED); 810 KASSERT(g_raid3_ndisks(sc, G_RAID3_DISK_STATE_ACTIVE) > 0, 811 ("%s called with no active disks (device=%s).", __func__, 812 sc->sc_name)); 813 814 sc->sc_syncid++; 815 G_RAID3_DEBUG(1, "Device %s: syncid bumped to %u.", sc->sc_name, 816 sc->sc_syncid); 817 for (n = 0; n < sc->sc_ndisks; n++) { 818 disk = &sc->sc_disks[n]; 819 if (disk->d_state == G_RAID3_DISK_STATE_ACTIVE || 820 disk->d_state == G_RAID3_DISK_STATE_SYNCHRONIZING) { 821 disk->d_sync.ds_syncid = sc->sc_syncid; 822 g_raid3_update_metadata(disk); 823 } 824 } 825 } 826 827 static void 828 g_raid3_bump_genid(struct g_raid3_softc *sc) 829 { 830 struct g_raid3_disk *disk; 831 u_int n; 832 833 g_topology_assert_not(); 834 sx_assert(&sc->sc_lock, SX_XLOCKED); 835 KASSERT(g_raid3_ndisks(sc, G_RAID3_DISK_STATE_ACTIVE) > 0, 836 ("%s called with no active disks (device=%s).", __func__, 837 sc->sc_name)); 838 839 sc->sc_genid++; 840 G_RAID3_DEBUG(1, "Device %s: genid bumped to %u.", sc->sc_name, 841 sc->sc_genid); 842 for (n = 0; n < sc->sc_ndisks; n++) { 843 disk = &sc->sc_disks[n]; 844 if (disk->d_state == G_RAID3_DISK_STATE_ACTIVE || 845 disk->d_state == G_RAID3_DISK_STATE_SYNCHRONIZING) { 846 disk->d_genid = sc->sc_genid; 847 g_raid3_update_metadata(disk); 848 } 849 } 850 } 851 852 static int 853 g_raid3_idle(struct g_raid3_softc *sc, int acw) 854 { 855 struct g_raid3_disk *disk; 856 u_int i; 857 int timeout; 858 859 g_topology_assert_not(); 860 sx_assert(&sc->sc_lock, SX_XLOCKED); 861 862 if (sc->sc_provider == NULL) 863 return (0); 864 if ((sc->sc_flags & G_RAID3_DEVICE_FLAG_NOFAILSYNC) != 0) 865 return (0); 866 if (sc->sc_idle) 867 return (0); 868 if (sc->sc_writes > 0) 869 return (0); 870 if (acw > 0 || (acw == -1 && sc->sc_provider->acw > 0)) { 871 timeout = g_raid3_idletime - (time_uptime - sc->sc_last_write); 872 if (timeout > 0) 873 return (timeout); 874 } 875 sc->sc_idle = 1; 876 for (i = 0; i < sc->sc_ndisks; i++) { 877 disk = &sc->sc_disks[i]; 878 if (disk->d_state != G_RAID3_DISK_STATE_ACTIVE) 879 continue; 880 G_RAID3_DEBUG(1, "Disk %s (device %s) marked as clean.", 881 g_raid3_get_diskname(disk), sc->sc_name); 882 disk->d_flags &= ~G_RAID3_DISK_FLAG_DIRTY; 883 g_raid3_update_metadata(disk); 884 } 885 return (0); 886 } 887 888 static void 889 g_raid3_unidle(struct g_raid3_softc *sc) 890 { 891 struct g_raid3_disk *disk; 892 u_int i; 893 894 g_topology_assert_not(); 895 sx_assert(&sc->sc_lock, SX_XLOCKED); 896 897 if ((sc->sc_flags & G_RAID3_DEVICE_FLAG_NOFAILSYNC) != 0) 898 return; 899 sc->sc_idle = 0; 900 sc->sc_last_write = time_uptime; 901 for (i = 0; i < sc->sc_ndisks; i++) { 902 disk = &sc->sc_disks[i]; 903 if (disk->d_state != G_RAID3_DISK_STATE_ACTIVE) 904 continue; 905 G_RAID3_DEBUG(1, "Disk %s (device %s) marked as dirty.", 906 g_raid3_get_diskname(disk), sc->sc_name); 907 disk->d_flags |= G_RAID3_DISK_FLAG_DIRTY; 908 g_raid3_update_metadata(disk); 909 } 910 } 911 912 /* 913 * Treat bio_driver1 field in parent bio as list head and field bio_caller1 914 * in child bio as pointer to the next element on the list. 915 */ 916 #define G_RAID3_HEAD_BIO(pbp) (pbp)->bio_driver1 917 918 #define G_RAID3_NEXT_BIO(cbp) (cbp)->bio_caller1 919 920 #define G_RAID3_FOREACH_BIO(pbp, bp) \ 921 for ((bp) = G_RAID3_HEAD_BIO(pbp); (bp) != NULL; \ 922 (bp) = G_RAID3_NEXT_BIO(bp)) 923 924 #define G_RAID3_FOREACH_SAFE_BIO(pbp, bp, tmpbp) \ 925 for ((bp) = G_RAID3_HEAD_BIO(pbp); \ 926 (bp) != NULL && ((tmpbp) = G_RAID3_NEXT_BIO(bp), 1); \ 927 (bp) = (tmpbp)) 928 929 static void 930 g_raid3_init_bio(struct bio *pbp) 931 { 932 933 G_RAID3_HEAD_BIO(pbp) = NULL; 934 } 935 936 static void 937 g_raid3_remove_bio(struct bio *cbp) 938 { 939 struct bio *pbp, *bp; 940 941 pbp = cbp->bio_parent; 942 if (G_RAID3_HEAD_BIO(pbp) == cbp) 943 G_RAID3_HEAD_BIO(pbp) = G_RAID3_NEXT_BIO(cbp); 944 else { 945 G_RAID3_FOREACH_BIO(pbp, bp) { 946 if (G_RAID3_NEXT_BIO(bp) == cbp) { 947 G_RAID3_NEXT_BIO(bp) = G_RAID3_NEXT_BIO(cbp); 948 break; 949 } 950 } 951 } 952 G_RAID3_NEXT_BIO(cbp) = NULL; 953 } 954 955 static void 956 g_raid3_replace_bio(struct bio *sbp, struct bio *dbp) 957 { 958 struct bio *pbp, *bp; 959 960 g_raid3_remove_bio(sbp); 961 pbp = dbp->bio_parent; 962 G_RAID3_NEXT_BIO(sbp) = G_RAID3_NEXT_BIO(dbp); 963 if (G_RAID3_HEAD_BIO(pbp) == dbp) 964 G_RAID3_HEAD_BIO(pbp) = sbp; 965 else { 966 G_RAID3_FOREACH_BIO(pbp, bp) { 967 if (G_RAID3_NEXT_BIO(bp) == dbp) { 968 G_RAID3_NEXT_BIO(bp) = sbp; 969 break; 970 } 971 } 972 } 973 G_RAID3_NEXT_BIO(dbp) = NULL; 974 } 975 976 static void 977 g_raid3_destroy_bio(struct g_raid3_softc *sc, struct bio *cbp) 978 { 979 struct bio *bp, *pbp; 980 size_t size; 981 982 pbp = cbp->bio_parent; 983 pbp->bio_children--; 984 KASSERT(cbp->bio_data != NULL, ("NULL bio_data")); 985 size = pbp->bio_length / (sc->sc_ndisks - 1); 986 g_raid3_free(sc, cbp->bio_data, size); 987 if (G_RAID3_HEAD_BIO(pbp) == cbp) { 988 G_RAID3_HEAD_BIO(pbp) = G_RAID3_NEXT_BIO(cbp); 989 G_RAID3_NEXT_BIO(cbp) = NULL; 990 g_destroy_bio(cbp); 991 } else { 992 G_RAID3_FOREACH_BIO(pbp, bp) { 993 if (G_RAID3_NEXT_BIO(bp) == cbp) 994 break; 995 } 996 if (bp != NULL) { 997 KASSERT(G_RAID3_NEXT_BIO(bp) != NULL, 998 ("NULL bp->bio_driver1")); 999 G_RAID3_NEXT_BIO(bp) = G_RAID3_NEXT_BIO(cbp); 1000 G_RAID3_NEXT_BIO(cbp) = NULL; 1001 } 1002 g_destroy_bio(cbp); 1003 } 1004 } 1005 1006 static struct bio * 1007 g_raid3_clone_bio(struct g_raid3_softc *sc, struct bio *pbp) 1008 { 1009 struct bio *bp, *cbp; 1010 size_t size; 1011 int memflag; 1012 1013 cbp = g_clone_bio(pbp); 1014 if (cbp == NULL) 1015 return (NULL); 1016 size = pbp->bio_length / (sc->sc_ndisks - 1); 1017 if ((pbp->bio_cflags & G_RAID3_BIO_CFLAG_REGULAR) != 0) 1018 memflag = M_WAITOK; 1019 else 1020 memflag = M_NOWAIT; 1021 cbp->bio_data = g_raid3_alloc(sc, size, memflag); 1022 if (cbp->bio_data == NULL) { 1023 pbp->bio_children--; 1024 g_destroy_bio(cbp); 1025 return (NULL); 1026 } 1027 G_RAID3_NEXT_BIO(cbp) = NULL; 1028 if (G_RAID3_HEAD_BIO(pbp) == NULL) 1029 G_RAID3_HEAD_BIO(pbp) = cbp; 1030 else { 1031 G_RAID3_FOREACH_BIO(pbp, bp) { 1032 if (G_RAID3_NEXT_BIO(bp) == NULL) { 1033 G_RAID3_NEXT_BIO(bp) = cbp; 1034 break; 1035 } 1036 } 1037 } 1038 return (cbp); 1039 } 1040 1041 static void 1042 g_raid3_scatter(struct bio *pbp) 1043 { 1044 struct g_raid3_softc *sc; 1045 struct g_raid3_disk *disk; 1046 struct bio *bp, *cbp, *tmpbp; 1047 off_t atom, cadd, padd, left; 1048 1049 sc = pbp->bio_to->geom->softc; 1050 bp = NULL; 1051 if ((pbp->bio_pflags & G_RAID3_BIO_PFLAG_NOPARITY) == 0) { 1052 /* 1053 * Find bio for which we should calculate data. 1054 */ 1055 G_RAID3_FOREACH_BIO(pbp, cbp) { 1056 if ((cbp->bio_cflags & G_RAID3_BIO_CFLAG_PARITY) != 0) { 1057 bp = cbp; 1058 break; 1059 } 1060 } 1061 KASSERT(bp != NULL, ("NULL parity bio.")); 1062 } 1063 atom = sc->sc_sectorsize / (sc->sc_ndisks - 1); 1064 cadd = padd = 0; 1065 for (left = pbp->bio_length; left > 0; left -= sc->sc_sectorsize) { 1066 G_RAID3_FOREACH_BIO(pbp, cbp) { 1067 if (cbp == bp) 1068 continue; 1069 bcopy(pbp->bio_data + padd, cbp->bio_data + cadd, atom); 1070 padd += atom; 1071 } 1072 cadd += atom; 1073 } 1074 if ((pbp->bio_pflags & G_RAID3_BIO_PFLAG_NOPARITY) == 0) { 1075 /* 1076 * Calculate parity. 1077 */ 1078 bzero(bp->bio_data, bp->bio_length); 1079 G_RAID3_FOREACH_SAFE_BIO(pbp, cbp, tmpbp) { 1080 if (cbp == bp) 1081 continue; 1082 g_raid3_xor(cbp->bio_data, bp->bio_data, bp->bio_data, 1083 bp->bio_length); 1084 if ((cbp->bio_cflags & G_RAID3_BIO_CFLAG_NODISK) != 0) 1085 g_raid3_destroy_bio(sc, cbp); 1086 } 1087 } 1088 G_RAID3_FOREACH_SAFE_BIO(pbp, cbp, tmpbp) { 1089 struct g_consumer *cp; 1090 1091 disk = cbp->bio_caller2; 1092 cp = disk->d_consumer; 1093 cbp->bio_to = cp->provider; 1094 G_RAID3_LOGREQ(3, cbp, "Sending request."); 1095 KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1, 1096 ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, 1097 cp->acr, cp->acw, cp->ace)); 1098 cp->index++; 1099 sc->sc_writes++; 1100 g_io_request(cbp, cp); 1101 } 1102 } 1103 1104 static void 1105 g_raid3_gather(struct bio *pbp) 1106 { 1107 struct g_raid3_softc *sc; 1108 struct g_raid3_disk *disk; 1109 struct bio *xbp, *fbp, *cbp; 1110 off_t atom, cadd, padd, left; 1111 1112 sc = pbp->bio_to->geom->softc; 1113 /* 1114 * Find bio for which we have to calculate data. 1115 * While going through this path, check if all requests 1116 * succeeded, if not, deny whole request. 1117 * If we're in COMPLETE mode, we allow one request to fail, 1118 * so if we find one, we're sending it to the parity consumer. 1119 * If there are more failed requests, we deny whole request. 1120 */ 1121 xbp = fbp = NULL; 1122 G_RAID3_FOREACH_BIO(pbp, cbp) { 1123 if ((cbp->bio_cflags & G_RAID3_BIO_CFLAG_PARITY) != 0) { 1124 KASSERT(xbp == NULL, ("More than one parity bio.")); 1125 xbp = cbp; 1126 } 1127 if (cbp->bio_error == 0) 1128 continue; 1129 /* 1130 * Found failed request. 1131 */ 1132 if (fbp == NULL) { 1133 if ((pbp->bio_pflags & G_RAID3_BIO_PFLAG_DEGRADED) != 0) { 1134 /* 1135 * We are already in degraded mode, so we can't 1136 * accept any failures. 1137 */ 1138 if (pbp->bio_error == 0) 1139 pbp->bio_error = cbp->bio_error; 1140 } else { 1141 fbp = cbp; 1142 } 1143 } else { 1144 /* 1145 * Next failed request, that's too many. 1146 */ 1147 if (pbp->bio_error == 0) 1148 pbp->bio_error = fbp->bio_error; 1149 } 1150 disk = cbp->bio_caller2; 1151 if (disk == NULL) 1152 continue; 1153 if ((disk->d_flags & G_RAID3_DISK_FLAG_BROKEN) == 0) { 1154 disk->d_flags |= G_RAID3_DISK_FLAG_BROKEN; 1155 G_RAID3_LOGREQ(0, cbp, "Request failed (error=%d).", 1156 cbp->bio_error); 1157 } else { 1158 G_RAID3_LOGREQ(1, cbp, "Request failed (error=%d).", 1159 cbp->bio_error); 1160 } 1161 if (g_raid3_disconnect_on_failure && 1162 sc->sc_state == G_RAID3_DEVICE_STATE_COMPLETE) { 1163 sc->sc_bump_id |= G_RAID3_BUMP_GENID; 1164 g_raid3_event_send(disk, 1165 G_RAID3_DISK_STATE_DISCONNECTED, 1166 G_RAID3_EVENT_DONTWAIT); 1167 } 1168 } 1169 if (pbp->bio_error != 0) 1170 goto finish; 1171 if (fbp != NULL && (pbp->bio_pflags & G_RAID3_BIO_PFLAG_VERIFY) != 0) { 1172 pbp->bio_pflags &= ~G_RAID3_BIO_PFLAG_VERIFY; 1173 if (xbp != fbp) 1174 g_raid3_replace_bio(xbp, fbp); 1175 g_raid3_destroy_bio(sc, fbp); 1176 } else if (fbp != NULL) { 1177 struct g_consumer *cp; 1178 1179 /* 1180 * One request failed, so send the same request to 1181 * the parity consumer. 1182 */ 1183 disk = pbp->bio_driver2; 1184 if (disk->d_state != G_RAID3_DISK_STATE_ACTIVE) { 1185 pbp->bio_error = fbp->bio_error; 1186 goto finish; 1187 } 1188 pbp->bio_pflags |= G_RAID3_BIO_PFLAG_DEGRADED; 1189 pbp->bio_inbed--; 1190 fbp->bio_flags &= ~(BIO_DONE | BIO_ERROR); 1191 if (disk->d_no == sc->sc_ndisks - 1) 1192 fbp->bio_cflags |= G_RAID3_BIO_CFLAG_PARITY; 1193 fbp->bio_error = 0; 1194 fbp->bio_completed = 0; 1195 fbp->bio_children = 0; 1196 fbp->bio_inbed = 0; 1197 cp = disk->d_consumer; 1198 fbp->bio_caller2 = disk; 1199 fbp->bio_to = cp->provider; 1200 G_RAID3_LOGREQ(3, fbp, "Sending request (recover)."); 1201 KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1, 1202 ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, 1203 cp->acr, cp->acw, cp->ace)); 1204 cp->index++; 1205 g_io_request(fbp, cp); 1206 return; 1207 } 1208 if (xbp != NULL) { 1209 /* 1210 * Calculate parity. 1211 */ 1212 G_RAID3_FOREACH_BIO(pbp, cbp) { 1213 if ((cbp->bio_cflags & G_RAID3_BIO_CFLAG_PARITY) != 0) 1214 continue; 1215 g_raid3_xor(cbp->bio_data, xbp->bio_data, xbp->bio_data, 1216 xbp->bio_length); 1217 } 1218 xbp->bio_cflags &= ~G_RAID3_BIO_CFLAG_PARITY; 1219 if ((pbp->bio_pflags & G_RAID3_BIO_PFLAG_VERIFY) != 0) { 1220 if (!g_raid3_is_zero(xbp)) { 1221 g_raid3_parity_mismatch++; 1222 pbp->bio_error = EIO; 1223 goto finish; 1224 } 1225 g_raid3_destroy_bio(sc, xbp); 1226 } 1227 } 1228 atom = sc->sc_sectorsize / (sc->sc_ndisks - 1); 1229 cadd = padd = 0; 1230 for (left = pbp->bio_length; left > 0; left -= sc->sc_sectorsize) { 1231 G_RAID3_FOREACH_BIO(pbp, cbp) { 1232 bcopy(cbp->bio_data + cadd, pbp->bio_data + padd, atom); 1233 pbp->bio_completed += atom; 1234 padd += atom; 1235 } 1236 cadd += atom; 1237 } 1238 finish: 1239 if (pbp->bio_error == 0) 1240 G_RAID3_LOGREQ(3, pbp, "Request finished."); 1241 else { 1242 if ((pbp->bio_pflags & G_RAID3_BIO_PFLAG_VERIFY) != 0) 1243 G_RAID3_LOGREQ(1, pbp, "Verification error."); 1244 else 1245 G_RAID3_LOGREQ(0, pbp, "Request failed."); 1246 } 1247 pbp->bio_pflags &= ~G_RAID3_BIO_PFLAG_MASK; 1248 while ((cbp = G_RAID3_HEAD_BIO(pbp)) != NULL) 1249 g_raid3_destroy_bio(sc, cbp); 1250 g_io_deliver(pbp, pbp->bio_error); 1251 } 1252 1253 static void 1254 g_raid3_done(struct bio *bp) 1255 { 1256 struct g_raid3_softc *sc; 1257 1258 sc = bp->bio_from->geom->softc; 1259 bp->bio_cflags |= G_RAID3_BIO_CFLAG_REGULAR; 1260 G_RAID3_LOGREQ(3, bp, "Regular request done (error=%d).", bp->bio_error); 1261 mtx_lock(&sc->sc_queue_mtx); 1262 bioq_insert_head(&sc->sc_queue, bp); 1263 wakeup(sc); 1264 wakeup(&sc->sc_queue); 1265 mtx_unlock(&sc->sc_queue_mtx); 1266 } 1267 1268 static void 1269 g_raid3_regular_request(struct bio *cbp) 1270 { 1271 struct g_raid3_softc *sc; 1272 struct g_raid3_disk *disk; 1273 struct bio *pbp; 1274 1275 g_topology_assert_not(); 1276 1277 pbp = cbp->bio_parent; 1278 sc = pbp->bio_to->geom->softc; 1279 cbp->bio_from->index--; 1280 if (cbp->bio_cmd == BIO_WRITE) 1281 sc->sc_writes--; 1282 disk = cbp->bio_from->private; 1283 if (disk == NULL) { 1284 g_topology_lock(); 1285 g_raid3_kill_consumer(sc, cbp->bio_from); 1286 g_topology_unlock(); 1287 } 1288 1289 G_RAID3_LOGREQ(3, cbp, "Request finished."); 1290 pbp->bio_inbed++; 1291 KASSERT(pbp->bio_inbed <= pbp->bio_children, 1292 ("bio_inbed (%u) is bigger than bio_children (%u).", pbp->bio_inbed, 1293 pbp->bio_children)); 1294 if (pbp->bio_inbed != pbp->bio_children) 1295 return; 1296 switch (pbp->bio_cmd) { 1297 case BIO_READ: 1298 g_raid3_gather(pbp); 1299 break; 1300 case BIO_WRITE: 1301 case BIO_DELETE: 1302 { 1303 int error = 0; 1304 1305 pbp->bio_completed = pbp->bio_length; 1306 while ((cbp = G_RAID3_HEAD_BIO(pbp)) != NULL) { 1307 if (cbp->bio_error == 0) { 1308 g_raid3_destroy_bio(sc, cbp); 1309 continue; 1310 } 1311 1312 if (error == 0) 1313 error = cbp->bio_error; 1314 else if (pbp->bio_error == 0) { 1315 /* 1316 * Next failed request, that's too many. 1317 */ 1318 pbp->bio_error = error; 1319 } 1320 1321 disk = cbp->bio_caller2; 1322 if (disk == NULL) { 1323 g_raid3_destroy_bio(sc, cbp); 1324 continue; 1325 } 1326 1327 if ((disk->d_flags & G_RAID3_DISK_FLAG_BROKEN) == 0) { 1328 disk->d_flags |= G_RAID3_DISK_FLAG_BROKEN; 1329 G_RAID3_LOGREQ(0, cbp, 1330 "Request failed (error=%d).", 1331 cbp->bio_error); 1332 } else { 1333 G_RAID3_LOGREQ(1, cbp, 1334 "Request failed (error=%d).", 1335 cbp->bio_error); 1336 } 1337 if (g_raid3_disconnect_on_failure && 1338 sc->sc_state == G_RAID3_DEVICE_STATE_COMPLETE) { 1339 sc->sc_bump_id |= G_RAID3_BUMP_GENID; 1340 g_raid3_event_send(disk, 1341 G_RAID3_DISK_STATE_DISCONNECTED, 1342 G_RAID3_EVENT_DONTWAIT); 1343 } 1344 g_raid3_destroy_bio(sc, cbp); 1345 } 1346 if (pbp->bio_error == 0) 1347 G_RAID3_LOGREQ(3, pbp, "Request finished."); 1348 else 1349 G_RAID3_LOGREQ(0, pbp, "Request failed."); 1350 pbp->bio_pflags &= ~G_RAID3_BIO_PFLAG_DEGRADED; 1351 pbp->bio_pflags &= ~G_RAID3_BIO_PFLAG_NOPARITY; 1352 bioq_remove(&sc->sc_inflight, pbp); 1353 /* Release delayed sync requests if possible. */ 1354 g_raid3_sync_release(sc); 1355 g_io_deliver(pbp, pbp->bio_error); 1356 break; 1357 } 1358 } 1359 } 1360 1361 static void 1362 g_raid3_sync_done(struct bio *bp) 1363 { 1364 struct g_raid3_softc *sc; 1365 1366 G_RAID3_LOGREQ(3, bp, "Synchronization request delivered."); 1367 sc = bp->bio_from->geom->softc; 1368 bp->bio_cflags |= G_RAID3_BIO_CFLAG_SYNC; 1369 mtx_lock(&sc->sc_queue_mtx); 1370 bioq_insert_head(&sc->sc_queue, bp); 1371 wakeup(sc); 1372 wakeup(&sc->sc_queue); 1373 mtx_unlock(&sc->sc_queue_mtx); 1374 } 1375 1376 static void 1377 g_raid3_flush(struct g_raid3_softc *sc, struct bio *bp) 1378 { 1379 struct bio_queue_head queue; 1380 struct g_raid3_disk *disk; 1381 struct g_consumer *cp; 1382 struct bio *cbp; 1383 u_int i; 1384 1385 bioq_init(&queue); 1386 for (i = 0; i < sc->sc_ndisks; i++) { 1387 disk = &sc->sc_disks[i]; 1388 if (disk->d_state != G_RAID3_DISK_STATE_ACTIVE) 1389 continue; 1390 cbp = g_clone_bio(bp); 1391 if (cbp == NULL) { 1392 for (cbp = bioq_first(&queue); cbp != NULL; 1393 cbp = bioq_first(&queue)) { 1394 bioq_remove(&queue, cbp); 1395 g_destroy_bio(cbp); 1396 } 1397 if (bp->bio_error == 0) 1398 bp->bio_error = ENOMEM; 1399 g_io_deliver(bp, bp->bio_error); 1400 return; 1401 } 1402 bioq_insert_tail(&queue, cbp); 1403 cbp->bio_done = g_std_done; 1404 cbp->bio_caller1 = disk; 1405 cbp->bio_to = disk->d_consumer->provider; 1406 } 1407 for (cbp = bioq_first(&queue); cbp != NULL; cbp = bioq_first(&queue)) { 1408 bioq_remove(&queue, cbp); 1409 G_RAID3_LOGREQ(3, cbp, "Sending request."); 1410 disk = cbp->bio_caller1; 1411 cbp->bio_caller1 = NULL; 1412 cp = disk->d_consumer; 1413 KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1, 1414 ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, 1415 cp->acr, cp->acw, cp->ace)); 1416 g_io_request(cbp, disk->d_consumer); 1417 } 1418 } 1419 1420 static void 1421 g_raid3_start(struct bio *bp) 1422 { 1423 struct g_raid3_softc *sc; 1424 1425 sc = bp->bio_to->geom->softc; 1426 /* 1427 * If sc == NULL or there are no valid disks, provider's error 1428 * should be set and g_raid3_start() should not be called at all. 1429 */ 1430 KASSERT(sc != NULL && (sc->sc_state == G_RAID3_DEVICE_STATE_DEGRADED || 1431 sc->sc_state == G_RAID3_DEVICE_STATE_COMPLETE), 1432 ("Provider's error should be set (error=%d)(device=%s).", 1433 bp->bio_to->error, bp->bio_to->name)); 1434 G_RAID3_LOGREQ(3, bp, "Request received."); 1435 1436 switch (bp->bio_cmd) { 1437 case BIO_READ: 1438 case BIO_WRITE: 1439 case BIO_DELETE: 1440 break; 1441 case BIO_FLUSH: 1442 g_raid3_flush(sc, bp); 1443 return; 1444 case BIO_GETATTR: 1445 default: 1446 g_io_deliver(bp, EOPNOTSUPP); 1447 return; 1448 } 1449 mtx_lock(&sc->sc_queue_mtx); 1450 bioq_insert_tail(&sc->sc_queue, bp); 1451 G_RAID3_DEBUG(4, "%s: Waking up %p.", __func__, sc); 1452 wakeup(sc); 1453 mtx_unlock(&sc->sc_queue_mtx); 1454 } 1455 1456 /* 1457 * Return TRUE if the given request is colliding with a in-progress 1458 * synchronization request. 1459 */ 1460 static int 1461 g_raid3_sync_collision(struct g_raid3_softc *sc, struct bio *bp) 1462 { 1463 struct g_raid3_disk *disk; 1464 struct bio *sbp; 1465 off_t rstart, rend, sstart, send; 1466 int i; 1467 1468 disk = sc->sc_syncdisk; 1469 if (disk == NULL) 1470 return (0); 1471 rstart = bp->bio_offset; 1472 rend = bp->bio_offset + bp->bio_length; 1473 for (i = 0; i < g_raid3_syncreqs; i++) { 1474 sbp = disk->d_sync.ds_bios[i]; 1475 if (sbp == NULL) 1476 continue; 1477 sstart = sbp->bio_offset; 1478 send = sbp->bio_length; 1479 if (sbp->bio_cmd == BIO_WRITE) { 1480 sstart *= sc->sc_ndisks - 1; 1481 send *= sc->sc_ndisks - 1; 1482 } 1483 send += sstart; 1484 if (rend > sstart && rstart < send) 1485 return (1); 1486 } 1487 return (0); 1488 } 1489 1490 /* 1491 * Return TRUE if the given sync request is colliding with a in-progress regular 1492 * request. 1493 */ 1494 static int 1495 g_raid3_regular_collision(struct g_raid3_softc *sc, struct bio *sbp) 1496 { 1497 off_t rstart, rend, sstart, send; 1498 struct bio *bp; 1499 1500 if (sc->sc_syncdisk == NULL) 1501 return (0); 1502 sstart = sbp->bio_offset; 1503 send = sstart + sbp->bio_length; 1504 TAILQ_FOREACH(bp, &sc->sc_inflight.queue, bio_queue) { 1505 rstart = bp->bio_offset; 1506 rend = bp->bio_offset + bp->bio_length; 1507 if (rend > sstart && rstart < send) 1508 return (1); 1509 } 1510 return (0); 1511 } 1512 1513 /* 1514 * Puts request onto delayed queue. 1515 */ 1516 static void 1517 g_raid3_regular_delay(struct g_raid3_softc *sc, struct bio *bp) 1518 { 1519 1520 G_RAID3_LOGREQ(2, bp, "Delaying request."); 1521 bioq_insert_head(&sc->sc_regular_delayed, bp); 1522 } 1523 1524 /* 1525 * Puts synchronization request onto delayed queue. 1526 */ 1527 static void 1528 g_raid3_sync_delay(struct g_raid3_softc *sc, struct bio *bp) 1529 { 1530 1531 G_RAID3_LOGREQ(2, bp, "Delaying synchronization request."); 1532 bioq_insert_tail(&sc->sc_sync_delayed, bp); 1533 } 1534 1535 /* 1536 * Releases delayed regular requests which don't collide anymore with sync 1537 * requests. 1538 */ 1539 static void 1540 g_raid3_regular_release(struct g_raid3_softc *sc) 1541 { 1542 struct bio *bp, *bp2; 1543 1544 TAILQ_FOREACH_SAFE(bp, &sc->sc_regular_delayed.queue, bio_queue, bp2) { 1545 if (g_raid3_sync_collision(sc, bp)) 1546 continue; 1547 bioq_remove(&sc->sc_regular_delayed, bp); 1548 G_RAID3_LOGREQ(2, bp, "Releasing delayed request (%p).", bp); 1549 mtx_lock(&sc->sc_queue_mtx); 1550 bioq_insert_head(&sc->sc_queue, bp); 1551 #if 0 1552 /* 1553 * wakeup() is not needed, because this function is called from 1554 * the worker thread. 1555 */ 1556 wakeup(&sc->sc_queue); 1557 #endif 1558 mtx_unlock(&sc->sc_queue_mtx); 1559 } 1560 } 1561 1562 /* 1563 * Releases delayed sync requests which don't collide anymore with regular 1564 * requests. 1565 */ 1566 static void 1567 g_raid3_sync_release(struct g_raid3_softc *sc) 1568 { 1569 struct bio *bp, *bp2; 1570 1571 TAILQ_FOREACH_SAFE(bp, &sc->sc_sync_delayed.queue, bio_queue, bp2) { 1572 if (g_raid3_regular_collision(sc, bp)) 1573 continue; 1574 bioq_remove(&sc->sc_sync_delayed, bp); 1575 G_RAID3_LOGREQ(2, bp, 1576 "Releasing delayed synchronization request."); 1577 g_io_request(bp, bp->bio_from); 1578 } 1579 } 1580 1581 /* 1582 * Handle synchronization requests. 1583 * Every synchronization request is two-steps process: first, READ request is 1584 * send to active provider and then WRITE request (with read data) to the provider 1585 * beeing synchronized. When WRITE is finished, new synchronization request is 1586 * send. 1587 */ 1588 static void 1589 g_raid3_sync_request(struct bio *bp) 1590 { 1591 struct g_raid3_softc *sc; 1592 struct g_raid3_disk *disk; 1593 1594 bp->bio_from->index--; 1595 sc = bp->bio_from->geom->softc; 1596 disk = bp->bio_from->private; 1597 if (disk == NULL) { 1598 sx_xunlock(&sc->sc_lock); /* Avoid recursion on sc_lock. */ 1599 g_topology_lock(); 1600 g_raid3_kill_consumer(sc, bp->bio_from); 1601 g_topology_unlock(); 1602 free(bp->bio_data, M_RAID3); 1603 g_destroy_bio(bp); 1604 sx_xlock(&sc->sc_lock); 1605 return; 1606 } 1607 1608 /* 1609 * Synchronization request. 1610 */ 1611 switch (bp->bio_cmd) { 1612 case BIO_READ: 1613 { 1614 struct g_consumer *cp; 1615 u_char *dst, *src; 1616 off_t left; 1617 u_int atom; 1618 1619 if (bp->bio_error != 0) { 1620 G_RAID3_LOGREQ(0, bp, 1621 "Synchronization request failed (error=%d).", 1622 bp->bio_error); 1623 g_destroy_bio(bp); 1624 return; 1625 } 1626 G_RAID3_LOGREQ(3, bp, "Synchronization request finished."); 1627 atom = sc->sc_sectorsize / (sc->sc_ndisks - 1); 1628 dst = src = bp->bio_data; 1629 if (disk->d_no == sc->sc_ndisks - 1) { 1630 u_int n; 1631 1632 /* Parity component. */ 1633 for (left = bp->bio_length; left > 0; 1634 left -= sc->sc_sectorsize) { 1635 bcopy(src, dst, atom); 1636 src += atom; 1637 for (n = 1; n < sc->sc_ndisks - 1; n++) { 1638 g_raid3_xor(src, dst, dst, atom); 1639 src += atom; 1640 } 1641 dst += atom; 1642 } 1643 } else { 1644 /* Regular component. */ 1645 src += atom * disk->d_no; 1646 for (left = bp->bio_length; left > 0; 1647 left -= sc->sc_sectorsize) { 1648 bcopy(src, dst, atom); 1649 src += sc->sc_sectorsize; 1650 dst += atom; 1651 } 1652 } 1653 bp->bio_driver1 = bp->bio_driver2 = NULL; 1654 bp->bio_pflags = 0; 1655 bp->bio_offset /= sc->sc_ndisks - 1; 1656 bp->bio_length /= sc->sc_ndisks - 1; 1657 bp->bio_cmd = BIO_WRITE; 1658 bp->bio_cflags = 0; 1659 bp->bio_children = bp->bio_inbed = 0; 1660 cp = disk->d_consumer; 1661 KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1, 1662 ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, 1663 cp->acr, cp->acw, cp->ace)); 1664 cp->index++; 1665 g_io_request(bp, cp); 1666 return; 1667 } 1668 case BIO_WRITE: 1669 { 1670 struct g_raid3_disk_sync *sync; 1671 off_t boffset, moffset; 1672 void *data; 1673 int i; 1674 1675 if (bp->bio_error != 0) { 1676 G_RAID3_LOGREQ(0, bp, 1677 "Synchronization request failed (error=%d).", 1678 bp->bio_error); 1679 g_destroy_bio(bp); 1680 sc->sc_bump_id |= G_RAID3_BUMP_GENID; 1681 g_raid3_event_send(disk, 1682 G_RAID3_DISK_STATE_DISCONNECTED, 1683 G_RAID3_EVENT_DONTWAIT); 1684 return; 1685 } 1686 G_RAID3_LOGREQ(3, bp, "Synchronization request finished."); 1687 sync = &disk->d_sync; 1688 if (sync->ds_offset == sc->sc_mediasize / (sc->sc_ndisks - 1) || 1689 sync->ds_consumer == NULL || 1690 (sc->sc_flags & G_RAID3_DEVICE_FLAG_DESTROY) != 0) { 1691 /* Don't send more synchronization requests. */ 1692 sync->ds_inflight--; 1693 if (sync->ds_bios != NULL) { 1694 i = (int)(uintptr_t)bp->bio_caller1; 1695 sync->ds_bios[i] = NULL; 1696 } 1697 free(bp->bio_data, M_RAID3); 1698 g_destroy_bio(bp); 1699 if (sync->ds_inflight > 0) 1700 return; 1701 if (sync->ds_consumer == NULL || 1702 (sc->sc_flags & G_RAID3_DEVICE_FLAG_DESTROY) != 0) { 1703 return; 1704 } 1705 /* 1706 * Disk up-to-date, activate it. 1707 */ 1708 g_raid3_event_send(disk, G_RAID3_DISK_STATE_ACTIVE, 1709 G_RAID3_EVENT_DONTWAIT); 1710 return; 1711 } 1712 1713 /* Send next synchronization request. */ 1714 data = bp->bio_data; 1715 bzero(bp, sizeof(*bp)); 1716 bp->bio_cmd = BIO_READ; 1717 bp->bio_offset = sync->ds_offset * (sc->sc_ndisks - 1); 1718 bp->bio_length = MIN(MAXPHYS, sc->sc_mediasize - bp->bio_offset); 1719 sync->ds_offset += bp->bio_length / (sc->sc_ndisks - 1); 1720 bp->bio_done = g_raid3_sync_done; 1721 bp->bio_data = data; 1722 bp->bio_from = sync->ds_consumer; 1723 bp->bio_to = sc->sc_provider; 1724 G_RAID3_LOGREQ(3, bp, "Sending synchronization request."); 1725 sync->ds_consumer->index++; 1726 /* 1727 * Delay the request if it is colliding with a regular request. 1728 */ 1729 if (g_raid3_regular_collision(sc, bp)) 1730 g_raid3_sync_delay(sc, bp); 1731 else 1732 g_io_request(bp, sync->ds_consumer); 1733 1734 /* Release delayed requests if possible. */ 1735 g_raid3_regular_release(sc); 1736 1737 /* Find the smallest offset. */ 1738 moffset = sc->sc_mediasize; 1739 for (i = 0; i < g_raid3_syncreqs; i++) { 1740 bp = sync->ds_bios[i]; 1741 boffset = bp->bio_offset; 1742 if (bp->bio_cmd == BIO_WRITE) 1743 boffset *= sc->sc_ndisks - 1; 1744 if (boffset < moffset) 1745 moffset = boffset; 1746 } 1747 if (sync->ds_offset_done + (MAXPHYS * 100) < moffset) { 1748 /* Update offset_done on every 100 blocks. */ 1749 sync->ds_offset_done = moffset; 1750 g_raid3_update_metadata(disk); 1751 } 1752 return; 1753 } 1754 default: 1755 KASSERT(1 == 0, ("Invalid command here: %u (device=%s)", 1756 bp->bio_cmd, sc->sc_name)); 1757 break; 1758 } 1759 } 1760 1761 static int 1762 g_raid3_register_request(struct bio *pbp) 1763 { 1764 struct g_raid3_softc *sc; 1765 struct g_raid3_disk *disk; 1766 struct g_consumer *cp; 1767 struct bio *cbp, *tmpbp; 1768 off_t offset, length; 1769 u_int n, ndisks; 1770 int round_robin, verify; 1771 1772 ndisks = 0; 1773 sc = pbp->bio_to->geom->softc; 1774 if ((pbp->bio_cflags & G_RAID3_BIO_CFLAG_REGSYNC) != 0 && 1775 sc->sc_syncdisk == NULL) { 1776 g_io_deliver(pbp, EIO); 1777 return (0); 1778 } 1779 g_raid3_init_bio(pbp); 1780 length = pbp->bio_length / (sc->sc_ndisks - 1); 1781 offset = pbp->bio_offset / (sc->sc_ndisks - 1); 1782 round_robin = verify = 0; 1783 switch (pbp->bio_cmd) { 1784 case BIO_READ: 1785 if ((sc->sc_flags & G_RAID3_DEVICE_FLAG_VERIFY) != 0 && 1786 sc->sc_state == G_RAID3_DEVICE_STATE_COMPLETE) { 1787 pbp->bio_pflags |= G_RAID3_BIO_PFLAG_VERIFY; 1788 verify = 1; 1789 ndisks = sc->sc_ndisks; 1790 } else { 1791 verify = 0; 1792 ndisks = sc->sc_ndisks - 1; 1793 } 1794 if ((sc->sc_flags & G_RAID3_DEVICE_FLAG_ROUND_ROBIN) != 0 && 1795 sc->sc_state == G_RAID3_DEVICE_STATE_COMPLETE) { 1796 round_robin = 1; 1797 } else { 1798 round_robin = 0; 1799 } 1800 KASSERT(!round_robin || !verify, 1801 ("ROUND-ROBIN and VERIFY are mutually exclusive.")); 1802 pbp->bio_driver2 = &sc->sc_disks[sc->sc_ndisks - 1]; 1803 break; 1804 case BIO_WRITE: 1805 case BIO_DELETE: 1806 /* 1807 * Delay the request if it is colliding with a synchronization 1808 * request. 1809 */ 1810 if (g_raid3_sync_collision(sc, pbp)) { 1811 g_raid3_regular_delay(sc, pbp); 1812 return (0); 1813 } 1814 1815 if (sc->sc_idle) 1816 g_raid3_unidle(sc); 1817 else 1818 sc->sc_last_write = time_uptime; 1819 1820 ndisks = sc->sc_ndisks; 1821 break; 1822 } 1823 for (n = 0; n < ndisks; n++) { 1824 disk = &sc->sc_disks[n]; 1825 cbp = g_raid3_clone_bio(sc, pbp); 1826 if (cbp == NULL) { 1827 while ((cbp = G_RAID3_HEAD_BIO(pbp)) != NULL) 1828 g_raid3_destroy_bio(sc, cbp); 1829 /* 1830 * To prevent deadlock, we must run back up 1831 * with the ENOMEM for failed requests of any 1832 * of our consumers. Our own sync requests 1833 * can stick around, as they are finite. 1834 */ 1835 if ((pbp->bio_cflags & 1836 G_RAID3_BIO_CFLAG_REGULAR) != 0) { 1837 g_io_deliver(pbp, ENOMEM); 1838 return (0); 1839 } 1840 return (ENOMEM); 1841 } 1842 cbp->bio_offset = offset; 1843 cbp->bio_length = length; 1844 cbp->bio_done = g_raid3_done; 1845 switch (pbp->bio_cmd) { 1846 case BIO_READ: 1847 if (disk->d_state != G_RAID3_DISK_STATE_ACTIVE) { 1848 /* 1849 * Replace invalid component with the parity 1850 * component. 1851 */ 1852 disk = &sc->sc_disks[sc->sc_ndisks - 1]; 1853 cbp->bio_cflags |= G_RAID3_BIO_CFLAG_PARITY; 1854 pbp->bio_pflags |= G_RAID3_BIO_PFLAG_DEGRADED; 1855 } else if (round_robin && 1856 disk->d_no == sc->sc_round_robin) { 1857 /* 1858 * In round-robin mode skip one data component 1859 * and use parity component when reading. 1860 */ 1861 pbp->bio_driver2 = disk; 1862 disk = &sc->sc_disks[sc->sc_ndisks - 1]; 1863 cbp->bio_cflags |= G_RAID3_BIO_CFLAG_PARITY; 1864 sc->sc_round_robin++; 1865 round_robin = 0; 1866 } else if (verify && disk->d_no == sc->sc_ndisks - 1) { 1867 cbp->bio_cflags |= G_RAID3_BIO_CFLAG_PARITY; 1868 } 1869 break; 1870 case BIO_WRITE: 1871 case BIO_DELETE: 1872 if (disk->d_state == G_RAID3_DISK_STATE_ACTIVE || 1873 disk->d_state == G_RAID3_DISK_STATE_SYNCHRONIZING) { 1874 if (n == ndisks - 1) { 1875 /* 1876 * Active parity component, mark it as such. 1877 */ 1878 cbp->bio_cflags |= 1879 G_RAID3_BIO_CFLAG_PARITY; 1880 } 1881 } else { 1882 pbp->bio_pflags |= G_RAID3_BIO_PFLAG_DEGRADED; 1883 if (n == ndisks - 1) { 1884 /* 1885 * Parity component is not connected, 1886 * so destroy its request. 1887 */ 1888 pbp->bio_pflags |= 1889 G_RAID3_BIO_PFLAG_NOPARITY; 1890 g_raid3_destroy_bio(sc, cbp); 1891 cbp = NULL; 1892 } else { 1893 cbp->bio_cflags |= 1894 G_RAID3_BIO_CFLAG_NODISK; 1895 disk = NULL; 1896 } 1897 } 1898 break; 1899 } 1900 if (cbp != NULL) 1901 cbp->bio_caller2 = disk; 1902 } 1903 switch (pbp->bio_cmd) { 1904 case BIO_READ: 1905 if (round_robin) { 1906 /* 1907 * If we are in round-robin mode and 'round_robin' is 1908 * still 1, it means, that we skipped parity component 1909 * for this read and must reset sc_round_robin field. 1910 */ 1911 sc->sc_round_robin = 0; 1912 } 1913 G_RAID3_FOREACH_SAFE_BIO(pbp, cbp, tmpbp) { 1914 disk = cbp->bio_caller2; 1915 cp = disk->d_consumer; 1916 cbp->bio_to = cp->provider; 1917 G_RAID3_LOGREQ(3, cbp, "Sending request."); 1918 KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1, 1919 ("Consumer %s not opened (r%dw%de%d).", 1920 cp->provider->name, cp->acr, cp->acw, cp->ace)); 1921 cp->index++; 1922 g_io_request(cbp, cp); 1923 } 1924 break; 1925 case BIO_WRITE: 1926 case BIO_DELETE: 1927 /* 1928 * Put request onto inflight queue, so we can check if new 1929 * synchronization requests don't collide with it. 1930 */ 1931 bioq_insert_tail(&sc->sc_inflight, pbp); 1932 1933 /* 1934 * Bump syncid on first write. 1935 */ 1936 if ((sc->sc_bump_id & G_RAID3_BUMP_SYNCID) != 0) { 1937 sc->sc_bump_id &= ~G_RAID3_BUMP_SYNCID; 1938 g_raid3_bump_syncid(sc); 1939 } 1940 g_raid3_scatter(pbp); 1941 break; 1942 } 1943 return (0); 1944 } 1945 1946 static int 1947 g_raid3_can_destroy(struct g_raid3_softc *sc) 1948 { 1949 struct g_geom *gp; 1950 struct g_consumer *cp; 1951 1952 g_topology_assert(); 1953 gp = sc->sc_geom; 1954 if (gp->softc == NULL) 1955 return (1); 1956 LIST_FOREACH(cp, &gp->consumer, consumer) { 1957 if (g_raid3_is_busy(sc, cp)) 1958 return (0); 1959 } 1960 gp = sc->sc_sync.ds_geom; 1961 LIST_FOREACH(cp, &gp->consumer, consumer) { 1962 if (g_raid3_is_busy(sc, cp)) 1963 return (0); 1964 } 1965 G_RAID3_DEBUG(2, "No I/O requests for %s, it can be destroyed.", 1966 sc->sc_name); 1967 return (1); 1968 } 1969 1970 static int 1971 g_raid3_try_destroy(struct g_raid3_softc *sc) 1972 { 1973 1974 g_topology_assert_not(); 1975 sx_assert(&sc->sc_lock, SX_XLOCKED); 1976 1977 if (sc->sc_rootmount != NULL) { 1978 G_RAID3_DEBUG(1, "root_mount_rel[%u] %p", __LINE__, 1979 sc->sc_rootmount); 1980 root_mount_rel(sc->sc_rootmount); 1981 sc->sc_rootmount = NULL; 1982 } 1983 1984 g_topology_lock(); 1985 if (!g_raid3_can_destroy(sc)) { 1986 g_topology_unlock(); 1987 return (0); 1988 } 1989 sc->sc_geom->softc = NULL; 1990 sc->sc_sync.ds_geom->softc = NULL; 1991 if ((sc->sc_flags & G_RAID3_DEVICE_FLAG_WAIT) != 0) { 1992 g_topology_unlock(); 1993 G_RAID3_DEBUG(4, "%s: Waking up %p.", __func__, 1994 &sc->sc_worker); 1995 /* Unlock sc_lock here, as it can be destroyed after wakeup. */ 1996 sx_xunlock(&sc->sc_lock); 1997 wakeup(&sc->sc_worker); 1998 sc->sc_worker = NULL; 1999 } else { 2000 g_topology_unlock(); 2001 g_raid3_destroy_device(sc); 2002 free(sc->sc_disks, M_RAID3); 2003 free(sc, M_RAID3); 2004 } 2005 return (1); 2006 } 2007 2008 /* 2009 * Worker thread. 2010 */ 2011 static void 2012 g_raid3_worker(void *arg) 2013 { 2014 struct g_raid3_softc *sc; 2015 struct g_raid3_event *ep; 2016 struct bio *bp; 2017 int timeout; 2018 2019 sc = arg; 2020 thread_lock(curthread); 2021 sched_prio(curthread, PRIBIO); 2022 thread_unlock(curthread); 2023 2024 sx_xlock(&sc->sc_lock); 2025 for (;;) { 2026 G_RAID3_DEBUG(5, "%s: Let's see...", __func__); 2027 /* 2028 * First take a look at events. 2029 * This is important to handle events before any I/O requests. 2030 */ 2031 ep = g_raid3_event_get(sc); 2032 if (ep != NULL) { 2033 g_raid3_event_remove(sc, ep); 2034 if ((ep->e_flags & G_RAID3_EVENT_DEVICE) != 0) { 2035 /* Update only device status. */ 2036 G_RAID3_DEBUG(3, 2037 "Running event for device %s.", 2038 sc->sc_name); 2039 ep->e_error = 0; 2040 g_raid3_update_device(sc, 1); 2041 } else { 2042 /* Update disk status. */ 2043 G_RAID3_DEBUG(3, "Running event for disk %s.", 2044 g_raid3_get_diskname(ep->e_disk)); 2045 ep->e_error = g_raid3_update_disk(ep->e_disk, 2046 ep->e_state); 2047 if (ep->e_error == 0) 2048 g_raid3_update_device(sc, 0); 2049 } 2050 if ((ep->e_flags & G_RAID3_EVENT_DONTWAIT) != 0) { 2051 KASSERT(ep->e_error == 0, 2052 ("Error cannot be handled.")); 2053 g_raid3_event_free(ep); 2054 } else { 2055 ep->e_flags |= G_RAID3_EVENT_DONE; 2056 G_RAID3_DEBUG(4, "%s: Waking up %p.", __func__, 2057 ep); 2058 mtx_lock(&sc->sc_events_mtx); 2059 wakeup(ep); 2060 mtx_unlock(&sc->sc_events_mtx); 2061 } 2062 if ((sc->sc_flags & 2063 G_RAID3_DEVICE_FLAG_DESTROY) != 0) { 2064 if (g_raid3_try_destroy(sc)) { 2065 curthread->td_pflags &= ~TDP_GEOM; 2066 G_RAID3_DEBUG(1, "Thread exiting."); 2067 kproc_exit(0); 2068 } 2069 } 2070 G_RAID3_DEBUG(5, "%s: I'm here 1.", __func__); 2071 continue; 2072 } 2073 /* 2074 * Check if we can mark array as CLEAN and if we can't take 2075 * how much seconds should we wait. 2076 */ 2077 timeout = g_raid3_idle(sc, -1); 2078 /* 2079 * Now I/O requests. 2080 */ 2081 /* Get first request from the queue. */ 2082 mtx_lock(&sc->sc_queue_mtx); 2083 bp = bioq_first(&sc->sc_queue); 2084 if (bp == NULL) { 2085 if ((sc->sc_flags & 2086 G_RAID3_DEVICE_FLAG_DESTROY) != 0) { 2087 mtx_unlock(&sc->sc_queue_mtx); 2088 if (g_raid3_try_destroy(sc)) { 2089 curthread->td_pflags &= ~TDP_GEOM; 2090 G_RAID3_DEBUG(1, "Thread exiting."); 2091 kproc_exit(0); 2092 } 2093 mtx_lock(&sc->sc_queue_mtx); 2094 } 2095 sx_xunlock(&sc->sc_lock); 2096 /* 2097 * XXX: We can miss an event here, because an event 2098 * can be added without sx-device-lock and without 2099 * mtx-queue-lock. Maybe I should just stop using 2100 * dedicated mutex for events synchronization and 2101 * stick with the queue lock? 2102 * The event will hang here until next I/O request 2103 * or next event is received. 2104 */ 2105 MSLEEP(sc, &sc->sc_queue_mtx, PRIBIO | PDROP, "r3:w1", 2106 timeout * hz); 2107 sx_xlock(&sc->sc_lock); 2108 G_RAID3_DEBUG(5, "%s: I'm here 4.", __func__); 2109 continue; 2110 } 2111 process: 2112 bioq_remove(&sc->sc_queue, bp); 2113 mtx_unlock(&sc->sc_queue_mtx); 2114 2115 if (bp->bio_from->geom == sc->sc_sync.ds_geom && 2116 (bp->bio_cflags & G_RAID3_BIO_CFLAG_SYNC) != 0) { 2117 g_raid3_sync_request(bp); /* READ */ 2118 } else if (bp->bio_to != sc->sc_provider) { 2119 if ((bp->bio_cflags & G_RAID3_BIO_CFLAG_REGULAR) != 0) 2120 g_raid3_regular_request(bp); 2121 else if ((bp->bio_cflags & G_RAID3_BIO_CFLAG_SYNC) != 0) 2122 g_raid3_sync_request(bp); /* WRITE */ 2123 else { 2124 KASSERT(0, 2125 ("Invalid request cflags=0x%hhx to=%s.", 2126 bp->bio_cflags, bp->bio_to->name)); 2127 } 2128 } else if (g_raid3_register_request(bp) != 0) { 2129 mtx_lock(&sc->sc_queue_mtx); 2130 bioq_insert_head(&sc->sc_queue, bp); 2131 /* 2132 * We are short in memory, let see if there are finished 2133 * request we can free. 2134 */ 2135 TAILQ_FOREACH(bp, &sc->sc_queue.queue, bio_queue) { 2136 if (bp->bio_cflags & G_RAID3_BIO_CFLAG_REGULAR) 2137 goto process; 2138 } 2139 /* 2140 * No finished regular request, so at least keep 2141 * synchronization running. 2142 */ 2143 TAILQ_FOREACH(bp, &sc->sc_queue.queue, bio_queue) { 2144 if (bp->bio_cflags & G_RAID3_BIO_CFLAG_SYNC) 2145 goto process; 2146 } 2147 sx_xunlock(&sc->sc_lock); 2148 MSLEEP(&sc->sc_queue, &sc->sc_queue_mtx, PRIBIO | PDROP, 2149 "r3:lowmem", hz / 10); 2150 sx_xlock(&sc->sc_lock); 2151 } 2152 G_RAID3_DEBUG(5, "%s: I'm here 9.", __func__); 2153 } 2154 } 2155 2156 static void 2157 g_raid3_update_idle(struct g_raid3_softc *sc, struct g_raid3_disk *disk) 2158 { 2159 2160 sx_assert(&sc->sc_lock, SX_LOCKED); 2161 if ((sc->sc_flags & G_RAID3_DEVICE_FLAG_NOFAILSYNC) != 0) 2162 return; 2163 if (!sc->sc_idle && (disk->d_flags & G_RAID3_DISK_FLAG_DIRTY) == 0) { 2164 G_RAID3_DEBUG(1, "Disk %s (device %s) marked as dirty.", 2165 g_raid3_get_diskname(disk), sc->sc_name); 2166 disk->d_flags |= G_RAID3_DISK_FLAG_DIRTY; 2167 } else if (sc->sc_idle && 2168 (disk->d_flags & G_RAID3_DISK_FLAG_DIRTY) != 0) { 2169 G_RAID3_DEBUG(1, "Disk %s (device %s) marked as clean.", 2170 g_raid3_get_diskname(disk), sc->sc_name); 2171 disk->d_flags &= ~G_RAID3_DISK_FLAG_DIRTY; 2172 } 2173 } 2174 2175 static void 2176 g_raid3_sync_start(struct g_raid3_softc *sc) 2177 { 2178 struct g_raid3_disk *disk; 2179 struct g_consumer *cp; 2180 struct bio *bp; 2181 int error; 2182 u_int n; 2183 2184 g_topology_assert_not(); 2185 sx_assert(&sc->sc_lock, SX_XLOCKED); 2186 2187 KASSERT(sc->sc_state == G_RAID3_DEVICE_STATE_DEGRADED, 2188 ("Device not in DEGRADED state (%s, %u).", sc->sc_name, 2189 sc->sc_state)); 2190 KASSERT(sc->sc_syncdisk == NULL, ("Syncdisk is not NULL (%s, %u).", 2191 sc->sc_name, sc->sc_state)); 2192 disk = NULL; 2193 for (n = 0; n < sc->sc_ndisks; n++) { 2194 if (sc->sc_disks[n].d_state != G_RAID3_DISK_STATE_SYNCHRONIZING) 2195 continue; 2196 disk = &sc->sc_disks[n]; 2197 break; 2198 } 2199 if (disk == NULL) 2200 return; 2201 2202 sx_xunlock(&sc->sc_lock); 2203 g_topology_lock(); 2204 cp = g_new_consumer(sc->sc_sync.ds_geom); 2205 error = g_attach(cp, sc->sc_provider); 2206 KASSERT(error == 0, 2207 ("Cannot attach to %s (error=%d).", sc->sc_name, error)); 2208 error = g_access(cp, 1, 0, 0); 2209 KASSERT(error == 0, ("Cannot open %s (error=%d).", sc->sc_name, error)); 2210 g_topology_unlock(); 2211 sx_xlock(&sc->sc_lock); 2212 2213 G_RAID3_DEBUG(0, "Device %s: rebuilding provider %s.", sc->sc_name, 2214 g_raid3_get_diskname(disk)); 2215 if ((sc->sc_flags & G_RAID3_DEVICE_FLAG_NOFAILSYNC) == 0) 2216 disk->d_flags |= G_RAID3_DISK_FLAG_DIRTY; 2217 KASSERT(disk->d_sync.ds_consumer == NULL, 2218 ("Sync consumer already exists (device=%s, disk=%s).", 2219 sc->sc_name, g_raid3_get_diskname(disk))); 2220 2221 disk->d_sync.ds_consumer = cp; 2222 disk->d_sync.ds_consumer->private = disk; 2223 disk->d_sync.ds_consumer->index = 0; 2224 sc->sc_syncdisk = disk; 2225 2226 /* 2227 * Allocate memory for synchronization bios and initialize them. 2228 */ 2229 disk->d_sync.ds_bios = malloc(sizeof(struct bio *) * g_raid3_syncreqs, 2230 M_RAID3, M_WAITOK); 2231 for (n = 0; n < g_raid3_syncreqs; n++) { 2232 bp = g_alloc_bio(); 2233 disk->d_sync.ds_bios[n] = bp; 2234 bp->bio_parent = NULL; 2235 bp->bio_cmd = BIO_READ; 2236 bp->bio_data = malloc(MAXPHYS, M_RAID3, M_WAITOK); 2237 bp->bio_cflags = 0; 2238 bp->bio_offset = disk->d_sync.ds_offset * (sc->sc_ndisks - 1); 2239 bp->bio_length = MIN(MAXPHYS, sc->sc_mediasize - bp->bio_offset); 2240 disk->d_sync.ds_offset += bp->bio_length / (sc->sc_ndisks - 1); 2241 bp->bio_done = g_raid3_sync_done; 2242 bp->bio_from = disk->d_sync.ds_consumer; 2243 bp->bio_to = sc->sc_provider; 2244 bp->bio_caller1 = (void *)(uintptr_t)n; 2245 } 2246 2247 /* Set the number of in-flight synchronization requests. */ 2248 disk->d_sync.ds_inflight = g_raid3_syncreqs; 2249 2250 /* 2251 * Fire off first synchronization requests. 2252 */ 2253 for (n = 0; n < g_raid3_syncreqs; n++) { 2254 bp = disk->d_sync.ds_bios[n]; 2255 G_RAID3_LOGREQ(3, bp, "Sending synchronization request."); 2256 disk->d_sync.ds_consumer->index++; 2257 /* 2258 * Delay the request if it is colliding with a regular request. 2259 */ 2260 if (g_raid3_regular_collision(sc, bp)) 2261 g_raid3_sync_delay(sc, bp); 2262 else 2263 g_io_request(bp, disk->d_sync.ds_consumer); 2264 } 2265 } 2266 2267 /* 2268 * Stop synchronization process. 2269 * type: 0 - synchronization finished 2270 * 1 - synchronization stopped 2271 */ 2272 static void 2273 g_raid3_sync_stop(struct g_raid3_softc *sc, int type) 2274 { 2275 struct g_raid3_disk *disk; 2276 struct g_consumer *cp; 2277 2278 g_topology_assert_not(); 2279 sx_assert(&sc->sc_lock, SX_LOCKED); 2280 2281 KASSERT(sc->sc_state == G_RAID3_DEVICE_STATE_DEGRADED, 2282 ("Device not in DEGRADED state (%s, %u).", sc->sc_name, 2283 sc->sc_state)); 2284 disk = sc->sc_syncdisk; 2285 sc->sc_syncdisk = NULL; 2286 KASSERT(disk != NULL, ("No disk was synchronized (%s).", sc->sc_name)); 2287 KASSERT(disk->d_state == G_RAID3_DISK_STATE_SYNCHRONIZING, 2288 ("Wrong disk state (%s, %s).", g_raid3_get_diskname(disk), 2289 g_raid3_disk_state2str(disk->d_state))); 2290 if (disk->d_sync.ds_consumer == NULL) 2291 return; 2292 2293 if (type == 0) { 2294 G_RAID3_DEBUG(0, "Device %s: rebuilding provider %s finished.", 2295 sc->sc_name, g_raid3_get_diskname(disk)); 2296 } else /* if (type == 1) */ { 2297 G_RAID3_DEBUG(0, "Device %s: rebuilding provider %s stopped.", 2298 sc->sc_name, g_raid3_get_diskname(disk)); 2299 } 2300 free(disk->d_sync.ds_bios, M_RAID3); 2301 disk->d_sync.ds_bios = NULL; 2302 cp = disk->d_sync.ds_consumer; 2303 disk->d_sync.ds_consumer = NULL; 2304 disk->d_flags &= ~G_RAID3_DISK_FLAG_DIRTY; 2305 sx_xunlock(&sc->sc_lock); /* Avoid recursion on sc_lock. */ 2306 g_topology_lock(); 2307 g_raid3_kill_consumer(sc, cp); 2308 g_topology_unlock(); 2309 sx_xlock(&sc->sc_lock); 2310 } 2311 2312 static void 2313 g_raid3_launch_provider(struct g_raid3_softc *sc) 2314 { 2315 struct g_provider *pp; 2316 2317 sx_assert(&sc->sc_lock, SX_LOCKED); 2318 2319 g_topology_lock(); 2320 pp = g_new_providerf(sc->sc_geom, "raid3/%s", sc->sc_name); 2321 pp->mediasize = sc->sc_mediasize; 2322 pp->sectorsize = sc->sc_sectorsize; 2323 sc->sc_provider = pp; 2324 g_error_provider(pp, 0); 2325 g_topology_unlock(); 2326 G_RAID3_DEBUG(0, "Device %s launched (%u/%u).", pp->name, 2327 g_raid3_ndisks(sc, G_RAID3_DISK_STATE_ACTIVE), sc->sc_ndisks); 2328 2329 if (sc->sc_state == G_RAID3_DEVICE_STATE_DEGRADED) 2330 g_raid3_sync_start(sc); 2331 } 2332 2333 static void 2334 g_raid3_destroy_provider(struct g_raid3_softc *sc) 2335 { 2336 struct bio *bp; 2337 2338 g_topology_assert_not(); 2339 KASSERT(sc->sc_provider != NULL, ("NULL provider (device=%s).", 2340 sc->sc_name)); 2341 2342 g_topology_lock(); 2343 g_error_provider(sc->sc_provider, ENXIO); 2344 mtx_lock(&sc->sc_queue_mtx); 2345 while ((bp = bioq_first(&sc->sc_queue)) != NULL) { 2346 bioq_remove(&sc->sc_queue, bp); 2347 g_io_deliver(bp, ENXIO); 2348 } 2349 mtx_unlock(&sc->sc_queue_mtx); 2350 G_RAID3_DEBUG(0, "Device %s: provider %s destroyed.", sc->sc_name, 2351 sc->sc_provider->name); 2352 sc->sc_provider->flags |= G_PF_WITHER; 2353 g_orphan_provider(sc->sc_provider, ENXIO); 2354 g_topology_unlock(); 2355 sc->sc_provider = NULL; 2356 if (sc->sc_syncdisk != NULL) 2357 g_raid3_sync_stop(sc, 1); 2358 } 2359 2360 static void 2361 g_raid3_go(void *arg) 2362 { 2363 struct g_raid3_softc *sc; 2364 2365 sc = arg; 2366 G_RAID3_DEBUG(0, "Force device %s start due to timeout.", sc->sc_name); 2367 g_raid3_event_send(sc, 0, 2368 G_RAID3_EVENT_DONTWAIT | G_RAID3_EVENT_DEVICE); 2369 } 2370 2371 static u_int 2372 g_raid3_determine_state(struct g_raid3_disk *disk) 2373 { 2374 struct g_raid3_softc *sc; 2375 u_int state; 2376 2377 sc = disk->d_softc; 2378 if (sc->sc_syncid == disk->d_sync.ds_syncid) { 2379 if ((disk->d_flags & 2380 G_RAID3_DISK_FLAG_SYNCHRONIZING) == 0) { 2381 /* Disk does not need synchronization. */ 2382 state = G_RAID3_DISK_STATE_ACTIVE; 2383 } else { 2384 if ((sc->sc_flags & 2385 G_RAID3_DEVICE_FLAG_NOAUTOSYNC) == 0 || 2386 (disk->d_flags & 2387 G_RAID3_DISK_FLAG_FORCE_SYNC) != 0) { 2388 /* 2389 * We can start synchronization from 2390 * the stored offset. 2391 */ 2392 state = G_RAID3_DISK_STATE_SYNCHRONIZING; 2393 } else { 2394 state = G_RAID3_DISK_STATE_STALE; 2395 } 2396 } 2397 } else if (disk->d_sync.ds_syncid < sc->sc_syncid) { 2398 /* 2399 * Reset all synchronization data for this disk, 2400 * because if it even was synchronized, it was 2401 * synchronized to disks with different syncid. 2402 */ 2403 disk->d_flags |= G_RAID3_DISK_FLAG_SYNCHRONIZING; 2404 disk->d_sync.ds_offset = 0; 2405 disk->d_sync.ds_offset_done = 0; 2406 disk->d_sync.ds_syncid = sc->sc_syncid; 2407 if ((sc->sc_flags & G_RAID3_DEVICE_FLAG_NOAUTOSYNC) == 0 || 2408 (disk->d_flags & G_RAID3_DISK_FLAG_FORCE_SYNC) != 0) { 2409 state = G_RAID3_DISK_STATE_SYNCHRONIZING; 2410 } else { 2411 state = G_RAID3_DISK_STATE_STALE; 2412 } 2413 } else /* if (sc->sc_syncid < disk->d_sync.ds_syncid) */ { 2414 /* 2415 * Not good, NOT GOOD! 2416 * It means that device was started on stale disks 2417 * and more fresh disk just arrive. 2418 * If there were writes, device is broken, sorry. 2419 * I think the best choice here is don't touch 2420 * this disk and inform the user loudly. 2421 */ 2422 G_RAID3_DEBUG(0, "Device %s was started before the freshest " 2423 "disk (%s) arrives!! It will not be connected to the " 2424 "running device.", sc->sc_name, 2425 g_raid3_get_diskname(disk)); 2426 g_raid3_destroy_disk(disk); 2427 state = G_RAID3_DISK_STATE_NONE; 2428 /* Return immediately, because disk was destroyed. */ 2429 return (state); 2430 } 2431 G_RAID3_DEBUG(3, "State for %s disk: %s.", 2432 g_raid3_get_diskname(disk), g_raid3_disk_state2str(state)); 2433 return (state); 2434 } 2435 2436 /* 2437 * Update device state. 2438 */ 2439 static void 2440 g_raid3_update_device(struct g_raid3_softc *sc, boolean_t force) 2441 { 2442 struct g_raid3_disk *disk; 2443 u_int state; 2444 2445 sx_assert(&sc->sc_lock, SX_XLOCKED); 2446 2447 switch (sc->sc_state) { 2448 case G_RAID3_DEVICE_STATE_STARTING: 2449 { 2450 u_int n, ndirty, ndisks, genid, syncid; 2451 2452 KASSERT(sc->sc_provider == NULL, 2453 ("Non-NULL provider in STARTING state (%s).", sc->sc_name)); 2454 /* 2455 * Are we ready? We are, if all disks are connected or 2456 * one disk is missing and 'force' is true. 2457 */ 2458 if (g_raid3_ndisks(sc, -1) + force == sc->sc_ndisks) { 2459 if (!force) 2460 callout_drain(&sc->sc_callout); 2461 } else { 2462 if (force) { 2463 /* 2464 * Timeout expired, so destroy device. 2465 */ 2466 sc->sc_flags |= G_RAID3_DEVICE_FLAG_DESTROY; 2467 G_RAID3_DEBUG(1, "root_mount_rel[%u] %p", 2468 __LINE__, sc->sc_rootmount); 2469 root_mount_rel(sc->sc_rootmount); 2470 sc->sc_rootmount = NULL; 2471 } 2472 return; 2473 } 2474 2475 /* 2476 * Find the biggest genid. 2477 */ 2478 genid = 0; 2479 for (n = 0; n < sc->sc_ndisks; n++) { 2480 disk = &sc->sc_disks[n]; 2481 if (disk->d_state == G_RAID3_DISK_STATE_NODISK) 2482 continue; 2483 if (disk->d_genid > genid) 2484 genid = disk->d_genid; 2485 } 2486 sc->sc_genid = genid; 2487 /* 2488 * Remove all disks without the biggest genid. 2489 */ 2490 for (n = 0; n < sc->sc_ndisks; n++) { 2491 disk = &sc->sc_disks[n]; 2492 if (disk->d_state == G_RAID3_DISK_STATE_NODISK) 2493 continue; 2494 if (disk->d_genid < genid) { 2495 G_RAID3_DEBUG(0, 2496 "Component %s (device %s) broken, skipping.", 2497 g_raid3_get_diskname(disk), sc->sc_name); 2498 g_raid3_destroy_disk(disk); 2499 } 2500 } 2501 2502 /* 2503 * There must be at least 'sc->sc_ndisks - 1' components 2504 * with the same syncid and without SYNCHRONIZING flag. 2505 */ 2506 2507 /* 2508 * Find the biggest syncid, number of valid components and 2509 * number of dirty components. 2510 */ 2511 ndirty = ndisks = syncid = 0; 2512 for (n = 0; n < sc->sc_ndisks; n++) { 2513 disk = &sc->sc_disks[n]; 2514 if (disk->d_state == G_RAID3_DISK_STATE_NODISK) 2515 continue; 2516 if ((disk->d_flags & G_RAID3_DISK_FLAG_DIRTY) != 0) 2517 ndirty++; 2518 if (disk->d_sync.ds_syncid > syncid) { 2519 syncid = disk->d_sync.ds_syncid; 2520 ndisks = 0; 2521 } else if (disk->d_sync.ds_syncid < syncid) { 2522 continue; 2523 } 2524 if ((disk->d_flags & 2525 G_RAID3_DISK_FLAG_SYNCHRONIZING) != 0) { 2526 continue; 2527 } 2528 ndisks++; 2529 } 2530 /* 2531 * Do we have enough valid components? 2532 */ 2533 if (ndisks + 1 < sc->sc_ndisks) { 2534 G_RAID3_DEBUG(0, 2535 "Device %s is broken, too few valid components.", 2536 sc->sc_name); 2537 sc->sc_flags |= G_RAID3_DEVICE_FLAG_DESTROY; 2538 return; 2539 } 2540 /* 2541 * If there is one DIRTY component and all disks are present, 2542 * mark it for synchronization. If there is more than one DIRTY 2543 * component, mark parity component for synchronization. 2544 */ 2545 if (ndisks == sc->sc_ndisks && ndirty == 1) { 2546 for (n = 0; n < sc->sc_ndisks; n++) { 2547 disk = &sc->sc_disks[n]; 2548 if ((disk->d_flags & 2549 G_RAID3_DISK_FLAG_DIRTY) == 0) { 2550 continue; 2551 } 2552 disk->d_flags |= 2553 G_RAID3_DISK_FLAG_SYNCHRONIZING; 2554 } 2555 } else if (ndisks == sc->sc_ndisks && ndirty > 1) { 2556 disk = &sc->sc_disks[sc->sc_ndisks - 1]; 2557 disk->d_flags |= G_RAID3_DISK_FLAG_SYNCHRONIZING; 2558 } 2559 2560 sc->sc_syncid = syncid; 2561 if (force) { 2562 /* Remember to bump syncid on first write. */ 2563 sc->sc_bump_id |= G_RAID3_BUMP_SYNCID; 2564 } 2565 if (ndisks == sc->sc_ndisks) 2566 state = G_RAID3_DEVICE_STATE_COMPLETE; 2567 else /* if (ndisks == sc->sc_ndisks - 1) */ 2568 state = G_RAID3_DEVICE_STATE_DEGRADED; 2569 G_RAID3_DEBUG(1, "Device %s state changed from %s to %s.", 2570 sc->sc_name, g_raid3_device_state2str(sc->sc_state), 2571 g_raid3_device_state2str(state)); 2572 sc->sc_state = state; 2573 for (n = 0; n < sc->sc_ndisks; n++) { 2574 disk = &sc->sc_disks[n]; 2575 if (disk->d_state == G_RAID3_DISK_STATE_NODISK) 2576 continue; 2577 state = g_raid3_determine_state(disk); 2578 g_raid3_event_send(disk, state, G_RAID3_EVENT_DONTWAIT); 2579 if (state == G_RAID3_DISK_STATE_STALE) 2580 sc->sc_bump_id |= G_RAID3_BUMP_SYNCID; 2581 } 2582 break; 2583 } 2584 case G_RAID3_DEVICE_STATE_DEGRADED: 2585 /* 2586 * Genid need to be bumped immediately, so do it here. 2587 */ 2588 if ((sc->sc_bump_id & G_RAID3_BUMP_GENID) != 0) { 2589 sc->sc_bump_id &= ~G_RAID3_BUMP_GENID; 2590 g_raid3_bump_genid(sc); 2591 } 2592 2593 if (g_raid3_ndisks(sc, G_RAID3_DISK_STATE_NEW) > 0) 2594 return; 2595 if (g_raid3_ndisks(sc, G_RAID3_DISK_STATE_ACTIVE) < 2596 sc->sc_ndisks - 1) { 2597 if (sc->sc_provider != NULL) 2598 g_raid3_destroy_provider(sc); 2599 sc->sc_flags |= G_RAID3_DEVICE_FLAG_DESTROY; 2600 return; 2601 } 2602 if (g_raid3_ndisks(sc, G_RAID3_DISK_STATE_ACTIVE) == 2603 sc->sc_ndisks) { 2604 state = G_RAID3_DEVICE_STATE_COMPLETE; 2605 G_RAID3_DEBUG(1, 2606 "Device %s state changed from %s to %s.", 2607 sc->sc_name, g_raid3_device_state2str(sc->sc_state), 2608 g_raid3_device_state2str(state)); 2609 sc->sc_state = state; 2610 } 2611 if (sc->sc_provider == NULL) 2612 g_raid3_launch_provider(sc); 2613 if (sc->sc_rootmount != NULL) { 2614 G_RAID3_DEBUG(1, "root_mount_rel[%u] %p", __LINE__, 2615 sc->sc_rootmount); 2616 root_mount_rel(sc->sc_rootmount); 2617 sc->sc_rootmount = NULL; 2618 } 2619 break; 2620 case G_RAID3_DEVICE_STATE_COMPLETE: 2621 /* 2622 * Genid need to be bumped immediately, so do it here. 2623 */ 2624 if ((sc->sc_bump_id & G_RAID3_BUMP_GENID) != 0) { 2625 sc->sc_bump_id &= ~G_RAID3_BUMP_GENID; 2626 g_raid3_bump_genid(sc); 2627 } 2628 2629 if (g_raid3_ndisks(sc, G_RAID3_DISK_STATE_NEW) > 0) 2630 return; 2631 KASSERT(g_raid3_ndisks(sc, G_RAID3_DISK_STATE_ACTIVE) >= 2632 sc->sc_ndisks - 1, 2633 ("Too few ACTIVE components in COMPLETE state (device %s).", 2634 sc->sc_name)); 2635 if (g_raid3_ndisks(sc, G_RAID3_DISK_STATE_ACTIVE) == 2636 sc->sc_ndisks - 1) { 2637 state = G_RAID3_DEVICE_STATE_DEGRADED; 2638 G_RAID3_DEBUG(1, 2639 "Device %s state changed from %s to %s.", 2640 sc->sc_name, g_raid3_device_state2str(sc->sc_state), 2641 g_raid3_device_state2str(state)); 2642 sc->sc_state = state; 2643 } 2644 if (sc->sc_provider == NULL) 2645 g_raid3_launch_provider(sc); 2646 if (sc->sc_rootmount != NULL) { 2647 G_RAID3_DEBUG(1, "root_mount_rel[%u] %p", __LINE__, 2648 sc->sc_rootmount); 2649 root_mount_rel(sc->sc_rootmount); 2650 sc->sc_rootmount = NULL; 2651 } 2652 break; 2653 default: 2654 KASSERT(1 == 0, ("Wrong device state (%s, %s).", sc->sc_name, 2655 g_raid3_device_state2str(sc->sc_state))); 2656 break; 2657 } 2658 } 2659 2660 /* 2661 * Update disk state and device state if needed. 2662 */ 2663 #define DISK_STATE_CHANGED() G_RAID3_DEBUG(1, \ 2664 "Disk %s state changed from %s to %s (device %s).", \ 2665 g_raid3_get_diskname(disk), \ 2666 g_raid3_disk_state2str(disk->d_state), \ 2667 g_raid3_disk_state2str(state), sc->sc_name) 2668 static int 2669 g_raid3_update_disk(struct g_raid3_disk *disk, u_int state) 2670 { 2671 struct g_raid3_softc *sc; 2672 2673 sc = disk->d_softc; 2674 sx_assert(&sc->sc_lock, SX_XLOCKED); 2675 2676 again: 2677 G_RAID3_DEBUG(3, "Changing disk %s state from %s to %s.", 2678 g_raid3_get_diskname(disk), g_raid3_disk_state2str(disk->d_state), 2679 g_raid3_disk_state2str(state)); 2680 switch (state) { 2681 case G_RAID3_DISK_STATE_NEW: 2682 /* 2683 * Possible scenarios: 2684 * 1. New disk arrive. 2685 */ 2686 /* Previous state should be NONE. */ 2687 KASSERT(disk->d_state == G_RAID3_DISK_STATE_NONE, 2688 ("Wrong disk state (%s, %s).", g_raid3_get_diskname(disk), 2689 g_raid3_disk_state2str(disk->d_state))); 2690 DISK_STATE_CHANGED(); 2691 2692 disk->d_state = state; 2693 G_RAID3_DEBUG(1, "Device %s: provider %s detected.", 2694 sc->sc_name, g_raid3_get_diskname(disk)); 2695 if (sc->sc_state == G_RAID3_DEVICE_STATE_STARTING) 2696 break; 2697 KASSERT(sc->sc_state == G_RAID3_DEVICE_STATE_DEGRADED || 2698 sc->sc_state == G_RAID3_DEVICE_STATE_COMPLETE, 2699 ("Wrong device state (%s, %s, %s, %s).", sc->sc_name, 2700 g_raid3_device_state2str(sc->sc_state), 2701 g_raid3_get_diskname(disk), 2702 g_raid3_disk_state2str(disk->d_state))); 2703 state = g_raid3_determine_state(disk); 2704 if (state != G_RAID3_DISK_STATE_NONE) 2705 goto again; 2706 break; 2707 case G_RAID3_DISK_STATE_ACTIVE: 2708 /* 2709 * Possible scenarios: 2710 * 1. New disk does not need synchronization. 2711 * 2. Synchronization process finished successfully. 2712 */ 2713 KASSERT(sc->sc_state == G_RAID3_DEVICE_STATE_DEGRADED || 2714 sc->sc_state == G_RAID3_DEVICE_STATE_COMPLETE, 2715 ("Wrong device state (%s, %s, %s, %s).", sc->sc_name, 2716 g_raid3_device_state2str(sc->sc_state), 2717 g_raid3_get_diskname(disk), 2718 g_raid3_disk_state2str(disk->d_state))); 2719 /* Previous state should be NEW or SYNCHRONIZING. */ 2720 KASSERT(disk->d_state == G_RAID3_DISK_STATE_NEW || 2721 disk->d_state == G_RAID3_DISK_STATE_SYNCHRONIZING, 2722 ("Wrong disk state (%s, %s).", g_raid3_get_diskname(disk), 2723 g_raid3_disk_state2str(disk->d_state))); 2724 DISK_STATE_CHANGED(); 2725 2726 if (disk->d_state == G_RAID3_DISK_STATE_SYNCHRONIZING) { 2727 disk->d_flags &= ~G_RAID3_DISK_FLAG_SYNCHRONIZING; 2728 disk->d_flags &= ~G_RAID3_DISK_FLAG_FORCE_SYNC; 2729 g_raid3_sync_stop(sc, 0); 2730 } 2731 disk->d_state = state; 2732 disk->d_sync.ds_offset = 0; 2733 disk->d_sync.ds_offset_done = 0; 2734 g_raid3_update_idle(sc, disk); 2735 g_raid3_update_metadata(disk); 2736 G_RAID3_DEBUG(1, "Device %s: provider %s activated.", 2737 sc->sc_name, g_raid3_get_diskname(disk)); 2738 break; 2739 case G_RAID3_DISK_STATE_STALE: 2740 /* 2741 * Possible scenarios: 2742 * 1. Stale disk was connected. 2743 */ 2744 /* Previous state should be NEW. */ 2745 KASSERT(disk->d_state == G_RAID3_DISK_STATE_NEW, 2746 ("Wrong disk state (%s, %s).", g_raid3_get_diskname(disk), 2747 g_raid3_disk_state2str(disk->d_state))); 2748 KASSERT(sc->sc_state == G_RAID3_DEVICE_STATE_DEGRADED || 2749 sc->sc_state == G_RAID3_DEVICE_STATE_COMPLETE, 2750 ("Wrong device state (%s, %s, %s, %s).", sc->sc_name, 2751 g_raid3_device_state2str(sc->sc_state), 2752 g_raid3_get_diskname(disk), 2753 g_raid3_disk_state2str(disk->d_state))); 2754 /* 2755 * STALE state is only possible if device is marked 2756 * NOAUTOSYNC. 2757 */ 2758 KASSERT((sc->sc_flags & G_RAID3_DEVICE_FLAG_NOAUTOSYNC) != 0, 2759 ("Wrong device state (%s, %s, %s, %s).", sc->sc_name, 2760 g_raid3_device_state2str(sc->sc_state), 2761 g_raid3_get_diskname(disk), 2762 g_raid3_disk_state2str(disk->d_state))); 2763 DISK_STATE_CHANGED(); 2764 2765 disk->d_flags &= ~G_RAID3_DISK_FLAG_DIRTY; 2766 disk->d_state = state; 2767 g_raid3_update_metadata(disk); 2768 G_RAID3_DEBUG(0, "Device %s: provider %s is stale.", 2769 sc->sc_name, g_raid3_get_diskname(disk)); 2770 break; 2771 case G_RAID3_DISK_STATE_SYNCHRONIZING: 2772 /* 2773 * Possible scenarios: 2774 * 1. Disk which needs synchronization was connected. 2775 */ 2776 /* Previous state should be NEW. */ 2777 KASSERT(disk->d_state == G_RAID3_DISK_STATE_NEW, 2778 ("Wrong disk state (%s, %s).", g_raid3_get_diskname(disk), 2779 g_raid3_disk_state2str(disk->d_state))); 2780 KASSERT(sc->sc_state == G_RAID3_DEVICE_STATE_DEGRADED || 2781 sc->sc_state == G_RAID3_DEVICE_STATE_COMPLETE, 2782 ("Wrong device state (%s, %s, %s, %s).", sc->sc_name, 2783 g_raid3_device_state2str(sc->sc_state), 2784 g_raid3_get_diskname(disk), 2785 g_raid3_disk_state2str(disk->d_state))); 2786 DISK_STATE_CHANGED(); 2787 2788 if (disk->d_state == G_RAID3_DISK_STATE_NEW) 2789 disk->d_flags &= ~G_RAID3_DISK_FLAG_DIRTY; 2790 disk->d_state = state; 2791 if (sc->sc_provider != NULL) { 2792 g_raid3_sync_start(sc); 2793 g_raid3_update_metadata(disk); 2794 } 2795 break; 2796 case G_RAID3_DISK_STATE_DISCONNECTED: 2797 /* 2798 * Possible scenarios: 2799 * 1. Device wasn't running yet, but disk disappear. 2800 * 2. Disk was active and disapppear. 2801 * 3. Disk disappear during synchronization process. 2802 */ 2803 if (sc->sc_state == G_RAID3_DEVICE_STATE_DEGRADED || 2804 sc->sc_state == G_RAID3_DEVICE_STATE_COMPLETE) { 2805 /* 2806 * Previous state should be ACTIVE, STALE or 2807 * SYNCHRONIZING. 2808 */ 2809 KASSERT(disk->d_state == G_RAID3_DISK_STATE_ACTIVE || 2810 disk->d_state == G_RAID3_DISK_STATE_STALE || 2811 disk->d_state == G_RAID3_DISK_STATE_SYNCHRONIZING, 2812 ("Wrong disk state (%s, %s).", 2813 g_raid3_get_diskname(disk), 2814 g_raid3_disk_state2str(disk->d_state))); 2815 } else if (sc->sc_state == G_RAID3_DEVICE_STATE_STARTING) { 2816 /* Previous state should be NEW. */ 2817 KASSERT(disk->d_state == G_RAID3_DISK_STATE_NEW, 2818 ("Wrong disk state (%s, %s).", 2819 g_raid3_get_diskname(disk), 2820 g_raid3_disk_state2str(disk->d_state))); 2821 /* 2822 * Reset bumping syncid if disk disappeared in STARTING 2823 * state. 2824 */ 2825 if ((sc->sc_bump_id & G_RAID3_BUMP_SYNCID) != 0) 2826 sc->sc_bump_id &= ~G_RAID3_BUMP_SYNCID; 2827 #ifdef INVARIANTS 2828 } else { 2829 KASSERT(1 == 0, ("Wrong device state (%s, %s, %s, %s).", 2830 sc->sc_name, 2831 g_raid3_device_state2str(sc->sc_state), 2832 g_raid3_get_diskname(disk), 2833 g_raid3_disk_state2str(disk->d_state))); 2834 #endif 2835 } 2836 DISK_STATE_CHANGED(); 2837 G_RAID3_DEBUG(0, "Device %s: provider %s disconnected.", 2838 sc->sc_name, g_raid3_get_diskname(disk)); 2839 2840 g_raid3_destroy_disk(disk); 2841 break; 2842 default: 2843 KASSERT(1 == 0, ("Unknown state (%u).", state)); 2844 break; 2845 } 2846 return (0); 2847 } 2848 #undef DISK_STATE_CHANGED 2849 2850 int 2851 g_raid3_read_metadata(struct g_consumer *cp, struct g_raid3_metadata *md) 2852 { 2853 struct g_provider *pp; 2854 u_char *buf; 2855 int error; 2856 2857 g_topology_assert(); 2858 2859 error = g_access(cp, 1, 0, 0); 2860 if (error != 0) 2861 return (error); 2862 pp = cp->provider; 2863 g_topology_unlock(); 2864 /* Metadata are stored on last sector. */ 2865 buf = g_read_data(cp, pp->mediasize - pp->sectorsize, pp->sectorsize, 2866 &error); 2867 g_topology_lock(); 2868 g_access(cp, -1, 0, 0); 2869 if (buf == NULL) { 2870 G_RAID3_DEBUG(1, "Cannot read metadata from %s (error=%d).", 2871 cp->provider->name, error); 2872 return (error); 2873 } 2874 2875 /* Decode metadata. */ 2876 error = raid3_metadata_decode(buf, md); 2877 g_free(buf); 2878 if (strcmp(md->md_magic, G_RAID3_MAGIC) != 0) 2879 return (EINVAL); 2880 if (md->md_version > G_RAID3_VERSION) { 2881 G_RAID3_DEBUG(0, 2882 "Kernel module is too old to handle metadata from %s.", 2883 cp->provider->name); 2884 return (EINVAL); 2885 } 2886 if (error != 0) { 2887 G_RAID3_DEBUG(1, "MD5 metadata hash mismatch for provider %s.", 2888 cp->provider->name); 2889 return (error); 2890 } 2891 2892 return (0); 2893 } 2894 2895 static int 2896 g_raid3_check_metadata(struct g_raid3_softc *sc, struct g_provider *pp, 2897 struct g_raid3_metadata *md) 2898 { 2899 2900 if (md->md_no >= sc->sc_ndisks) { 2901 G_RAID3_DEBUG(1, "Invalid disk %s number (no=%u), skipping.", 2902 pp->name, md->md_no); 2903 return (EINVAL); 2904 } 2905 if (sc->sc_disks[md->md_no].d_state != G_RAID3_DISK_STATE_NODISK) { 2906 G_RAID3_DEBUG(1, "Disk %s (no=%u) already exists, skipping.", 2907 pp->name, md->md_no); 2908 return (EEXIST); 2909 } 2910 if (md->md_all != sc->sc_ndisks) { 2911 G_RAID3_DEBUG(1, 2912 "Invalid '%s' field on disk %s (device %s), skipping.", 2913 "md_all", pp->name, sc->sc_name); 2914 return (EINVAL); 2915 } 2916 if ((md->md_mediasize % md->md_sectorsize) != 0) { 2917 G_RAID3_DEBUG(1, "Invalid metadata (mediasize %% sectorsize != " 2918 "0) on disk %s (device %s), skipping.", pp->name, 2919 sc->sc_name); 2920 return (EINVAL); 2921 } 2922 if (md->md_mediasize != sc->sc_mediasize) { 2923 G_RAID3_DEBUG(1, 2924 "Invalid '%s' field on disk %s (device %s), skipping.", 2925 "md_mediasize", pp->name, sc->sc_name); 2926 return (EINVAL); 2927 } 2928 if ((md->md_mediasize % (sc->sc_ndisks - 1)) != 0) { 2929 G_RAID3_DEBUG(1, 2930 "Invalid '%s' field on disk %s (device %s), skipping.", 2931 "md_mediasize", pp->name, sc->sc_name); 2932 return (EINVAL); 2933 } 2934 if ((sc->sc_mediasize / (sc->sc_ndisks - 1)) > pp->mediasize) { 2935 G_RAID3_DEBUG(1, 2936 "Invalid size of disk %s (device %s), skipping.", pp->name, 2937 sc->sc_name); 2938 return (EINVAL); 2939 } 2940 if ((md->md_sectorsize / pp->sectorsize) < sc->sc_ndisks - 1) { 2941 G_RAID3_DEBUG(1, 2942 "Invalid '%s' field on disk %s (device %s), skipping.", 2943 "md_sectorsize", pp->name, sc->sc_name); 2944 return (EINVAL); 2945 } 2946 if (md->md_sectorsize != sc->sc_sectorsize) { 2947 G_RAID3_DEBUG(1, 2948 "Invalid '%s' field on disk %s (device %s), skipping.", 2949 "md_sectorsize", pp->name, sc->sc_name); 2950 return (EINVAL); 2951 } 2952 if ((sc->sc_sectorsize % pp->sectorsize) != 0) { 2953 G_RAID3_DEBUG(1, 2954 "Invalid sector size of disk %s (device %s), skipping.", 2955 pp->name, sc->sc_name); 2956 return (EINVAL); 2957 } 2958 if ((md->md_mflags & ~G_RAID3_DEVICE_FLAG_MASK) != 0) { 2959 G_RAID3_DEBUG(1, 2960 "Invalid device flags on disk %s (device %s), skipping.", 2961 pp->name, sc->sc_name); 2962 return (EINVAL); 2963 } 2964 if ((md->md_mflags & G_RAID3_DEVICE_FLAG_VERIFY) != 0 && 2965 (md->md_mflags & G_RAID3_DEVICE_FLAG_ROUND_ROBIN) != 0) { 2966 /* 2967 * VERIFY and ROUND-ROBIN options are mutally exclusive. 2968 */ 2969 G_RAID3_DEBUG(1, "Both VERIFY and ROUND-ROBIN flags exist on " 2970 "disk %s (device %s), skipping.", pp->name, sc->sc_name); 2971 return (EINVAL); 2972 } 2973 if ((md->md_dflags & ~G_RAID3_DISK_FLAG_MASK) != 0) { 2974 G_RAID3_DEBUG(1, 2975 "Invalid disk flags on disk %s (device %s), skipping.", 2976 pp->name, sc->sc_name); 2977 return (EINVAL); 2978 } 2979 return (0); 2980 } 2981 2982 int 2983 g_raid3_add_disk(struct g_raid3_softc *sc, struct g_provider *pp, 2984 struct g_raid3_metadata *md) 2985 { 2986 struct g_raid3_disk *disk; 2987 int error; 2988 2989 g_topology_assert_not(); 2990 G_RAID3_DEBUG(2, "Adding disk %s.", pp->name); 2991 2992 error = g_raid3_check_metadata(sc, pp, md); 2993 if (error != 0) 2994 return (error); 2995 if (sc->sc_state != G_RAID3_DEVICE_STATE_STARTING && 2996 md->md_genid < sc->sc_genid) { 2997 G_RAID3_DEBUG(0, "Component %s (device %s) broken, skipping.", 2998 pp->name, sc->sc_name); 2999 return (EINVAL); 3000 } 3001 disk = g_raid3_init_disk(sc, pp, md, &error); 3002 if (disk == NULL) 3003 return (error); 3004 error = g_raid3_event_send(disk, G_RAID3_DISK_STATE_NEW, 3005 G_RAID3_EVENT_WAIT); 3006 if (error != 0) 3007 return (error); 3008 if (md->md_version < G_RAID3_VERSION) { 3009 G_RAID3_DEBUG(0, "Upgrading metadata on %s (v%d->v%d).", 3010 pp->name, md->md_version, G_RAID3_VERSION); 3011 g_raid3_update_metadata(disk); 3012 } 3013 return (0); 3014 } 3015 3016 static void 3017 g_raid3_destroy_delayed(void *arg, int flag) 3018 { 3019 struct g_raid3_softc *sc; 3020 int error; 3021 3022 if (flag == EV_CANCEL) { 3023 G_RAID3_DEBUG(1, "Destroying canceled."); 3024 return; 3025 } 3026 sc = arg; 3027 g_topology_unlock(); 3028 sx_xlock(&sc->sc_lock); 3029 KASSERT((sc->sc_flags & G_RAID3_DEVICE_FLAG_DESTROY) == 0, 3030 ("DESTROY flag set on %s.", sc->sc_name)); 3031 KASSERT((sc->sc_flags & G_RAID3_DEVICE_FLAG_DESTROYING) != 0, 3032 ("DESTROYING flag not set on %s.", sc->sc_name)); 3033 G_RAID3_DEBUG(0, "Destroying %s (delayed).", sc->sc_name); 3034 error = g_raid3_destroy(sc, G_RAID3_DESTROY_SOFT); 3035 if (error != 0) { 3036 G_RAID3_DEBUG(0, "Cannot destroy %s.", sc->sc_name); 3037 sx_xunlock(&sc->sc_lock); 3038 } 3039 g_topology_lock(); 3040 } 3041 3042 static int 3043 g_raid3_access(struct g_provider *pp, int acr, int acw, int ace) 3044 { 3045 struct g_raid3_softc *sc; 3046 int dcr, dcw, dce, error = 0; 3047 3048 g_topology_assert(); 3049 G_RAID3_DEBUG(2, "Access request for %s: r%dw%de%d.", pp->name, acr, 3050 acw, ace); 3051 3052 sc = pp->geom->softc; 3053 if (sc == NULL && acr <= 0 && acw <= 0 && ace <= 0) 3054 return (0); 3055 KASSERT(sc != NULL, ("NULL softc (provider=%s).", pp->name)); 3056 3057 dcr = pp->acr + acr; 3058 dcw = pp->acw + acw; 3059 dce = pp->ace + ace; 3060 3061 g_topology_unlock(); 3062 sx_xlock(&sc->sc_lock); 3063 if ((sc->sc_flags & G_RAID3_DEVICE_FLAG_DESTROY) != 0 || 3064 g_raid3_ndisks(sc, G_RAID3_DISK_STATE_ACTIVE) < sc->sc_ndisks - 1) { 3065 if (acr > 0 || acw > 0 || ace > 0) 3066 error = ENXIO; 3067 goto end; 3068 } 3069 if (dcw == 0 && !sc->sc_idle) 3070 g_raid3_idle(sc, dcw); 3071 if ((sc->sc_flags & G_RAID3_DEVICE_FLAG_DESTROYING) != 0) { 3072 if (acr > 0 || acw > 0 || ace > 0) { 3073 error = ENXIO; 3074 goto end; 3075 } 3076 if (dcr == 0 && dcw == 0 && dce == 0) { 3077 g_post_event(g_raid3_destroy_delayed, sc, M_WAITOK, 3078 sc, NULL); 3079 } 3080 } 3081 end: 3082 sx_xunlock(&sc->sc_lock); 3083 g_topology_lock(); 3084 return (error); 3085 } 3086 3087 static struct g_geom * 3088 g_raid3_create(struct g_class *mp, const struct g_raid3_metadata *md) 3089 { 3090 struct g_raid3_softc *sc; 3091 struct g_geom *gp; 3092 int error, timeout; 3093 u_int n; 3094 3095 g_topology_assert(); 3096 G_RAID3_DEBUG(1, "Creating device %s (id=%u).", md->md_name, md->md_id); 3097 3098 /* One disk is minimum. */ 3099 if (md->md_all < 1) 3100 return (NULL); 3101 /* 3102 * Action geom. 3103 */ 3104 gp = g_new_geomf(mp, "%s", md->md_name); 3105 sc = malloc(sizeof(*sc), M_RAID3, M_WAITOK | M_ZERO); 3106 sc->sc_disks = malloc(sizeof(struct g_raid3_disk) * md->md_all, M_RAID3, 3107 M_WAITOK | M_ZERO); 3108 gp->start = g_raid3_start; 3109 gp->orphan = g_raid3_orphan; 3110 gp->access = g_raid3_access; 3111 gp->dumpconf = g_raid3_dumpconf; 3112 3113 sc->sc_id = md->md_id; 3114 sc->sc_mediasize = md->md_mediasize; 3115 sc->sc_sectorsize = md->md_sectorsize; 3116 sc->sc_ndisks = md->md_all; 3117 sc->sc_round_robin = 0; 3118 sc->sc_flags = md->md_mflags; 3119 sc->sc_bump_id = 0; 3120 sc->sc_idle = 1; 3121 sc->sc_last_write = time_uptime; 3122 sc->sc_writes = 0; 3123 for (n = 0; n < sc->sc_ndisks; n++) { 3124 sc->sc_disks[n].d_softc = sc; 3125 sc->sc_disks[n].d_no = n; 3126 sc->sc_disks[n].d_state = G_RAID3_DISK_STATE_NODISK; 3127 } 3128 sx_init(&sc->sc_lock, "graid3:lock"); 3129 bioq_init(&sc->sc_queue); 3130 mtx_init(&sc->sc_queue_mtx, "graid3:queue", NULL, MTX_DEF); 3131 bioq_init(&sc->sc_regular_delayed); 3132 bioq_init(&sc->sc_inflight); 3133 bioq_init(&sc->sc_sync_delayed); 3134 TAILQ_INIT(&sc->sc_events); 3135 mtx_init(&sc->sc_events_mtx, "graid3:events", NULL, MTX_DEF); 3136 callout_init(&sc->sc_callout, CALLOUT_MPSAFE); 3137 sc->sc_state = G_RAID3_DEVICE_STATE_STARTING; 3138 gp->softc = sc; 3139 sc->sc_geom = gp; 3140 sc->sc_provider = NULL; 3141 /* 3142 * Synchronization geom. 3143 */ 3144 gp = g_new_geomf(mp, "%s.sync", md->md_name); 3145 gp->softc = sc; 3146 gp->orphan = g_raid3_orphan; 3147 sc->sc_sync.ds_geom = gp; 3148 3149 if (!g_raid3_use_malloc) { 3150 sc->sc_zones[G_RAID3_ZONE_64K].sz_zone = uma_zcreate("gr3:64k", 3151 65536, g_raid3_uma_ctor, g_raid3_uma_dtor, NULL, NULL, 3152 UMA_ALIGN_PTR, 0); 3153 sc->sc_zones[G_RAID3_ZONE_64K].sz_inuse = 0; 3154 sc->sc_zones[G_RAID3_ZONE_64K].sz_max = g_raid3_n64k; 3155 sc->sc_zones[G_RAID3_ZONE_64K].sz_requested = 3156 sc->sc_zones[G_RAID3_ZONE_64K].sz_failed = 0; 3157 sc->sc_zones[G_RAID3_ZONE_16K].sz_zone = uma_zcreate("gr3:16k", 3158 16384, g_raid3_uma_ctor, g_raid3_uma_dtor, NULL, NULL, 3159 UMA_ALIGN_PTR, 0); 3160 sc->sc_zones[G_RAID3_ZONE_16K].sz_inuse = 0; 3161 sc->sc_zones[G_RAID3_ZONE_16K].sz_max = g_raid3_n16k; 3162 sc->sc_zones[G_RAID3_ZONE_16K].sz_requested = 3163 sc->sc_zones[G_RAID3_ZONE_16K].sz_failed = 0; 3164 sc->sc_zones[G_RAID3_ZONE_4K].sz_zone = uma_zcreate("gr3:4k", 3165 4096, g_raid3_uma_ctor, g_raid3_uma_dtor, NULL, NULL, 3166 UMA_ALIGN_PTR, 0); 3167 sc->sc_zones[G_RAID3_ZONE_4K].sz_inuse = 0; 3168 sc->sc_zones[G_RAID3_ZONE_4K].sz_max = g_raid3_n4k; 3169 sc->sc_zones[G_RAID3_ZONE_4K].sz_requested = 3170 sc->sc_zones[G_RAID3_ZONE_4K].sz_failed = 0; 3171 } 3172 3173 error = kproc_create(g_raid3_worker, sc, &sc->sc_worker, 0, 0, 3174 "g_raid3 %s", md->md_name); 3175 if (error != 0) { 3176 G_RAID3_DEBUG(1, "Cannot create kernel thread for %s.", 3177 sc->sc_name); 3178 if (!g_raid3_use_malloc) { 3179 uma_zdestroy(sc->sc_zones[G_RAID3_ZONE_64K].sz_zone); 3180 uma_zdestroy(sc->sc_zones[G_RAID3_ZONE_16K].sz_zone); 3181 uma_zdestroy(sc->sc_zones[G_RAID3_ZONE_4K].sz_zone); 3182 } 3183 g_destroy_geom(sc->sc_sync.ds_geom); 3184 mtx_destroy(&sc->sc_events_mtx); 3185 mtx_destroy(&sc->sc_queue_mtx); 3186 sx_destroy(&sc->sc_lock); 3187 g_destroy_geom(sc->sc_geom); 3188 free(sc->sc_disks, M_RAID3); 3189 free(sc, M_RAID3); 3190 return (NULL); 3191 } 3192 3193 G_RAID3_DEBUG(1, "Device %s created (%u components, id=%u).", 3194 sc->sc_name, sc->sc_ndisks, sc->sc_id); 3195 3196 sc->sc_rootmount = root_mount_hold("GRAID3"); 3197 G_RAID3_DEBUG(1, "root_mount_hold %p", sc->sc_rootmount); 3198 3199 /* 3200 * Run timeout. 3201 */ 3202 timeout = atomic_load_acq_int(&g_raid3_timeout); 3203 callout_reset(&sc->sc_callout, timeout * hz, g_raid3_go, sc); 3204 return (sc->sc_geom); 3205 } 3206 3207 int 3208 g_raid3_destroy(struct g_raid3_softc *sc, int how) 3209 { 3210 struct g_provider *pp; 3211 3212 g_topology_assert_not(); 3213 if (sc == NULL) 3214 return (ENXIO); 3215 sx_assert(&sc->sc_lock, SX_XLOCKED); 3216 3217 pp = sc->sc_provider; 3218 if (pp != NULL && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)) { 3219 switch (how) { 3220 case G_RAID3_DESTROY_SOFT: 3221 G_RAID3_DEBUG(1, 3222 "Device %s is still open (r%dw%de%d).", pp->name, 3223 pp->acr, pp->acw, pp->ace); 3224 return (EBUSY); 3225 case G_RAID3_DESTROY_DELAYED: 3226 G_RAID3_DEBUG(1, 3227 "Device %s will be destroyed on last close.", 3228 pp->name); 3229 if (sc->sc_syncdisk != NULL) 3230 g_raid3_sync_stop(sc, 1); 3231 sc->sc_flags |= G_RAID3_DEVICE_FLAG_DESTROYING; 3232 return (EBUSY); 3233 case G_RAID3_DESTROY_HARD: 3234 G_RAID3_DEBUG(1, "Device %s is still open, so it " 3235 "can't be definitely removed.", pp->name); 3236 break; 3237 } 3238 } 3239 3240 g_topology_lock(); 3241 if (sc->sc_geom->softc == NULL) { 3242 g_topology_unlock(); 3243 return (0); 3244 } 3245 sc->sc_geom->softc = NULL; 3246 sc->sc_sync.ds_geom->softc = NULL; 3247 g_topology_unlock(); 3248 3249 sc->sc_flags |= G_RAID3_DEVICE_FLAG_DESTROY; 3250 sc->sc_flags |= G_RAID3_DEVICE_FLAG_WAIT; 3251 G_RAID3_DEBUG(4, "%s: Waking up %p.", __func__, sc); 3252 sx_xunlock(&sc->sc_lock); 3253 mtx_lock(&sc->sc_queue_mtx); 3254 wakeup(sc); 3255 wakeup(&sc->sc_queue); 3256 mtx_unlock(&sc->sc_queue_mtx); 3257 G_RAID3_DEBUG(4, "%s: Sleeping %p.", __func__, &sc->sc_worker); 3258 while (sc->sc_worker != NULL) 3259 tsleep(&sc->sc_worker, PRIBIO, "r3:destroy", hz / 5); 3260 G_RAID3_DEBUG(4, "%s: Woken up %p.", __func__, &sc->sc_worker); 3261 sx_xlock(&sc->sc_lock); 3262 g_raid3_destroy_device(sc); 3263 free(sc->sc_disks, M_RAID3); 3264 free(sc, M_RAID3); 3265 return (0); 3266 } 3267 3268 static void 3269 g_raid3_taste_orphan(struct g_consumer *cp) 3270 { 3271 3272 KASSERT(1 == 0, ("%s called while tasting %s.", __func__, 3273 cp->provider->name)); 3274 } 3275 3276 static struct g_geom * 3277 g_raid3_taste(struct g_class *mp, struct g_provider *pp, int flags __unused) 3278 { 3279 struct g_raid3_metadata md; 3280 struct g_raid3_softc *sc; 3281 struct g_consumer *cp; 3282 struct g_geom *gp; 3283 int error; 3284 3285 g_topology_assert(); 3286 g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name); 3287 G_RAID3_DEBUG(2, "Tasting %s.", pp->name); 3288 3289 gp = g_new_geomf(mp, "raid3:taste"); 3290 /* This orphan function should be never called. */ 3291 gp->orphan = g_raid3_taste_orphan; 3292 cp = g_new_consumer(gp); 3293 g_attach(cp, pp); 3294 error = g_raid3_read_metadata(cp, &md); 3295 g_detach(cp); 3296 g_destroy_consumer(cp); 3297 g_destroy_geom(gp); 3298 if (error != 0) 3299 return (NULL); 3300 gp = NULL; 3301 3302 if (md.md_provider[0] != '\0' && strcmp(md.md_provider, pp->name) != 0) 3303 return (NULL); 3304 if (md.md_provsize != 0 && md.md_provsize != pp->mediasize) 3305 return (NULL); 3306 if (g_raid3_debug >= 2) 3307 raid3_metadata_dump(&md); 3308 3309 /* 3310 * Let's check if device already exists. 3311 */ 3312 sc = NULL; 3313 LIST_FOREACH(gp, &mp->geom, geom) { 3314 sc = gp->softc; 3315 if (sc == NULL) 3316 continue; 3317 if (sc->sc_sync.ds_geom == gp) 3318 continue; 3319 if (strcmp(md.md_name, sc->sc_name) != 0) 3320 continue; 3321 if (md.md_id != sc->sc_id) { 3322 G_RAID3_DEBUG(0, "Device %s already configured.", 3323 sc->sc_name); 3324 return (NULL); 3325 } 3326 break; 3327 } 3328 if (gp == NULL) { 3329 gp = g_raid3_create(mp, &md); 3330 if (gp == NULL) { 3331 G_RAID3_DEBUG(0, "Cannot create device %s.", 3332 md.md_name); 3333 return (NULL); 3334 } 3335 sc = gp->softc; 3336 } 3337 G_RAID3_DEBUG(1, "Adding disk %s to %s.", pp->name, gp->name); 3338 g_topology_unlock(); 3339 sx_xlock(&sc->sc_lock); 3340 error = g_raid3_add_disk(sc, pp, &md); 3341 if (error != 0) { 3342 G_RAID3_DEBUG(0, "Cannot add disk %s to %s (error=%d).", 3343 pp->name, gp->name, error); 3344 if (g_raid3_ndisks(sc, G_RAID3_DISK_STATE_NODISK) == 3345 sc->sc_ndisks) { 3346 g_cancel_event(sc); 3347 g_raid3_destroy(sc, G_RAID3_DESTROY_HARD); 3348 g_topology_lock(); 3349 return (NULL); 3350 } 3351 gp = NULL; 3352 } 3353 sx_xunlock(&sc->sc_lock); 3354 g_topology_lock(); 3355 return (gp); 3356 } 3357 3358 static int 3359 g_raid3_destroy_geom(struct gctl_req *req __unused, struct g_class *mp __unused, 3360 struct g_geom *gp) 3361 { 3362 struct g_raid3_softc *sc; 3363 int error; 3364 3365 g_topology_unlock(); 3366 sc = gp->softc; 3367 sx_xlock(&sc->sc_lock); 3368 g_cancel_event(sc); 3369 error = g_raid3_destroy(gp->softc, G_RAID3_DESTROY_SOFT); 3370 if (error != 0) 3371 sx_xunlock(&sc->sc_lock); 3372 g_topology_lock(); 3373 return (error); 3374 } 3375 3376 static void 3377 g_raid3_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp, 3378 struct g_consumer *cp, struct g_provider *pp) 3379 { 3380 struct g_raid3_softc *sc; 3381 3382 g_topology_assert(); 3383 3384 sc = gp->softc; 3385 if (sc == NULL) 3386 return; 3387 /* Skip synchronization geom. */ 3388 if (gp == sc->sc_sync.ds_geom) 3389 return; 3390 if (pp != NULL) { 3391 /* Nothing here. */ 3392 } else if (cp != NULL) { 3393 struct g_raid3_disk *disk; 3394 3395 disk = cp->private; 3396 if (disk == NULL) 3397 return; 3398 g_topology_unlock(); 3399 sx_xlock(&sc->sc_lock); 3400 sbuf_printf(sb, "%s<Type>", indent); 3401 if (disk->d_no == sc->sc_ndisks - 1) 3402 sbuf_printf(sb, "PARITY"); 3403 else 3404 sbuf_printf(sb, "DATA"); 3405 sbuf_printf(sb, "</Type>\n"); 3406 sbuf_printf(sb, "%s<Number>%u</Number>\n", indent, 3407 (u_int)disk->d_no); 3408 if (disk->d_state == G_RAID3_DISK_STATE_SYNCHRONIZING) { 3409 sbuf_printf(sb, "%s<Synchronized>", indent); 3410 if (disk->d_sync.ds_offset == 0) 3411 sbuf_printf(sb, "0%%"); 3412 else { 3413 sbuf_printf(sb, "%u%%", 3414 (u_int)((disk->d_sync.ds_offset * 100) / 3415 (sc->sc_mediasize / (sc->sc_ndisks - 1)))); 3416 } 3417 sbuf_printf(sb, "</Synchronized>\n"); 3418 } 3419 sbuf_printf(sb, "%s<SyncID>%u</SyncID>\n", indent, 3420 disk->d_sync.ds_syncid); 3421 sbuf_printf(sb, "%s<GenID>%u</GenID>\n", indent, disk->d_genid); 3422 sbuf_printf(sb, "%s<Flags>", indent); 3423 if (disk->d_flags == 0) 3424 sbuf_printf(sb, "NONE"); 3425 else { 3426 int first = 1; 3427 3428 #define ADD_FLAG(flag, name) do { \ 3429 if ((disk->d_flags & (flag)) != 0) { \ 3430 if (!first) \ 3431 sbuf_printf(sb, ", "); \ 3432 else \ 3433 first = 0; \ 3434 sbuf_printf(sb, name); \ 3435 } \ 3436 } while (0) 3437 ADD_FLAG(G_RAID3_DISK_FLAG_DIRTY, "DIRTY"); 3438 ADD_FLAG(G_RAID3_DISK_FLAG_HARDCODED, "HARDCODED"); 3439 ADD_FLAG(G_RAID3_DISK_FLAG_SYNCHRONIZING, 3440 "SYNCHRONIZING"); 3441 ADD_FLAG(G_RAID3_DISK_FLAG_FORCE_SYNC, "FORCE_SYNC"); 3442 ADD_FLAG(G_RAID3_DISK_FLAG_BROKEN, "BROKEN"); 3443 #undef ADD_FLAG 3444 } 3445 sbuf_printf(sb, "</Flags>\n"); 3446 sbuf_printf(sb, "%s<State>%s</State>\n", indent, 3447 g_raid3_disk_state2str(disk->d_state)); 3448 sx_xunlock(&sc->sc_lock); 3449 g_topology_lock(); 3450 } else { 3451 g_topology_unlock(); 3452 sx_xlock(&sc->sc_lock); 3453 if (!g_raid3_use_malloc) { 3454 sbuf_printf(sb, 3455 "%s<Zone4kRequested>%u</Zone4kRequested>\n", indent, 3456 sc->sc_zones[G_RAID3_ZONE_4K].sz_requested); 3457 sbuf_printf(sb, 3458 "%s<Zone4kFailed>%u</Zone4kFailed>\n", indent, 3459 sc->sc_zones[G_RAID3_ZONE_4K].sz_failed); 3460 sbuf_printf(sb, 3461 "%s<Zone16kRequested>%u</Zone16kRequested>\n", indent, 3462 sc->sc_zones[G_RAID3_ZONE_16K].sz_requested); 3463 sbuf_printf(sb, 3464 "%s<Zone16kFailed>%u</Zone16kFailed>\n", indent, 3465 sc->sc_zones[G_RAID3_ZONE_16K].sz_failed); 3466 sbuf_printf(sb, 3467 "%s<Zone64kRequested>%u</Zone64kRequested>\n", indent, 3468 sc->sc_zones[G_RAID3_ZONE_64K].sz_requested); 3469 sbuf_printf(sb, 3470 "%s<Zone64kFailed>%u</Zone64kFailed>\n", indent, 3471 sc->sc_zones[G_RAID3_ZONE_64K].sz_failed); 3472 } 3473 sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)sc->sc_id); 3474 sbuf_printf(sb, "%s<SyncID>%u</SyncID>\n", indent, sc->sc_syncid); 3475 sbuf_printf(sb, "%s<GenID>%u</GenID>\n", indent, sc->sc_genid); 3476 sbuf_printf(sb, "%s<Flags>", indent); 3477 if (sc->sc_flags == 0) 3478 sbuf_printf(sb, "NONE"); 3479 else { 3480 int first = 1; 3481 3482 #define ADD_FLAG(flag, name) do { \ 3483 if ((sc->sc_flags & (flag)) != 0) { \ 3484 if (!first) \ 3485 sbuf_printf(sb, ", "); \ 3486 else \ 3487 first = 0; \ 3488 sbuf_printf(sb, name); \ 3489 } \ 3490 } while (0) 3491 ADD_FLAG(G_RAID3_DEVICE_FLAG_NOFAILSYNC, "NOFAILSYNC"); 3492 ADD_FLAG(G_RAID3_DEVICE_FLAG_NOAUTOSYNC, "NOAUTOSYNC"); 3493 ADD_FLAG(G_RAID3_DEVICE_FLAG_ROUND_ROBIN, 3494 "ROUND-ROBIN"); 3495 ADD_FLAG(G_RAID3_DEVICE_FLAG_VERIFY, "VERIFY"); 3496 #undef ADD_FLAG 3497 } 3498 sbuf_printf(sb, "</Flags>\n"); 3499 sbuf_printf(sb, "%s<Components>%u</Components>\n", indent, 3500 sc->sc_ndisks); 3501 sbuf_printf(sb, "%s<State>%s</State>\n", indent, 3502 g_raid3_device_state2str(sc->sc_state)); 3503 sx_xunlock(&sc->sc_lock); 3504 g_topology_lock(); 3505 } 3506 } 3507 3508 static void 3509 g_raid3_shutdown_pre_sync(void *arg, int howto) 3510 { 3511 struct g_class *mp; 3512 struct g_geom *gp, *gp2; 3513 struct g_raid3_softc *sc; 3514 int error; 3515 3516 mp = arg; 3517 DROP_GIANT(); 3518 g_topology_lock(); 3519 LIST_FOREACH_SAFE(gp, &mp->geom, geom, gp2) { 3520 if ((sc = gp->softc) == NULL) 3521 continue; 3522 /* Skip synchronization geom. */ 3523 if (gp == sc->sc_sync.ds_geom) 3524 continue; 3525 g_topology_unlock(); 3526 sx_xlock(&sc->sc_lock); 3527 g_cancel_event(sc); 3528 error = g_raid3_destroy(sc, G_RAID3_DESTROY_DELAYED); 3529 if (error != 0) 3530 sx_xunlock(&sc->sc_lock); 3531 g_topology_lock(); 3532 } 3533 g_topology_unlock(); 3534 PICKUP_GIANT(); 3535 } 3536 3537 static void 3538 g_raid3_init(struct g_class *mp) 3539 { 3540 3541 g_raid3_pre_sync = EVENTHANDLER_REGISTER(shutdown_pre_sync, 3542 g_raid3_shutdown_pre_sync, mp, SHUTDOWN_PRI_FIRST); 3543 if (g_raid3_pre_sync == NULL) 3544 G_RAID3_DEBUG(0, "Warning! Cannot register shutdown event."); 3545 } 3546 3547 static void 3548 g_raid3_fini(struct g_class *mp) 3549 { 3550 3551 if (g_raid3_pre_sync != NULL) 3552 EVENTHANDLER_DEREGISTER(shutdown_pre_sync, g_raid3_pre_sync); 3553 } 3554 3555 DECLARE_GEOM_CLASS(g_raid3_class, g_raid3); 3556