xref: /freebsd/sys/geom/mirror/g_mirror.c (revision cddbc3b40812213ff00041f79174cac0be360a2a)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2004-2006 Pawel Jakub Dawidek <pjd@FreeBSD.org>
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/bio.h>
35 #include <sys/eventhandler.h>
36 #include <sys/fail.h>
37 #include <sys/kernel.h>
38 #include <sys/kthread.h>
39 #include <sys/limits.h>
40 #include <sys/lock.h>
41 #include <sys/malloc.h>
42 #include <sys/mutex.h>
43 #include <sys/proc.h>
44 #include <sys/sbuf.h>
45 #include <sys/sched.h>
46 #include <sys/sx.h>
47 #include <sys/sysctl.h>
48 
49 #include <geom/geom.h>
50 #include <geom/mirror/g_mirror.h>
51 
52 FEATURE(geom_mirror, "GEOM mirroring support");
53 
54 static MALLOC_DEFINE(M_MIRROR, "mirror_data", "GEOM_MIRROR Data");
55 
56 SYSCTL_DECL(_kern_geom);
57 static SYSCTL_NODE(_kern_geom, OID_AUTO, mirror, CTLFLAG_RW, 0,
58     "GEOM_MIRROR stuff");
59 int g_mirror_debug = 0;
60 SYSCTL_INT(_kern_geom_mirror, OID_AUTO, debug, CTLFLAG_RWTUN, &g_mirror_debug, 0,
61     "Debug level");
62 bool g_launch_mirror_before_timeout = true;
63 SYSCTL_BOOL(_kern_geom_mirror, OID_AUTO, launch_mirror_before_timeout,
64     CTLFLAG_RWTUN, &g_launch_mirror_before_timeout, 0,
65     "If false, force gmirror to wait out the full kern.geom.mirror.timeout "
66     "before launching mirrors");
67 static u_int g_mirror_timeout = 4;
68 SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, timeout, CTLFLAG_RWTUN, &g_mirror_timeout,
69     0, "Time to wait on all mirror components");
70 static u_int g_mirror_idletime = 5;
71 SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, idletime, CTLFLAG_RWTUN,
72     &g_mirror_idletime, 0, "Mark components as clean when idling");
73 static u_int g_mirror_disconnect_on_failure = 1;
74 SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, disconnect_on_failure, CTLFLAG_RWTUN,
75     &g_mirror_disconnect_on_failure, 0, "Disconnect component on I/O failure.");
76 static u_int g_mirror_syncreqs = 2;
77 SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, sync_requests, CTLFLAG_RDTUN,
78     &g_mirror_syncreqs, 0, "Parallel synchronization I/O requests.");
79 static u_int g_mirror_sync_period = 5;
80 SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, sync_update_period, CTLFLAG_RWTUN,
81     &g_mirror_sync_period, 0,
82     "Metadata update period during synchronization, in seconds");
83 
84 #define	MSLEEP(ident, mtx, priority, wmesg, timeout)	do {		\
85 	G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, (ident));	\
86 	msleep((ident), (mtx), (priority), (wmesg), (timeout));		\
87 	G_MIRROR_DEBUG(4, "%s: Woken up %p.", __func__, (ident));	\
88 } while (0)
89 
90 static eventhandler_tag g_mirror_post_sync = NULL;
91 static int g_mirror_shutdown = 0;
92 
93 static g_ctl_destroy_geom_t g_mirror_destroy_geom;
94 static g_taste_t g_mirror_taste;
95 static g_init_t g_mirror_init;
96 static g_fini_t g_mirror_fini;
97 static g_provgone_t g_mirror_providergone;
98 static g_resize_t g_mirror_resize;
99 
100 struct g_class g_mirror_class = {
101 	.name = G_MIRROR_CLASS_NAME,
102 	.version = G_VERSION,
103 	.ctlreq = g_mirror_config,
104 	.taste = g_mirror_taste,
105 	.destroy_geom = g_mirror_destroy_geom,
106 	.init = g_mirror_init,
107 	.fini = g_mirror_fini,
108 	.providergone = g_mirror_providergone,
109 	.resize = g_mirror_resize
110 };
111 
112 
113 static void g_mirror_destroy_provider(struct g_mirror_softc *sc);
114 static int g_mirror_update_disk(struct g_mirror_disk *disk, u_int state);
115 static void g_mirror_update_device(struct g_mirror_softc *sc, bool force);
116 static void g_mirror_dumpconf(struct sbuf *sb, const char *indent,
117     struct g_geom *gp, struct g_consumer *cp, struct g_provider *pp);
118 static int g_mirror_refresh_device(struct g_mirror_softc *sc,
119     const struct g_provider *pp, const struct g_mirror_metadata *md);
120 static void g_mirror_sync_reinit(const struct g_mirror_disk *disk,
121     struct bio *bp, off_t offset);
122 static void g_mirror_sync_stop(struct g_mirror_disk *disk, int type);
123 static void g_mirror_register_request(struct g_mirror_softc *sc,
124     struct bio *bp);
125 static void g_mirror_sync_release(struct g_mirror_softc *sc);
126 
127 
128 static const char *
129 g_mirror_disk_state2str(int state)
130 {
131 
132 	switch (state) {
133 	case G_MIRROR_DISK_STATE_NONE:
134 		return ("NONE");
135 	case G_MIRROR_DISK_STATE_NEW:
136 		return ("NEW");
137 	case G_MIRROR_DISK_STATE_ACTIVE:
138 		return ("ACTIVE");
139 	case G_MIRROR_DISK_STATE_STALE:
140 		return ("STALE");
141 	case G_MIRROR_DISK_STATE_SYNCHRONIZING:
142 		return ("SYNCHRONIZING");
143 	case G_MIRROR_DISK_STATE_DISCONNECTED:
144 		return ("DISCONNECTED");
145 	case G_MIRROR_DISK_STATE_DESTROY:
146 		return ("DESTROY");
147 	default:
148 		return ("INVALID");
149 	}
150 }
151 
152 static const char *
153 g_mirror_device_state2str(int state)
154 {
155 
156 	switch (state) {
157 	case G_MIRROR_DEVICE_STATE_STARTING:
158 		return ("STARTING");
159 	case G_MIRROR_DEVICE_STATE_RUNNING:
160 		return ("RUNNING");
161 	default:
162 		return ("INVALID");
163 	}
164 }
165 
166 static const char *
167 g_mirror_get_diskname(struct g_mirror_disk *disk)
168 {
169 
170 	if (disk->d_consumer == NULL || disk->d_consumer->provider == NULL)
171 		return ("[unknown]");
172 	return (disk->d_name);
173 }
174 
175 /*
176  * --- Events handling functions ---
177  * Events in geom_mirror are used to maintain disks and device status
178  * from one thread to simplify locking.
179  */
180 static void
181 g_mirror_event_free(struct g_mirror_event *ep)
182 {
183 
184 	free(ep, M_MIRROR);
185 }
186 
187 int
188 g_mirror_event_send(void *arg, int state, int flags)
189 {
190 	struct g_mirror_softc *sc;
191 	struct g_mirror_disk *disk;
192 	struct g_mirror_event *ep;
193 	int error;
194 
195 	ep = malloc(sizeof(*ep), M_MIRROR, M_WAITOK);
196 	G_MIRROR_DEBUG(4, "%s: Sending event %p.", __func__, ep);
197 	if ((flags & G_MIRROR_EVENT_DEVICE) != 0) {
198 		disk = NULL;
199 		sc = arg;
200 	} else {
201 		disk = arg;
202 		sc = disk->d_softc;
203 	}
204 	ep->e_disk = disk;
205 	ep->e_state = state;
206 	ep->e_flags = flags;
207 	ep->e_error = 0;
208 	mtx_lock(&sc->sc_events_mtx);
209 	TAILQ_INSERT_TAIL(&sc->sc_events, ep, e_next);
210 	mtx_unlock(&sc->sc_events_mtx);
211 	G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
212 	mtx_lock(&sc->sc_queue_mtx);
213 	wakeup(sc);
214 	mtx_unlock(&sc->sc_queue_mtx);
215 	if ((flags & G_MIRROR_EVENT_DONTWAIT) != 0)
216 		return (0);
217 	G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, ep);
218 	sx_xunlock(&sc->sc_lock);
219 	while ((ep->e_flags & G_MIRROR_EVENT_DONE) == 0) {
220 		mtx_lock(&sc->sc_events_mtx);
221 		MSLEEP(ep, &sc->sc_events_mtx, PRIBIO | PDROP, "m:event",
222 		    hz * 5);
223 	}
224 	error = ep->e_error;
225 	g_mirror_event_free(ep);
226 	sx_xlock(&sc->sc_lock);
227 	return (error);
228 }
229 
230 static struct g_mirror_event *
231 g_mirror_event_first(struct g_mirror_softc *sc)
232 {
233 	struct g_mirror_event *ep;
234 
235 	mtx_lock(&sc->sc_events_mtx);
236 	ep = TAILQ_FIRST(&sc->sc_events);
237 	mtx_unlock(&sc->sc_events_mtx);
238 	return (ep);
239 }
240 
241 static void
242 g_mirror_event_remove(struct g_mirror_softc *sc, struct g_mirror_event *ep)
243 {
244 
245 	mtx_lock(&sc->sc_events_mtx);
246 	TAILQ_REMOVE(&sc->sc_events, ep, e_next);
247 	mtx_unlock(&sc->sc_events_mtx);
248 }
249 
250 static void
251 g_mirror_event_cancel(struct g_mirror_disk *disk)
252 {
253 	struct g_mirror_softc *sc;
254 	struct g_mirror_event *ep, *tmpep;
255 
256 	sc = disk->d_softc;
257 	sx_assert(&sc->sc_lock, SX_XLOCKED);
258 
259 	mtx_lock(&sc->sc_events_mtx);
260 	TAILQ_FOREACH_SAFE(ep, &sc->sc_events, e_next, tmpep) {
261 		if ((ep->e_flags & G_MIRROR_EVENT_DEVICE) != 0)
262 			continue;
263 		if (ep->e_disk != disk)
264 			continue;
265 		TAILQ_REMOVE(&sc->sc_events, ep, e_next);
266 		if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0)
267 			g_mirror_event_free(ep);
268 		else {
269 			ep->e_error = ECANCELED;
270 			wakeup(ep);
271 		}
272 	}
273 	mtx_unlock(&sc->sc_events_mtx);
274 }
275 
276 /*
277  * Return the number of disks in given state.
278  * If state is equal to -1, count all connected disks.
279  */
280 u_int
281 g_mirror_ndisks(struct g_mirror_softc *sc, int state)
282 {
283 	struct g_mirror_disk *disk;
284 	u_int n = 0;
285 
286 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
287 		if (state == -1 || disk->d_state == state)
288 			n++;
289 	}
290 	return (n);
291 }
292 
293 /*
294  * Find a disk in mirror by its disk ID.
295  */
296 static struct g_mirror_disk *
297 g_mirror_id2disk(struct g_mirror_softc *sc, uint32_t id)
298 {
299 	struct g_mirror_disk *disk;
300 
301 	sx_assert(&sc->sc_lock, SX_XLOCKED);
302 
303 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
304 		if (disk->d_id == id)
305 			return (disk);
306 	}
307 	return (NULL);
308 }
309 
310 static u_int
311 g_mirror_nrequests(struct g_mirror_softc *sc, struct g_consumer *cp)
312 {
313 	struct bio *bp;
314 	u_int nreqs = 0;
315 
316 	mtx_lock(&sc->sc_queue_mtx);
317 	TAILQ_FOREACH(bp, &sc->sc_queue, bio_queue) {
318 		if (bp->bio_from == cp)
319 			nreqs++;
320 	}
321 	mtx_unlock(&sc->sc_queue_mtx);
322 	return (nreqs);
323 }
324 
325 static int
326 g_mirror_is_busy(struct g_mirror_softc *sc, struct g_consumer *cp)
327 {
328 
329 	if (cp->index > 0) {
330 		G_MIRROR_DEBUG(2,
331 		    "I/O requests for %s exist, can't destroy it now.",
332 		    cp->provider->name);
333 		return (1);
334 	}
335 	if (g_mirror_nrequests(sc, cp) > 0) {
336 		G_MIRROR_DEBUG(2,
337 		    "I/O requests for %s in queue, can't destroy it now.",
338 		    cp->provider->name);
339 		return (1);
340 	}
341 	return (0);
342 }
343 
344 static void
345 g_mirror_destroy_consumer(void *arg, int flags __unused)
346 {
347 	struct g_consumer *cp;
348 
349 	g_topology_assert();
350 
351 	cp = arg;
352 	G_MIRROR_DEBUG(1, "Consumer %s destroyed.", cp->provider->name);
353 	g_detach(cp);
354 	g_destroy_consumer(cp);
355 }
356 
357 static void
358 g_mirror_kill_consumer(struct g_mirror_softc *sc, struct g_consumer *cp)
359 {
360 	struct g_provider *pp;
361 	int retaste_wait;
362 
363 	g_topology_assert();
364 
365 	cp->private = NULL;
366 	if (g_mirror_is_busy(sc, cp))
367 		return;
368 	pp = cp->provider;
369 	retaste_wait = 0;
370 	if (cp->acw == 1) {
371 		if ((pp->geom->flags & G_GEOM_WITHER) == 0)
372 			retaste_wait = 1;
373 	}
374 	G_MIRROR_DEBUG(2, "Access %s r%dw%de%d = %d", pp->name, -cp->acr,
375 	    -cp->acw, -cp->ace, 0);
376 	if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0)
377 		g_access(cp, -cp->acr, -cp->acw, -cp->ace);
378 	if (retaste_wait) {
379 		/*
380 		 * After retaste event was send (inside g_access()), we can send
381 		 * event to detach and destroy consumer.
382 		 * A class, which has consumer to the given provider connected
383 		 * will not receive retaste event for the provider.
384 		 * This is the way how I ignore retaste events when I close
385 		 * consumers opened for write: I detach and destroy consumer
386 		 * after retaste event is sent.
387 		 */
388 		g_post_event(g_mirror_destroy_consumer, cp, M_WAITOK, NULL);
389 		return;
390 	}
391 	G_MIRROR_DEBUG(1, "Consumer %s destroyed.", pp->name);
392 	g_detach(cp);
393 	g_destroy_consumer(cp);
394 }
395 
396 static int
397 g_mirror_connect_disk(struct g_mirror_disk *disk, struct g_provider *pp)
398 {
399 	struct g_consumer *cp;
400 	int error;
401 
402 	g_topology_assert_not();
403 	KASSERT(disk->d_consumer == NULL,
404 	    ("Disk already connected (device %s).", disk->d_softc->sc_name));
405 
406 	g_topology_lock();
407 	cp = g_new_consumer(disk->d_softc->sc_geom);
408 	cp->flags |= G_CF_DIRECT_RECEIVE;
409 	error = g_attach(cp, pp);
410 	if (error != 0) {
411 		g_destroy_consumer(cp);
412 		g_topology_unlock();
413 		return (error);
414 	}
415 	error = g_access(cp, 1, 1, 1);
416 	if (error != 0) {
417 		g_detach(cp);
418 		g_destroy_consumer(cp);
419 		g_topology_unlock();
420 		G_MIRROR_DEBUG(0, "Cannot open consumer %s (error=%d).",
421 		    pp->name, error);
422 		return (error);
423 	}
424 	g_topology_unlock();
425 	disk->d_consumer = cp;
426 	disk->d_consumer->private = disk;
427 	disk->d_consumer->index = 0;
428 
429 	G_MIRROR_DEBUG(2, "Disk %s connected.", g_mirror_get_diskname(disk));
430 	return (0);
431 }
432 
433 static void
434 g_mirror_disconnect_consumer(struct g_mirror_softc *sc, struct g_consumer *cp)
435 {
436 
437 	g_topology_assert();
438 
439 	if (cp == NULL)
440 		return;
441 	if (cp->provider != NULL)
442 		g_mirror_kill_consumer(sc, cp);
443 	else
444 		g_destroy_consumer(cp);
445 }
446 
447 /*
448  * Initialize disk. This means allocate memory, create consumer, attach it
449  * to the provider and open access (r1w1e1) to it.
450  */
451 static struct g_mirror_disk *
452 g_mirror_init_disk(struct g_mirror_softc *sc, struct g_provider *pp,
453     struct g_mirror_metadata *md, int *errorp)
454 {
455 	struct g_mirror_disk *disk;
456 	int i, error;
457 
458 	disk = malloc(sizeof(*disk), M_MIRROR, M_NOWAIT | M_ZERO);
459 	if (disk == NULL) {
460 		error = ENOMEM;
461 		goto fail;
462 	}
463 	disk->d_softc = sc;
464 	error = g_mirror_connect_disk(disk, pp);
465 	if (error != 0)
466 		goto fail;
467 	disk->d_id = md->md_did;
468 	disk->d_state = G_MIRROR_DISK_STATE_NONE;
469 	disk->d_priority = md->md_priority;
470 	disk->d_flags = md->md_dflags;
471 	error = g_getattr("GEOM::candelete", disk->d_consumer, &i);
472 	if (error == 0 && i != 0)
473 		disk->d_flags |= G_MIRROR_DISK_FLAG_CANDELETE;
474 	if (md->md_provider[0] != '\0')
475 		disk->d_flags |= G_MIRROR_DISK_FLAG_HARDCODED;
476 	disk->d_sync.ds_consumer = NULL;
477 	disk->d_sync.ds_offset = md->md_sync_offset;
478 	disk->d_sync.ds_offset_done = md->md_sync_offset;
479 	disk->d_sync.ds_update_ts = time_uptime;
480 	disk->d_genid = md->md_genid;
481 	disk->d_sync.ds_syncid = md->md_syncid;
482 	disk->d_init_ndisks = md->md_all;
483 	disk->d_init_slice = md->md_slice;
484 	disk->d_init_balance = md->md_balance;
485 	disk->d_init_mediasize = md->md_mediasize;
486 	if (errorp != NULL)
487 		*errorp = 0;
488 	return (disk);
489 fail:
490 	if (errorp != NULL)
491 		*errorp = error;
492 	if (disk != NULL)
493 		free(disk, M_MIRROR);
494 	return (NULL);
495 }
496 
497 static void
498 g_mirror_destroy_disk(struct g_mirror_disk *disk)
499 {
500 	struct g_mirror_softc *sc;
501 
502 	g_topology_assert_not();
503 	sc = disk->d_softc;
504 	sx_assert(&sc->sc_lock, SX_XLOCKED);
505 
506 	g_topology_lock();
507 	LIST_REMOVE(disk, d_next);
508 	g_topology_unlock();
509 	g_mirror_event_cancel(disk);
510 	if (sc->sc_hint == disk)
511 		sc->sc_hint = NULL;
512 	switch (disk->d_state) {
513 	case G_MIRROR_DISK_STATE_SYNCHRONIZING:
514 		g_mirror_sync_stop(disk, 1);
515 		/* FALLTHROUGH */
516 	case G_MIRROR_DISK_STATE_NEW:
517 	case G_MIRROR_DISK_STATE_STALE:
518 	case G_MIRROR_DISK_STATE_ACTIVE:
519 		g_topology_lock();
520 		g_mirror_disconnect_consumer(sc, disk->d_consumer);
521 		g_topology_unlock();
522 		free(disk, M_MIRROR);
523 		break;
524 	default:
525 		KASSERT(0 == 1, ("Wrong disk state (%s, %s).",
526 		    g_mirror_get_diskname(disk),
527 		    g_mirror_disk_state2str(disk->d_state)));
528 	}
529 }
530 
531 static void
532 g_mirror_free_device(struct g_mirror_softc *sc)
533 {
534 
535 	g_topology_assert();
536 
537 	mtx_destroy(&sc->sc_queue_mtx);
538 	mtx_destroy(&sc->sc_events_mtx);
539 	mtx_destroy(&sc->sc_done_mtx);
540 	sx_destroy(&sc->sc_lock);
541 	free(sc, M_MIRROR);
542 }
543 
544 static void
545 g_mirror_providergone(struct g_provider *pp)
546 {
547 	struct g_mirror_softc *sc = pp->private;
548 
549 	if ((--sc->sc_refcnt) == 0)
550 		g_mirror_free_device(sc);
551 }
552 
553 static void
554 g_mirror_destroy_device(struct g_mirror_softc *sc)
555 {
556 	struct g_mirror_disk *disk;
557 	struct g_mirror_event *ep;
558 	struct g_geom *gp;
559 	struct g_consumer *cp, *tmpcp;
560 
561 	g_topology_assert_not();
562 	sx_assert(&sc->sc_lock, SX_XLOCKED);
563 
564 	gp = sc->sc_geom;
565 	if (sc->sc_provider != NULL)
566 		g_mirror_destroy_provider(sc);
567 	for (disk = LIST_FIRST(&sc->sc_disks); disk != NULL;
568 	    disk = LIST_FIRST(&sc->sc_disks)) {
569 		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
570 		g_mirror_update_metadata(disk);
571 		g_mirror_destroy_disk(disk);
572 	}
573 	while ((ep = g_mirror_event_first(sc)) != NULL) {
574 		g_mirror_event_remove(sc, ep);
575 		if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0)
576 			g_mirror_event_free(ep);
577 		else {
578 			ep->e_error = ECANCELED;
579 			ep->e_flags |= G_MIRROR_EVENT_DONE;
580 			G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, ep);
581 			mtx_lock(&sc->sc_events_mtx);
582 			wakeup(ep);
583 			mtx_unlock(&sc->sc_events_mtx);
584 		}
585 	}
586 	callout_drain(&sc->sc_callout);
587 
588 	g_topology_lock();
589 	LIST_FOREACH_SAFE(cp, &sc->sc_sync.ds_geom->consumer, consumer, tmpcp) {
590 		g_mirror_disconnect_consumer(sc, cp);
591 	}
592 	g_wither_geom(sc->sc_sync.ds_geom, ENXIO);
593 	G_MIRROR_DEBUG(0, "Device %s destroyed.", gp->name);
594 	g_wither_geom(gp, ENXIO);
595 	sx_xunlock(&sc->sc_lock);
596 	if ((--sc->sc_refcnt) == 0)
597 		g_mirror_free_device(sc);
598 	g_topology_unlock();
599 }
600 
601 static void
602 g_mirror_orphan(struct g_consumer *cp)
603 {
604 	struct g_mirror_disk *disk;
605 
606 	g_topology_assert();
607 
608 	disk = cp->private;
609 	if (disk == NULL)
610 		return;
611 	disk->d_softc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
612 	g_mirror_event_send(disk, G_MIRROR_DISK_STATE_DISCONNECTED,
613 	    G_MIRROR_EVENT_DONTWAIT);
614 }
615 
616 /*
617  * Function should return the next active disk on the list.
618  * It is possible that it will be the same disk as given.
619  * If there are no active disks on list, NULL is returned.
620  */
621 static __inline struct g_mirror_disk *
622 g_mirror_find_next(struct g_mirror_softc *sc, struct g_mirror_disk *disk)
623 {
624 	struct g_mirror_disk *dp;
625 
626 	for (dp = LIST_NEXT(disk, d_next); dp != disk;
627 	    dp = LIST_NEXT(dp, d_next)) {
628 		if (dp == NULL)
629 			dp = LIST_FIRST(&sc->sc_disks);
630 		if (dp->d_state == G_MIRROR_DISK_STATE_ACTIVE)
631 			break;
632 	}
633 	if (dp->d_state != G_MIRROR_DISK_STATE_ACTIVE)
634 		return (NULL);
635 	return (dp);
636 }
637 
638 static struct g_mirror_disk *
639 g_mirror_get_disk(struct g_mirror_softc *sc)
640 {
641 	struct g_mirror_disk *disk;
642 
643 	if (sc->sc_hint == NULL) {
644 		sc->sc_hint = LIST_FIRST(&sc->sc_disks);
645 		if (sc->sc_hint == NULL)
646 			return (NULL);
647 	}
648 	disk = sc->sc_hint;
649 	if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE) {
650 		disk = g_mirror_find_next(sc, disk);
651 		if (disk == NULL)
652 			return (NULL);
653 	}
654 	sc->sc_hint = g_mirror_find_next(sc, disk);
655 	return (disk);
656 }
657 
658 static int
659 g_mirror_write_metadata(struct g_mirror_disk *disk,
660     struct g_mirror_metadata *md)
661 {
662 	struct g_mirror_softc *sc;
663 	struct g_consumer *cp;
664 	off_t offset, length;
665 	u_char *sector;
666 	int error = 0;
667 
668 	g_topology_assert_not();
669 	sc = disk->d_softc;
670 	sx_assert(&sc->sc_lock, SX_LOCKED);
671 
672 	cp = disk->d_consumer;
673 	KASSERT(cp != NULL, ("NULL consumer (%s).", sc->sc_name));
674 	KASSERT(cp->provider != NULL, ("NULL provider (%s).", sc->sc_name));
675 	KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
676 	    ("Consumer %s closed? (r%dw%de%d).", cp->provider->name, cp->acr,
677 	    cp->acw, cp->ace));
678 	length = cp->provider->sectorsize;
679 	offset = cp->provider->mediasize - length;
680 	sector = malloc((size_t)length, M_MIRROR, M_WAITOK | M_ZERO);
681 	if (md != NULL &&
682 	    (sc->sc_flags & G_MIRROR_DEVICE_FLAG_WIPE) == 0) {
683 		/*
684 		 * Handle the case, when the size of parent provider reduced.
685 		 */
686 		if (offset < md->md_mediasize)
687 			error = ENOSPC;
688 		else
689 			mirror_metadata_encode(md, sector);
690 	}
691 	KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_metadata_write, error);
692 	if (error == 0)
693 		error = g_write_data(cp, offset, sector, length);
694 	free(sector, M_MIRROR);
695 	if (error != 0) {
696 		if ((disk->d_flags & G_MIRROR_DISK_FLAG_BROKEN) == 0) {
697 			disk->d_flags |= G_MIRROR_DISK_FLAG_BROKEN;
698 			G_MIRROR_DEBUG(0, "Cannot write metadata on %s "
699 			    "(device=%s, error=%d).",
700 			    g_mirror_get_diskname(disk), sc->sc_name, error);
701 		} else {
702 			G_MIRROR_DEBUG(1, "Cannot write metadata on %s "
703 			    "(device=%s, error=%d).",
704 			    g_mirror_get_diskname(disk), sc->sc_name, error);
705 		}
706 		if (g_mirror_disconnect_on_failure &&
707 		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 1) {
708 			sc->sc_bump_id |= G_MIRROR_BUMP_GENID;
709 			g_mirror_event_send(disk,
710 			    G_MIRROR_DISK_STATE_DISCONNECTED,
711 			    G_MIRROR_EVENT_DONTWAIT);
712 		}
713 	}
714 	return (error);
715 }
716 
717 static int
718 g_mirror_clear_metadata(struct g_mirror_disk *disk)
719 {
720 	int error;
721 
722 	g_topology_assert_not();
723 	sx_assert(&disk->d_softc->sc_lock, SX_LOCKED);
724 
725 	if (disk->d_softc->sc_type != G_MIRROR_TYPE_AUTOMATIC)
726 		return (0);
727 	error = g_mirror_write_metadata(disk, NULL);
728 	if (error == 0) {
729 		G_MIRROR_DEBUG(2, "Metadata on %s cleared.",
730 		    g_mirror_get_diskname(disk));
731 	} else {
732 		G_MIRROR_DEBUG(0,
733 		    "Cannot clear metadata on disk %s (error=%d).",
734 		    g_mirror_get_diskname(disk), error);
735 	}
736 	return (error);
737 }
738 
739 void
740 g_mirror_fill_metadata(struct g_mirror_softc *sc, struct g_mirror_disk *disk,
741     struct g_mirror_metadata *md)
742 {
743 
744 	strlcpy(md->md_magic, G_MIRROR_MAGIC, sizeof(md->md_magic));
745 	md->md_version = G_MIRROR_VERSION;
746 	strlcpy(md->md_name, sc->sc_name, sizeof(md->md_name));
747 	md->md_mid = sc->sc_id;
748 	md->md_all = sc->sc_ndisks;
749 	md->md_slice = sc->sc_slice;
750 	md->md_balance = sc->sc_balance;
751 	md->md_genid = sc->sc_genid;
752 	md->md_mediasize = sc->sc_mediasize;
753 	md->md_sectorsize = sc->sc_sectorsize;
754 	md->md_mflags = (sc->sc_flags & G_MIRROR_DEVICE_FLAG_MASK);
755 	bzero(md->md_provider, sizeof(md->md_provider));
756 	if (disk == NULL) {
757 		md->md_did = arc4random();
758 		md->md_priority = 0;
759 		md->md_syncid = 0;
760 		md->md_dflags = 0;
761 		md->md_sync_offset = 0;
762 		md->md_provsize = 0;
763 	} else {
764 		md->md_did = disk->d_id;
765 		md->md_priority = disk->d_priority;
766 		md->md_syncid = disk->d_sync.ds_syncid;
767 		md->md_dflags = (disk->d_flags & G_MIRROR_DISK_FLAG_MASK);
768 		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
769 			md->md_sync_offset = disk->d_sync.ds_offset_done;
770 		else
771 			md->md_sync_offset = 0;
772 		if ((disk->d_flags & G_MIRROR_DISK_FLAG_HARDCODED) != 0) {
773 			strlcpy(md->md_provider,
774 			    disk->d_consumer->provider->name,
775 			    sizeof(md->md_provider));
776 		}
777 		md->md_provsize = disk->d_consumer->provider->mediasize;
778 	}
779 }
780 
781 void
782 g_mirror_update_metadata(struct g_mirror_disk *disk)
783 {
784 	struct g_mirror_softc *sc;
785 	struct g_mirror_metadata md;
786 	int error;
787 
788 	g_topology_assert_not();
789 	sc = disk->d_softc;
790 	sx_assert(&sc->sc_lock, SX_LOCKED);
791 
792 	if (sc->sc_type != G_MIRROR_TYPE_AUTOMATIC)
793 		return;
794 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_WIPE) == 0)
795 		g_mirror_fill_metadata(sc, disk, &md);
796 	error = g_mirror_write_metadata(disk, &md);
797 	if (error == 0) {
798 		G_MIRROR_DEBUG(2, "Metadata on %s updated.",
799 		    g_mirror_get_diskname(disk));
800 	} else {
801 		G_MIRROR_DEBUG(0,
802 		    "Cannot update metadata on disk %s (error=%d).",
803 		    g_mirror_get_diskname(disk), error);
804 	}
805 }
806 
807 static void
808 g_mirror_bump_syncid(struct g_mirror_softc *sc)
809 {
810 	struct g_mirror_disk *disk;
811 
812 	g_topology_assert_not();
813 	sx_assert(&sc->sc_lock, SX_XLOCKED);
814 	KASSERT(g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 0,
815 	    ("%s called with no active disks (device=%s).", __func__,
816 	    sc->sc_name));
817 
818 	sc->sc_syncid++;
819 	G_MIRROR_DEBUG(1, "Device %s: syncid bumped to %u.", sc->sc_name,
820 	    sc->sc_syncid);
821 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
822 		if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE ||
823 		    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
824 			disk->d_sync.ds_syncid = sc->sc_syncid;
825 			g_mirror_update_metadata(disk);
826 		}
827 	}
828 }
829 
830 static void
831 g_mirror_bump_genid(struct g_mirror_softc *sc)
832 {
833 	struct g_mirror_disk *disk;
834 
835 	g_topology_assert_not();
836 	sx_assert(&sc->sc_lock, SX_XLOCKED);
837 	KASSERT(g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 0,
838 	    ("%s called with no active disks (device=%s).", __func__,
839 	    sc->sc_name));
840 
841 	sc->sc_genid++;
842 	G_MIRROR_DEBUG(1, "Device %s: genid bumped to %u.", sc->sc_name,
843 	    sc->sc_genid);
844 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
845 		if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE ||
846 		    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
847 			disk->d_genid = sc->sc_genid;
848 			g_mirror_update_metadata(disk);
849 		}
850 	}
851 }
852 
853 static int
854 g_mirror_idle(struct g_mirror_softc *sc, int acw)
855 {
856 	struct g_mirror_disk *disk;
857 	int timeout;
858 
859 	g_topology_assert_not();
860 	sx_assert(&sc->sc_lock, SX_XLOCKED);
861 
862 	if (sc->sc_provider == NULL)
863 		return (0);
864 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOFAILSYNC) != 0)
865 		return (0);
866 	if (sc->sc_idle)
867 		return (0);
868 	if (sc->sc_writes > 0)
869 		return (0);
870 	if (acw > 0 || (acw == -1 && sc->sc_provider->acw > 0)) {
871 		timeout = g_mirror_idletime - (time_uptime - sc->sc_last_write);
872 		if (!g_mirror_shutdown && timeout > 0)
873 			return (timeout);
874 	}
875 	sc->sc_idle = 1;
876 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
877 		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
878 			continue;
879 		G_MIRROR_DEBUG(2, "Disk %s (device %s) marked as clean.",
880 		    g_mirror_get_diskname(disk), sc->sc_name);
881 		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
882 		g_mirror_update_metadata(disk);
883 	}
884 	return (0);
885 }
886 
887 static void
888 g_mirror_unidle(struct g_mirror_softc *sc)
889 {
890 	struct g_mirror_disk *disk;
891 
892 	g_topology_assert_not();
893 	sx_assert(&sc->sc_lock, SX_XLOCKED);
894 
895 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOFAILSYNC) != 0)
896 		return;
897 	sc->sc_idle = 0;
898 	sc->sc_last_write = time_uptime;
899 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
900 		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
901 			continue;
902 		G_MIRROR_DEBUG(2, "Disk %s (device %s) marked as dirty.",
903 		    g_mirror_get_diskname(disk), sc->sc_name);
904 		disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
905 		g_mirror_update_metadata(disk);
906 	}
907 }
908 
909 static void
910 g_mirror_done(struct bio *bp)
911 {
912 	struct g_mirror_softc *sc;
913 
914 	sc = bp->bio_from->geom->softc;
915 	bp->bio_cflags = G_MIRROR_BIO_FLAG_REGULAR;
916 	mtx_lock(&sc->sc_queue_mtx);
917 	TAILQ_INSERT_TAIL(&sc->sc_queue, bp, bio_queue);
918 	mtx_unlock(&sc->sc_queue_mtx);
919 	wakeup(sc);
920 }
921 
922 static void
923 g_mirror_regular_request_error(struct g_mirror_softc *sc,
924     struct g_mirror_disk *disk, struct bio *bp)
925 {
926 
927 	if (bp->bio_cmd == BIO_FLUSH && bp->bio_error == EOPNOTSUPP)
928 		return;
929 
930 	if ((disk->d_flags & G_MIRROR_DISK_FLAG_BROKEN) == 0) {
931 		disk->d_flags |= G_MIRROR_DISK_FLAG_BROKEN;
932 		G_MIRROR_LOGREQ(0, bp, "Request failed (error=%d).",
933 		    bp->bio_error);
934 	} else {
935 		G_MIRROR_LOGREQ(1, bp, "Request failed (error=%d).",
936 		    bp->bio_error);
937 	}
938 	if (g_mirror_disconnect_on_failure &&
939 	    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 1) {
940 		if (bp->bio_error == ENXIO &&
941 		    bp->bio_cmd == BIO_READ)
942 			sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
943 		else if (bp->bio_error == ENXIO)
944 			sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID_NOW;
945 		else
946 			sc->sc_bump_id |= G_MIRROR_BUMP_GENID;
947 		g_mirror_event_send(disk, G_MIRROR_DISK_STATE_DISCONNECTED,
948 		    G_MIRROR_EVENT_DONTWAIT);
949 	}
950 }
951 
952 static void
953 g_mirror_regular_request(struct g_mirror_softc *sc, struct bio *bp)
954 {
955 	struct g_mirror_disk *disk;
956 	struct bio *pbp;
957 
958 	g_topology_assert_not();
959 	KASSERT(sc->sc_provider == bp->bio_parent->bio_to,
960 	    ("regular request %p with unexpected origin", bp));
961 
962 	pbp = bp->bio_parent;
963 	bp->bio_from->index--;
964 	if (bp->bio_cmd == BIO_WRITE || bp->bio_cmd == BIO_DELETE)
965 		sc->sc_writes--;
966 	disk = bp->bio_from->private;
967 	if (disk == NULL) {
968 		g_topology_lock();
969 		g_mirror_kill_consumer(sc, bp->bio_from);
970 		g_topology_unlock();
971 	}
972 
973 	switch (bp->bio_cmd) {
974 	case BIO_READ:
975 		KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_regular_request_read,
976 		    bp->bio_error);
977 		break;
978 	case BIO_WRITE:
979 		KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_regular_request_write,
980 		    bp->bio_error);
981 		break;
982 	case BIO_DELETE:
983 		KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_regular_request_delete,
984 		    bp->bio_error);
985 		break;
986 	case BIO_FLUSH:
987 		KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_regular_request_flush,
988 		    bp->bio_error);
989 		break;
990 	}
991 
992 	pbp->bio_inbed++;
993 	KASSERT(pbp->bio_inbed <= pbp->bio_children,
994 	    ("bio_inbed (%u) is bigger than bio_children (%u).", pbp->bio_inbed,
995 	    pbp->bio_children));
996 	if (bp->bio_error == 0 && pbp->bio_error == 0) {
997 		G_MIRROR_LOGREQ(3, bp, "Request delivered.");
998 		g_destroy_bio(bp);
999 		if (pbp->bio_children == pbp->bio_inbed) {
1000 			G_MIRROR_LOGREQ(3, pbp, "Request delivered.");
1001 			pbp->bio_completed = pbp->bio_length;
1002 			if (pbp->bio_cmd == BIO_WRITE ||
1003 			    pbp->bio_cmd == BIO_DELETE) {
1004 				TAILQ_REMOVE(&sc->sc_inflight, pbp, bio_queue);
1005 				/* Release delayed sync requests if possible. */
1006 				g_mirror_sync_release(sc);
1007 			}
1008 			g_io_deliver(pbp, pbp->bio_error);
1009 		}
1010 		return;
1011 	} else if (bp->bio_error != 0) {
1012 		if (pbp->bio_error == 0)
1013 			pbp->bio_error = bp->bio_error;
1014 		if (disk != NULL)
1015 			g_mirror_regular_request_error(sc, disk, bp);
1016 		switch (pbp->bio_cmd) {
1017 		case BIO_DELETE:
1018 		case BIO_WRITE:
1019 		case BIO_FLUSH:
1020 			pbp->bio_inbed--;
1021 			pbp->bio_children--;
1022 			break;
1023 		}
1024 	}
1025 	g_destroy_bio(bp);
1026 
1027 	switch (pbp->bio_cmd) {
1028 	case BIO_READ:
1029 		if (pbp->bio_inbed < pbp->bio_children)
1030 			break;
1031 		if (g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) == 1)
1032 			g_io_deliver(pbp, pbp->bio_error);
1033 		else {
1034 			pbp->bio_error = 0;
1035 			mtx_lock(&sc->sc_queue_mtx);
1036 			TAILQ_INSERT_TAIL(&sc->sc_queue, pbp, bio_queue);
1037 			mtx_unlock(&sc->sc_queue_mtx);
1038 			G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
1039 			wakeup(sc);
1040 		}
1041 		break;
1042 	case BIO_DELETE:
1043 	case BIO_WRITE:
1044 	case BIO_FLUSH:
1045 		if (pbp->bio_children == 0) {
1046 			/*
1047 			 * All requests failed.
1048 			 */
1049 		} else if (pbp->bio_inbed < pbp->bio_children) {
1050 			/* Do nothing. */
1051 			break;
1052 		} else if (pbp->bio_children == pbp->bio_inbed) {
1053 			/* Some requests succeeded. */
1054 			pbp->bio_error = 0;
1055 			pbp->bio_completed = pbp->bio_length;
1056 		}
1057 		if (pbp->bio_cmd == BIO_WRITE || pbp->bio_cmd == BIO_DELETE) {
1058 			TAILQ_REMOVE(&sc->sc_inflight, pbp, bio_queue);
1059 			/* Release delayed sync requests if possible. */
1060 			g_mirror_sync_release(sc);
1061 		}
1062 		g_io_deliver(pbp, pbp->bio_error);
1063 		break;
1064 	default:
1065 		KASSERT(1 == 0, ("Invalid request: %u.", pbp->bio_cmd));
1066 		break;
1067 	}
1068 }
1069 
1070 static void
1071 g_mirror_sync_done(struct bio *bp)
1072 {
1073 	struct g_mirror_softc *sc;
1074 
1075 	G_MIRROR_LOGREQ(3, bp, "Synchronization request delivered.");
1076 	sc = bp->bio_from->geom->softc;
1077 	bp->bio_cflags = G_MIRROR_BIO_FLAG_SYNC;
1078 	mtx_lock(&sc->sc_queue_mtx);
1079 	TAILQ_INSERT_TAIL(&sc->sc_queue, bp, bio_queue);
1080 	mtx_unlock(&sc->sc_queue_mtx);
1081 	wakeup(sc);
1082 }
1083 
1084 static void
1085 g_mirror_candelete(struct bio *bp)
1086 {
1087 	struct g_mirror_softc *sc;
1088 	struct g_mirror_disk *disk;
1089 	int val;
1090 
1091 	sc = bp->bio_to->private;
1092 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1093 		if (disk->d_flags & G_MIRROR_DISK_FLAG_CANDELETE)
1094 			break;
1095 	}
1096 	val = disk != NULL;
1097 	g_handleattr(bp, "GEOM::candelete", &val, sizeof(val));
1098 }
1099 
1100 static void
1101 g_mirror_kernel_dump(struct bio *bp)
1102 {
1103 	struct g_mirror_softc *sc;
1104 	struct g_mirror_disk *disk;
1105 	struct bio *cbp;
1106 	struct g_kerneldump *gkd;
1107 
1108 	/*
1109 	 * We configure dumping to the first component, because this component
1110 	 * will be used for reading with 'prefer' balance algorithm.
1111 	 * If the component with the highest priority is currently disconnected
1112 	 * we will not be able to read the dump after the reboot if it will be
1113 	 * connected and synchronized later. Can we do something better?
1114 	 */
1115 	sc = bp->bio_to->private;
1116 	disk = LIST_FIRST(&sc->sc_disks);
1117 
1118 	gkd = (struct g_kerneldump *)bp->bio_data;
1119 	if (gkd->length > bp->bio_to->mediasize)
1120 		gkd->length = bp->bio_to->mediasize;
1121 	cbp = g_clone_bio(bp);
1122 	if (cbp == NULL) {
1123 		g_io_deliver(bp, ENOMEM);
1124 		return;
1125 	}
1126 	cbp->bio_done = g_std_done;
1127 	g_io_request(cbp, disk->d_consumer);
1128 	G_MIRROR_DEBUG(1, "Kernel dump will go to %s.",
1129 	    g_mirror_get_diskname(disk));
1130 }
1131 
1132 static void
1133 g_mirror_start(struct bio *bp)
1134 {
1135 	struct g_mirror_softc *sc;
1136 
1137 	sc = bp->bio_to->private;
1138 	/*
1139 	 * If sc == NULL or there are no valid disks, provider's error
1140 	 * should be set and g_mirror_start() should not be called at all.
1141 	 */
1142 	KASSERT(sc != NULL && sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
1143 	    ("Provider's error should be set (error=%d)(mirror=%s).",
1144 	    bp->bio_to->error, bp->bio_to->name));
1145 	G_MIRROR_LOGREQ(3, bp, "Request received.");
1146 
1147 	switch (bp->bio_cmd) {
1148 	case BIO_READ:
1149 	case BIO_WRITE:
1150 	case BIO_DELETE:
1151 	case BIO_FLUSH:
1152 		break;
1153 	case BIO_GETATTR:
1154 		if (!strcmp(bp->bio_attribute, "GEOM::candelete")) {
1155 			g_mirror_candelete(bp);
1156 			return;
1157 		} else if (strcmp("GEOM::kerneldump", bp->bio_attribute) == 0) {
1158 			g_mirror_kernel_dump(bp);
1159 			return;
1160 		}
1161 		/* FALLTHROUGH */
1162 	default:
1163 		g_io_deliver(bp, EOPNOTSUPP);
1164 		return;
1165 	}
1166 	mtx_lock(&sc->sc_queue_mtx);
1167 	if (bp->bio_to->error != 0) {
1168 		mtx_unlock(&sc->sc_queue_mtx);
1169 		g_io_deliver(bp, bp->bio_to->error);
1170 		return;
1171 	}
1172 	TAILQ_INSERT_TAIL(&sc->sc_queue, bp, bio_queue);
1173 	mtx_unlock(&sc->sc_queue_mtx);
1174 	G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
1175 	wakeup(sc);
1176 }
1177 
1178 /*
1179  * Return TRUE if the given request is colliding with a in-progress
1180  * synchronization request.
1181  */
1182 static bool
1183 g_mirror_sync_collision(struct g_mirror_softc *sc, struct bio *bp)
1184 {
1185 	struct g_mirror_disk *disk;
1186 	struct bio *sbp;
1187 	off_t rstart, rend, sstart, send;
1188 	u_int i;
1189 
1190 	if (sc->sc_sync.ds_ndisks == 0)
1191 		return (false);
1192 	rstart = bp->bio_offset;
1193 	rend = bp->bio_offset + bp->bio_length;
1194 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1195 		if (disk->d_state != G_MIRROR_DISK_STATE_SYNCHRONIZING)
1196 			continue;
1197 		for (i = 0; i < g_mirror_syncreqs; i++) {
1198 			sbp = disk->d_sync.ds_bios[i];
1199 			if (sbp == NULL)
1200 				continue;
1201 			sstart = sbp->bio_offset;
1202 			send = sbp->bio_offset + sbp->bio_length;
1203 			if (rend > sstart && rstart < send)
1204 				return (true);
1205 		}
1206 	}
1207 	return (false);
1208 }
1209 
1210 /*
1211  * Return TRUE if the given sync request is colliding with a in-progress regular
1212  * request.
1213  */
1214 static bool
1215 g_mirror_regular_collision(struct g_mirror_softc *sc, struct bio *sbp)
1216 {
1217 	off_t rstart, rend, sstart, send;
1218 	struct bio *bp;
1219 
1220 	if (sc->sc_sync.ds_ndisks == 0)
1221 		return (false);
1222 	sstart = sbp->bio_offset;
1223 	send = sbp->bio_offset + sbp->bio_length;
1224 	TAILQ_FOREACH(bp, &sc->sc_inflight, bio_queue) {
1225 		rstart = bp->bio_offset;
1226 		rend = bp->bio_offset + bp->bio_length;
1227 		if (rend > sstart && rstart < send)
1228 			return (true);
1229 	}
1230 	return (false);
1231 }
1232 
1233 /*
1234  * Puts regular request onto delayed queue.
1235  */
1236 static void
1237 g_mirror_regular_delay(struct g_mirror_softc *sc, struct bio *bp)
1238 {
1239 
1240 	G_MIRROR_LOGREQ(2, bp, "Delaying request.");
1241 	TAILQ_INSERT_TAIL(&sc->sc_regular_delayed, bp, bio_queue);
1242 }
1243 
1244 /*
1245  * Puts synchronization request onto delayed queue.
1246  */
1247 static void
1248 g_mirror_sync_delay(struct g_mirror_softc *sc, struct bio *bp)
1249 {
1250 
1251 	G_MIRROR_LOGREQ(2, bp, "Delaying synchronization request.");
1252 	TAILQ_INSERT_TAIL(&sc->sc_sync_delayed, bp, bio_queue);
1253 }
1254 
1255 /*
1256  * Requeue delayed regular requests.
1257  */
1258 static void
1259 g_mirror_regular_release(struct g_mirror_softc *sc)
1260 {
1261 	struct bio *bp;
1262 
1263 	if ((bp = TAILQ_FIRST(&sc->sc_regular_delayed)) == NULL)
1264 		return;
1265 	if (g_mirror_sync_collision(sc, bp))
1266 		return;
1267 
1268 	G_MIRROR_DEBUG(2, "Requeuing regular requests after collision.");
1269 	mtx_lock(&sc->sc_queue_mtx);
1270 	TAILQ_CONCAT(&sc->sc_regular_delayed, &sc->sc_queue, bio_queue);
1271 	TAILQ_SWAP(&sc->sc_regular_delayed, &sc->sc_queue, bio, bio_queue);
1272 	mtx_unlock(&sc->sc_queue_mtx);
1273 }
1274 
1275 /*
1276  * Releases delayed sync requests which don't collide anymore with regular
1277  * requests.
1278  */
1279 static void
1280 g_mirror_sync_release(struct g_mirror_softc *sc)
1281 {
1282 	struct bio *bp, *bp2;
1283 
1284 	TAILQ_FOREACH_SAFE(bp, &sc->sc_sync_delayed, bio_queue, bp2) {
1285 		if (g_mirror_regular_collision(sc, bp))
1286 			continue;
1287 		TAILQ_REMOVE(&sc->sc_sync_delayed, bp, bio_queue);
1288 		G_MIRROR_LOGREQ(2, bp,
1289 		    "Releasing delayed synchronization request.");
1290 		g_io_request(bp, bp->bio_from);
1291 	}
1292 }
1293 
1294 /*
1295  * Free a synchronization request and clear its slot in the array.
1296  */
1297 static void
1298 g_mirror_sync_request_free(struct g_mirror_disk *disk, struct bio *bp)
1299 {
1300 	int idx;
1301 
1302 	if (disk != NULL && disk->d_sync.ds_bios != NULL) {
1303 		idx = (int)(uintptr_t)bp->bio_caller1;
1304 		KASSERT(disk->d_sync.ds_bios[idx] == bp,
1305 		    ("unexpected sync BIO at %p:%d", disk, idx));
1306 		disk->d_sync.ds_bios[idx] = NULL;
1307 	}
1308 	free(bp->bio_data, M_MIRROR);
1309 	g_destroy_bio(bp);
1310 }
1311 
1312 /*
1313  * Handle synchronization requests.
1314  * Every synchronization request is a two-step process: first, a read request is
1315  * sent to the mirror provider via the sync consumer. If that request completes
1316  * successfully, it is converted to a write and sent to the disk being
1317  * synchronized. If the write also completes successfully, the synchronization
1318  * offset is advanced and a new read request is submitted.
1319  */
1320 static void
1321 g_mirror_sync_request(struct g_mirror_softc *sc, struct bio *bp)
1322 {
1323 	struct g_mirror_disk *disk;
1324 	struct g_mirror_disk_sync *sync;
1325 
1326 	KASSERT((bp->bio_cmd == BIO_READ &&
1327 	    bp->bio_from->geom == sc->sc_sync.ds_geom) ||
1328 	    (bp->bio_cmd == BIO_WRITE && bp->bio_from->geom == sc->sc_geom),
1329 	    ("Sync BIO %p with unexpected origin", bp));
1330 
1331 	bp->bio_from->index--;
1332 	disk = bp->bio_from->private;
1333 	if (disk == NULL) {
1334 		sx_xunlock(&sc->sc_lock); /* Avoid recursion on sc_lock. */
1335 		g_topology_lock();
1336 		g_mirror_kill_consumer(sc, bp->bio_from);
1337 		g_topology_unlock();
1338 		g_mirror_sync_request_free(NULL, bp);
1339 		sx_xlock(&sc->sc_lock);
1340 		return;
1341 	}
1342 
1343 	sync = &disk->d_sync;
1344 
1345 	/*
1346 	 * Synchronization request.
1347 	 */
1348 	switch (bp->bio_cmd) {
1349 	case BIO_READ: {
1350 		struct g_consumer *cp;
1351 
1352 		KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_sync_request_read,
1353 		    bp->bio_error);
1354 
1355 		if (bp->bio_error != 0) {
1356 			G_MIRROR_LOGREQ(0, bp,
1357 			    "Synchronization request failed (error=%d).",
1358 			    bp->bio_error);
1359 
1360 			/*
1361 			 * The read error will trigger a syncid bump, so there's
1362 			 * no need to do that here.
1363 			 *
1364 			 * The read error handling for regular requests will
1365 			 * retry the read from all active mirrors before passing
1366 			 * the error back up, so there's no need to retry here.
1367 			 */
1368 			g_mirror_sync_request_free(disk, bp);
1369 			g_mirror_event_send(disk,
1370 			    G_MIRROR_DISK_STATE_DISCONNECTED,
1371 			    G_MIRROR_EVENT_DONTWAIT);
1372 			return;
1373 		}
1374 		G_MIRROR_LOGREQ(3, bp,
1375 		    "Synchronization request half-finished.");
1376 		bp->bio_cmd = BIO_WRITE;
1377 		bp->bio_cflags = 0;
1378 		cp = disk->d_consumer;
1379 		KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1380 		    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
1381 		    cp->acr, cp->acw, cp->ace));
1382 		cp->index++;
1383 		g_io_request(bp, cp);
1384 		return;
1385 	}
1386 	case BIO_WRITE: {
1387 		off_t offset;
1388 		int i;
1389 
1390 		KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_sync_request_write,
1391 		    bp->bio_error);
1392 
1393 		if (bp->bio_error != 0) {
1394 			G_MIRROR_LOGREQ(0, bp,
1395 			    "Synchronization request failed (error=%d).",
1396 			    bp->bio_error);
1397 			g_mirror_sync_request_free(disk, bp);
1398 			sc->sc_bump_id |= G_MIRROR_BUMP_GENID;
1399 			g_mirror_event_send(disk,
1400 			    G_MIRROR_DISK_STATE_DISCONNECTED,
1401 			    G_MIRROR_EVENT_DONTWAIT);
1402 			return;
1403 		}
1404 		G_MIRROR_LOGREQ(3, bp, "Synchronization request finished.");
1405 		if (sync->ds_offset >= sc->sc_mediasize ||
1406 		    sync->ds_consumer == NULL ||
1407 		    (sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
1408 			/* Don't send more synchronization requests. */
1409 			sync->ds_inflight--;
1410 			g_mirror_sync_request_free(disk, bp);
1411 			if (sync->ds_inflight > 0)
1412 				return;
1413 			if (sync->ds_consumer == NULL ||
1414 			    (sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
1415 				return;
1416 			}
1417 			/* Disk up-to-date, activate it. */
1418 			g_mirror_event_send(disk, G_MIRROR_DISK_STATE_ACTIVE,
1419 			    G_MIRROR_EVENT_DONTWAIT);
1420 			return;
1421 		}
1422 
1423 		/* Send next synchronization request. */
1424 		g_mirror_sync_reinit(disk, bp, sync->ds_offset);
1425 		sync->ds_offset += bp->bio_length;
1426 
1427 		G_MIRROR_LOGREQ(3, bp, "Sending synchronization request.");
1428 		sync->ds_consumer->index++;
1429 
1430 		/*
1431 		 * Delay the request if it is colliding with a regular request.
1432 		 */
1433 		if (g_mirror_regular_collision(sc, bp))
1434 			g_mirror_sync_delay(sc, bp);
1435 		else
1436 			g_io_request(bp, sync->ds_consumer);
1437 
1438 		/* Requeue delayed requests if possible. */
1439 		g_mirror_regular_release(sc);
1440 
1441 		/* Find the smallest offset */
1442 		offset = sc->sc_mediasize;
1443 		for (i = 0; i < g_mirror_syncreqs; i++) {
1444 			bp = sync->ds_bios[i];
1445 			if (bp != NULL && bp->bio_offset < offset)
1446 				offset = bp->bio_offset;
1447 		}
1448 		if (g_mirror_sync_period > 0 &&
1449 		    time_uptime - sync->ds_update_ts > g_mirror_sync_period) {
1450 			sync->ds_offset_done = offset;
1451 			g_mirror_update_metadata(disk);
1452 			sync->ds_update_ts = time_uptime;
1453 		}
1454 		return;
1455 	}
1456 	default:
1457 		panic("Invalid I/O request %p", bp);
1458 	}
1459 }
1460 
1461 static void
1462 g_mirror_request_prefer(struct g_mirror_softc *sc, struct bio *bp)
1463 {
1464 	struct g_mirror_disk *disk;
1465 	struct g_consumer *cp;
1466 	struct bio *cbp;
1467 
1468 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1469 		if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE)
1470 			break;
1471 	}
1472 	if (disk == NULL) {
1473 		if (bp->bio_error == 0)
1474 			bp->bio_error = ENXIO;
1475 		g_io_deliver(bp, bp->bio_error);
1476 		return;
1477 	}
1478 	cbp = g_clone_bio(bp);
1479 	if (cbp == NULL) {
1480 		if (bp->bio_error == 0)
1481 			bp->bio_error = ENOMEM;
1482 		g_io_deliver(bp, bp->bio_error);
1483 		return;
1484 	}
1485 	/*
1486 	 * Fill in the component buf structure.
1487 	 */
1488 	cp = disk->d_consumer;
1489 	cbp->bio_done = g_mirror_done;
1490 	cbp->bio_to = cp->provider;
1491 	G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1492 	KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1493 	    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr,
1494 	    cp->acw, cp->ace));
1495 	cp->index++;
1496 	g_io_request(cbp, cp);
1497 }
1498 
1499 static void
1500 g_mirror_request_round_robin(struct g_mirror_softc *sc, struct bio *bp)
1501 {
1502 	struct g_mirror_disk *disk;
1503 	struct g_consumer *cp;
1504 	struct bio *cbp;
1505 
1506 	disk = g_mirror_get_disk(sc);
1507 	if (disk == NULL) {
1508 		if (bp->bio_error == 0)
1509 			bp->bio_error = ENXIO;
1510 		g_io_deliver(bp, bp->bio_error);
1511 		return;
1512 	}
1513 	cbp = g_clone_bio(bp);
1514 	if (cbp == NULL) {
1515 		if (bp->bio_error == 0)
1516 			bp->bio_error = ENOMEM;
1517 		g_io_deliver(bp, bp->bio_error);
1518 		return;
1519 	}
1520 	/*
1521 	 * Fill in the component buf structure.
1522 	 */
1523 	cp = disk->d_consumer;
1524 	cbp->bio_done = g_mirror_done;
1525 	cbp->bio_to = cp->provider;
1526 	G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1527 	KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1528 	    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr,
1529 	    cp->acw, cp->ace));
1530 	cp->index++;
1531 	g_io_request(cbp, cp);
1532 }
1533 
1534 #define TRACK_SIZE  (1 * 1024 * 1024)
1535 #define LOAD_SCALE	256
1536 #define ABS(x)		(((x) >= 0) ? (x) : (-(x)))
1537 
1538 static void
1539 g_mirror_request_load(struct g_mirror_softc *sc, struct bio *bp)
1540 {
1541 	struct g_mirror_disk *disk, *dp;
1542 	struct g_consumer *cp;
1543 	struct bio *cbp;
1544 	int prio, best;
1545 
1546 	/* Find a disk with the smallest load. */
1547 	disk = NULL;
1548 	best = INT_MAX;
1549 	LIST_FOREACH(dp, &sc->sc_disks, d_next) {
1550 		if (dp->d_state != G_MIRROR_DISK_STATE_ACTIVE)
1551 			continue;
1552 		prio = dp->load;
1553 		/* If disk head is precisely in position - highly prefer it. */
1554 		if (dp->d_last_offset == bp->bio_offset)
1555 			prio -= 2 * LOAD_SCALE;
1556 		else
1557 		/* If disk head is close to position - prefer it. */
1558 		if (ABS(dp->d_last_offset - bp->bio_offset) < TRACK_SIZE)
1559 			prio -= 1 * LOAD_SCALE;
1560 		if (prio <= best) {
1561 			disk = dp;
1562 			best = prio;
1563 		}
1564 	}
1565 	KASSERT(disk != NULL, ("NULL disk for %s.", sc->sc_name));
1566 	cbp = g_clone_bio(bp);
1567 	if (cbp == NULL) {
1568 		if (bp->bio_error == 0)
1569 			bp->bio_error = ENOMEM;
1570 		g_io_deliver(bp, bp->bio_error);
1571 		return;
1572 	}
1573 	/*
1574 	 * Fill in the component buf structure.
1575 	 */
1576 	cp = disk->d_consumer;
1577 	cbp->bio_done = g_mirror_done;
1578 	cbp->bio_to = cp->provider;
1579 	G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1580 	KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1581 	    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr,
1582 	    cp->acw, cp->ace));
1583 	cp->index++;
1584 	/* Remember last head position */
1585 	disk->d_last_offset = bp->bio_offset + bp->bio_length;
1586 	/* Update loads. */
1587 	LIST_FOREACH(dp, &sc->sc_disks, d_next) {
1588 		dp->load = (dp->d_consumer->index * LOAD_SCALE +
1589 		    dp->load * 7) / 8;
1590 	}
1591 	g_io_request(cbp, cp);
1592 }
1593 
1594 static void
1595 g_mirror_request_split(struct g_mirror_softc *sc, struct bio *bp)
1596 {
1597 	struct bio_queue queue;
1598 	struct g_mirror_disk *disk;
1599 	struct g_consumer *cp;
1600 	struct bio *cbp;
1601 	off_t left, mod, offset, slice;
1602 	u_char *data;
1603 	u_int ndisks;
1604 
1605 	if (bp->bio_length <= sc->sc_slice) {
1606 		g_mirror_request_round_robin(sc, bp);
1607 		return;
1608 	}
1609 	ndisks = g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE);
1610 	slice = bp->bio_length / ndisks;
1611 	mod = slice % sc->sc_provider->sectorsize;
1612 	if (mod != 0)
1613 		slice += sc->sc_provider->sectorsize - mod;
1614 	/*
1615 	 * Allocate all bios before sending any request, so we can
1616 	 * return ENOMEM in nice and clean way.
1617 	 */
1618 	left = bp->bio_length;
1619 	offset = bp->bio_offset;
1620 	data = bp->bio_data;
1621 	TAILQ_INIT(&queue);
1622 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1623 		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
1624 			continue;
1625 		cbp = g_clone_bio(bp);
1626 		if (cbp == NULL) {
1627 			while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
1628 				TAILQ_REMOVE(&queue, cbp, bio_queue);
1629 				g_destroy_bio(cbp);
1630 			}
1631 			if (bp->bio_error == 0)
1632 				bp->bio_error = ENOMEM;
1633 			g_io_deliver(bp, bp->bio_error);
1634 			return;
1635 		}
1636 		TAILQ_INSERT_TAIL(&queue, cbp, bio_queue);
1637 		cbp->bio_done = g_mirror_done;
1638 		cbp->bio_caller1 = disk;
1639 		cbp->bio_to = disk->d_consumer->provider;
1640 		cbp->bio_offset = offset;
1641 		cbp->bio_data = data;
1642 		cbp->bio_length = MIN(left, slice);
1643 		left -= cbp->bio_length;
1644 		if (left == 0)
1645 			break;
1646 		offset += cbp->bio_length;
1647 		data += cbp->bio_length;
1648 	}
1649 	while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
1650 		TAILQ_REMOVE(&queue, cbp, bio_queue);
1651 		G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1652 		disk = cbp->bio_caller1;
1653 		cbp->bio_caller1 = NULL;
1654 		cp = disk->d_consumer;
1655 		KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1656 		    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
1657 		    cp->acr, cp->acw, cp->ace));
1658 		disk->d_consumer->index++;
1659 		g_io_request(cbp, disk->d_consumer);
1660 	}
1661 }
1662 
1663 static void
1664 g_mirror_register_request(struct g_mirror_softc *sc, struct bio *bp)
1665 {
1666 	struct bio_queue queue;
1667 	struct bio *cbp;
1668 	struct g_consumer *cp;
1669 	struct g_mirror_disk *disk;
1670 
1671 	sx_assert(&sc->sc_lock, SA_XLOCKED);
1672 
1673 	/*
1674 	 * To avoid ordering issues, if a write is deferred because of a
1675 	 * collision with a sync request, all I/O is deferred until that
1676 	 * write is initiated.
1677 	 */
1678 	if (bp->bio_from->geom != sc->sc_sync.ds_geom &&
1679 	    !TAILQ_EMPTY(&sc->sc_regular_delayed)) {
1680 		g_mirror_regular_delay(sc, bp);
1681 		return;
1682 	}
1683 
1684 	switch (bp->bio_cmd) {
1685 	case BIO_READ:
1686 		switch (sc->sc_balance) {
1687 		case G_MIRROR_BALANCE_LOAD:
1688 			g_mirror_request_load(sc, bp);
1689 			break;
1690 		case G_MIRROR_BALANCE_PREFER:
1691 			g_mirror_request_prefer(sc, bp);
1692 			break;
1693 		case G_MIRROR_BALANCE_ROUND_ROBIN:
1694 			g_mirror_request_round_robin(sc, bp);
1695 			break;
1696 		case G_MIRROR_BALANCE_SPLIT:
1697 			g_mirror_request_split(sc, bp);
1698 			break;
1699 		}
1700 		return;
1701 	case BIO_WRITE:
1702 	case BIO_DELETE:
1703 		/*
1704 		 * Delay the request if it is colliding with a synchronization
1705 		 * request.
1706 		 */
1707 		if (g_mirror_sync_collision(sc, bp)) {
1708 			g_mirror_regular_delay(sc, bp);
1709 			return;
1710 		}
1711 
1712 		if (sc->sc_idle)
1713 			g_mirror_unidle(sc);
1714 		else
1715 			sc->sc_last_write = time_uptime;
1716 
1717 		/*
1718 		 * Bump syncid on first write.
1719 		 */
1720 		if ((sc->sc_bump_id & G_MIRROR_BUMP_SYNCID) != 0) {
1721 			sc->sc_bump_id &= ~G_MIRROR_BUMP_SYNCID;
1722 			g_mirror_bump_syncid(sc);
1723 		}
1724 
1725 		/*
1726 		 * Allocate all bios before sending any request, so we can
1727 		 * return ENOMEM in nice and clean way.
1728 		 */
1729 		TAILQ_INIT(&queue);
1730 		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1731 			switch (disk->d_state) {
1732 			case G_MIRROR_DISK_STATE_ACTIVE:
1733 				break;
1734 			case G_MIRROR_DISK_STATE_SYNCHRONIZING:
1735 				if (bp->bio_offset >= disk->d_sync.ds_offset)
1736 					continue;
1737 				break;
1738 			default:
1739 				continue;
1740 			}
1741 			if (bp->bio_cmd == BIO_DELETE &&
1742 			    (disk->d_flags & G_MIRROR_DISK_FLAG_CANDELETE) == 0)
1743 				continue;
1744 			cbp = g_clone_bio(bp);
1745 			if (cbp == NULL) {
1746 				while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
1747 					TAILQ_REMOVE(&queue, cbp, bio_queue);
1748 					g_destroy_bio(cbp);
1749 				}
1750 				if (bp->bio_error == 0)
1751 					bp->bio_error = ENOMEM;
1752 				g_io_deliver(bp, bp->bio_error);
1753 				return;
1754 			}
1755 			TAILQ_INSERT_TAIL(&queue, cbp, bio_queue);
1756 			cbp->bio_done = g_mirror_done;
1757 			cp = disk->d_consumer;
1758 			cbp->bio_caller1 = cp;
1759 			cbp->bio_to = cp->provider;
1760 			KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1761 			    ("Consumer %s not opened (r%dw%de%d).",
1762 			    cp->provider->name, cp->acr, cp->acw, cp->ace));
1763 		}
1764 		if (TAILQ_EMPTY(&queue)) {
1765 			KASSERT(bp->bio_cmd == BIO_DELETE,
1766 			    ("No consumers for regular request %p", bp));
1767 			g_io_deliver(bp, EOPNOTSUPP);
1768 			return;
1769 		}
1770 		while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
1771 			G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1772 			TAILQ_REMOVE(&queue, cbp, bio_queue);
1773 			cp = cbp->bio_caller1;
1774 			cbp->bio_caller1 = NULL;
1775 			cp->index++;
1776 			sc->sc_writes++;
1777 			g_io_request(cbp, cp);
1778 		}
1779 		/*
1780 		 * Put request onto inflight queue, so we can check if new
1781 		 * synchronization requests don't collide with it.
1782 		 */
1783 		TAILQ_INSERT_TAIL(&sc->sc_inflight, bp, bio_queue);
1784 		return;
1785 	case BIO_FLUSH:
1786 		TAILQ_INIT(&queue);
1787 		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1788 			if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
1789 				continue;
1790 			cbp = g_clone_bio(bp);
1791 			if (cbp == NULL) {
1792 				while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
1793 					TAILQ_REMOVE(&queue, cbp, bio_queue);
1794 					g_destroy_bio(cbp);
1795 				}
1796 				if (bp->bio_error == 0)
1797 					bp->bio_error = ENOMEM;
1798 				g_io_deliver(bp, bp->bio_error);
1799 				return;
1800 			}
1801 			TAILQ_INSERT_TAIL(&queue, cbp, bio_queue);
1802 			cbp->bio_done = g_mirror_done;
1803 			cbp->bio_caller1 = disk;
1804 			cbp->bio_to = disk->d_consumer->provider;
1805 		}
1806 		KASSERT(!TAILQ_EMPTY(&queue),
1807 		    ("No consumers for regular request %p", bp));
1808 		while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
1809 			G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1810 			TAILQ_REMOVE(&queue, cbp, bio_queue);
1811 			disk = cbp->bio_caller1;
1812 			cbp->bio_caller1 = NULL;
1813 			cp = disk->d_consumer;
1814 			KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1815 			    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
1816 			    cp->acr, cp->acw, cp->ace));
1817 			cp->index++;
1818 			g_io_request(cbp, cp);
1819 		}
1820 		break;
1821 	default:
1822 		KASSERT(1 == 0, ("Invalid command here: %u (device=%s)",
1823 		    bp->bio_cmd, sc->sc_name));
1824 		break;
1825 	}
1826 }
1827 
1828 static int
1829 g_mirror_can_destroy(struct g_mirror_softc *sc)
1830 {
1831 	struct g_geom *gp;
1832 	struct g_consumer *cp;
1833 
1834 	g_topology_assert();
1835 	gp = sc->sc_geom;
1836 	if (gp->softc == NULL)
1837 		return (1);
1838 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_TASTING) != 0)
1839 		return (0);
1840 	LIST_FOREACH(cp, &gp->consumer, consumer) {
1841 		if (g_mirror_is_busy(sc, cp))
1842 			return (0);
1843 	}
1844 	gp = sc->sc_sync.ds_geom;
1845 	LIST_FOREACH(cp, &gp->consumer, consumer) {
1846 		if (g_mirror_is_busy(sc, cp))
1847 			return (0);
1848 	}
1849 	G_MIRROR_DEBUG(2, "No I/O requests for %s, it can be destroyed.",
1850 	    sc->sc_name);
1851 	return (1);
1852 }
1853 
1854 static int
1855 g_mirror_try_destroy(struct g_mirror_softc *sc)
1856 {
1857 
1858 	if (sc->sc_rootmount != NULL) {
1859 		G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p", __LINE__,
1860 		    sc->sc_rootmount);
1861 		root_mount_rel(sc->sc_rootmount);
1862 		sc->sc_rootmount = NULL;
1863 	}
1864 	g_topology_lock();
1865 	if (!g_mirror_can_destroy(sc)) {
1866 		g_topology_unlock();
1867 		return (0);
1868 	}
1869 	sc->sc_geom->softc = NULL;
1870 	sc->sc_sync.ds_geom->softc = NULL;
1871 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DRAIN) != 0) {
1872 		g_topology_unlock();
1873 		G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__,
1874 		    &sc->sc_worker);
1875 		/* Unlock sc_lock here, as it can be destroyed after wakeup. */
1876 		sx_xunlock(&sc->sc_lock);
1877 		wakeup(&sc->sc_worker);
1878 		sc->sc_worker = NULL;
1879 	} else {
1880 		g_topology_unlock();
1881 		g_mirror_destroy_device(sc);
1882 	}
1883 	return (1);
1884 }
1885 
1886 /*
1887  * Worker thread.
1888  */
1889 static void
1890 g_mirror_worker(void *arg)
1891 {
1892 	struct g_mirror_softc *sc;
1893 	struct g_mirror_event *ep;
1894 	struct bio *bp;
1895 	int timeout;
1896 
1897 	sc = arg;
1898 	thread_lock(curthread);
1899 	sched_prio(curthread, PRIBIO);
1900 	thread_unlock(curthread);
1901 
1902 	sx_xlock(&sc->sc_lock);
1903 	for (;;) {
1904 		G_MIRROR_DEBUG(5, "%s: Let's see...", __func__);
1905 		/*
1906 		 * First take a look at events.
1907 		 * This is important to handle events before any I/O requests.
1908 		 */
1909 		ep = g_mirror_event_first(sc);
1910 		if (ep != NULL) {
1911 			g_mirror_event_remove(sc, ep);
1912 			if ((ep->e_flags & G_MIRROR_EVENT_DEVICE) != 0) {
1913 				/* Update only device status. */
1914 				G_MIRROR_DEBUG(3,
1915 				    "Running event for device %s.",
1916 				    sc->sc_name);
1917 				ep->e_error = 0;
1918 				g_mirror_update_device(sc, true);
1919 			} else {
1920 				/* Update disk status. */
1921 				G_MIRROR_DEBUG(3, "Running event for disk %s.",
1922 				     g_mirror_get_diskname(ep->e_disk));
1923 				ep->e_error = g_mirror_update_disk(ep->e_disk,
1924 				    ep->e_state);
1925 				if (ep->e_error == 0)
1926 					g_mirror_update_device(sc, false);
1927 			}
1928 			if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0) {
1929 				KASSERT(ep->e_error == 0,
1930 				    ("Error cannot be handled."));
1931 				g_mirror_event_free(ep);
1932 			} else {
1933 				ep->e_flags |= G_MIRROR_EVENT_DONE;
1934 				G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__,
1935 				    ep);
1936 				mtx_lock(&sc->sc_events_mtx);
1937 				wakeup(ep);
1938 				mtx_unlock(&sc->sc_events_mtx);
1939 			}
1940 			if ((sc->sc_flags &
1941 			    G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
1942 				if (g_mirror_try_destroy(sc)) {
1943 					curthread->td_pflags &= ~TDP_GEOM;
1944 					G_MIRROR_DEBUG(1, "Thread exiting.");
1945 					kproc_exit(0);
1946 				}
1947 			}
1948 			G_MIRROR_DEBUG(5, "%s: I'm here 1.", __func__);
1949 			continue;
1950 		}
1951 
1952 		/*
1953 		 * Check if we can mark array as CLEAN and if we can't take
1954 		 * how much seconds should we wait.
1955 		 */
1956 		timeout = g_mirror_idle(sc, -1);
1957 
1958 		/*
1959 		 * Handle I/O requests.
1960 		 */
1961 		mtx_lock(&sc->sc_queue_mtx);
1962 		bp = TAILQ_FIRST(&sc->sc_queue);
1963 		if (bp != NULL)
1964 			TAILQ_REMOVE(&sc->sc_queue, bp, bio_queue);
1965 		else {
1966 			if ((sc->sc_flags &
1967 			    G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
1968 				mtx_unlock(&sc->sc_queue_mtx);
1969 				if (g_mirror_try_destroy(sc)) {
1970 					curthread->td_pflags &= ~TDP_GEOM;
1971 					G_MIRROR_DEBUG(1, "Thread exiting.");
1972 					kproc_exit(0);
1973 				}
1974 				mtx_lock(&sc->sc_queue_mtx);
1975 				if (!TAILQ_EMPTY(&sc->sc_queue)) {
1976 					mtx_unlock(&sc->sc_queue_mtx);
1977 					continue;
1978 				}
1979 			}
1980 			if (g_mirror_event_first(sc) != NULL) {
1981 				mtx_unlock(&sc->sc_queue_mtx);
1982 				continue;
1983 			}
1984 			sx_xunlock(&sc->sc_lock);
1985 			MSLEEP(sc, &sc->sc_queue_mtx, PRIBIO | PDROP, "m:w1",
1986 			    timeout * hz);
1987 			sx_xlock(&sc->sc_lock);
1988 			G_MIRROR_DEBUG(5, "%s: I'm here 4.", __func__);
1989 			continue;
1990 		}
1991 		mtx_unlock(&sc->sc_queue_mtx);
1992 
1993 		if (bp->bio_from->geom == sc->sc_sync.ds_geom &&
1994 		    (bp->bio_cflags & G_MIRROR_BIO_FLAG_SYNC) != 0) {
1995 			/*
1996 			 * Handle completion of the first half (the read) of a
1997 			 * block synchronization operation.
1998 			 */
1999 			g_mirror_sync_request(sc, bp);
2000 		} else if (bp->bio_to != sc->sc_provider) {
2001 			if ((bp->bio_cflags & G_MIRROR_BIO_FLAG_REGULAR) != 0)
2002 				/*
2003 				 * Handle completion of a regular I/O request.
2004 				 */
2005 				g_mirror_regular_request(sc, bp);
2006 			else if ((bp->bio_cflags & G_MIRROR_BIO_FLAG_SYNC) != 0)
2007 				/*
2008 				 * Handle completion of the second half (the
2009 				 * write) of a block synchronization operation.
2010 				 */
2011 				g_mirror_sync_request(sc, bp);
2012 			else {
2013 				KASSERT(0,
2014 				    ("Invalid request cflags=0x%hx to=%s.",
2015 				    bp->bio_cflags, bp->bio_to->name));
2016 			}
2017 		} else {
2018 			/*
2019 			 * Initiate an I/O request.
2020 			 */
2021 			g_mirror_register_request(sc, bp);
2022 		}
2023 		G_MIRROR_DEBUG(5, "%s: I'm here 9.", __func__);
2024 	}
2025 }
2026 
2027 static void
2028 g_mirror_update_idle(struct g_mirror_softc *sc, struct g_mirror_disk *disk)
2029 {
2030 
2031 	sx_assert(&sc->sc_lock, SX_LOCKED);
2032 
2033 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOFAILSYNC) != 0)
2034 		return;
2035 	if (!sc->sc_idle && (disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) == 0) {
2036 		G_MIRROR_DEBUG(2, "Disk %s (device %s) marked as dirty.",
2037 		    g_mirror_get_diskname(disk), sc->sc_name);
2038 		disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
2039 	} else if (sc->sc_idle &&
2040 	    (disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) != 0) {
2041 		G_MIRROR_DEBUG(2, "Disk %s (device %s) marked as clean.",
2042 		    g_mirror_get_diskname(disk), sc->sc_name);
2043 		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2044 	}
2045 }
2046 
2047 static void
2048 g_mirror_sync_reinit(const struct g_mirror_disk *disk, struct bio *bp,
2049     off_t offset)
2050 {
2051 	void *data;
2052 	int idx;
2053 
2054 	data = bp->bio_data;
2055 	idx = (int)(uintptr_t)bp->bio_caller1;
2056 	g_reset_bio(bp);
2057 
2058 	bp->bio_cmd = BIO_READ;
2059 	bp->bio_data = data;
2060 	bp->bio_done = g_mirror_sync_done;
2061 	bp->bio_from = disk->d_sync.ds_consumer;
2062 	bp->bio_to = disk->d_softc->sc_provider;
2063 	bp->bio_caller1 = (void *)(uintptr_t)idx;
2064 	bp->bio_offset = offset;
2065 	bp->bio_length = MIN(MAXPHYS,
2066 	    disk->d_softc->sc_mediasize - bp->bio_offset);
2067 }
2068 
2069 static void
2070 g_mirror_sync_start(struct g_mirror_disk *disk)
2071 {
2072 	struct g_mirror_softc *sc;
2073 	struct g_mirror_disk_sync *sync;
2074 	struct g_consumer *cp;
2075 	struct bio *bp;
2076 	int error, i;
2077 
2078 	g_topology_assert_not();
2079 	sc = disk->d_softc;
2080 	sync = &disk->d_sync;
2081 	sx_assert(&sc->sc_lock, SX_LOCKED);
2082 
2083 	KASSERT(disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
2084 	    ("Disk %s is not marked for synchronization.",
2085 	    g_mirror_get_diskname(disk)));
2086 	KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2087 	    ("Device not in RUNNING state (%s, %u).", sc->sc_name,
2088 	    sc->sc_state));
2089 
2090 	sx_xunlock(&sc->sc_lock);
2091 	g_topology_lock();
2092 	cp = g_new_consumer(sc->sc_sync.ds_geom);
2093 	cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
2094 	error = g_attach(cp, sc->sc_provider);
2095 	KASSERT(error == 0,
2096 	    ("Cannot attach to %s (error=%d).", sc->sc_name, error));
2097 	error = g_access(cp, 1, 0, 0);
2098 	KASSERT(error == 0, ("Cannot open %s (error=%d).", sc->sc_name, error));
2099 	g_topology_unlock();
2100 	sx_xlock(&sc->sc_lock);
2101 
2102 	G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s.", sc->sc_name,
2103 	    g_mirror_get_diskname(disk));
2104 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOFAILSYNC) == 0)
2105 		disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
2106 	KASSERT(sync->ds_consumer == NULL,
2107 	    ("Sync consumer already exists (device=%s, disk=%s).",
2108 	    sc->sc_name, g_mirror_get_diskname(disk)));
2109 
2110 	sync->ds_consumer = cp;
2111 	sync->ds_consumer->private = disk;
2112 	sync->ds_consumer->index = 0;
2113 
2114 	/*
2115 	 * Allocate memory for synchronization bios and initialize them.
2116 	 */
2117 	sync->ds_bios = malloc(sizeof(struct bio *) * g_mirror_syncreqs,
2118 	    M_MIRROR, M_WAITOK);
2119 	for (i = 0; i < g_mirror_syncreqs; i++) {
2120 		bp = g_alloc_bio();
2121 		sync->ds_bios[i] = bp;
2122 
2123 		bp->bio_data = malloc(MAXPHYS, M_MIRROR, M_WAITOK);
2124 		bp->bio_caller1 = (void *)(uintptr_t)i;
2125 		g_mirror_sync_reinit(disk, bp, sync->ds_offset);
2126 		sync->ds_offset += bp->bio_length;
2127 	}
2128 
2129 	/* Increase the number of disks in SYNCHRONIZING state. */
2130 	sc->sc_sync.ds_ndisks++;
2131 	/* Set the number of in-flight synchronization requests. */
2132 	sync->ds_inflight = g_mirror_syncreqs;
2133 
2134 	/*
2135 	 * Fire off first synchronization requests.
2136 	 */
2137 	for (i = 0; i < g_mirror_syncreqs; i++) {
2138 		bp = sync->ds_bios[i];
2139 		G_MIRROR_LOGREQ(3, bp, "Sending synchronization request.");
2140 		sync->ds_consumer->index++;
2141 		/*
2142 		 * Delay the request if it is colliding with a regular request.
2143 		 */
2144 		if (g_mirror_regular_collision(sc, bp))
2145 			g_mirror_sync_delay(sc, bp);
2146 		else
2147 			g_io_request(bp, sync->ds_consumer);
2148 	}
2149 }
2150 
2151 /*
2152  * Stop synchronization process.
2153  * type: 0 - synchronization finished
2154  *       1 - synchronization stopped
2155  */
2156 static void
2157 g_mirror_sync_stop(struct g_mirror_disk *disk, int type)
2158 {
2159 	struct g_mirror_softc *sc;
2160 	struct g_consumer *cp;
2161 
2162 	g_topology_assert_not();
2163 	sc = disk->d_softc;
2164 	sx_assert(&sc->sc_lock, SX_LOCKED);
2165 
2166 	KASSERT(disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
2167 	    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2168 	    g_mirror_disk_state2str(disk->d_state)));
2169 	if (disk->d_sync.ds_consumer == NULL)
2170 		return;
2171 
2172 	if (type == 0) {
2173 		G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s finished.",
2174 		    sc->sc_name, g_mirror_get_diskname(disk));
2175 	} else /* if (type == 1) */ {
2176 		G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s stopped.",
2177 		    sc->sc_name, g_mirror_get_diskname(disk));
2178 	}
2179 	g_mirror_regular_release(sc);
2180 	free(disk->d_sync.ds_bios, M_MIRROR);
2181 	disk->d_sync.ds_bios = NULL;
2182 	cp = disk->d_sync.ds_consumer;
2183 	disk->d_sync.ds_consumer = NULL;
2184 	disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2185 	sc->sc_sync.ds_ndisks--;
2186 	sx_xunlock(&sc->sc_lock); /* Avoid recursion on sc_lock. */
2187 	g_topology_lock();
2188 	g_mirror_kill_consumer(sc, cp);
2189 	g_topology_unlock();
2190 	sx_xlock(&sc->sc_lock);
2191 }
2192 
2193 static void
2194 g_mirror_launch_provider(struct g_mirror_softc *sc)
2195 {
2196 	struct g_mirror_disk *disk;
2197 	struct g_provider *pp, *dp;
2198 
2199 	sx_assert(&sc->sc_lock, SX_LOCKED);
2200 
2201 	g_topology_lock();
2202 	pp = g_new_providerf(sc->sc_geom, "mirror/%s", sc->sc_name);
2203 	pp->flags |= G_PF_DIRECT_RECEIVE;
2204 	pp->mediasize = sc->sc_mediasize;
2205 	pp->sectorsize = sc->sc_sectorsize;
2206 	pp->stripesize = 0;
2207 	pp->stripeoffset = 0;
2208 
2209 	/* Splitting of unmapped BIO's could work but isn't implemented now */
2210 	if (sc->sc_balance != G_MIRROR_BALANCE_SPLIT)
2211 		pp->flags |= G_PF_ACCEPT_UNMAPPED;
2212 
2213 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2214 		if (disk->d_consumer && disk->d_consumer->provider) {
2215 			dp = disk->d_consumer->provider;
2216 			if (dp->stripesize > pp->stripesize) {
2217 				pp->stripesize = dp->stripesize;
2218 				pp->stripeoffset = dp->stripeoffset;
2219 			}
2220 			/* A provider underneath us doesn't support unmapped */
2221 			if ((dp->flags & G_PF_ACCEPT_UNMAPPED) == 0) {
2222 				G_MIRROR_DEBUG(0, "Cancelling unmapped "
2223 				    "because of %s.", dp->name);
2224 				pp->flags &= ~G_PF_ACCEPT_UNMAPPED;
2225 			}
2226 		}
2227 	}
2228 	pp->private = sc;
2229 	sc->sc_refcnt++;
2230 	sc->sc_provider = pp;
2231 	g_error_provider(pp, 0);
2232 	g_topology_unlock();
2233 	G_MIRROR_DEBUG(0, "Device %s launched (%u/%u).", pp->name,
2234 	    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE), sc->sc_ndisks);
2235 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2236 		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
2237 			g_mirror_sync_start(disk);
2238 	}
2239 }
2240 
2241 static void
2242 g_mirror_destroy_provider(struct g_mirror_softc *sc)
2243 {
2244 	struct g_mirror_disk *disk;
2245 	struct bio *bp;
2246 
2247 	g_topology_assert_not();
2248 	KASSERT(sc->sc_provider != NULL, ("NULL provider (device=%s).",
2249 	    sc->sc_name));
2250 
2251 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2252 		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
2253 			g_mirror_sync_stop(disk, 1);
2254 	}
2255 
2256 	g_topology_lock();
2257 	g_error_provider(sc->sc_provider, ENXIO);
2258 	mtx_lock(&sc->sc_queue_mtx);
2259 	while ((bp = TAILQ_FIRST(&sc->sc_queue)) != NULL) {
2260 		TAILQ_REMOVE(&sc->sc_queue, bp, bio_queue);
2261 		/*
2262 		 * Abort any pending I/O that wasn't generated by us.
2263 		 * Synchronization requests and requests destined for individual
2264 		 * mirror components can be destroyed immediately.
2265 		 */
2266 		if (bp->bio_to == sc->sc_provider &&
2267 		    bp->bio_from->geom != sc->sc_sync.ds_geom) {
2268 			g_io_deliver(bp, ENXIO);
2269 		} else {
2270 			if ((bp->bio_cflags & G_MIRROR_BIO_FLAG_SYNC) != 0)
2271 				free(bp->bio_data, M_MIRROR);
2272 			g_destroy_bio(bp);
2273 		}
2274 	}
2275 	mtx_unlock(&sc->sc_queue_mtx);
2276 	g_wither_provider(sc->sc_provider, ENXIO);
2277 	sc->sc_provider = NULL;
2278 	G_MIRROR_DEBUG(0, "Device %s: provider destroyed.", sc->sc_name);
2279 	g_topology_unlock();
2280 }
2281 
2282 static void
2283 g_mirror_go(void *arg)
2284 {
2285 	struct g_mirror_softc *sc;
2286 
2287 	sc = arg;
2288 	G_MIRROR_DEBUG(0, "Force device %s start due to timeout.", sc->sc_name);
2289 	g_mirror_event_send(sc, 0,
2290 	    G_MIRROR_EVENT_DONTWAIT | G_MIRROR_EVENT_DEVICE);
2291 }
2292 
2293 static u_int
2294 g_mirror_determine_state(struct g_mirror_disk *disk)
2295 {
2296 	struct g_mirror_softc *sc;
2297 	u_int state;
2298 
2299 	sc = disk->d_softc;
2300 	if (sc->sc_syncid == disk->d_sync.ds_syncid) {
2301 		if ((disk->d_flags &
2302 		    G_MIRROR_DISK_FLAG_SYNCHRONIZING) == 0 &&
2303 		    (g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) == 0 ||
2304 		     (disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) == 0)) {
2305 			/* Disk does not need synchronization. */
2306 			state = G_MIRROR_DISK_STATE_ACTIVE;
2307 		} else {
2308 			if ((sc->sc_flags &
2309 			     G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) == 0 ||
2310 			    (disk->d_flags &
2311 			     G_MIRROR_DISK_FLAG_FORCE_SYNC) != 0) {
2312 				/*
2313 				 * We can start synchronization from
2314 				 * the stored offset.
2315 				 */
2316 				state = G_MIRROR_DISK_STATE_SYNCHRONIZING;
2317 			} else {
2318 				state = G_MIRROR_DISK_STATE_STALE;
2319 			}
2320 		}
2321 	} else if (disk->d_sync.ds_syncid < sc->sc_syncid) {
2322 		/*
2323 		 * Reset all synchronization data for this disk,
2324 		 * because if it even was synchronized, it was
2325 		 * synchronized to disks with different syncid.
2326 		 */
2327 		disk->d_flags |= G_MIRROR_DISK_FLAG_SYNCHRONIZING;
2328 		disk->d_sync.ds_offset = 0;
2329 		disk->d_sync.ds_offset_done = 0;
2330 		disk->d_sync.ds_syncid = sc->sc_syncid;
2331 		if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) == 0 ||
2332 		    (disk->d_flags & G_MIRROR_DISK_FLAG_FORCE_SYNC) != 0) {
2333 			state = G_MIRROR_DISK_STATE_SYNCHRONIZING;
2334 		} else {
2335 			state = G_MIRROR_DISK_STATE_STALE;
2336 		}
2337 	} else /* if (sc->sc_syncid < disk->d_sync.ds_syncid) */ {
2338 		/*
2339 		 * Not good, NOT GOOD!
2340 		 * It means that mirror was started on stale disks
2341 		 * and more fresh disk just arrive.
2342 		 * If there were writes, mirror is broken, sorry.
2343 		 * I think the best choice here is don't touch
2344 		 * this disk and inform the user loudly.
2345 		 */
2346 		G_MIRROR_DEBUG(0, "Device %s was started before the freshest "
2347 		    "disk (%s) arrives!! It will not be connected to the "
2348 		    "running device.", sc->sc_name,
2349 		    g_mirror_get_diskname(disk));
2350 		g_mirror_destroy_disk(disk);
2351 		state = G_MIRROR_DISK_STATE_NONE;
2352 		/* Return immediately, because disk was destroyed. */
2353 		return (state);
2354 	}
2355 	G_MIRROR_DEBUG(3, "State for %s disk: %s.",
2356 	    g_mirror_get_diskname(disk), g_mirror_disk_state2str(state));
2357 	return (state);
2358 }
2359 
2360 /*
2361  * Update device state.
2362  */
2363 static void
2364 g_mirror_update_device(struct g_mirror_softc *sc, bool force)
2365 {
2366 	struct g_mirror_disk *disk;
2367 	u_int state;
2368 
2369 	sx_assert(&sc->sc_lock, SX_XLOCKED);
2370 
2371 	switch (sc->sc_state) {
2372 	case G_MIRROR_DEVICE_STATE_STARTING:
2373 	    {
2374 		struct g_mirror_disk *pdisk, *tdisk;
2375 		const char *mismatch;
2376 		uintmax_t found, newest;
2377 		u_int dirty, ndisks;
2378 
2379 		/* Pre-flight checks */
2380 		LIST_FOREACH_SAFE(disk, &sc->sc_disks, d_next, tdisk) {
2381 			/*
2382 			 * Confirm we already detected the newest genid.
2383 			 */
2384 			KASSERT(sc->sc_genid >= disk->d_genid,
2385 			    ("%s: found newer genid %u (sc:%p had %u).", __func__,
2386 			    disk->d_genid, sc, sc->sc_genid));
2387 
2388 			/* Kick out any previously tasted stale components. */
2389 			if (disk->d_genid < sc->sc_genid) {
2390 				G_MIRROR_DEBUG(0, "Stale 'genid' field on %s "
2391 				    "(device %s) (component=%u latest=%u), skipping.",
2392 				    g_mirror_get_diskname(disk), sc->sc_name,
2393 				    disk->d_genid, sc->sc_genid);
2394 				g_mirror_destroy_disk(disk);
2395 				sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
2396 				continue;
2397 			}
2398 
2399 			/*
2400 			 * Confirm we already detected the newest syncid.
2401 			 */
2402 			KASSERT(sc->sc_syncid >= disk->d_sync.ds_syncid,
2403 			    ("%s: found newer syncid %u (sc:%p had %u).",
2404 			     __func__, disk->d_sync.ds_syncid, sc,
2405 			     sc->sc_syncid));
2406 
2407 #define DETECT_MISMATCH(field, name) \
2408 			if (mismatch == NULL &&					\
2409 			    disk->d_init_ ## field != sc->sc_ ## field) {	\
2410 				mismatch = name;				\
2411 				found = (intmax_t)disk->d_init_ ## field;	\
2412 				newest = (intmax_t)sc->sc_ ## field;		\
2413 			}
2414 			mismatch = NULL;
2415 			DETECT_MISMATCH(ndisks, "md_all");
2416 			DETECT_MISMATCH(balance, "md_balance");
2417 			DETECT_MISMATCH(slice, "md_slice");
2418 			DETECT_MISMATCH(mediasize, "md_mediasize");
2419 #undef DETECT_MISMATCH
2420 			if (mismatch != NULL) {
2421 				G_MIRROR_DEBUG(0, "Found a mismatching '%s' "
2422 				    "field on %s (device %s) (found=%ju "
2423 				    "newest=%ju).", mismatch,
2424 				    g_mirror_get_diskname(disk), sc->sc_name,
2425 				    found, newest);
2426 				g_mirror_destroy_disk(disk);
2427 				sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
2428 				continue;
2429 			}
2430 		}
2431 
2432 		KASSERT(sc->sc_provider == NULL,
2433 		    ("Non-NULL provider in STARTING state (%s).", sc->sc_name));
2434 		/*
2435 		 * Are we ready? If the timeout (force is true) has expired, and
2436 		 * any disks are present, then yes. If we're permitted to launch
2437 		 * before the timeout has expired and the expected number of
2438 		 * current-generation mirror disks have been tasted, then yes.
2439 		 */
2440 		ndisks = g_mirror_ndisks(sc, -1);
2441 		if ((force && ndisks > 0) ||
2442 		    (g_launch_mirror_before_timeout && ndisks == sc->sc_ndisks)) {
2443 			;
2444 		} else if (ndisks == 0) {
2445 			/*
2446 			 * Disks went down in starting phase, so destroy
2447 			 * device.
2448 			 */
2449 			callout_drain(&sc->sc_callout);
2450 			sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
2451 			G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p", __LINE__,
2452 			    sc->sc_rootmount);
2453 			root_mount_rel(sc->sc_rootmount);
2454 			sc->sc_rootmount = NULL;
2455 			return;
2456 		} else {
2457 			return;
2458 		}
2459 
2460 		/*
2461 		 * Activate all disks with the biggest syncid.
2462 		 */
2463 		if (force) {
2464 			/*
2465 			 * If 'force' is true, we have been called due to
2466 			 * timeout, so don't bother canceling timeout.
2467 			 */
2468 			ndisks = 0;
2469 			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2470 				if ((disk->d_flags &
2471 				    G_MIRROR_DISK_FLAG_SYNCHRONIZING) == 0) {
2472 					ndisks++;
2473 				}
2474 			}
2475 			if (ndisks == 0) {
2476 				/* No valid disks found, destroy device. */
2477 				sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
2478 				G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p",
2479 				    __LINE__, sc->sc_rootmount);
2480 				root_mount_rel(sc->sc_rootmount);
2481 				sc->sc_rootmount = NULL;
2482 				return;
2483 			}
2484 		} else {
2485 			/* Cancel timeout. */
2486 			callout_drain(&sc->sc_callout);
2487 		}
2488 
2489 		/*
2490 		 * Here we need to look for dirty disks and if all disks
2491 		 * with the biggest syncid are dirty, we have to choose
2492 		 * one with the biggest priority and rebuild the rest.
2493 		 */
2494 		/*
2495 		 * Find the number of dirty disks with the biggest syncid.
2496 		 * Find the number of disks with the biggest syncid.
2497 		 * While here, find a disk with the biggest priority.
2498 		 */
2499 		dirty = ndisks = 0;
2500 		pdisk = NULL;
2501 		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2502 			if (disk->d_sync.ds_syncid != sc->sc_syncid)
2503 				continue;
2504 			if ((disk->d_flags &
2505 			    G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
2506 				continue;
2507 			}
2508 			ndisks++;
2509 			if ((disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) != 0) {
2510 				dirty++;
2511 				if (pdisk == NULL ||
2512 				    pdisk->d_priority < disk->d_priority) {
2513 					pdisk = disk;
2514 				}
2515 			}
2516 		}
2517 		if (dirty == 0) {
2518 			/* No dirty disks at all, great. */
2519 		} else if (dirty == ndisks) {
2520 			/*
2521 			 * Force synchronization for all dirty disks except one
2522 			 * with the biggest priority.
2523 			 */
2524 			KASSERT(pdisk != NULL, ("pdisk == NULL"));
2525 			G_MIRROR_DEBUG(1, "Using disk %s (device %s) as a "
2526 			    "master disk for synchronization.",
2527 			    g_mirror_get_diskname(pdisk), sc->sc_name);
2528 			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2529 				if (disk->d_sync.ds_syncid != sc->sc_syncid)
2530 					continue;
2531 				if ((disk->d_flags &
2532 				    G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
2533 					continue;
2534 				}
2535 				KASSERT((disk->d_flags &
2536 				    G_MIRROR_DISK_FLAG_DIRTY) != 0,
2537 				    ("Disk %s isn't marked as dirty.",
2538 				    g_mirror_get_diskname(disk)));
2539 				/* Skip the disk with the biggest priority. */
2540 				if (disk == pdisk)
2541 					continue;
2542 				disk->d_sync.ds_syncid = 0;
2543 			}
2544 		} else if (dirty < ndisks) {
2545 			/*
2546 			 * Force synchronization for all dirty disks.
2547 			 * We have some non-dirty disks.
2548 			 */
2549 			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2550 				if (disk->d_sync.ds_syncid != sc->sc_syncid)
2551 					continue;
2552 				if ((disk->d_flags &
2553 				    G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
2554 					continue;
2555 				}
2556 				if ((disk->d_flags &
2557 				    G_MIRROR_DISK_FLAG_DIRTY) == 0) {
2558 					continue;
2559 				}
2560 				disk->d_sync.ds_syncid = 0;
2561 			}
2562 		}
2563 
2564 		/* Reset hint. */
2565 		sc->sc_hint = NULL;
2566 		if (force) {
2567 			/* Remember to bump syncid on first write. */
2568 			sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
2569 		}
2570 		state = G_MIRROR_DEVICE_STATE_RUNNING;
2571 		G_MIRROR_DEBUG(1, "Device %s state changed from %s to %s.",
2572 		    sc->sc_name, g_mirror_device_state2str(sc->sc_state),
2573 		    g_mirror_device_state2str(state));
2574 		sc->sc_state = state;
2575 		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2576 			state = g_mirror_determine_state(disk);
2577 			g_mirror_event_send(disk, state,
2578 			    G_MIRROR_EVENT_DONTWAIT);
2579 			if (state == G_MIRROR_DISK_STATE_STALE)
2580 				sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
2581 		}
2582 		break;
2583 	    }
2584 	case G_MIRROR_DEVICE_STATE_RUNNING:
2585 		if (g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) == 0 &&
2586 		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_NEW) == 0) {
2587 			/*
2588 			 * No usable disks, so destroy the device.
2589 			 */
2590 			sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
2591 			break;
2592 		} else if (g_mirror_ndisks(sc,
2593 		    G_MIRROR_DISK_STATE_ACTIVE) > 0 &&
2594 		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_NEW) == 0) {
2595 			/*
2596 			 * We have active disks, launch provider if it doesn't
2597 			 * exist.
2598 			 */
2599 			if (sc->sc_provider == NULL)
2600 				g_mirror_launch_provider(sc);
2601 			if (sc->sc_rootmount != NULL) {
2602 				G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p",
2603 				    __LINE__, sc->sc_rootmount);
2604 				root_mount_rel(sc->sc_rootmount);
2605 				sc->sc_rootmount = NULL;
2606 			}
2607 		}
2608 		/*
2609 		 * Genid should be bumped immediately, so do it here.
2610 		 */
2611 		if ((sc->sc_bump_id & G_MIRROR_BUMP_GENID) != 0) {
2612 			sc->sc_bump_id &= ~G_MIRROR_BUMP_GENID;
2613 			g_mirror_bump_genid(sc);
2614 		}
2615 		if ((sc->sc_bump_id & G_MIRROR_BUMP_SYNCID_NOW) != 0) {
2616 			sc->sc_bump_id &= ~G_MIRROR_BUMP_SYNCID_NOW;
2617 			g_mirror_bump_syncid(sc);
2618 		}
2619 		break;
2620 	default:
2621 		KASSERT(1 == 0, ("Wrong device state (%s, %s).",
2622 		    sc->sc_name, g_mirror_device_state2str(sc->sc_state)));
2623 		break;
2624 	}
2625 }
2626 
2627 /*
2628  * Update disk state and device state if needed.
2629  */
2630 #define	DISK_STATE_CHANGED()	G_MIRROR_DEBUG(1,			\
2631 	"Disk %s state changed from %s to %s (device %s).",		\
2632 	g_mirror_get_diskname(disk),					\
2633 	g_mirror_disk_state2str(disk->d_state),				\
2634 	g_mirror_disk_state2str(state), sc->sc_name)
2635 static int
2636 g_mirror_update_disk(struct g_mirror_disk *disk, u_int state)
2637 {
2638 	struct g_mirror_softc *sc;
2639 
2640 	sc = disk->d_softc;
2641 	sx_assert(&sc->sc_lock, SX_XLOCKED);
2642 
2643 again:
2644 	G_MIRROR_DEBUG(3, "Changing disk %s state from %s to %s.",
2645 	    g_mirror_get_diskname(disk), g_mirror_disk_state2str(disk->d_state),
2646 	    g_mirror_disk_state2str(state));
2647 	switch (state) {
2648 	case G_MIRROR_DISK_STATE_NEW:
2649 		/*
2650 		 * Possible scenarios:
2651 		 * 1. New disk arrive.
2652 		 */
2653 		/* Previous state should be NONE. */
2654 		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NONE,
2655 		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2656 		    g_mirror_disk_state2str(disk->d_state)));
2657 		DISK_STATE_CHANGED();
2658 
2659 		disk->d_state = state;
2660 		g_topology_lock();
2661 		if (LIST_EMPTY(&sc->sc_disks))
2662 			LIST_INSERT_HEAD(&sc->sc_disks, disk, d_next);
2663 		else {
2664 			struct g_mirror_disk *dp;
2665 
2666 			LIST_FOREACH(dp, &sc->sc_disks, d_next) {
2667 				if (disk->d_priority >= dp->d_priority) {
2668 					LIST_INSERT_BEFORE(dp, disk, d_next);
2669 					dp = NULL;
2670 					break;
2671 				}
2672 				if (LIST_NEXT(dp, d_next) == NULL)
2673 					break;
2674 			}
2675 			if (dp != NULL)
2676 				LIST_INSERT_AFTER(dp, disk, d_next);
2677 		}
2678 		g_topology_unlock();
2679 		G_MIRROR_DEBUG(1, "Device %s: provider %s detected.",
2680 		    sc->sc_name, g_mirror_get_diskname(disk));
2681 		if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING)
2682 			break;
2683 		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2684 		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2685 		    g_mirror_device_state2str(sc->sc_state),
2686 		    g_mirror_get_diskname(disk),
2687 		    g_mirror_disk_state2str(disk->d_state)));
2688 		state = g_mirror_determine_state(disk);
2689 		if (state != G_MIRROR_DISK_STATE_NONE)
2690 			goto again;
2691 		break;
2692 	case G_MIRROR_DISK_STATE_ACTIVE:
2693 		/*
2694 		 * Possible scenarios:
2695 		 * 1. New disk does not need synchronization.
2696 		 * 2. Synchronization process finished successfully.
2697 		 */
2698 		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2699 		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2700 		    g_mirror_device_state2str(sc->sc_state),
2701 		    g_mirror_get_diskname(disk),
2702 		    g_mirror_disk_state2str(disk->d_state)));
2703 		/* Previous state should be NEW or SYNCHRONIZING. */
2704 		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW ||
2705 		    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
2706 		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2707 		    g_mirror_disk_state2str(disk->d_state)));
2708 		DISK_STATE_CHANGED();
2709 
2710 		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
2711 			disk->d_flags &= ~G_MIRROR_DISK_FLAG_SYNCHRONIZING;
2712 			disk->d_flags &= ~G_MIRROR_DISK_FLAG_FORCE_SYNC;
2713 			g_mirror_sync_stop(disk, 0);
2714 		}
2715 		disk->d_state = state;
2716 		disk->d_sync.ds_offset = 0;
2717 		disk->d_sync.ds_offset_done = 0;
2718 		g_mirror_update_idle(sc, disk);
2719 		g_mirror_update_metadata(disk);
2720 		G_MIRROR_DEBUG(1, "Device %s: provider %s activated.",
2721 		    sc->sc_name, g_mirror_get_diskname(disk));
2722 		break;
2723 	case G_MIRROR_DISK_STATE_STALE:
2724 		/*
2725 		 * Possible scenarios:
2726 		 * 1. Stale disk was connected.
2727 		 */
2728 		/* Previous state should be NEW. */
2729 		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW,
2730 		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2731 		    g_mirror_disk_state2str(disk->d_state)));
2732 		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2733 		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2734 		    g_mirror_device_state2str(sc->sc_state),
2735 		    g_mirror_get_diskname(disk),
2736 		    g_mirror_disk_state2str(disk->d_state)));
2737 		/*
2738 		 * STALE state is only possible if device is marked
2739 		 * NOAUTOSYNC.
2740 		 */
2741 		KASSERT((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) != 0,
2742 		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2743 		    g_mirror_device_state2str(sc->sc_state),
2744 		    g_mirror_get_diskname(disk),
2745 		    g_mirror_disk_state2str(disk->d_state)));
2746 		DISK_STATE_CHANGED();
2747 
2748 		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2749 		disk->d_state = state;
2750 		g_mirror_update_metadata(disk);
2751 		G_MIRROR_DEBUG(0, "Device %s: provider %s is stale.",
2752 		    sc->sc_name, g_mirror_get_diskname(disk));
2753 		break;
2754 	case G_MIRROR_DISK_STATE_SYNCHRONIZING:
2755 		/*
2756 		 * Possible scenarios:
2757 		 * 1. Disk which needs synchronization was connected.
2758 		 */
2759 		/* Previous state should be NEW. */
2760 		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW,
2761 		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2762 		    g_mirror_disk_state2str(disk->d_state)));
2763 		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2764 		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2765 		    g_mirror_device_state2str(sc->sc_state),
2766 		    g_mirror_get_diskname(disk),
2767 		    g_mirror_disk_state2str(disk->d_state)));
2768 		DISK_STATE_CHANGED();
2769 
2770 		if (disk->d_state == G_MIRROR_DISK_STATE_NEW)
2771 			disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2772 		disk->d_state = state;
2773 		if (sc->sc_provider != NULL) {
2774 			g_mirror_sync_start(disk);
2775 			g_mirror_update_metadata(disk);
2776 		}
2777 		break;
2778 	case G_MIRROR_DISK_STATE_DISCONNECTED:
2779 		/*
2780 		 * Possible scenarios:
2781 		 * 1. Device wasn't running yet, but disk disappear.
2782 		 * 2. Disk was active and disapppear.
2783 		 * 3. Disk disappear during synchronization process.
2784 		 */
2785 		if (sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING) {
2786 			/*
2787 			 * Previous state should be ACTIVE, STALE or
2788 			 * SYNCHRONIZING.
2789 			 */
2790 			KASSERT(disk->d_state == G_MIRROR_DISK_STATE_ACTIVE ||
2791 			    disk->d_state == G_MIRROR_DISK_STATE_STALE ||
2792 			    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
2793 			    ("Wrong disk state (%s, %s).",
2794 			    g_mirror_get_diskname(disk),
2795 			    g_mirror_disk_state2str(disk->d_state)));
2796 		} else if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING) {
2797 			/* Previous state should be NEW. */
2798 			KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW,
2799 			    ("Wrong disk state (%s, %s).",
2800 			    g_mirror_get_diskname(disk),
2801 			    g_mirror_disk_state2str(disk->d_state)));
2802 			/*
2803 			 * Reset bumping syncid if disk disappeared in STARTING
2804 			 * state.
2805 			 */
2806 			if ((sc->sc_bump_id & G_MIRROR_BUMP_SYNCID) != 0)
2807 				sc->sc_bump_id &= ~G_MIRROR_BUMP_SYNCID;
2808 #ifdef	INVARIANTS
2809 		} else {
2810 			KASSERT(1 == 0, ("Wrong device state (%s, %s, %s, %s).",
2811 			    sc->sc_name,
2812 			    g_mirror_device_state2str(sc->sc_state),
2813 			    g_mirror_get_diskname(disk),
2814 			    g_mirror_disk_state2str(disk->d_state)));
2815 #endif
2816 		}
2817 		DISK_STATE_CHANGED();
2818 		G_MIRROR_DEBUG(0, "Device %s: provider %s disconnected.",
2819 		    sc->sc_name, g_mirror_get_diskname(disk));
2820 
2821 		g_mirror_destroy_disk(disk);
2822 		break;
2823 	case G_MIRROR_DISK_STATE_DESTROY:
2824 	    {
2825 		int error;
2826 
2827 		error = g_mirror_clear_metadata(disk);
2828 		if (error != 0) {
2829 			G_MIRROR_DEBUG(0,
2830 			    "Device %s: failed to clear metadata on %s: %d.",
2831 			    sc->sc_name, g_mirror_get_diskname(disk), error);
2832 			break;
2833 		}
2834 		DISK_STATE_CHANGED();
2835 		G_MIRROR_DEBUG(0, "Device %s: provider %s destroyed.",
2836 		    sc->sc_name, g_mirror_get_diskname(disk));
2837 
2838 		g_mirror_destroy_disk(disk);
2839 		sc->sc_ndisks--;
2840 		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2841 			g_mirror_update_metadata(disk);
2842 		}
2843 		break;
2844 	    }
2845 	default:
2846 		KASSERT(1 == 0, ("Unknown state (%u).", state));
2847 		break;
2848 	}
2849 	return (0);
2850 }
2851 #undef	DISK_STATE_CHANGED
2852 
2853 int
2854 g_mirror_read_metadata(struct g_consumer *cp, struct g_mirror_metadata *md)
2855 {
2856 	struct g_provider *pp;
2857 	u_char *buf;
2858 	int error;
2859 
2860 	g_topology_assert();
2861 
2862 	error = g_access(cp, 1, 0, 0);
2863 	if (error != 0)
2864 		return (error);
2865 	pp = cp->provider;
2866 	g_topology_unlock();
2867 	/* Metadata are stored on last sector. */
2868 	buf = g_read_data(cp, pp->mediasize - pp->sectorsize, pp->sectorsize,
2869 	    &error);
2870 	g_topology_lock();
2871 	g_access(cp, -1, 0, 0);
2872 	if (buf == NULL) {
2873 		G_MIRROR_DEBUG(1, "Cannot read metadata from %s (error=%d).",
2874 		    cp->provider->name, error);
2875 		return (error);
2876 	}
2877 
2878 	/* Decode metadata. */
2879 	error = mirror_metadata_decode(buf, md);
2880 	g_free(buf);
2881 	if (strcmp(md->md_magic, G_MIRROR_MAGIC) != 0)
2882 		return (EINVAL);
2883 	if (md->md_version > G_MIRROR_VERSION) {
2884 		G_MIRROR_DEBUG(0,
2885 		    "Kernel module is too old to handle metadata from %s.",
2886 		    cp->provider->name);
2887 		return (EINVAL);
2888 	}
2889 	if (error != 0) {
2890 		G_MIRROR_DEBUG(1, "MD5 metadata hash mismatch for provider %s.",
2891 		    cp->provider->name);
2892 		return (error);
2893 	}
2894 
2895 	return (0);
2896 }
2897 
2898 static int
2899 g_mirror_check_metadata(struct g_mirror_softc *sc, struct g_provider *pp,
2900     struct g_mirror_metadata *md)
2901 {
2902 
2903 	G_MIRROR_DEBUG(2, "%s: md_did 0x%u disk %s device %s md_all 0x%x "
2904 	    "sc_ndisks 0x%x md_slice 0x%x sc_slice 0x%x md_balance 0x%x "
2905 	    "sc_balance 0x%x sc_mediasize 0x%jx pp_mediasize 0x%jx "
2906 	    "md_sectorsize 0x%x sc_sectorsize 0x%x md_mflags 0x%jx "
2907 	    "md_dflags 0x%jx md_syncid 0x%x md_genid 0x%x md_priority 0x%x "
2908 	    "sc_state 0x%x.",
2909 	    __func__, md->md_did, pp->name, sc->sc_name, md->md_all,
2910 	    sc->sc_ndisks, md->md_slice, sc->sc_slice, md->md_balance,
2911 	    sc->sc_balance, (uintmax_t)sc->sc_mediasize,
2912 	    (uintmax_t)pp->mediasize, md->md_sectorsize, sc->sc_sectorsize,
2913 	    (uintmax_t)md->md_mflags, (uintmax_t)md->md_dflags, md->md_syncid,
2914 	    md->md_genid, md->md_priority, sc->sc_state);
2915 
2916 	if (g_mirror_id2disk(sc, md->md_did) != NULL) {
2917 		G_MIRROR_DEBUG(1, "Disk %s (id=%u) already exists, skipping.",
2918 		    pp->name, md->md_did);
2919 		return (EEXIST);
2920 	}
2921 	if (sc->sc_mediasize > pp->mediasize) {
2922 		G_MIRROR_DEBUG(1,
2923 		    "Invalid size of disk %s (device %s), skipping.", pp->name,
2924 		    sc->sc_name);
2925 		return (EINVAL);
2926 	}
2927 	if (md->md_sectorsize != sc->sc_sectorsize) {
2928 		G_MIRROR_DEBUG(1,
2929 		    "Invalid '%s' field on disk %s (device %s), skipping.",
2930 		    "md_sectorsize", pp->name, sc->sc_name);
2931 		return (EINVAL);
2932 	}
2933 	if ((sc->sc_sectorsize % pp->sectorsize) != 0) {
2934 		G_MIRROR_DEBUG(1,
2935 		    "Invalid sector size of disk %s (device %s), skipping.",
2936 		    pp->name, sc->sc_name);
2937 		return (EINVAL);
2938 	}
2939 	if ((md->md_mflags & ~G_MIRROR_DEVICE_FLAG_MASK) != 0) {
2940 		G_MIRROR_DEBUG(1,
2941 		    "Invalid device flags on disk %s (device %s), skipping.",
2942 		    pp->name, sc->sc_name);
2943 		return (EINVAL);
2944 	}
2945 	if ((md->md_dflags & ~G_MIRROR_DISK_FLAG_MASK) != 0) {
2946 		G_MIRROR_DEBUG(1,
2947 		    "Invalid disk flags on disk %s (device %s), skipping.",
2948 		    pp->name, sc->sc_name);
2949 		return (EINVAL);
2950 	}
2951 	return (0);
2952 }
2953 
2954 int
2955 g_mirror_add_disk(struct g_mirror_softc *sc, struct g_provider *pp,
2956     struct g_mirror_metadata *md)
2957 {
2958 	struct g_mirror_disk *disk;
2959 	int error;
2960 
2961 	g_topology_assert_not();
2962 	G_MIRROR_DEBUG(2, "Adding disk %s.", pp->name);
2963 
2964 	error = g_mirror_check_metadata(sc, pp, md);
2965 	if (error != 0)
2966 		return (error);
2967 
2968 	if (md->md_genid < sc->sc_genid) {
2969 		G_MIRROR_DEBUG(0, "Component %s (device %s) broken, skipping.",
2970 		    pp->name, sc->sc_name);
2971 		return (EINVAL);
2972 	}
2973 
2974 	/*
2975 	 * If the component disk we're tasting has newer metadata than the
2976 	 * STARTING gmirror device, refresh the device from the component.
2977 	 */
2978 	error = g_mirror_refresh_device(sc, pp, md);
2979 	if (error != 0)
2980 		return (error);
2981 
2982 	disk = g_mirror_init_disk(sc, pp, md, &error);
2983 	if (disk == NULL)
2984 		return (error);
2985 	error = g_mirror_event_send(disk, G_MIRROR_DISK_STATE_NEW,
2986 	    G_MIRROR_EVENT_WAIT);
2987 	if (error != 0)
2988 		return (error);
2989 	if (md->md_version < G_MIRROR_VERSION) {
2990 		G_MIRROR_DEBUG(0, "Upgrading metadata on %s (v%d->v%d).",
2991 		    pp->name, md->md_version, G_MIRROR_VERSION);
2992 		g_mirror_update_metadata(disk);
2993 	}
2994 	return (0);
2995 }
2996 
2997 static void
2998 g_mirror_destroy_delayed(void *arg, int flag)
2999 {
3000 	struct g_mirror_softc *sc;
3001 	int error;
3002 
3003 	if (flag == EV_CANCEL) {
3004 		G_MIRROR_DEBUG(1, "Destroying canceled.");
3005 		return;
3006 	}
3007 	sc = arg;
3008 	g_topology_unlock();
3009 	sx_xlock(&sc->sc_lock);
3010 	KASSERT((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) == 0,
3011 	    ("DESTROY flag set on %s.", sc->sc_name));
3012 	KASSERT((sc->sc_flags & G_MIRROR_DEVICE_FLAG_CLOSEWAIT) != 0,
3013 	    ("CLOSEWAIT flag not set on %s.", sc->sc_name));
3014 	G_MIRROR_DEBUG(1, "Destroying %s (delayed).", sc->sc_name);
3015 	error = g_mirror_destroy(sc, G_MIRROR_DESTROY_SOFT);
3016 	if (error != 0) {
3017 		G_MIRROR_DEBUG(0, "Cannot destroy %s (error=%d).",
3018 		    sc->sc_name, error);
3019 		sx_xunlock(&sc->sc_lock);
3020 	}
3021 	g_topology_lock();
3022 }
3023 
3024 static int
3025 g_mirror_access(struct g_provider *pp, int acr, int acw, int ace)
3026 {
3027 	struct g_mirror_softc *sc;
3028 	int error = 0;
3029 
3030 	g_topology_assert();
3031 	G_MIRROR_DEBUG(2, "Access request for %s: r%dw%de%d.", pp->name, acr,
3032 	    acw, ace);
3033 
3034 	sc = pp->private;
3035 	KASSERT(sc != NULL, ("NULL softc (provider=%s).", pp->name));
3036 
3037 	g_topology_unlock();
3038 	sx_xlock(&sc->sc_lock);
3039 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0 ||
3040 	    (sc->sc_flags & G_MIRROR_DEVICE_FLAG_CLOSEWAIT) != 0 ||
3041 	    LIST_EMPTY(&sc->sc_disks)) {
3042 		if (acr > 0 || acw > 0 || ace > 0)
3043 			error = ENXIO;
3044 		goto end;
3045 	}
3046 	sc->sc_provider_open += acr + acw + ace;
3047 	if (pp->acw + acw == 0)
3048 		g_mirror_idle(sc, 0);
3049 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_CLOSEWAIT) != 0 &&
3050 	    sc->sc_provider_open == 0)
3051 		g_post_event(g_mirror_destroy_delayed, sc, M_WAITOK, sc, NULL);
3052 end:
3053 	sx_xunlock(&sc->sc_lock);
3054 	g_topology_lock();
3055 	return (error);
3056 }
3057 
3058 static void
3059 g_mirror_reinit_from_metadata(struct g_mirror_softc *sc,
3060     const struct g_mirror_metadata *md)
3061 {
3062 
3063 	sc->sc_genid = md->md_genid;
3064 	sc->sc_syncid = md->md_syncid;
3065 
3066 	sc->sc_slice = md->md_slice;
3067 	sc->sc_balance = md->md_balance;
3068 	sc->sc_mediasize = md->md_mediasize;
3069 	sc->sc_ndisks = md->md_all;
3070 	sc->sc_flags &= ~G_MIRROR_DEVICE_FLAG_MASK;
3071 	sc->sc_flags |= (md->md_mflags & G_MIRROR_DEVICE_FLAG_MASK);
3072 }
3073 
3074 struct g_geom *
3075 g_mirror_create(struct g_class *mp, const struct g_mirror_metadata *md,
3076     u_int type)
3077 {
3078 	struct g_mirror_softc *sc;
3079 	struct g_geom *gp;
3080 	int error, timeout;
3081 
3082 	g_topology_assert();
3083 	G_MIRROR_DEBUG(1, "Creating device %s (id=%u).", md->md_name,
3084 	    md->md_mid);
3085 
3086 	/* One disk is minimum. */
3087 	if (md->md_all < 1)
3088 		return (NULL);
3089 	/*
3090 	 * Action geom.
3091 	 */
3092 	gp = g_new_geomf(mp, "%s", md->md_name);
3093 	sc = malloc(sizeof(*sc), M_MIRROR, M_WAITOK | M_ZERO);
3094 	gp->start = g_mirror_start;
3095 	gp->orphan = g_mirror_orphan;
3096 	gp->access = g_mirror_access;
3097 	gp->dumpconf = g_mirror_dumpconf;
3098 
3099 	sc->sc_type = type;
3100 	sc->sc_id = md->md_mid;
3101 	g_mirror_reinit_from_metadata(sc, md);
3102 	sc->sc_sectorsize = md->md_sectorsize;
3103 	sc->sc_bump_id = 0;
3104 	sc->sc_idle = 1;
3105 	sc->sc_last_write = time_uptime;
3106 	sc->sc_writes = 0;
3107 	sc->sc_refcnt = 1;
3108 	sx_init(&sc->sc_lock, "gmirror:lock");
3109 	TAILQ_INIT(&sc->sc_queue);
3110 	mtx_init(&sc->sc_queue_mtx, "gmirror:queue", NULL, MTX_DEF);
3111 	TAILQ_INIT(&sc->sc_regular_delayed);
3112 	TAILQ_INIT(&sc->sc_inflight);
3113 	TAILQ_INIT(&sc->sc_sync_delayed);
3114 	LIST_INIT(&sc->sc_disks);
3115 	TAILQ_INIT(&sc->sc_events);
3116 	mtx_init(&sc->sc_events_mtx, "gmirror:events", NULL, MTX_DEF);
3117 	callout_init(&sc->sc_callout, 1);
3118 	mtx_init(&sc->sc_done_mtx, "gmirror:done", NULL, MTX_DEF);
3119 	sc->sc_state = G_MIRROR_DEVICE_STATE_STARTING;
3120 	gp->softc = sc;
3121 	sc->sc_geom = gp;
3122 	sc->sc_provider = NULL;
3123 	sc->sc_provider_open = 0;
3124 	/*
3125 	 * Synchronization geom.
3126 	 */
3127 	gp = g_new_geomf(mp, "%s.sync", md->md_name);
3128 	gp->softc = sc;
3129 	gp->orphan = g_mirror_orphan;
3130 	sc->sc_sync.ds_geom = gp;
3131 	sc->sc_sync.ds_ndisks = 0;
3132 	error = kproc_create(g_mirror_worker, sc, &sc->sc_worker, 0, 0,
3133 	    "g_mirror %s", md->md_name);
3134 	if (error != 0) {
3135 		G_MIRROR_DEBUG(1, "Cannot create kernel thread for %s.",
3136 		    sc->sc_name);
3137 		g_destroy_geom(sc->sc_sync.ds_geom);
3138 		g_destroy_geom(sc->sc_geom);
3139 		g_mirror_free_device(sc);
3140 		return (NULL);
3141 	}
3142 
3143 	G_MIRROR_DEBUG(1, "Device %s created (%u components, id=%u).",
3144 	    sc->sc_name, sc->sc_ndisks, sc->sc_id);
3145 
3146 	sc->sc_rootmount = root_mount_hold("GMIRROR");
3147 	G_MIRROR_DEBUG(1, "root_mount_hold %p", sc->sc_rootmount);
3148 	/*
3149 	 * Run timeout.
3150 	 */
3151 	timeout = g_mirror_timeout * hz;
3152 	callout_reset(&sc->sc_callout, timeout, g_mirror_go, sc);
3153 	return (sc->sc_geom);
3154 }
3155 
3156 int
3157 g_mirror_destroy(struct g_mirror_softc *sc, int how)
3158 {
3159 	struct g_mirror_disk *disk;
3160 
3161 	g_topology_assert_not();
3162 	sx_assert(&sc->sc_lock, SX_XLOCKED);
3163 
3164 	if (sc->sc_provider_open != 0) {
3165 		switch (how) {
3166 		case G_MIRROR_DESTROY_SOFT:
3167 			G_MIRROR_DEBUG(1,
3168 			    "Device %s is still open (%d).", sc->sc_name,
3169 			    sc->sc_provider_open);
3170 			return (EBUSY);
3171 		case G_MIRROR_DESTROY_DELAYED:
3172 			G_MIRROR_DEBUG(1,
3173 			    "Device %s will be destroyed on last close.",
3174 			    sc->sc_name);
3175 			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
3176 				if (disk->d_state ==
3177 				    G_MIRROR_DISK_STATE_SYNCHRONIZING) {
3178 					g_mirror_sync_stop(disk, 1);
3179 				}
3180 			}
3181 			sc->sc_flags |= G_MIRROR_DEVICE_FLAG_CLOSEWAIT;
3182 			return (EBUSY);
3183 		case G_MIRROR_DESTROY_HARD:
3184 			G_MIRROR_DEBUG(1, "Device %s is still open, so it "
3185 			    "can't be definitely removed.", sc->sc_name);
3186 		}
3187 	}
3188 
3189 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
3190 		sx_xunlock(&sc->sc_lock);
3191 		return (0);
3192 	}
3193 	sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
3194 	sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DRAIN;
3195 	G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
3196 	sx_xunlock(&sc->sc_lock);
3197 	mtx_lock(&sc->sc_queue_mtx);
3198 	wakeup(sc);
3199 	mtx_unlock(&sc->sc_queue_mtx);
3200 	G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, &sc->sc_worker);
3201 	while (sc->sc_worker != NULL)
3202 		tsleep(&sc->sc_worker, PRIBIO, "m:destroy", hz / 5);
3203 	G_MIRROR_DEBUG(4, "%s: Woken up %p.", __func__, &sc->sc_worker);
3204 	sx_xlock(&sc->sc_lock);
3205 	g_mirror_destroy_device(sc);
3206 	return (0);
3207 }
3208 
3209 static void
3210 g_mirror_taste_orphan(struct g_consumer *cp)
3211 {
3212 
3213 	KASSERT(1 == 0, ("%s called while tasting %s.", __func__,
3214 	    cp->provider->name));
3215 }
3216 
3217 static struct g_geom *
3218 g_mirror_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
3219 {
3220 	struct g_mirror_metadata md;
3221 	struct g_mirror_softc *sc;
3222 	struct g_consumer *cp;
3223 	struct g_geom *gp;
3224 	int error;
3225 
3226 	g_topology_assert();
3227 	g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name);
3228 	G_MIRROR_DEBUG(2, "Tasting %s.", pp->name);
3229 
3230 	gp = g_new_geomf(mp, "mirror:taste");
3231 	/*
3232 	 * This orphan function should be never called.
3233 	 */
3234 	gp->orphan = g_mirror_taste_orphan;
3235 	cp = g_new_consumer(gp);
3236 	g_attach(cp, pp);
3237 	error = g_mirror_read_metadata(cp, &md);
3238 	g_detach(cp);
3239 	g_destroy_consumer(cp);
3240 	g_destroy_geom(gp);
3241 	if (error != 0)
3242 		return (NULL);
3243 	gp = NULL;
3244 
3245 	if (md.md_provider[0] != '\0' &&
3246 	    !g_compare_names(md.md_provider, pp->name))
3247 		return (NULL);
3248 	if (md.md_provsize != 0 && md.md_provsize != pp->mediasize)
3249 		return (NULL);
3250 	if ((md.md_dflags & G_MIRROR_DISK_FLAG_INACTIVE) != 0) {
3251 		G_MIRROR_DEBUG(0,
3252 		    "Device %s: provider %s marked as inactive, skipping.",
3253 		    md.md_name, pp->name);
3254 		return (NULL);
3255 	}
3256 	if (g_mirror_debug >= 2)
3257 		mirror_metadata_dump(&md);
3258 
3259 	/*
3260 	 * Let's check if device already exists.
3261 	 */
3262 	sc = NULL;
3263 	LIST_FOREACH(gp, &mp->geom, geom) {
3264 		sc = gp->softc;
3265 		if (sc == NULL)
3266 			continue;
3267 		if (sc->sc_type != G_MIRROR_TYPE_AUTOMATIC)
3268 			continue;
3269 		if (sc->sc_sync.ds_geom == gp)
3270 			continue;
3271 		if (strcmp(md.md_name, sc->sc_name) != 0)
3272 			continue;
3273 		if (md.md_mid != sc->sc_id) {
3274 			G_MIRROR_DEBUG(0, "Device %s already configured.",
3275 			    sc->sc_name);
3276 			return (NULL);
3277 		}
3278 		break;
3279 	}
3280 	if (gp == NULL) {
3281 		gp = g_mirror_create(mp, &md, G_MIRROR_TYPE_AUTOMATIC);
3282 		if (gp == NULL) {
3283 			G_MIRROR_DEBUG(0, "Cannot create device %s.",
3284 			    md.md_name);
3285 			return (NULL);
3286 		}
3287 		sc = gp->softc;
3288 	}
3289 	G_MIRROR_DEBUG(1, "Adding disk %s to %s.", pp->name, gp->name);
3290 	g_topology_unlock();
3291 	sx_xlock(&sc->sc_lock);
3292 	sc->sc_flags |= G_MIRROR_DEVICE_FLAG_TASTING;
3293 	error = g_mirror_add_disk(sc, pp, &md);
3294 	if (error != 0) {
3295 		G_MIRROR_DEBUG(0, "Cannot add disk %s to %s (error=%d).",
3296 		    pp->name, gp->name, error);
3297 		if (LIST_EMPTY(&sc->sc_disks)) {
3298 			g_cancel_event(sc);
3299 			g_mirror_destroy(sc, G_MIRROR_DESTROY_HARD);
3300 			g_topology_lock();
3301 			return (NULL);
3302 		}
3303 		gp = NULL;
3304 	}
3305 	sc->sc_flags &= ~G_MIRROR_DEVICE_FLAG_TASTING;
3306 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
3307 		g_mirror_destroy(sc, G_MIRROR_DESTROY_HARD);
3308 		g_topology_lock();
3309 		return (NULL);
3310 	}
3311 	sx_xunlock(&sc->sc_lock);
3312 	g_topology_lock();
3313 	return (gp);
3314 }
3315 
3316 static void
3317 g_mirror_resize(struct g_consumer *cp)
3318 {
3319 	struct g_mirror_disk *disk;
3320 
3321 	g_topology_assert();
3322 	g_trace(G_T_TOPOLOGY, "%s(%s)", __func__, cp->provider->name);
3323 
3324 	disk = cp->private;
3325 	if (disk == NULL)
3326 		return;
3327 	g_topology_unlock();
3328 	g_mirror_update_metadata(disk);
3329 	g_topology_lock();
3330 }
3331 
3332 static int
3333 g_mirror_destroy_geom(struct gctl_req *req __unused,
3334     struct g_class *mp __unused, struct g_geom *gp)
3335 {
3336 	struct g_mirror_softc *sc;
3337 	int error;
3338 
3339 	g_topology_unlock();
3340 	sc = gp->softc;
3341 	sx_xlock(&sc->sc_lock);
3342 	g_cancel_event(sc);
3343 	error = g_mirror_destroy(gp->softc, G_MIRROR_DESTROY_SOFT);
3344 	if (error != 0)
3345 		sx_xunlock(&sc->sc_lock);
3346 	g_topology_lock();
3347 	return (error);
3348 }
3349 
3350 static void
3351 g_mirror_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
3352     struct g_consumer *cp, struct g_provider *pp)
3353 {
3354 	struct g_mirror_softc *sc;
3355 
3356 	g_topology_assert();
3357 
3358 	sc = gp->softc;
3359 	if (sc == NULL)
3360 		return;
3361 	/* Skip synchronization geom. */
3362 	if (gp == sc->sc_sync.ds_geom)
3363 		return;
3364 	if (pp != NULL) {
3365 		/* Nothing here. */
3366 	} else if (cp != NULL) {
3367 		struct g_mirror_disk *disk;
3368 
3369 		disk = cp->private;
3370 		if (disk == NULL)
3371 			return;
3372 		sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)disk->d_id);
3373 		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
3374 			sbuf_printf(sb, "%s<Synchronized>", indent);
3375 			if (disk->d_sync.ds_offset == 0)
3376 				sbuf_printf(sb, "0%%");
3377 			else
3378 				sbuf_printf(sb, "%u%%",
3379 				    (u_int)((disk->d_sync.ds_offset * 100) /
3380 				    sc->sc_mediasize));
3381 			sbuf_printf(sb, "</Synchronized>\n");
3382 			if (disk->d_sync.ds_offset > 0)
3383 				sbuf_printf(sb, "%s<BytesSynced>%jd"
3384 				    "</BytesSynced>\n", indent,
3385 				    (intmax_t)disk->d_sync.ds_offset);
3386 		}
3387 		sbuf_printf(sb, "%s<SyncID>%u</SyncID>\n", indent,
3388 		    disk->d_sync.ds_syncid);
3389 		sbuf_printf(sb, "%s<GenID>%u</GenID>\n", indent,
3390 		    disk->d_genid);
3391 		sbuf_printf(sb, "%s<Flags>", indent);
3392 		if (disk->d_flags == 0)
3393 			sbuf_printf(sb, "NONE");
3394 		else {
3395 			int first = 1;
3396 
3397 #define	ADD_FLAG(flag, name)	do {					\
3398 	if ((disk->d_flags & (flag)) != 0) {				\
3399 		if (!first)						\
3400 			sbuf_printf(sb, ", ");				\
3401 		else							\
3402 			first = 0;					\
3403 		sbuf_printf(sb, name);					\
3404 	}								\
3405 } while (0)
3406 			ADD_FLAG(G_MIRROR_DISK_FLAG_DIRTY, "DIRTY");
3407 			ADD_FLAG(G_MIRROR_DISK_FLAG_HARDCODED, "HARDCODED");
3408 			ADD_FLAG(G_MIRROR_DISK_FLAG_INACTIVE, "INACTIVE");
3409 			ADD_FLAG(G_MIRROR_DISK_FLAG_SYNCHRONIZING,
3410 			    "SYNCHRONIZING");
3411 			ADD_FLAG(G_MIRROR_DISK_FLAG_FORCE_SYNC, "FORCE_SYNC");
3412 			ADD_FLAG(G_MIRROR_DISK_FLAG_BROKEN, "BROKEN");
3413 #undef	ADD_FLAG
3414 		}
3415 		sbuf_printf(sb, "</Flags>\n");
3416 		sbuf_printf(sb, "%s<Priority>%u</Priority>\n", indent,
3417 		    disk->d_priority);
3418 		sbuf_printf(sb, "%s<State>%s</State>\n", indent,
3419 		    g_mirror_disk_state2str(disk->d_state));
3420 	} else {
3421 		sbuf_printf(sb, "%s<Type>", indent);
3422 		switch (sc->sc_type) {
3423 		case G_MIRROR_TYPE_AUTOMATIC:
3424 			sbuf_printf(sb, "AUTOMATIC");
3425 			break;
3426 		case G_MIRROR_TYPE_MANUAL:
3427 			sbuf_printf(sb, "MANUAL");
3428 			break;
3429 		default:
3430 			sbuf_printf(sb, "UNKNOWN");
3431 			break;
3432 		}
3433 		sbuf_printf(sb, "</Type>\n");
3434 		sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)sc->sc_id);
3435 		sbuf_printf(sb, "%s<SyncID>%u</SyncID>\n", indent, sc->sc_syncid);
3436 		sbuf_printf(sb, "%s<GenID>%u</GenID>\n", indent, sc->sc_genid);
3437 		sbuf_printf(sb, "%s<Flags>", indent);
3438 		if (sc->sc_flags == 0)
3439 			sbuf_printf(sb, "NONE");
3440 		else {
3441 			int first = 1;
3442 
3443 #define	ADD_FLAG(flag, name)	do {					\
3444 	if ((sc->sc_flags & (flag)) != 0) {				\
3445 		if (!first)						\
3446 			sbuf_printf(sb, ", ");				\
3447 		else							\
3448 			first = 0;					\
3449 		sbuf_printf(sb, name);					\
3450 	}								\
3451 } while (0)
3452 			ADD_FLAG(G_MIRROR_DEVICE_FLAG_NOFAILSYNC, "NOFAILSYNC");
3453 			ADD_FLAG(G_MIRROR_DEVICE_FLAG_NOAUTOSYNC, "NOAUTOSYNC");
3454 #undef	ADD_FLAG
3455 		}
3456 		sbuf_printf(sb, "</Flags>\n");
3457 		sbuf_printf(sb, "%s<Slice>%u</Slice>\n", indent,
3458 		    (u_int)sc->sc_slice);
3459 		sbuf_printf(sb, "%s<Balance>%s</Balance>\n", indent,
3460 		    balance_name(sc->sc_balance));
3461 		sbuf_printf(sb, "%s<Components>%u</Components>\n", indent,
3462 		    sc->sc_ndisks);
3463 		sbuf_printf(sb, "%s<State>", indent);
3464 		if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING)
3465 			sbuf_printf(sb, "%s", "STARTING");
3466 		else if (sc->sc_ndisks ==
3467 		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE))
3468 			sbuf_printf(sb, "%s", "COMPLETE");
3469 		else
3470 			sbuf_printf(sb, "%s", "DEGRADED");
3471 		sbuf_printf(sb, "</State>\n");
3472 	}
3473 }
3474 
3475 static void
3476 g_mirror_shutdown_post_sync(void *arg, int howto)
3477 {
3478 	struct g_class *mp;
3479 	struct g_geom *gp, *gp2;
3480 	struct g_mirror_softc *sc;
3481 	int error;
3482 
3483 	if (panicstr != NULL)
3484 		return;
3485 
3486 	mp = arg;
3487 	g_topology_lock();
3488 	g_mirror_shutdown = 1;
3489 	LIST_FOREACH_SAFE(gp, &mp->geom, geom, gp2) {
3490 		if ((sc = gp->softc) == NULL)
3491 			continue;
3492 		/* Skip synchronization geom. */
3493 		if (gp == sc->sc_sync.ds_geom)
3494 			continue;
3495 		g_topology_unlock();
3496 		sx_xlock(&sc->sc_lock);
3497 		g_mirror_idle(sc, -1);
3498 		g_cancel_event(sc);
3499 		error = g_mirror_destroy(sc, G_MIRROR_DESTROY_DELAYED);
3500 		if (error != 0)
3501 			sx_xunlock(&sc->sc_lock);
3502 		g_topology_lock();
3503 	}
3504 	g_topology_unlock();
3505 }
3506 
3507 static void
3508 g_mirror_init(struct g_class *mp)
3509 {
3510 
3511 	g_mirror_post_sync = EVENTHANDLER_REGISTER(shutdown_post_sync,
3512 	    g_mirror_shutdown_post_sync, mp, SHUTDOWN_PRI_FIRST);
3513 	if (g_mirror_post_sync == NULL)
3514 		G_MIRROR_DEBUG(0, "Warning! Cannot register shutdown event.");
3515 }
3516 
3517 static void
3518 g_mirror_fini(struct g_class *mp)
3519 {
3520 
3521 	if (g_mirror_post_sync != NULL)
3522 		EVENTHANDLER_DEREGISTER(shutdown_post_sync, g_mirror_post_sync);
3523 }
3524 
3525 /*
3526  * Refresh the mirror device's metadata when gmirror encounters a newer
3527  * generation as the individual components are being added to the mirror set.
3528  */
3529 static int
3530 g_mirror_refresh_device(struct g_mirror_softc *sc, const struct g_provider *pp,
3531     const struct g_mirror_metadata *md)
3532 {
3533 
3534 	g_topology_assert_not();
3535 	sx_assert(&sc->sc_lock, SX_XLOCKED);
3536 
3537 	KASSERT(sc->sc_genid <= md->md_genid,
3538 	    ("%s: attempted to refresh from stale component %s (device %s) "
3539 	    "(%u < %u).", __func__, pp->name, sc->sc_name, md->md_genid,
3540 	    sc->sc_genid));
3541 
3542 	if (sc->sc_genid > md->md_genid || (sc->sc_genid == md->md_genid &&
3543 	    sc->sc_syncid >= md->md_syncid))
3544 		return (0);
3545 
3546 	G_MIRROR_DEBUG(0, "Found newer version for device %s (genid: curr=%u "
3547 	    "new=%u; syncid: curr=%u new=%u; ndisks: curr=%u new=%u; "
3548 	    "provider=%s).", sc->sc_name, sc->sc_genid, md->md_genid,
3549 	    sc->sc_syncid, md->md_syncid, sc->sc_ndisks, md->md_all, pp->name);
3550 
3551 	if (sc->sc_state != G_MIRROR_DEVICE_STATE_STARTING) {
3552 		/* Probable data corruption detected */
3553 		G_MIRROR_DEBUG(0, "Cannot refresh metadata in %s state "
3554 		    "(device=%s genid=%u). A stale mirror device was launched.",
3555 		    g_mirror_device_state2str(sc->sc_state), sc->sc_name,
3556 		    sc->sc_genid);
3557 		return (EINVAL);
3558 	}
3559 
3560 	/* Update softc */
3561 	g_mirror_reinit_from_metadata(sc, md);
3562 
3563 	G_MIRROR_DEBUG(1, "Refresh device %s (id=%u, state=%s) from disk %s "
3564 	    "(genid=%u syncid=%u md_all=%u).", sc->sc_name, md->md_mid,
3565 	    g_mirror_device_state2str(sc->sc_state), pp->name, md->md_genid,
3566 	    md->md_syncid, (unsigned)md->md_all);
3567 
3568 	return (0);
3569 }
3570 
3571 DECLARE_GEOM_CLASS(g_mirror_class, g_mirror);
3572 MODULE_VERSION(geom_mirror, 0);
3573