xref: /freebsd/sys/geom/mirror/g_mirror.c (revision 79ac3c12a714bcd3f2354c52d948aed9575c46d6)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2004-2006 Pawel Jakub Dawidek <pjd@FreeBSD.org>
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/bio.h>
35 #include <sys/eventhandler.h>
36 #include <sys/fail.h>
37 #include <sys/kernel.h>
38 #include <sys/kthread.h>
39 #include <sys/limits.h>
40 #include <sys/lock.h>
41 #include <sys/malloc.h>
42 #include <sys/mutex.h>
43 #include <sys/proc.h>
44 #include <sys/sbuf.h>
45 #include <sys/sched.h>
46 #include <sys/sx.h>
47 #include <sys/sysctl.h>
48 
49 #include <geom/geom.h>
50 #include <geom/geom_dbg.h>
51 #include <geom/mirror/g_mirror.h>
52 
53 FEATURE(geom_mirror, "GEOM mirroring support");
54 
55 static MALLOC_DEFINE(M_MIRROR, "mirror_data", "GEOM_MIRROR Data");
56 
57 SYSCTL_DECL(_kern_geom);
58 static SYSCTL_NODE(_kern_geom, OID_AUTO, mirror, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
59     "GEOM_MIRROR stuff");
60 int g_mirror_debug = 0;
61 SYSCTL_INT(_kern_geom_mirror, OID_AUTO, debug, CTLFLAG_RWTUN, &g_mirror_debug, 0,
62     "Debug level");
63 bool g_launch_mirror_before_timeout = true;
64 SYSCTL_BOOL(_kern_geom_mirror, OID_AUTO, launch_mirror_before_timeout,
65     CTLFLAG_RWTUN, &g_launch_mirror_before_timeout, 0,
66     "If false, force gmirror to wait out the full kern.geom.mirror.timeout "
67     "before launching mirrors");
68 static u_int g_mirror_timeout = 4;
69 SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, timeout, CTLFLAG_RWTUN, &g_mirror_timeout,
70     0, "Time to wait on all mirror components");
71 static u_int g_mirror_idletime = 5;
72 SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, idletime, CTLFLAG_RWTUN,
73     &g_mirror_idletime, 0, "Mark components as clean when idling");
74 static u_int g_mirror_disconnect_on_failure = 1;
75 SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, disconnect_on_failure, CTLFLAG_RWTUN,
76     &g_mirror_disconnect_on_failure, 0, "Disconnect component on I/O failure.");
77 static u_int g_mirror_syncreqs = 2;
78 SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, sync_requests, CTLFLAG_RDTUN,
79     &g_mirror_syncreqs, 0, "Parallel synchronization I/O requests.");
80 static u_int g_mirror_sync_period = 5;
81 SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, sync_update_period, CTLFLAG_RWTUN,
82     &g_mirror_sync_period, 0,
83     "Metadata update period during synchronization, in seconds");
84 
85 #define	MSLEEP(ident, mtx, priority, wmesg, timeout)	do {		\
86 	G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, (ident));	\
87 	msleep((ident), (mtx), (priority), (wmesg), (timeout));		\
88 	G_MIRROR_DEBUG(4, "%s: Woken up %p.", __func__, (ident));	\
89 } while (0)
90 
91 static eventhandler_tag g_mirror_post_sync = NULL;
92 static int g_mirror_shutdown = 0;
93 
94 static g_ctl_destroy_geom_t g_mirror_destroy_geom;
95 static g_taste_t g_mirror_taste;
96 static g_init_t g_mirror_init;
97 static g_fini_t g_mirror_fini;
98 static g_provgone_t g_mirror_providergone;
99 static g_resize_t g_mirror_resize;
100 
101 struct g_class g_mirror_class = {
102 	.name = G_MIRROR_CLASS_NAME,
103 	.version = G_VERSION,
104 	.ctlreq = g_mirror_config,
105 	.taste = g_mirror_taste,
106 	.destroy_geom = g_mirror_destroy_geom,
107 	.init = g_mirror_init,
108 	.fini = g_mirror_fini,
109 	.providergone = g_mirror_providergone,
110 	.resize = g_mirror_resize
111 };
112 
113 static void g_mirror_destroy_provider(struct g_mirror_softc *sc);
114 static int g_mirror_update_disk(struct g_mirror_disk *disk, u_int state);
115 static void g_mirror_update_device(struct g_mirror_softc *sc, bool force);
116 static void g_mirror_dumpconf(struct sbuf *sb, const char *indent,
117     struct g_geom *gp, struct g_consumer *cp, struct g_provider *pp);
118 static void g_mirror_timeout_drain(struct g_mirror_softc *sc);
119 static int g_mirror_refresh_device(struct g_mirror_softc *sc,
120     const struct g_provider *pp, const struct g_mirror_metadata *md);
121 static void g_mirror_sync_reinit(const struct g_mirror_disk *disk,
122     struct bio *bp, off_t offset);
123 static void g_mirror_sync_stop(struct g_mirror_disk *disk, int type);
124 static void g_mirror_register_request(struct g_mirror_softc *sc,
125     struct bio *bp);
126 static void g_mirror_sync_release(struct g_mirror_softc *sc);
127 
128 static const char *
129 g_mirror_disk_state2str(int state)
130 {
131 
132 	switch (state) {
133 	case G_MIRROR_DISK_STATE_NONE:
134 		return ("NONE");
135 	case G_MIRROR_DISK_STATE_NEW:
136 		return ("NEW");
137 	case G_MIRROR_DISK_STATE_ACTIVE:
138 		return ("ACTIVE");
139 	case G_MIRROR_DISK_STATE_STALE:
140 		return ("STALE");
141 	case G_MIRROR_DISK_STATE_SYNCHRONIZING:
142 		return ("SYNCHRONIZING");
143 	case G_MIRROR_DISK_STATE_DISCONNECTED:
144 		return ("DISCONNECTED");
145 	case G_MIRROR_DISK_STATE_DESTROY:
146 		return ("DESTROY");
147 	default:
148 		return ("INVALID");
149 	}
150 }
151 
152 static const char *
153 g_mirror_device_state2str(int state)
154 {
155 
156 	switch (state) {
157 	case G_MIRROR_DEVICE_STATE_STARTING:
158 		return ("STARTING");
159 	case G_MIRROR_DEVICE_STATE_RUNNING:
160 		return ("RUNNING");
161 	default:
162 		return ("INVALID");
163 	}
164 }
165 
166 static const char *
167 g_mirror_get_diskname(struct g_mirror_disk *disk)
168 {
169 
170 	if (disk->d_consumer == NULL || disk->d_consumer->provider == NULL)
171 		return ("[unknown]");
172 	return (disk->d_name);
173 }
174 
175 /*
176  * --- Events handling functions ---
177  * Events in geom_mirror are used to maintain disks and device status
178  * from one thread to simplify locking.
179  */
180 static void
181 g_mirror_event_free(struct g_mirror_event *ep)
182 {
183 
184 	free(ep, M_MIRROR);
185 }
186 
187 static int
188 g_mirror_event_dispatch(struct g_mirror_event *ep, void *arg, int state,
189     int flags)
190 {
191 	struct g_mirror_softc *sc;
192 	struct g_mirror_disk *disk;
193 	int error;
194 
195 	G_MIRROR_DEBUG(4, "%s: Sending event %p.", __func__, ep);
196 	if ((flags & G_MIRROR_EVENT_DEVICE) != 0) {
197 		disk = NULL;
198 		sc = arg;
199 	} else {
200 		disk = arg;
201 		sc = disk->d_softc;
202 	}
203 	ep->e_disk = disk;
204 	ep->e_state = state;
205 	ep->e_flags = flags;
206 	ep->e_error = 0;
207 	mtx_lock(&sc->sc_events_mtx);
208 	TAILQ_INSERT_TAIL(&sc->sc_events, ep, e_next);
209 	mtx_unlock(&sc->sc_events_mtx);
210 	G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
211 	mtx_lock(&sc->sc_queue_mtx);
212 	wakeup(sc);
213 	mtx_unlock(&sc->sc_queue_mtx);
214 	if ((flags & G_MIRROR_EVENT_DONTWAIT) != 0)
215 		return (0);
216 	G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, ep);
217 	sx_xunlock(&sc->sc_lock);
218 	while ((ep->e_flags & G_MIRROR_EVENT_DONE) == 0) {
219 		mtx_lock(&sc->sc_events_mtx);
220 		MSLEEP(ep, &sc->sc_events_mtx, PRIBIO | PDROP, "m:event",
221 		    hz * 5);
222 	}
223 	error = ep->e_error;
224 	g_mirror_event_free(ep);
225 	sx_xlock(&sc->sc_lock);
226 	return (error);
227 }
228 
229 int
230 g_mirror_event_send(void *arg, int state, int flags)
231 {
232 	struct g_mirror_event *ep;
233 
234 	ep = malloc(sizeof(*ep), M_MIRROR, M_WAITOK);
235 	return (g_mirror_event_dispatch(ep, arg, state, flags));
236 }
237 
238 static struct g_mirror_event *
239 g_mirror_event_first(struct g_mirror_softc *sc)
240 {
241 	struct g_mirror_event *ep;
242 
243 	mtx_lock(&sc->sc_events_mtx);
244 	ep = TAILQ_FIRST(&sc->sc_events);
245 	mtx_unlock(&sc->sc_events_mtx);
246 	return (ep);
247 }
248 
249 static void
250 g_mirror_event_remove(struct g_mirror_softc *sc, struct g_mirror_event *ep)
251 {
252 
253 	mtx_lock(&sc->sc_events_mtx);
254 	TAILQ_REMOVE(&sc->sc_events, ep, e_next);
255 	mtx_unlock(&sc->sc_events_mtx);
256 }
257 
258 static void
259 g_mirror_event_cancel(struct g_mirror_disk *disk)
260 {
261 	struct g_mirror_softc *sc;
262 	struct g_mirror_event *ep, *tmpep;
263 
264 	sc = disk->d_softc;
265 	sx_assert(&sc->sc_lock, SX_XLOCKED);
266 
267 	mtx_lock(&sc->sc_events_mtx);
268 	TAILQ_FOREACH_SAFE(ep, &sc->sc_events, e_next, tmpep) {
269 		if ((ep->e_flags & G_MIRROR_EVENT_DEVICE) != 0)
270 			continue;
271 		if (ep->e_disk != disk)
272 			continue;
273 		TAILQ_REMOVE(&sc->sc_events, ep, e_next);
274 		if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0)
275 			g_mirror_event_free(ep);
276 		else {
277 			ep->e_error = ECANCELED;
278 			wakeup(ep);
279 		}
280 	}
281 	mtx_unlock(&sc->sc_events_mtx);
282 }
283 
284 /*
285  * Return the number of disks in given state.
286  * If state is equal to -1, count all connected disks.
287  */
288 u_int
289 g_mirror_ndisks(struct g_mirror_softc *sc, int state)
290 {
291 	struct g_mirror_disk *disk;
292 	u_int n = 0;
293 
294 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
295 		if (state == -1 || disk->d_state == state)
296 			n++;
297 	}
298 	return (n);
299 }
300 
301 /*
302  * Find a disk in mirror by its disk ID.
303  */
304 static struct g_mirror_disk *
305 g_mirror_id2disk(struct g_mirror_softc *sc, uint32_t id)
306 {
307 	struct g_mirror_disk *disk;
308 
309 	sx_assert(&sc->sc_lock, SX_XLOCKED);
310 
311 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
312 		if (disk->d_id == id)
313 			return (disk);
314 	}
315 	return (NULL);
316 }
317 
318 static u_int
319 g_mirror_nrequests(struct g_mirror_softc *sc, struct g_consumer *cp)
320 {
321 	struct bio *bp;
322 	u_int nreqs = 0;
323 
324 	mtx_lock(&sc->sc_queue_mtx);
325 	TAILQ_FOREACH(bp, &sc->sc_queue, bio_queue) {
326 		if (bp->bio_from == cp)
327 			nreqs++;
328 	}
329 	mtx_unlock(&sc->sc_queue_mtx);
330 	return (nreqs);
331 }
332 
333 static int
334 g_mirror_is_busy(struct g_mirror_softc *sc, struct g_consumer *cp)
335 {
336 
337 	if (cp->index > 0) {
338 		G_MIRROR_DEBUG(2,
339 		    "I/O requests for %s exist, can't destroy it now.",
340 		    cp->provider->name);
341 		return (1);
342 	}
343 	if (g_mirror_nrequests(sc, cp) > 0) {
344 		G_MIRROR_DEBUG(2,
345 		    "I/O requests for %s in queue, can't destroy it now.",
346 		    cp->provider->name);
347 		return (1);
348 	}
349 	return (0);
350 }
351 
352 static void
353 g_mirror_destroy_consumer(void *arg, int flags __unused)
354 {
355 	struct g_consumer *cp;
356 
357 	g_topology_assert();
358 
359 	cp = arg;
360 	G_MIRROR_DEBUG(1, "Consumer %s destroyed.", cp->provider->name);
361 	g_detach(cp);
362 	g_destroy_consumer(cp);
363 }
364 
365 static void
366 g_mirror_kill_consumer(struct g_mirror_softc *sc, struct g_consumer *cp)
367 {
368 	struct g_provider *pp;
369 	int retaste_wait;
370 
371 	g_topology_assert();
372 
373 	cp->private = NULL;
374 	if (g_mirror_is_busy(sc, cp))
375 		return;
376 	pp = cp->provider;
377 	retaste_wait = 0;
378 	if (cp->acw == 1) {
379 		if ((pp->geom->flags & G_GEOM_WITHER) == 0)
380 			retaste_wait = 1;
381 	}
382 	G_MIRROR_DEBUG(2, "Access %s r%dw%de%d = %d", pp->name, -cp->acr,
383 	    -cp->acw, -cp->ace, 0);
384 	if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0)
385 		g_access(cp, -cp->acr, -cp->acw, -cp->ace);
386 	if (retaste_wait) {
387 		/*
388 		 * After retaste event was send (inside g_access()), we can send
389 		 * event to detach and destroy consumer.
390 		 * A class, which has consumer to the given provider connected
391 		 * will not receive retaste event for the provider.
392 		 * This is the way how I ignore retaste events when I close
393 		 * consumers opened for write: I detach and destroy consumer
394 		 * after retaste event is sent.
395 		 */
396 		g_post_event(g_mirror_destroy_consumer, cp, M_WAITOK, NULL);
397 		return;
398 	}
399 	G_MIRROR_DEBUG(1, "Consumer %s destroyed.", pp->name);
400 	g_detach(cp);
401 	g_destroy_consumer(cp);
402 }
403 
404 static int
405 g_mirror_connect_disk(struct g_mirror_disk *disk, struct g_provider *pp)
406 {
407 	struct g_consumer *cp;
408 	int error;
409 
410 	g_topology_assert_not();
411 	KASSERT(disk->d_consumer == NULL,
412 	    ("Disk already connected (device %s).", disk->d_softc->sc_name));
413 
414 	g_topology_lock();
415 	cp = g_new_consumer(disk->d_softc->sc_geom);
416 	cp->flags |= G_CF_DIRECT_RECEIVE;
417 	error = g_attach(cp, pp);
418 	if (error != 0) {
419 		g_destroy_consumer(cp);
420 		g_topology_unlock();
421 		return (error);
422 	}
423 	error = g_access(cp, 1, 1, 1);
424 	if (error != 0) {
425 		g_detach(cp);
426 		g_destroy_consumer(cp);
427 		g_topology_unlock();
428 		G_MIRROR_DEBUG(0, "Cannot open consumer %s (error=%d).",
429 		    pp->name, error);
430 		return (error);
431 	}
432 	g_topology_unlock();
433 	disk->d_consumer = cp;
434 	disk->d_consumer->private = disk;
435 	disk->d_consumer->index = 0;
436 
437 	G_MIRROR_DEBUG(2, "Disk %s connected.", g_mirror_get_diskname(disk));
438 	return (0);
439 }
440 
441 static void
442 g_mirror_disconnect_consumer(struct g_mirror_softc *sc, struct g_consumer *cp)
443 {
444 
445 	g_topology_assert();
446 
447 	if (cp == NULL)
448 		return;
449 	if (cp->provider != NULL)
450 		g_mirror_kill_consumer(sc, cp);
451 	else
452 		g_destroy_consumer(cp);
453 }
454 
455 /*
456  * Initialize disk. This means allocate memory, create consumer, attach it
457  * to the provider and open access (r1w1e1) to it.
458  */
459 static struct g_mirror_disk *
460 g_mirror_init_disk(struct g_mirror_softc *sc, struct g_provider *pp,
461     struct g_mirror_metadata *md, int *errorp)
462 {
463 	struct g_mirror_disk *disk;
464 	int i, error;
465 
466 	disk = malloc(sizeof(*disk), M_MIRROR, M_NOWAIT | M_ZERO);
467 	if (disk == NULL) {
468 		error = ENOMEM;
469 		goto fail;
470 	}
471 	disk->d_softc = sc;
472 	error = g_mirror_connect_disk(disk, pp);
473 	if (error != 0)
474 		goto fail;
475 	disk->d_id = md->md_did;
476 	disk->d_state = G_MIRROR_DISK_STATE_NONE;
477 	disk->d_priority = md->md_priority;
478 	disk->d_flags = md->md_dflags;
479 	error = g_getattr("GEOM::candelete", disk->d_consumer, &i);
480 	if (error == 0 && i != 0)
481 		disk->d_flags |= G_MIRROR_DISK_FLAG_CANDELETE;
482 	if (md->md_provider[0] != '\0')
483 		disk->d_flags |= G_MIRROR_DISK_FLAG_HARDCODED;
484 	disk->d_sync.ds_consumer = NULL;
485 	disk->d_sync.ds_offset = md->md_sync_offset;
486 	disk->d_sync.ds_offset_done = md->md_sync_offset;
487 	disk->d_sync.ds_update_ts = time_uptime;
488 	disk->d_genid = md->md_genid;
489 	disk->d_sync.ds_syncid = md->md_syncid;
490 	disk->d_init_ndisks = md->md_all;
491 	disk->d_init_slice = md->md_slice;
492 	disk->d_init_balance = md->md_balance;
493 	disk->d_init_mediasize = md->md_mediasize;
494 	if (errorp != NULL)
495 		*errorp = 0;
496 	return (disk);
497 fail:
498 	if (errorp != NULL)
499 		*errorp = error;
500 	if (disk != NULL)
501 		free(disk, M_MIRROR);
502 	return (NULL);
503 }
504 
505 static void
506 g_mirror_destroy_disk(struct g_mirror_disk *disk)
507 {
508 	struct g_mirror_softc *sc;
509 
510 	g_topology_assert_not();
511 	sc = disk->d_softc;
512 	sx_assert(&sc->sc_lock, SX_XLOCKED);
513 
514 	g_topology_lock();
515 	LIST_REMOVE(disk, d_next);
516 	g_topology_unlock();
517 	g_mirror_event_cancel(disk);
518 	if (sc->sc_hint == disk)
519 		sc->sc_hint = NULL;
520 	switch (disk->d_state) {
521 	case G_MIRROR_DISK_STATE_SYNCHRONIZING:
522 		g_mirror_sync_stop(disk, 1);
523 		/* FALLTHROUGH */
524 	case G_MIRROR_DISK_STATE_NEW:
525 	case G_MIRROR_DISK_STATE_STALE:
526 	case G_MIRROR_DISK_STATE_ACTIVE:
527 		g_topology_lock();
528 		g_mirror_disconnect_consumer(sc, disk->d_consumer);
529 		g_topology_unlock();
530 		free(disk, M_MIRROR);
531 		break;
532 	default:
533 		KASSERT(0 == 1, ("Wrong disk state (%s, %s).",
534 		    g_mirror_get_diskname(disk),
535 		    g_mirror_disk_state2str(disk->d_state)));
536 	}
537 }
538 
539 static void
540 g_mirror_free_device(struct g_mirror_softc *sc)
541 {
542 
543 	g_topology_assert();
544 
545 	mtx_destroy(&sc->sc_queue_mtx);
546 	mtx_destroy(&sc->sc_events_mtx);
547 	mtx_destroy(&sc->sc_done_mtx);
548 	sx_destroy(&sc->sc_lock);
549 	free(sc, M_MIRROR);
550 }
551 
552 static void
553 g_mirror_providergone(struct g_provider *pp)
554 {
555 	struct g_mirror_softc *sc = pp->private;
556 
557 	if ((--sc->sc_refcnt) == 0)
558 		g_mirror_free_device(sc);
559 }
560 
561 static void
562 g_mirror_destroy_device(struct g_mirror_softc *sc)
563 {
564 	struct g_mirror_disk *disk;
565 	struct g_mirror_event *ep;
566 	struct g_geom *gp;
567 	struct g_consumer *cp, *tmpcp;
568 
569 	g_topology_assert_not();
570 	sx_assert(&sc->sc_lock, SX_XLOCKED);
571 
572 	gp = sc->sc_geom;
573 	if (sc->sc_provider != NULL)
574 		g_mirror_destroy_provider(sc);
575 	for (disk = LIST_FIRST(&sc->sc_disks); disk != NULL;
576 	    disk = LIST_FIRST(&sc->sc_disks)) {
577 		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
578 		g_mirror_update_metadata(disk);
579 		g_mirror_destroy_disk(disk);
580 	}
581 	while ((ep = g_mirror_event_first(sc)) != NULL) {
582 		g_mirror_event_remove(sc, ep);
583 		if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0)
584 			g_mirror_event_free(ep);
585 		else {
586 			ep->e_error = ECANCELED;
587 			ep->e_flags |= G_MIRROR_EVENT_DONE;
588 			G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, ep);
589 			mtx_lock(&sc->sc_events_mtx);
590 			wakeup(ep);
591 			mtx_unlock(&sc->sc_events_mtx);
592 		}
593 	}
594 	g_mirror_timeout_drain(sc);
595 
596 	g_topology_lock();
597 	LIST_FOREACH_SAFE(cp, &sc->sc_sync.ds_geom->consumer, consumer, tmpcp) {
598 		g_mirror_disconnect_consumer(sc, cp);
599 	}
600 	g_wither_geom(sc->sc_sync.ds_geom, ENXIO);
601 	G_MIRROR_DEBUG(0, "Device %s destroyed.", gp->name);
602 	g_wither_geom(gp, ENXIO);
603 	sx_xunlock(&sc->sc_lock);
604 	if ((--sc->sc_refcnt) == 0)
605 		g_mirror_free_device(sc);
606 	g_topology_unlock();
607 }
608 
609 static void
610 g_mirror_orphan(struct g_consumer *cp)
611 {
612 	struct g_mirror_disk *disk;
613 
614 	g_topology_assert();
615 
616 	disk = cp->private;
617 	if (disk == NULL)
618 		return;
619 	disk->d_softc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
620 	g_mirror_event_send(disk, G_MIRROR_DISK_STATE_DISCONNECTED,
621 	    G_MIRROR_EVENT_DONTWAIT);
622 }
623 
624 /*
625  * Function should return the next active disk on the list.
626  * It is possible that it will be the same disk as given.
627  * If there are no active disks on list, NULL is returned.
628  */
629 static __inline struct g_mirror_disk *
630 g_mirror_find_next(struct g_mirror_softc *sc, struct g_mirror_disk *disk)
631 {
632 	struct g_mirror_disk *dp;
633 
634 	for (dp = LIST_NEXT(disk, d_next); dp != disk;
635 	    dp = LIST_NEXT(dp, d_next)) {
636 		if (dp == NULL)
637 			dp = LIST_FIRST(&sc->sc_disks);
638 		if (dp->d_state == G_MIRROR_DISK_STATE_ACTIVE)
639 			break;
640 	}
641 	if (dp->d_state != G_MIRROR_DISK_STATE_ACTIVE)
642 		return (NULL);
643 	return (dp);
644 }
645 
646 static struct g_mirror_disk *
647 g_mirror_get_disk(struct g_mirror_softc *sc)
648 {
649 	struct g_mirror_disk *disk;
650 
651 	if (sc->sc_hint == NULL) {
652 		sc->sc_hint = LIST_FIRST(&sc->sc_disks);
653 		if (sc->sc_hint == NULL)
654 			return (NULL);
655 	}
656 	disk = sc->sc_hint;
657 	if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE) {
658 		disk = g_mirror_find_next(sc, disk);
659 		if (disk == NULL)
660 			return (NULL);
661 	}
662 	sc->sc_hint = g_mirror_find_next(sc, disk);
663 	return (disk);
664 }
665 
666 static int
667 g_mirror_write_metadata(struct g_mirror_disk *disk,
668     struct g_mirror_metadata *md)
669 {
670 	struct g_mirror_softc *sc;
671 	struct g_consumer *cp;
672 	off_t offset, length;
673 	u_char *sector;
674 	int error = 0;
675 
676 	g_topology_assert_not();
677 	sc = disk->d_softc;
678 	sx_assert(&sc->sc_lock, SX_LOCKED);
679 
680 	cp = disk->d_consumer;
681 	KASSERT(cp != NULL, ("NULL consumer (%s).", sc->sc_name));
682 	KASSERT(cp->provider != NULL, ("NULL provider (%s).", sc->sc_name));
683 	KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
684 	    ("Consumer %s closed? (r%dw%de%d).", cp->provider->name, cp->acr,
685 	    cp->acw, cp->ace));
686 	length = cp->provider->sectorsize;
687 	offset = cp->provider->mediasize - length;
688 	sector = malloc((size_t)length, M_MIRROR, M_WAITOK | M_ZERO);
689 	if (md != NULL &&
690 	    (sc->sc_flags & G_MIRROR_DEVICE_FLAG_WIPE) == 0) {
691 		/*
692 		 * Handle the case, when the size of parent provider reduced.
693 		 */
694 		if (offset < md->md_mediasize)
695 			error = ENOSPC;
696 		else
697 			mirror_metadata_encode(md, sector);
698 	}
699 	KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_metadata_write, error);
700 	if (error == 0)
701 		error = g_write_data(cp, offset, sector, length);
702 	free(sector, M_MIRROR);
703 	if (error != 0) {
704 		if ((disk->d_flags & G_MIRROR_DISK_FLAG_BROKEN) == 0) {
705 			disk->d_flags |= G_MIRROR_DISK_FLAG_BROKEN;
706 			G_MIRROR_DEBUG(0, "Cannot write metadata on %s "
707 			    "(device=%s, error=%d).",
708 			    g_mirror_get_diskname(disk), sc->sc_name, error);
709 		} else {
710 			G_MIRROR_DEBUG(1, "Cannot write metadata on %s "
711 			    "(device=%s, error=%d).",
712 			    g_mirror_get_diskname(disk), sc->sc_name, error);
713 		}
714 		if (g_mirror_disconnect_on_failure &&
715 		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 1) {
716 			sc->sc_bump_id |= G_MIRROR_BUMP_GENID;
717 			g_mirror_event_send(disk,
718 			    G_MIRROR_DISK_STATE_DISCONNECTED,
719 			    G_MIRROR_EVENT_DONTWAIT);
720 		}
721 	}
722 	return (error);
723 }
724 
725 static int
726 g_mirror_clear_metadata(struct g_mirror_disk *disk)
727 {
728 	int error;
729 
730 	g_topology_assert_not();
731 	sx_assert(&disk->d_softc->sc_lock, SX_LOCKED);
732 
733 	if (disk->d_softc->sc_type != G_MIRROR_TYPE_AUTOMATIC)
734 		return (0);
735 	error = g_mirror_write_metadata(disk, NULL);
736 	if (error == 0) {
737 		G_MIRROR_DEBUG(2, "Metadata on %s cleared.",
738 		    g_mirror_get_diskname(disk));
739 	} else {
740 		G_MIRROR_DEBUG(0,
741 		    "Cannot clear metadata on disk %s (error=%d).",
742 		    g_mirror_get_diskname(disk), error);
743 	}
744 	return (error);
745 }
746 
747 void
748 g_mirror_fill_metadata(struct g_mirror_softc *sc, struct g_mirror_disk *disk,
749     struct g_mirror_metadata *md)
750 {
751 
752 	strlcpy(md->md_magic, G_MIRROR_MAGIC, sizeof(md->md_magic));
753 	md->md_version = G_MIRROR_VERSION;
754 	strlcpy(md->md_name, sc->sc_name, sizeof(md->md_name));
755 	md->md_mid = sc->sc_id;
756 	md->md_all = sc->sc_ndisks;
757 	md->md_slice = sc->sc_slice;
758 	md->md_balance = sc->sc_balance;
759 	md->md_genid = sc->sc_genid;
760 	md->md_mediasize = sc->sc_mediasize;
761 	md->md_sectorsize = sc->sc_sectorsize;
762 	md->md_mflags = (sc->sc_flags & G_MIRROR_DEVICE_FLAG_MASK);
763 	bzero(md->md_provider, sizeof(md->md_provider));
764 	if (disk == NULL) {
765 		md->md_did = arc4random();
766 		md->md_priority = 0;
767 		md->md_syncid = 0;
768 		md->md_dflags = 0;
769 		md->md_sync_offset = 0;
770 		md->md_provsize = 0;
771 	} else {
772 		md->md_did = disk->d_id;
773 		md->md_priority = disk->d_priority;
774 		md->md_syncid = disk->d_sync.ds_syncid;
775 		md->md_dflags = (disk->d_flags & G_MIRROR_DISK_FLAG_MASK);
776 		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
777 			md->md_sync_offset = disk->d_sync.ds_offset_done;
778 		else
779 			md->md_sync_offset = 0;
780 		if ((disk->d_flags & G_MIRROR_DISK_FLAG_HARDCODED) != 0) {
781 			strlcpy(md->md_provider,
782 			    disk->d_consumer->provider->name,
783 			    sizeof(md->md_provider));
784 		}
785 		md->md_provsize = disk->d_consumer->provider->mediasize;
786 	}
787 }
788 
789 void
790 g_mirror_update_metadata(struct g_mirror_disk *disk)
791 {
792 	struct g_mirror_softc *sc;
793 	struct g_mirror_metadata md;
794 	int error;
795 
796 	g_topology_assert_not();
797 	sc = disk->d_softc;
798 	sx_assert(&sc->sc_lock, SX_LOCKED);
799 
800 	if (sc->sc_type != G_MIRROR_TYPE_AUTOMATIC)
801 		return;
802 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_WIPE) == 0)
803 		g_mirror_fill_metadata(sc, disk, &md);
804 	error = g_mirror_write_metadata(disk, &md);
805 	if (error == 0) {
806 		G_MIRROR_DEBUG(2, "Metadata on %s updated.",
807 		    g_mirror_get_diskname(disk));
808 	} else {
809 		G_MIRROR_DEBUG(0,
810 		    "Cannot update metadata on disk %s (error=%d).",
811 		    g_mirror_get_diskname(disk), error);
812 	}
813 }
814 
815 static void
816 g_mirror_bump_syncid(struct g_mirror_softc *sc)
817 {
818 	struct g_mirror_disk *disk;
819 
820 	g_topology_assert_not();
821 	sx_assert(&sc->sc_lock, SX_XLOCKED);
822 	KASSERT(g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 0,
823 	    ("%s called with no active disks (device=%s).", __func__,
824 	    sc->sc_name));
825 
826 	sc->sc_syncid++;
827 	G_MIRROR_DEBUG(1, "Device %s: syncid bumped to %u.", sc->sc_name,
828 	    sc->sc_syncid);
829 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
830 		if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE ||
831 		    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
832 			disk->d_sync.ds_syncid = sc->sc_syncid;
833 			g_mirror_update_metadata(disk);
834 		}
835 	}
836 }
837 
838 static void
839 g_mirror_bump_genid(struct g_mirror_softc *sc)
840 {
841 	struct g_mirror_disk *disk;
842 
843 	g_topology_assert_not();
844 	sx_assert(&sc->sc_lock, SX_XLOCKED);
845 	KASSERT(g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 0,
846 	    ("%s called with no active disks (device=%s).", __func__,
847 	    sc->sc_name));
848 
849 	sc->sc_genid++;
850 	G_MIRROR_DEBUG(1, "Device %s: genid bumped to %u.", sc->sc_name,
851 	    sc->sc_genid);
852 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
853 		if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE ||
854 		    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
855 			disk->d_genid = sc->sc_genid;
856 			g_mirror_update_metadata(disk);
857 		}
858 	}
859 }
860 
861 static int
862 g_mirror_idle(struct g_mirror_softc *sc, int acw)
863 {
864 	struct g_mirror_disk *disk;
865 	int timeout;
866 
867 	g_topology_assert_not();
868 	sx_assert(&sc->sc_lock, SX_XLOCKED);
869 
870 	if (sc->sc_provider == NULL)
871 		return (0);
872 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOFAILSYNC) != 0)
873 		return (0);
874 	if (sc->sc_idle)
875 		return (0);
876 	if (sc->sc_writes > 0)
877 		return (0);
878 	if (acw > 0 || (acw == -1 && sc->sc_provider->acw > 0)) {
879 		timeout = g_mirror_idletime - (time_uptime - sc->sc_last_write);
880 		if (!g_mirror_shutdown && timeout > 0)
881 			return (timeout);
882 	}
883 	sc->sc_idle = 1;
884 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
885 		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
886 			continue;
887 		G_MIRROR_DEBUG(2, "Disk %s (device %s) marked as clean.",
888 		    g_mirror_get_diskname(disk), sc->sc_name);
889 		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
890 		g_mirror_update_metadata(disk);
891 	}
892 	return (0);
893 }
894 
895 static void
896 g_mirror_unidle(struct g_mirror_softc *sc)
897 {
898 	struct g_mirror_disk *disk;
899 
900 	g_topology_assert_not();
901 	sx_assert(&sc->sc_lock, SX_XLOCKED);
902 
903 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOFAILSYNC) != 0)
904 		return;
905 	sc->sc_idle = 0;
906 	sc->sc_last_write = time_uptime;
907 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
908 		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
909 			continue;
910 		G_MIRROR_DEBUG(2, "Disk %s (device %s) marked as dirty.",
911 		    g_mirror_get_diskname(disk), sc->sc_name);
912 		disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
913 		g_mirror_update_metadata(disk);
914 	}
915 }
916 
917 static void
918 g_mirror_done(struct bio *bp)
919 {
920 	struct g_mirror_softc *sc;
921 
922 	sc = bp->bio_from->geom->softc;
923 	bp->bio_cflags = G_MIRROR_BIO_FLAG_REGULAR;
924 	mtx_lock(&sc->sc_queue_mtx);
925 	TAILQ_INSERT_TAIL(&sc->sc_queue, bp, bio_queue);
926 	mtx_unlock(&sc->sc_queue_mtx);
927 	wakeup(sc);
928 }
929 
930 static void
931 g_mirror_regular_request_error(struct g_mirror_softc *sc,
932     struct g_mirror_disk *disk, struct bio *bp)
933 {
934 
935 	if ((bp->bio_cmd == BIO_FLUSH || bp->bio_cmd == BIO_SPEEDUP) &&
936 	    bp->bio_error == EOPNOTSUPP)
937 		return;
938 
939 	if ((disk->d_flags & G_MIRROR_DISK_FLAG_BROKEN) == 0) {
940 		disk->d_flags |= G_MIRROR_DISK_FLAG_BROKEN;
941 		G_MIRROR_LOGREQ(0, bp, "Request failed (error=%d).",
942 		    bp->bio_error);
943 	} else {
944 		G_MIRROR_LOGREQ(1, bp, "Request failed (error=%d).",
945 		    bp->bio_error);
946 	}
947 	if (g_mirror_disconnect_on_failure &&
948 	    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 1) {
949 		if (bp->bio_error == ENXIO &&
950 		    bp->bio_cmd == BIO_READ)
951 			sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
952 		else if (bp->bio_error == ENXIO)
953 			sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID_NOW;
954 		else
955 			sc->sc_bump_id |= G_MIRROR_BUMP_GENID;
956 		g_mirror_event_send(disk, G_MIRROR_DISK_STATE_DISCONNECTED,
957 		    G_MIRROR_EVENT_DONTWAIT);
958 	}
959 }
960 
961 static void
962 g_mirror_regular_request(struct g_mirror_softc *sc, struct bio *bp)
963 {
964 	struct g_mirror_disk *disk;
965 	struct bio *pbp;
966 
967 	g_topology_assert_not();
968 	KASSERT(sc->sc_provider == bp->bio_parent->bio_to,
969 	    ("regular request %p with unexpected origin", bp));
970 
971 	pbp = bp->bio_parent;
972 	bp->bio_from->index--;
973 	if (bp->bio_cmd == BIO_WRITE || bp->bio_cmd == BIO_DELETE)
974 		sc->sc_writes--;
975 	disk = bp->bio_from->private;
976 	if (disk == NULL) {
977 		g_topology_lock();
978 		g_mirror_kill_consumer(sc, bp->bio_from);
979 		g_topology_unlock();
980 	}
981 
982 	switch (bp->bio_cmd) {
983 	case BIO_READ:
984 		KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_regular_request_read,
985 		    bp->bio_error);
986 		break;
987 	case BIO_WRITE:
988 		KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_regular_request_write,
989 		    bp->bio_error);
990 		break;
991 	case BIO_DELETE:
992 		KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_regular_request_delete,
993 		    bp->bio_error);
994 		break;
995 	case BIO_FLUSH:
996 		KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_regular_request_flush,
997 		    bp->bio_error);
998 		break;
999 	case BIO_SPEEDUP:
1000 		KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_regular_request_speedup,
1001 		    bp->bio_error);
1002 		break;
1003 	}
1004 
1005 	pbp->bio_inbed++;
1006 	KASSERT(pbp->bio_inbed <= pbp->bio_children,
1007 	    ("bio_inbed (%u) is bigger than bio_children (%u).", pbp->bio_inbed,
1008 	    pbp->bio_children));
1009 	if (bp->bio_error == 0 && pbp->bio_error == 0) {
1010 		G_MIRROR_LOGREQ(3, bp, "Request delivered.");
1011 		g_destroy_bio(bp);
1012 		if (pbp->bio_children == pbp->bio_inbed) {
1013 			G_MIRROR_LOGREQ(3, pbp, "Request delivered.");
1014 			pbp->bio_completed = pbp->bio_length;
1015 			if (pbp->bio_cmd == BIO_WRITE ||
1016 			    pbp->bio_cmd == BIO_DELETE) {
1017 				TAILQ_REMOVE(&sc->sc_inflight, pbp, bio_queue);
1018 				/* Release delayed sync requests if possible. */
1019 				g_mirror_sync_release(sc);
1020 			}
1021 			g_io_deliver(pbp, pbp->bio_error);
1022 		}
1023 		return;
1024 	} else if (bp->bio_error != 0) {
1025 		if (pbp->bio_error == 0)
1026 			pbp->bio_error = bp->bio_error;
1027 		if (disk != NULL)
1028 			g_mirror_regular_request_error(sc, disk, bp);
1029 		switch (pbp->bio_cmd) {
1030 		case BIO_DELETE:
1031 		case BIO_WRITE:
1032 		case BIO_FLUSH:
1033 		case BIO_SPEEDUP:
1034 			pbp->bio_inbed--;
1035 			pbp->bio_children--;
1036 			break;
1037 		}
1038 	}
1039 	g_destroy_bio(bp);
1040 
1041 	switch (pbp->bio_cmd) {
1042 	case BIO_READ:
1043 		if (pbp->bio_inbed < pbp->bio_children)
1044 			break;
1045 		if (g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) == 1)
1046 			g_io_deliver(pbp, pbp->bio_error);
1047 		else {
1048 			pbp->bio_error = 0;
1049 			mtx_lock(&sc->sc_queue_mtx);
1050 			TAILQ_INSERT_TAIL(&sc->sc_queue, pbp, bio_queue);
1051 			mtx_unlock(&sc->sc_queue_mtx);
1052 			G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
1053 			wakeup(sc);
1054 		}
1055 		break;
1056 	case BIO_DELETE:
1057 	case BIO_WRITE:
1058 	case BIO_FLUSH:
1059 	case BIO_SPEEDUP:
1060 		if (pbp->bio_children == 0) {
1061 			/*
1062 			 * All requests failed.
1063 			 */
1064 		} else if (pbp->bio_inbed < pbp->bio_children) {
1065 			/* Do nothing. */
1066 			break;
1067 		} else if (pbp->bio_children == pbp->bio_inbed) {
1068 			/* Some requests succeeded. */
1069 			pbp->bio_error = 0;
1070 			pbp->bio_completed = pbp->bio_length;
1071 		}
1072 		if (pbp->bio_cmd == BIO_WRITE || pbp->bio_cmd == BIO_DELETE) {
1073 			TAILQ_REMOVE(&sc->sc_inflight, pbp, bio_queue);
1074 			/* Release delayed sync requests if possible. */
1075 			g_mirror_sync_release(sc);
1076 		}
1077 		g_io_deliver(pbp, pbp->bio_error);
1078 		break;
1079 	default:
1080 		KASSERT(1 == 0, ("Invalid request: %u.", pbp->bio_cmd));
1081 		break;
1082 	}
1083 }
1084 
1085 static void
1086 g_mirror_sync_done(struct bio *bp)
1087 {
1088 	struct g_mirror_softc *sc;
1089 
1090 	G_MIRROR_LOGREQ(3, bp, "Synchronization request delivered.");
1091 	sc = bp->bio_from->geom->softc;
1092 	bp->bio_cflags = G_MIRROR_BIO_FLAG_SYNC;
1093 	mtx_lock(&sc->sc_queue_mtx);
1094 	TAILQ_INSERT_TAIL(&sc->sc_queue, bp, bio_queue);
1095 	mtx_unlock(&sc->sc_queue_mtx);
1096 	wakeup(sc);
1097 }
1098 
1099 static void
1100 g_mirror_candelete(struct bio *bp)
1101 {
1102 	struct g_mirror_softc *sc;
1103 	struct g_mirror_disk *disk;
1104 	int val;
1105 
1106 	sc = bp->bio_to->private;
1107 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1108 		if (disk->d_flags & G_MIRROR_DISK_FLAG_CANDELETE)
1109 			break;
1110 	}
1111 	val = disk != NULL;
1112 	g_handleattr(bp, "GEOM::candelete", &val, sizeof(val));
1113 }
1114 
1115 static void
1116 g_mirror_kernel_dump(struct bio *bp)
1117 {
1118 	struct g_mirror_softc *sc;
1119 	struct g_mirror_disk *disk;
1120 	struct bio *cbp;
1121 	struct g_kerneldump *gkd;
1122 
1123 	/*
1124 	 * We configure dumping to the first component, because this component
1125 	 * will be used for reading with 'prefer' balance algorithm.
1126 	 * If the component with the highest priority is currently disconnected
1127 	 * we will not be able to read the dump after the reboot if it will be
1128 	 * connected and synchronized later. Can we do something better?
1129 	 */
1130 	sc = bp->bio_to->private;
1131 	disk = LIST_FIRST(&sc->sc_disks);
1132 
1133 	gkd = (struct g_kerneldump *)bp->bio_data;
1134 	if (gkd->length > bp->bio_to->mediasize)
1135 		gkd->length = bp->bio_to->mediasize;
1136 	cbp = g_clone_bio(bp);
1137 	if (cbp == NULL) {
1138 		g_io_deliver(bp, ENOMEM);
1139 		return;
1140 	}
1141 	cbp->bio_done = g_std_done;
1142 	g_io_request(cbp, disk->d_consumer);
1143 	G_MIRROR_DEBUG(1, "Kernel dump will go to %s.",
1144 	    g_mirror_get_diskname(disk));
1145 }
1146 
1147 static void
1148 g_mirror_start(struct bio *bp)
1149 {
1150 	struct g_mirror_softc *sc;
1151 
1152 	sc = bp->bio_to->private;
1153 	/*
1154 	 * If sc == NULL or there are no valid disks, provider's error
1155 	 * should be set and g_mirror_start() should not be called at all.
1156 	 */
1157 	KASSERT(sc != NULL && sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
1158 	    ("Provider's error should be set (error=%d)(mirror=%s).",
1159 	    bp->bio_to->error, bp->bio_to->name));
1160 	G_MIRROR_LOGREQ(3, bp, "Request received.");
1161 
1162 	switch (bp->bio_cmd) {
1163 	case BIO_READ:
1164 	case BIO_WRITE:
1165 	case BIO_DELETE:
1166 	case BIO_SPEEDUP:
1167 	case BIO_FLUSH:
1168 		break;
1169 	case BIO_GETATTR:
1170 		if (!strcmp(bp->bio_attribute, "GEOM::candelete")) {
1171 			g_mirror_candelete(bp);
1172 			return;
1173 		} else if (strcmp("GEOM::kerneldump", bp->bio_attribute) == 0) {
1174 			g_mirror_kernel_dump(bp);
1175 			return;
1176 		}
1177 		/* FALLTHROUGH */
1178 	default:
1179 		g_io_deliver(bp, EOPNOTSUPP);
1180 		return;
1181 	}
1182 	mtx_lock(&sc->sc_queue_mtx);
1183 	if (bp->bio_to->error != 0) {
1184 		mtx_unlock(&sc->sc_queue_mtx);
1185 		g_io_deliver(bp, bp->bio_to->error);
1186 		return;
1187 	}
1188 	TAILQ_INSERT_TAIL(&sc->sc_queue, bp, bio_queue);
1189 	mtx_unlock(&sc->sc_queue_mtx);
1190 	G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
1191 	wakeup(sc);
1192 }
1193 
1194 /*
1195  * Return TRUE if the given request is colliding with a in-progress
1196  * synchronization request.
1197  */
1198 static bool
1199 g_mirror_sync_collision(struct g_mirror_softc *sc, struct bio *bp)
1200 {
1201 	struct g_mirror_disk *disk;
1202 	struct bio *sbp;
1203 	off_t rstart, rend, sstart, send;
1204 	u_int i;
1205 
1206 	if (sc->sc_sync.ds_ndisks == 0)
1207 		return (false);
1208 	rstart = bp->bio_offset;
1209 	rend = bp->bio_offset + bp->bio_length;
1210 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1211 		if (disk->d_state != G_MIRROR_DISK_STATE_SYNCHRONIZING)
1212 			continue;
1213 		for (i = 0; i < g_mirror_syncreqs; i++) {
1214 			sbp = disk->d_sync.ds_bios[i];
1215 			if (sbp == NULL)
1216 				continue;
1217 			sstart = sbp->bio_offset;
1218 			send = sbp->bio_offset + sbp->bio_length;
1219 			if (rend > sstart && rstart < send)
1220 				return (true);
1221 		}
1222 	}
1223 	return (false);
1224 }
1225 
1226 /*
1227  * Return TRUE if the given sync request is colliding with a in-progress regular
1228  * request.
1229  */
1230 static bool
1231 g_mirror_regular_collision(struct g_mirror_softc *sc, struct bio *sbp)
1232 {
1233 	off_t rstart, rend, sstart, send;
1234 	struct bio *bp;
1235 
1236 	if (sc->sc_sync.ds_ndisks == 0)
1237 		return (false);
1238 	sstart = sbp->bio_offset;
1239 	send = sbp->bio_offset + sbp->bio_length;
1240 	TAILQ_FOREACH(bp, &sc->sc_inflight, bio_queue) {
1241 		rstart = bp->bio_offset;
1242 		rend = bp->bio_offset + bp->bio_length;
1243 		if (rend > sstart && rstart < send)
1244 			return (true);
1245 	}
1246 	return (false);
1247 }
1248 
1249 /*
1250  * Puts regular request onto delayed queue.
1251  */
1252 static void
1253 g_mirror_regular_delay(struct g_mirror_softc *sc, struct bio *bp)
1254 {
1255 
1256 	G_MIRROR_LOGREQ(2, bp, "Delaying request.");
1257 	TAILQ_INSERT_TAIL(&sc->sc_regular_delayed, bp, bio_queue);
1258 }
1259 
1260 /*
1261  * Puts synchronization request onto delayed queue.
1262  */
1263 static void
1264 g_mirror_sync_delay(struct g_mirror_softc *sc, struct bio *bp)
1265 {
1266 
1267 	G_MIRROR_LOGREQ(2, bp, "Delaying synchronization request.");
1268 	TAILQ_INSERT_TAIL(&sc->sc_sync_delayed, bp, bio_queue);
1269 }
1270 
1271 /*
1272  * Requeue delayed regular requests.
1273  */
1274 static void
1275 g_mirror_regular_release(struct g_mirror_softc *sc)
1276 {
1277 	struct bio *bp;
1278 
1279 	if ((bp = TAILQ_FIRST(&sc->sc_regular_delayed)) == NULL)
1280 		return;
1281 	if (g_mirror_sync_collision(sc, bp))
1282 		return;
1283 
1284 	G_MIRROR_DEBUG(2, "Requeuing regular requests after collision.");
1285 	mtx_lock(&sc->sc_queue_mtx);
1286 	TAILQ_CONCAT(&sc->sc_regular_delayed, &sc->sc_queue, bio_queue);
1287 	TAILQ_SWAP(&sc->sc_regular_delayed, &sc->sc_queue, bio, bio_queue);
1288 	mtx_unlock(&sc->sc_queue_mtx);
1289 }
1290 
1291 /*
1292  * Releases delayed sync requests which don't collide anymore with regular
1293  * requests.
1294  */
1295 static void
1296 g_mirror_sync_release(struct g_mirror_softc *sc)
1297 {
1298 	struct bio *bp, *bp2;
1299 
1300 	TAILQ_FOREACH_SAFE(bp, &sc->sc_sync_delayed, bio_queue, bp2) {
1301 		if (g_mirror_regular_collision(sc, bp))
1302 			continue;
1303 		TAILQ_REMOVE(&sc->sc_sync_delayed, bp, bio_queue);
1304 		G_MIRROR_LOGREQ(2, bp,
1305 		    "Releasing delayed synchronization request.");
1306 		g_io_request(bp, bp->bio_from);
1307 	}
1308 }
1309 
1310 /*
1311  * Free a synchronization request and clear its slot in the array.
1312  */
1313 static void
1314 g_mirror_sync_request_free(struct g_mirror_disk *disk, struct bio *bp)
1315 {
1316 	int idx;
1317 
1318 	if (disk != NULL && disk->d_sync.ds_bios != NULL) {
1319 		idx = (int)(uintptr_t)bp->bio_caller1;
1320 		KASSERT(disk->d_sync.ds_bios[idx] == bp,
1321 		    ("unexpected sync BIO at %p:%d", disk, idx));
1322 		disk->d_sync.ds_bios[idx] = NULL;
1323 	}
1324 	free(bp->bio_data, M_MIRROR);
1325 	g_destroy_bio(bp);
1326 }
1327 
1328 /*
1329  * Handle synchronization requests.
1330  * Every synchronization request is a two-step process: first, a read request is
1331  * sent to the mirror provider via the sync consumer. If that request completes
1332  * successfully, it is converted to a write and sent to the disk being
1333  * synchronized. If the write also completes successfully, the synchronization
1334  * offset is advanced and a new read request is submitted.
1335  */
1336 static void
1337 g_mirror_sync_request(struct g_mirror_softc *sc, struct bio *bp)
1338 {
1339 	struct g_mirror_disk *disk;
1340 	struct g_mirror_disk_sync *sync;
1341 
1342 	KASSERT((bp->bio_cmd == BIO_READ &&
1343 	    bp->bio_from->geom == sc->sc_sync.ds_geom) ||
1344 	    (bp->bio_cmd == BIO_WRITE && bp->bio_from->geom == sc->sc_geom),
1345 	    ("Sync BIO %p with unexpected origin", bp));
1346 
1347 	bp->bio_from->index--;
1348 	disk = bp->bio_from->private;
1349 	if (disk == NULL) {
1350 		sx_xunlock(&sc->sc_lock); /* Avoid recursion on sc_lock. */
1351 		g_topology_lock();
1352 		g_mirror_kill_consumer(sc, bp->bio_from);
1353 		g_topology_unlock();
1354 		g_mirror_sync_request_free(NULL, bp);
1355 		sx_xlock(&sc->sc_lock);
1356 		return;
1357 	}
1358 
1359 	sync = &disk->d_sync;
1360 
1361 	/*
1362 	 * Synchronization request.
1363 	 */
1364 	switch (bp->bio_cmd) {
1365 	case BIO_READ: {
1366 		struct g_consumer *cp;
1367 
1368 		KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_sync_request_read,
1369 		    bp->bio_error);
1370 
1371 		if (bp->bio_error != 0) {
1372 			G_MIRROR_LOGREQ(0, bp,
1373 			    "Synchronization request failed (error=%d).",
1374 			    bp->bio_error);
1375 
1376 			/*
1377 			 * The read error will trigger a syncid bump, so there's
1378 			 * no need to do that here.
1379 			 *
1380 			 * The read error handling for regular requests will
1381 			 * retry the read from all active mirrors before passing
1382 			 * the error back up, so there's no need to retry here.
1383 			 */
1384 			g_mirror_sync_request_free(disk, bp);
1385 			g_mirror_event_send(disk,
1386 			    G_MIRROR_DISK_STATE_DISCONNECTED,
1387 			    G_MIRROR_EVENT_DONTWAIT);
1388 			return;
1389 		}
1390 		G_MIRROR_LOGREQ(3, bp,
1391 		    "Synchronization request half-finished.");
1392 		bp->bio_cmd = BIO_WRITE;
1393 		bp->bio_cflags = 0;
1394 		cp = disk->d_consumer;
1395 		KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1396 		    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
1397 		    cp->acr, cp->acw, cp->ace));
1398 		cp->index++;
1399 		g_io_request(bp, cp);
1400 		return;
1401 	}
1402 	case BIO_WRITE: {
1403 		off_t offset;
1404 		int i;
1405 
1406 		KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_sync_request_write,
1407 		    bp->bio_error);
1408 
1409 		if (bp->bio_error != 0) {
1410 			G_MIRROR_LOGREQ(0, bp,
1411 			    "Synchronization request failed (error=%d).",
1412 			    bp->bio_error);
1413 			g_mirror_sync_request_free(disk, bp);
1414 			sc->sc_bump_id |= G_MIRROR_BUMP_GENID;
1415 			g_mirror_event_send(disk,
1416 			    G_MIRROR_DISK_STATE_DISCONNECTED,
1417 			    G_MIRROR_EVENT_DONTWAIT);
1418 			return;
1419 		}
1420 		G_MIRROR_LOGREQ(3, bp, "Synchronization request finished.");
1421 		if (sync->ds_offset >= sc->sc_mediasize ||
1422 		    sync->ds_consumer == NULL ||
1423 		    (sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
1424 			/* Don't send more synchronization requests. */
1425 			sync->ds_inflight--;
1426 			g_mirror_sync_request_free(disk, bp);
1427 			if (sync->ds_inflight > 0)
1428 				return;
1429 			if (sync->ds_consumer == NULL ||
1430 			    (sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
1431 				return;
1432 			}
1433 			/* Disk up-to-date, activate it. */
1434 			g_mirror_event_send(disk, G_MIRROR_DISK_STATE_ACTIVE,
1435 			    G_MIRROR_EVENT_DONTWAIT);
1436 			return;
1437 		}
1438 
1439 		/* Send next synchronization request. */
1440 		g_mirror_sync_reinit(disk, bp, sync->ds_offset);
1441 		sync->ds_offset += bp->bio_length;
1442 
1443 		G_MIRROR_LOGREQ(3, bp, "Sending synchronization request.");
1444 		sync->ds_consumer->index++;
1445 
1446 		/*
1447 		 * Delay the request if it is colliding with a regular request.
1448 		 */
1449 		if (g_mirror_regular_collision(sc, bp))
1450 			g_mirror_sync_delay(sc, bp);
1451 		else
1452 			g_io_request(bp, sync->ds_consumer);
1453 
1454 		/* Requeue delayed requests if possible. */
1455 		g_mirror_regular_release(sc);
1456 
1457 		/* Find the smallest offset */
1458 		offset = sc->sc_mediasize;
1459 		for (i = 0; i < g_mirror_syncreqs; i++) {
1460 			bp = sync->ds_bios[i];
1461 			if (bp != NULL && bp->bio_offset < offset)
1462 				offset = bp->bio_offset;
1463 		}
1464 		if (g_mirror_sync_period > 0 &&
1465 		    time_uptime - sync->ds_update_ts > g_mirror_sync_period) {
1466 			sync->ds_offset_done = offset;
1467 			g_mirror_update_metadata(disk);
1468 			sync->ds_update_ts = time_uptime;
1469 		}
1470 		return;
1471 	}
1472 	default:
1473 		panic("Invalid I/O request %p", bp);
1474 	}
1475 }
1476 
1477 static void
1478 g_mirror_request_prefer(struct g_mirror_softc *sc, struct bio *bp)
1479 {
1480 	struct g_mirror_disk *disk;
1481 	struct g_consumer *cp;
1482 	struct bio *cbp;
1483 
1484 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1485 		if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE)
1486 			break;
1487 	}
1488 	if (disk == NULL) {
1489 		if (bp->bio_error == 0)
1490 			bp->bio_error = ENXIO;
1491 		g_io_deliver(bp, bp->bio_error);
1492 		return;
1493 	}
1494 	cbp = g_clone_bio(bp);
1495 	if (cbp == NULL) {
1496 		if (bp->bio_error == 0)
1497 			bp->bio_error = ENOMEM;
1498 		g_io_deliver(bp, bp->bio_error);
1499 		return;
1500 	}
1501 	/*
1502 	 * Fill in the component buf structure.
1503 	 */
1504 	cp = disk->d_consumer;
1505 	cbp->bio_done = g_mirror_done;
1506 	cbp->bio_to = cp->provider;
1507 	G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1508 	KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1509 	    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr,
1510 	    cp->acw, cp->ace));
1511 	cp->index++;
1512 	g_io_request(cbp, cp);
1513 }
1514 
1515 static void
1516 g_mirror_request_round_robin(struct g_mirror_softc *sc, struct bio *bp)
1517 {
1518 	struct g_mirror_disk *disk;
1519 	struct g_consumer *cp;
1520 	struct bio *cbp;
1521 
1522 	disk = g_mirror_get_disk(sc);
1523 	if (disk == NULL) {
1524 		if (bp->bio_error == 0)
1525 			bp->bio_error = ENXIO;
1526 		g_io_deliver(bp, bp->bio_error);
1527 		return;
1528 	}
1529 	cbp = g_clone_bio(bp);
1530 	if (cbp == NULL) {
1531 		if (bp->bio_error == 0)
1532 			bp->bio_error = ENOMEM;
1533 		g_io_deliver(bp, bp->bio_error);
1534 		return;
1535 	}
1536 	/*
1537 	 * Fill in the component buf structure.
1538 	 */
1539 	cp = disk->d_consumer;
1540 	cbp->bio_done = g_mirror_done;
1541 	cbp->bio_to = cp->provider;
1542 	G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1543 	KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1544 	    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr,
1545 	    cp->acw, cp->ace));
1546 	cp->index++;
1547 	g_io_request(cbp, cp);
1548 }
1549 
1550 #define TRACK_SIZE  (1 * 1024 * 1024)
1551 #define LOAD_SCALE	256
1552 #define ABS(x)		(((x) >= 0) ? (x) : (-(x)))
1553 
1554 static void
1555 g_mirror_request_load(struct g_mirror_softc *sc, struct bio *bp)
1556 {
1557 	struct g_mirror_disk *disk, *dp;
1558 	struct g_consumer *cp;
1559 	struct bio *cbp;
1560 	int prio, best;
1561 
1562 	/* Find a disk with the smallest load. */
1563 	disk = NULL;
1564 	best = INT_MAX;
1565 	LIST_FOREACH(dp, &sc->sc_disks, d_next) {
1566 		if (dp->d_state != G_MIRROR_DISK_STATE_ACTIVE)
1567 			continue;
1568 		prio = dp->load;
1569 		/* If disk head is precisely in position - highly prefer it. */
1570 		if (dp->d_last_offset == bp->bio_offset)
1571 			prio -= 2 * LOAD_SCALE;
1572 		else
1573 		/* If disk head is close to position - prefer it. */
1574 		if (ABS(dp->d_last_offset - bp->bio_offset) < TRACK_SIZE)
1575 			prio -= 1 * LOAD_SCALE;
1576 		if (prio <= best) {
1577 			disk = dp;
1578 			best = prio;
1579 		}
1580 	}
1581 	KASSERT(disk != NULL, ("NULL disk for %s.", sc->sc_name));
1582 	cbp = g_clone_bio(bp);
1583 	if (cbp == NULL) {
1584 		if (bp->bio_error == 0)
1585 			bp->bio_error = ENOMEM;
1586 		g_io_deliver(bp, bp->bio_error);
1587 		return;
1588 	}
1589 	/*
1590 	 * Fill in the component buf structure.
1591 	 */
1592 	cp = disk->d_consumer;
1593 	cbp->bio_done = g_mirror_done;
1594 	cbp->bio_to = cp->provider;
1595 	G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1596 	KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1597 	    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr,
1598 	    cp->acw, cp->ace));
1599 	cp->index++;
1600 	/* Remember last head position */
1601 	disk->d_last_offset = bp->bio_offset + bp->bio_length;
1602 	/* Update loads. */
1603 	LIST_FOREACH(dp, &sc->sc_disks, d_next) {
1604 		dp->load = (dp->d_consumer->index * LOAD_SCALE +
1605 		    dp->load * 7) / 8;
1606 	}
1607 	g_io_request(cbp, cp);
1608 }
1609 
1610 static void
1611 g_mirror_request_split(struct g_mirror_softc *sc, struct bio *bp)
1612 {
1613 	struct bio_queue queue;
1614 	struct g_mirror_disk *disk;
1615 	struct g_consumer *cp;
1616 	struct bio *cbp;
1617 	off_t left, mod, offset, slice;
1618 	u_char *data;
1619 	u_int ndisks;
1620 
1621 	if (bp->bio_length <= sc->sc_slice) {
1622 		g_mirror_request_round_robin(sc, bp);
1623 		return;
1624 	}
1625 	ndisks = g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE);
1626 	slice = bp->bio_length / ndisks;
1627 	mod = slice % sc->sc_provider->sectorsize;
1628 	if (mod != 0)
1629 		slice += sc->sc_provider->sectorsize - mod;
1630 	/*
1631 	 * Allocate all bios before sending any request, so we can
1632 	 * return ENOMEM in nice and clean way.
1633 	 */
1634 	left = bp->bio_length;
1635 	offset = bp->bio_offset;
1636 	data = bp->bio_data;
1637 	TAILQ_INIT(&queue);
1638 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1639 		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
1640 			continue;
1641 		cbp = g_clone_bio(bp);
1642 		if (cbp == NULL) {
1643 			while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
1644 				TAILQ_REMOVE(&queue, cbp, bio_queue);
1645 				g_destroy_bio(cbp);
1646 			}
1647 			if (bp->bio_error == 0)
1648 				bp->bio_error = ENOMEM;
1649 			g_io_deliver(bp, bp->bio_error);
1650 			return;
1651 		}
1652 		TAILQ_INSERT_TAIL(&queue, cbp, bio_queue);
1653 		cbp->bio_done = g_mirror_done;
1654 		cbp->bio_caller1 = disk;
1655 		cbp->bio_to = disk->d_consumer->provider;
1656 		cbp->bio_offset = offset;
1657 		cbp->bio_data = data;
1658 		cbp->bio_length = MIN(left, slice);
1659 		left -= cbp->bio_length;
1660 		if (left == 0)
1661 			break;
1662 		offset += cbp->bio_length;
1663 		data += cbp->bio_length;
1664 	}
1665 	while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
1666 		TAILQ_REMOVE(&queue, cbp, bio_queue);
1667 		G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1668 		disk = cbp->bio_caller1;
1669 		cbp->bio_caller1 = NULL;
1670 		cp = disk->d_consumer;
1671 		KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1672 		    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
1673 		    cp->acr, cp->acw, cp->ace));
1674 		disk->d_consumer->index++;
1675 		g_io_request(cbp, disk->d_consumer);
1676 	}
1677 }
1678 
1679 static void
1680 g_mirror_register_request(struct g_mirror_softc *sc, struct bio *bp)
1681 {
1682 	struct bio_queue queue;
1683 	struct bio *cbp;
1684 	struct g_consumer *cp;
1685 	struct g_mirror_disk *disk;
1686 
1687 	sx_assert(&sc->sc_lock, SA_XLOCKED);
1688 
1689 	/*
1690 	 * To avoid ordering issues, if a write is deferred because of a
1691 	 * collision with a sync request, all I/O is deferred until that
1692 	 * write is initiated.
1693 	 */
1694 	if (bp->bio_from->geom != sc->sc_sync.ds_geom &&
1695 	    !TAILQ_EMPTY(&sc->sc_regular_delayed)) {
1696 		g_mirror_regular_delay(sc, bp);
1697 		return;
1698 	}
1699 
1700 	switch (bp->bio_cmd) {
1701 	case BIO_READ:
1702 		switch (sc->sc_balance) {
1703 		case G_MIRROR_BALANCE_LOAD:
1704 			g_mirror_request_load(sc, bp);
1705 			break;
1706 		case G_MIRROR_BALANCE_PREFER:
1707 			g_mirror_request_prefer(sc, bp);
1708 			break;
1709 		case G_MIRROR_BALANCE_ROUND_ROBIN:
1710 			g_mirror_request_round_robin(sc, bp);
1711 			break;
1712 		case G_MIRROR_BALANCE_SPLIT:
1713 			g_mirror_request_split(sc, bp);
1714 			break;
1715 		}
1716 		return;
1717 	case BIO_WRITE:
1718 	case BIO_DELETE:
1719 		/*
1720 		 * Delay the request if it is colliding with a synchronization
1721 		 * request.
1722 		 */
1723 		if (g_mirror_sync_collision(sc, bp)) {
1724 			g_mirror_regular_delay(sc, bp);
1725 			return;
1726 		}
1727 
1728 		if (sc->sc_idle)
1729 			g_mirror_unidle(sc);
1730 		else
1731 			sc->sc_last_write = time_uptime;
1732 
1733 		/*
1734 		 * Bump syncid on first write.
1735 		 */
1736 		if ((sc->sc_bump_id & G_MIRROR_BUMP_SYNCID) != 0) {
1737 			sc->sc_bump_id &= ~G_MIRROR_BUMP_SYNCID;
1738 			g_mirror_bump_syncid(sc);
1739 		}
1740 
1741 		/*
1742 		 * Allocate all bios before sending any request, so we can
1743 		 * return ENOMEM in nice and clean way.
1744 		 */
1745 		TAILQ_INIT(&queue);
1746 		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1747 			switch (disk->d_state) {
1748 			case G_MIRROR_DISK_STATE_ACTIVE:
1749 				break;
1750 			case G_MIRROR_DISK_STATE_SYNCHRONIZING:
1751 				if (bp->bio_offset >= disk->d_sync.ds_offset)
1752 					continue;
1753 				break;
1754 			default:
1755 				continue;
1756 			}
1757 			if (bp->bio_cmd == BIO_DELETE &&
1758 			    (disk->d_flags & G_MIRROR_DISK_FLAG_CANDELETE) == 0)
1759 				continue;
1760 			cbp = g_clone_bio(bp);
1761 			if (cbp == NULL) {
1762 				while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
1763 					TAILQ_REMOVE(&queue, cbp, bio_queue);
1764 					g_destroy_bio(cbp);
1765 				}
1766 				if (bp->bio_error == 0)
1767 					bp->bio_error = ENOMEM;
1768 				g_io_deliver(bp, bp->bio_error);
1769 				return;
1770 			}
1771 			TAILQ_INSERT_TAIL(&queue, cbp, bio_queue);
1772 			cbp->bio_done = g_mirror_done;
1773 			cp = disk->d_consumer;
1774 			cbp->bio_caller1 = cp;
1775 			cbp->bio_to = cp->provider;
1776 			KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1777 			    ("Consumer %s not opened (r%dw%de%d).",
1778 			    cp->provider->name, cp->acr, cp->acw, cp->ace));
1779 		}
1780 		if (TAILQ_EMPTY(&queue)) {
1781 			KASSERT(bp->bio_cmd == BIO_DELETE,
1782 			    ("No consumers for regular request %p", bp));
1783 			g_io_deliver(bp, EOPNOTSUPP);
1784 			return;
1785 		}
1786 		while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
1787 			G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1788 			TAILQ_REMOVE(&queue, cbp, bio_queue);
1789 			cp = cbp->bio_caller1;
1790 			cbp->bio_caller1 = NULL;
1791 			cp->index++;
1792 			sc->sc_writes++;
1793 			g_io_request(cbp, cp);
1794 		}
1795 		/*
1796 		 * Put request onto inflight queue, so we can check if new
1797 		 * synchronization requests don't collide with it.
1798 		 */
1799 		TAILQ_INSERT_TAIL(&sc->sc_inflight, bp, bio_queue);
1800 		return;
1801 	case BIO_SPEEDUP:
1802 	case BIO_FLUSH:
1803 		TAILQ_INIT(&queue);
1804 		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1805 			if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
1806 				continue;
1807 			cbp = g_clone_bio(bp);
1808 			if (cbp == NULL) {
1809 				while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
1810 					TAILQ_REMOVE(&queue, cbp, bio_queue);
1811 					g_destroy_bio(cbp);
1812 				}
1813 				if (bp->bio_error == 0)
1814 					bp->bio_error = ENOMEM;
1815 				g_io_deliver(bp, bp->bio_error);
1816 				return;
1817 			}
1818 			TAILQ_INSERT_TAIL(&queue, cbp, bio_queue);
1819 			cbp->bio_done = g_mirror_done;
1820 			cbp->bio_caller1 = disk;
1821 			cbp->bio_to = disk->d_consumer->provider;
1822 		}
1823 		KASSERT(!TAILQ_EMPTY(&queue),
1824 		    ("No consumers for regular request %p", bp));
1825 		while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
1826 			G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1827 			TAILQ_REMOVE(&queue, cbp, bio_queue);
1828 			disk = cbp->bio_caller1;
1829 			cbp->bio_caller1 = NULL;
1830 			cp = disk->d_consumer;
1831 			KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1832 			    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
1833 			    cp->acr, cp->acw, cp->ace));
1834 			cp->index++;
1835 			g_io_request(cbp, cp);
1836 		}
1837 		break;
1838 	default:
1839 		KASSERT(1 == 0, ("Invalid command here: %u (device=%s)",
1840 		    bp->bio_cmd, sc->sc_name));
1841 		break;
1842 	}
1843 }
1844 
1845 static int
1846 g_mirror_can_destroy(struct g_mirror_softc *sc)
1847 {
1848 	struct g_geom *gp;
1849 	struct g_consumer *cp;
1850 
1851 	g_topology_assert();
1852 	gp = sc->sc_geom;
1853 	if (gp->softc == NULL)
1854 		return (1);
1855 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_TASTING) != 0)
1856 		return (0);
1857 	LIST_FOREACH(cp, &gp->consumer, consumer) {
1858 		if (g_mirror_is_busy(sc, cp))
1859 			return (0);
1860 	}
1861 	gp = sc->sc_sync.ds_geom;
1862 	LIST_FOREACH(cp, &gp->consumer, consumer) {
1863 		if (g_mirror_is_busy(sc, cp))
1864 			return (0);
1865 	}
1866 	G_MIRROR_DEBUG(2, "No I/O requests for %s, it can be destroyed.",
1867 	    sc->sc_name);
1868 	return (1);
1869 }
1870 
1871 static int
1872 g_mirror_try_destroy(struct g_mirror_softc *sc)
1873 {
1874 
1875 	if (sc->sc_rootmount != NULL) {
1876 		G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p", __LINE__,
1877 		    sc->sc_rootmount);
1878 		root_mount_rel(sc->sc_rootmount);
1879 		sc->sc_rootmount = NULL;
1880 	}
1881 	g_topology_lock();
1882 	if (!g_mirror_can_destroy(sc)) {
1883 		g_topology_unlock();
1884 		return (0);
1885 	}
1886 	sc->sc_geom->softc = NULL;
1887 	sc->sc_sync.ds_geom->softc = NULL;
1888 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DRAIN) != 0) {
1889 		g_topology_unlock();
1890 		G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__,
1891 		    &sc->sc_worker);
1892 		/* Unlock sc_lock here, as it can be destroyed after wakeup. */
1893 		sx_xunlock(&sc->sc_lock);
1894 		wakeup(&sc->sc_worker);
1895 		sc->sc_worker = NULL;
1896 	} else {
1897 		g_topology_unlock();
1898 		g_mirror_destroy_device(sc);
1899 	}
1900 	return (1);
1901 }
1902 
1903 /*
1904  * Worker thread.
1905  */
1906 static void
1907 g_mirror_worker(void *arg)
1908 {
1909 	struct g_mirror_softc *sc;
1910 	struct g_mirror_event *ep;
1911 	struct bio *bp;
1912 	int timeout;
1913 
1914 	sc = arg;
1915 	thread_lock(curthread);
1916 	sched_prio(curthread, PRIBIO);
1917 	thread_unlock(curthread);
1918 
1919 	sx_xlock(&sc->sc_lock);
1920 	for (;;) {
1921 		G_MIRROR_DEBUG(5, "%s: Let's see...", __func__);
1922 		/*
1923 		 * First take a look at events.
1924 		 * This is important to handle events before any I/O requests.
1925 		 */
1926 		ep = g_mirror_event_first(sc);
1927 		if (ep != NULL) {
1928 			g_mirror_event_remove(sc, ep);
1929 			if ((ep->e_flags & G_MIRROR_EVENT_DEVICE) != 0) {
1930 				/* Update only device status. */
1931 				G_MIRROR_DEBUG(3,
1932 				    "Running event for device %s.",
1933 				    sc->sc_name);
1934 				ep->e_error = 0;
1935 				g_mirror_update_device(sc, true);
1936 			} else {
1937 				/* Update disk status. */
1938 				G_MIRROR_DEBUG(3, "Running event for disk %s.",
1939 				     g_mirror_get_diskname(ep->e_disk));
1940 				ep->e_error = g_mirror_update_disk(ep->e_disk,
1941 				    ep->e_state);
1942 				if (ep->e_error == 0)
1943 					g_mirror_update_device(sc, false);
1944 			}
1945 			if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0) {
1946 				KASSERT(ep->e_error == 0,
1947 				    ("Error cannot be handled."));
1948 				g_mirror_event_free(ep);
1949 			} else {
1950 				ep->e_flags |= G_MIRROR_EVENT_DONE;
1951 				G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__,
1952 				    ep);
1953 				mtx_lock(&sc->sc_events_mtx);
1954 				wakeup(ep);
1955 				mtx_unlock(&sc->sc_events_mtx);
1956 			}
1957 			if ((sc->sc_flags &
1958 			    G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
1959 				if (g_mirror_try_destroy(sc)) {
1960 					curthread->td_pflags &= ~TDP_GEOM;
1961 					G_MIRROR_DEBUG(1, "Thread exiting.");
1962 					kproc_exit(0);
1963 				}
1964 			}
1965 			G_MIRROR_DEBUG(5, "%s: I'm here 1.", __func__);
1966 			continue;
1967 		}
1968 
1969 		/*
1970 		 * Check if we can mark array as CLEAN and if we can't take
1971 		 * how much seconds should we wait.
1972 		 */
1973 		timeout = g_mirror_idle(sc, -1);
1974 
1975 		/*
1976 		 * Handle I/O requests.
1977 		 */
1978 		mtx_lock(&sc->sc_queue_mtx);
1979 		bp = TAILQ_FIRST(&sc->sc_queue);
1980 		if (bp != NULL)
1981 			TAILQ_REMOVE(&sc->sc_queue, bp, bio_queue);
1982 		else {
1983 			if ((sc->sc_flags &
1984 			    G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
1985 				mtx_unlock(&sc->sc_queue_mtx);
1986 				if (g_mirror_try_destroy(sc)) {
1987 					curthread->td_pflags &= ~TDP_GEOM;
1988 					G_MIRROR_DEBUG(1, "Thread exiting.");
1989 					kproc_exit(0);
1990 				}
1991 				mtx_lock(&sc->sc_queue_mtx);
1992 				if (!TAILQ_EMPTY(&sc->sc_queue)) {
1993 					mtx_unlock(&sc->sc_queue_mtx);
1994 					continue;
1995 				}
1996 			}
1997 			if (g_mirror_event_first(sc) != NULL) {
1998 				mtx_unlock(&sc->sc_queue_mtx);
1999 				continue;
2000 			}
2001 			sx_xunlock(&sc->sc_lock);
2002 			MSLEEP(sc, &sc->sc_queue_mtx, PRIBIO | PDROP, "m:w1",
2003 			    timeout * hz);
2004 			sx_xlock(&sc->sc_lock);
2005 			G_MIRROR_DEBUG(5, "%s: I'm here 4.", __func__);
2006 			continue;
2007 		}
2008 		mtx_unlock(&sc->sc_queue_mtx);
2009 
2010 		if (bp->bio_from->geom == sc->sc_sync.ds_geom &&
2011 		    (bp->bio_cflags & G_MIRROR_BIO_FLAG_SYNC) != 0) {
2012 			/*
2013 			 * Handle completion of the first half (the read) of a
2014 			 * block synchronization operation.
2015 			 */
2016 			g_mirror_sync_request(sc, bp);
2017 		} else if (bp->bio_to != sc->sc_provider) {
2018 			if ((bp->bio_cflags & G_MIRROR_BIO_FLAG_REGULAR) != 0)
2019 				/*
2020 				 * Handle completion of a regular I/O request.
2021 				 */
2022 				g_mirror_regular_request(sc, bp);
2023 			else if ((bp->bio_cflags & G_MIRROR_BIO_FLAG_SYNC) != 0)
2024 				/*
2025 				 * Handle completion of the second half (the
2026 				 * write) of a block synchronization operation.
2027 				 */
2028 				g_mirror_sync_request(sc, bp);
2029 			else {
2030 				KASSERT(0,
2031 				    ("Invalid request cflags=0x%hx to=%s.",
2032 				    bp->bio_cflags, bp->bio_to->name));
2033 			}
2034 		} else {
2035 			/*
2036 			 * Initiate an I/O request.
2037 			 */
2038 			g_mirror_register_request(sc, bp);
2039 		}
2040 		G_MIRROR_DEBUG(5, "%s: I'm here 9.", __func__);
2041 	}
2042 }
2043 
2044 static void
2045 g_mirror_update_idle(struct g_mirror_softc *sc, struct g_mirror_disk *disk)
2046 {
2047 
2048 	sx_assert(&sc->sc_lock, SX_LOCKED);
2049 
2050 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOFAILSYNC) != 0)
2051 		return;
2052 	if (!sc->sc_idle && (disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) == 0) {
2053 		G_MIRROR_DEBUG(2, "Disk %s (device %s) marked as dirty.",
2054 		    g_mirror_get_diskname(disk), sc->sc_name);
2055 		disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
2056 	} else if (sc->sc_idle &&
2057 	    (disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) != 0) {
2058 		G_MIRROR_DEBUG(2, "Disk %s (device %s) marked as clean.",
2059 		    g_mirror_get_diskname(disk), sc->sc_name);
2060 		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2061 	}
2062 }
2063 
2064 static void
2065 g_mirror_sync_reinit(const struct g_mirror_disk *disk, struct bio *bp,
2066     off_t offset)
2067 {
2068 	void *data;
2069 	int idx;
2070 
2071 	data = bp->bio_data;
2072 	idx = (int)(uintptr_t)bp->bio_caller1;
2073 	g_reset_bio(bp);
2074 
2075 	bp->bio_cmd = BIO_READ;
2076 	bp->bio_data = data;
2077 	bp->bio_done = g_mirror_sync_done;
2078 	bp->bio_from = disk->d_sync.ds_consumer;
2079 	bp->bio_to = disk->d_softc->sc_provider;
2080 	bp->bio_caller1 = (void *)(uintptr_t)idx;
2081 	bp->bio_offset = offset;
2082 	bp->bio_length = MIN(maxphys,
2083 	    disk->d_softc->sc_mediasize - bp->bio_offset);
2084 }
2085 
2086 static void
2087 g_mirror_sync_start(struct g_mirror_disk *disk)
2088 {
2089 	struct g_mirror_softc *sc;
2090 	struct g_mirror_disk_sync *sync;
2091 	struct g_consumer *cp;
2092 	struct bio *bp;
2093 	int error, i;
2094 
2095 	g_topology_assert_not();
2096 	sc = disk->d_softc;
2097 	sync = &disk->d_sync;
2098 	sx_assert(&sc->sc_lock, SX_LOCKED);
2099 
2100 	KASSERT(disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
2101 	    ("Disk %s is not marked for synchronization.",
2102 	    g_mirror_get_diskname(disk)));
2103 	KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2104 	    ("Device not in RUNNING state (%s, %u).", sc->sc_name,
2105 	    sc->sc_state));
2106 
2107 	sx_xunlock(&sc->sc_lock);
2108 	g_topology_lock();
2109 	cp = g_new_consumer(sc->sc_sync.ds_geom);
2110 	cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
2111 	error = g_attach(cp, sc->sc_provider);
2112 	KASSERT(error == 0,
2113 	    ("Cannot attach to %s (error=%d).", sc->sc_name, error));
2114 	error = g_access(cp, 1, 0, 0);
2115 	KASSERT(error == 0, ("Cannot open %s (error=%d).", sc->sc_name, error));
2116 	g_topology_unlock();
2117 	sx_xlock(&sc->sc_lock);
2118 
2119 	G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s.", sc->sc_name,
2120 	    g_mirror_get_diskname(disk));
2121 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOFAILSYNC) == 0)
2122 		disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
2123 	KASSERT(sync->ds_consumer == NULL,
2124 	    ("Sync consumer already exists (device=%s, disk=%s).",
2125 	    sc->sc_name, g_mirror_get_diskname(disk)));
2126 
2127 	sync->ds_consumer = cp;
2128 	sync->ds_consumer->private = disk;
2129 	sync->ds_consumer->index = 0;
2130 
2131 	/*
2132 	 * Allocate memory for synchronization bios and initialize them.
2133 	 */
2134 	sync->ds_bios = malloc(sizeof(struct bio *) * g_mirror_syncreqs,
2135 	    M_MIRROR, M_WAITOK);
2136 	for (i = 0; i < g_mirror_syncreqs; i++) {
2137 		bp = g_alloc_bio();
2138 		sync->ds_bios[i] = bp;
2139 
2140 		bp->bio_data = malloc(maxphys, M_MIRROR, M_WAITOK);
2141 		bp->bio_caller1 = (void *)(uintptr_t)i;
2142 		g_mirror_sync_reinit(disk, bp, sync->ds_offset);
2143 		sync->ds_offset += bp->bio_length;
2144 	}
2145 
2146 	/* Increase the number of disks in SYNCHRONIZING state. */
2147 	sc->sc_sync.ds_ndisks++;
2148 	/* Set the number of in-flight synchronization requests. */
2149 	sync->ds_inflight = g_mirror_syncreqs;
2150 
2151 	/*
2152 	 * Fire off first synchronization requests.
2153 	 */
2154 	for (i = 0; i < g_mirror_syncreqs; i++) {
2155 		bp = sync->ds_bios[i];
2156 		G_MIRROR_LOGREQ(3, bp, "Sending synchronization request.");
2157 		sync->ds_consumer->index++;
2158 		/*
2159 		 * Delay the request if it is colliding with a regular request.
2160 		 */
2161 		if (g_mirror_regular_collision(sc, bp))
2162 			g_mirror_sync_delay(sc, bp);
2163 		else
2164 			g_io_request(bp, sync->ds_consumer);
2165 	}
2166 }
2167 
2168 /*
2169  * Stop synchronization process.
2170  * type: 0 - synchronization finished
2171  *       1 - synchronization stopped
2172  */
2173 static void
2174 g_mirror_sync_stop(struct g_mirror_disk *disk, int type)
2175 {
2176 	struct g_mirror_softc *sc;
2177 	struct g_consumer *cp;
2178 
2179 	g_topology_assert_not();
2180 	sc = disk->d_softc;
2181 	sx_assert(&sc->sc_lock, SX_LOCKED);
2182 
2183 	KASSERT(disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
2184 	    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2185 	    g_mirror_disk_state2str(disk->d_state)));
2186 	if (disk->d_sync.ds_consumer == NULL)
2187 		return;
2188 
2189 	if (type == 0) {
2190 		G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s finished.",
2191 		    sc->sc_name, g_mirror_get_diskname(disk));
2192 	} else /* if (type == 1) */ {
2193 		G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s stopped.",
2194 		    sc->sc_name, g_mirror_get_diskname(disk));
2195 	}
2196 	g_mirror_regular_release(sc);
2197 	free(disk->d_sync.ds_bios, M_MIRROR);
2198 	disk->d_sync.ds_bios = NULL;
2199 	cp = disk->d_sync.ds_consumer;
2200 	disk->d_sync.ds_consumer = NULL;
2201 	disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2202 	sc->sc_sync.ds_ndisks--;
2203 	sx_xunlock(&sc->sc_lock); /* Avoid recursion on sc_lock. */
2204 	g_topology_lock();
2205 	g_mirror_kill_consumer(sc, cp);
2206 	g_topology_unlock();
2207 	sx_xlock(&sc->sc_lock);
2208 }
2209 
2210 static void
2211 g_mirror_launch_provider(struct g_mirror_softc *sc)
2212 {
2213 	struct g_mirror_disk *disk;
2214 	struct g_provider *pp, *dp;
2215 
2216 	sx_assert(&sc->sc_lock, SX_LOCKED);
2217 
2218 	g_topology_lock();
2219 	pp = g_new_providerf(sc->sc_geom, "mirror/%s", sc->sc_name);
2220 	pp->flags |= G_PF_DIRECT_RECEIVE;
2221 	pp->mediasize = sc->sc_mediasize;
2222 	pp->sectorsize = sc->sc_sectorsize;
2223 	pp->stripesize = 0;
2224 	pp->stripeoffset = 0;
2225 
2226 	/* Splitting of unmapped BIO's could work but isn't implemented now */
2227 	if (sc->sc_balance != G_MIRROR_BALANCE_SPLIT)
2228 		pp->flags |= G_PF_ACCEPT_UNMAPPED;
2229 
2230 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2231 		if (disk->d_consumer && disk->d_consumer->provider) {
2232 			dp = disk->d_consumer->provider;
2233 			if (dp->stripesize > pp->stripesize) {
2234 				pp->stripesize = dp->stripesize;
2235 				pp->stripeoffset = dp->stripeoffset;
2236 			}
2237 			/* A provider underneath us doesn't support unmapped */
2238 			if ((dp->flags & G_PF_ACCEPT_UNMAPPED) == 0) {
2239 				G_MIRROR_DEBUG(0, "Cancelling unmapped "
2240 				    "because of %s.", dp->name);
2241 				pp->flags &= ~G_PF_ACCEPT_UNMAPPED;
2242 			}
2243 		}
2244 	}
2245 	pp->private = sc;
2246 	sc->sc_refcnt++;
2247 	sc->sc_provider = pp;
2248 	g_error_provider(pp, 0);
2249 	g_topology_unlock();
2250 	G_MIRROR_DEBUG(0, "Device %s launched (%u/%u).", pp->name,
2251 	    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE), sc->sc_ndisks);
2252 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2253 		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
2254 			g_mirror_sync_start(disk);
2255 	}
2256 }
2257 
2258 static void
2259 g_mirror_destroy_provider(struct g_mirror_softc *sc)
2260 {
2261 	struct g_mirror_disk *disk;
2262 	struct bio *bp;
2263 
2264 	g_topology_assert_not();
2265 	KASSERT(sc->sc_provider != NULL, ("NULL provider (device=%s).",
2266 	    sc->sc_name));
2267 
2268 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2269 		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
2270 			g_mirror_sync_stop(disk, 1);
2271 	}
2272 
2273 	g_topology_lock();
2274 	g_error_provider(sc->sc_provider, ENXIO);
2275 	mtx_lock(&sc->sc_queue_mtx);
2276 	while ((bp = TAILQ_FIRST(&sc->sc_queue)) != NULL) {
2277 		TAILQ_REMOVE(&sc->sc_queue, bp, bio_queue);
2278 		/*
2279 		 * Abort any pending I/O that wasn't generated by us.
2280 		 * Synchronization requests and requests destined for individual
2281 		 * mirror components can be destroyed immediately.
2282 		 */
2283 		if (bp->bio_to == sc->sc_provider &&
2284 		    bp->bio_from->geom != sc->sc_sync.ds_geom) {
2285 			g_io_deliver(bp, ENXIO);
2286 		} else {
2287 			if ((bp->bio_cflags & G_MIRROR_BIO_FLAG_SYNC) != 0)
2288 				free(bp->bio_data, M_MIRROR);
2289 			g_destroy_bio(bp);
2290 		}
2291 	}
2292 	mtx_unlock(&sc->sc_queue_mtx);
2293 	g_wither_provider(sc->sc_provider, ENXIO);
2294 	sc->sc_provider = NULL;
2295 	G_MIRROR_DEBUG(0, "Device %s: provider destroyed.", sc->sc_name);
2296 	g_topology_unlock();
2297 }
2298 
2299 static void
2300 g_mirror_go(void *arg)
2301 {
2302 	struct g_mirror_softc *sc;
2303 	struct g_mirror_event *ep;
2304 
2305 	sc = arg;
2306 	G_MIRROR_DEBUG(0, "Force device %s start due to timeout.", sc->sc_name);
2307 	ep = sc->sc_timeout_event;
2308 	sc->sc_timeout_event = NULL;
2309 	g_mirror_event_dispatch(ep, sc, 0,
2310 	    G_MIRROR_EVENT_DONTWAIT | G_MIRROR_EVENT_DEVICE);
2311 }
2312 
2313 static void
2314 g_mirror_timeout_drain(struct g_mirror_softc *sc)
2315 {
2316 	sx_assert(&sc->sc_lock, SX_XLOCKED);
2317 
2318 	callout_drain(&sc->sc_callout);
2319 	g_mirror_event_free(sc->sc_timeout_event);
2320 	sc->sc_timeout_event = NULL;
2321 }
2322 
2323 static u_int
2324 g_mirror_determine_state(struct g_mirror_disk *disk)
2325 {
2326 	struct g_mirror_softc *sc;
2327 	u_int state;
2328 
2329 	sc = disk->d_softc;
2330 	if (sc->sc_syncid == disk->d_sync.ds_syncid) {
2331 		if ((disk->d_flags &
2332 		    G_MIRROR_DISK_FLAG_SYNCHRONIZING) == 0 &&
2333 		    (g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) == 0 ||
2334 		     (disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) == 0)) {
2335 			/* Disk does not need synchronization. */
2336 			state = G_MIRROR_DISK_STATE_ACTIVE;
2337 		} else {
2338 			if ((sc->sc_flags &
2339 			     G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) == 0 ||
2340 			    (disk->d_flags &
2341 			     G_MIRROR_DISK_FLAG_FORCE_SYNC) != 0) {
2342 				/*
2343 				 * We can start synchronization from
2344 				 * the stored offset.
2345 				 */
2346 				state = G_MIRROR_DISK_STATE_SYNCHRONIZING;
2347 			} else {
2348 				state = G_MIRROR_DISK_STATE_STALE;
2349 			}
2350 		}
2351 	} else if (disk->d_sync.ds_syncid < sc->sc_syncid) {
2352 		/*
2353 		 * Reset all synchronization data for this disk,
2354 		 * because if it even was synchronized, it was
2355 		 * synchronized to disks with different syncid.
2356 		 */
2357 		disk->d_flags |= G_MIRROR_DISK_FLAG_SYNCHRONIZING;
2358 		disk->d_sync.ds_offset = 0;
2359 		disk->d_sync.ds_offset_done = 0;
2360 		disk->d_sync.ds_syncid = sc->sc_syncid;
2361 		if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) == 0 ||
2362 		    (disk->d_flags & G_MIRROR_DISK_FLAG_FORCE_SYNC) != 0) {
2363 			state = G_MIRROR_DISK_STATE_SYNCHRONIZING;
2364 		} else {
2365 			state = G_MIRROR_DISK_STATE_STALE;
2366 		}
2367 	} else /* if (sc->sc_syncid < disk->d_sync.ds_syncid) */ {
2368 		/*
2369 		 * Not good, NOT GOOD!
2370 		 * It means that mirror was started on stale disks
2371 		 * and more fresh disk just arrive.
2372 		 * If there were writes, mirror is broken, sorry.
2373 		 * I think the best choice here is don't touch
2374 		 * this disk and inform the user loudly.
2375 		 */
2376 		G_MIRROR_DEBUG(0, "Device %s was started before the freshest "
2377 		    "disk (%s) arrives!! It will not be connected to the "
2378 		    "running device.", sc->sc_name,
2379 		    g_mirror_get_diskname(disk));
2380 		g_mirror_destroy_disk(disk);
2381 		state = G_MIRROR_DISK_STATE_NONE;
2382 		/* Return immediately, because disk was destroyed. */
2383 		return (state);
2384 	}
2385 	G_MIRROR_DEBUG(3, "State for %s disk: %s.",
2386 	    g_mirror_get_diskname(disk), g_mirror_disk_state2str(state));
2387 	return (state);
2388 }
2389 
2390 /*
2391  * Update device state.
2392  */
2393 static void
2394 g_mirror_update_device(struct g_mirror_softc *sc, bool force)
2395 {
2396 	struct g_mirror_disk *disk;
2397 	u_int state;
2398 
2399 	sx_assert(&sc->sc_lock, SX_XLOCKED);
2400 
2401 	switch (sc->sc_state) {
2402 	case G_MIRROR_DEVICE_STATE_STARTING:
2403 	    {
2404 		struct g_mirror_disk *pdisk, *tdisk;
2405 		const char *mismatch;
2406 		uintmax_t found, newest;
2407 		u_int dirty, ndisks;
2408 
2409 		/* Pre-flight checks */
2410 		LIST_FOREACH_SAFE(disk, &sc->sc_disks, d_next, tdisk) {
2411 			/*
2412 			 * Confirm we already detected the newest genid.
2413 			 */
2414 			KASSERT(sc->sc_genid >= disk->d_genid,
2415 			    ("%s: found newer genid %u (sc:%p had %u).", __func__,
2416 			    disk->d_genid, sc, sc->sc_genid));
2417 
2418 			/* Kick out any previously tasted stale components. */
2419 			if (disk->d_genid < sc->sc_genid) {
2420 				G_MIRROR_DEBUG(0, "Stale 'genid' field on %s "
2421 				    "(device %s) (component=%u latest=%u), skipping.",
2422 				    g_mirror_get_diskname(disk), sc->sc_name,
2423 				    disk->d_genid, sc->sc_genid);
2424 				g_mirror_destroy_disk(disk);
2425 				sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
2426 				continue;
2427 			}
2428 
2429 			/*
2430 			 * Confirm we already detected the newest syncid.
2431 			 */
2432 			KASSERT(sc->sc_syncid >= disk->d_sync.ds_syncid,
2433 			    ("%s: found newer syncid %u (sc:%p had %u).",
2434 			     __func__, disk->d_sync.ds_syncid, sc,
2435 			     sc->sc_syncid));
2436 
2437 #define DETECT_MISMATCH(field, name) \
2438 			if (mismatch == NULL &&					\
2439 			    disk->d_init_ ## field != sc->sc_ ## field) {	\
2440 				mismatch = name;				\
2441 				found = (intmax_t)disk->d_init_ ## field;	\
2442 				newest = (intmax_t)sc->sc_ ## field;		\
2443 			}
2444 			mismatch = NULL;
2445 			DETECT_MISMATCH(ndisks, "md_all");
2446 			DETECT_MISMATCH(balance, "md_balance");
2447 			DETECT_MISMATCH(slice, "md_slice");
2448 			DETECT_MISMATCH(mediasize, "md_mediasize");
2449 #undef DETECT_MISMATCH
2450 			if (mismatch != NULL) {
2451 				G_MIRROR_DEBUG(0, "Found a mismatching '%s' "
2452 				    "field on %s (device %s) (found=%ju "
2453 				    "newest=%ju).", mismatch,
2454 				    g_mirror_get_diskname(disk), sc->sc_name,
2455 				    found, newest);
2456 				g_mirror_destroy_disk(disk);
2457 				sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
2458 				continue;
2459 			}
2460 		}
2461 
2462 		KASSERT(sc->sc_provider == NULL,
2463 		    ("Non-NULL provider in STARTING state (%s).", sc->sc_name));
2464 		/*
2465 		 * Are we ready? If the timeout (force is true) has expired, and
2466 		 * any disks are present, then yes. If we're permitted to launch
2467 		 * before the timeout has expired and the expected number of
2468 		 * current-generation mirror disks have been tasted, then yes.
2469 		 */
2470 		ndisks = g_mirror_ndisks(sc, -1);
2471 		if ((force && ndisks > 0) ||
2472 		    (g_launch_mirror_before_timeout && ndisks == sc->sc_ndisks)) {
2473 			;
2474 		} else if (ndisks == 0) {
2475 			/*
2476 			 * Disks went down in starting phase, so destroy
2477 			 * device.
2478 			 */
2479 			g_mirror_timeout_drain(sc);
2480 			sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
2481 			G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p", __LINE__,
2482 			    sc->sc_rootmount);
2483 			root_mount_rel(sc->sc_rootmount);
2484 			sc->sc_rootmount = NULL;
2485 			return;
2486 		} else {
2487 			return;
2488 		}
2489 
2490 		/*
2491 		 * Activate all disks with the biggest syncid.
2492 		 */
2493 		if (force) {
2494 			/*
2495 			 * If 'force' is true, we have been called due to
2496 			 * timeout, so don't bother canceling timeout.
2497 			 */
2498 			ndisks = 0;
2499 			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2500 				if ((disk->d_flags &
2501 				    G_MIRROR_DISK_FLAG_SYNCHRONIZING) == 0) {
2502 					ndisks++;
2503 				}
2504 			}
2505 			if (ndisks == 0) {
2506 				/* No valid disks found, destroy device. */
2507 				sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
2508 				G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p",
2509 				    __LINE__, sc->sc_rootmount);
2510 				root_mount_rel(sc->sc_rootmount);
2511 				sc->sc_rootmount = NULL;
2512 				return;
2513 			}
2514 		} else {
2515 			/* Cancel timeout. */
2516 			g_mirror_timeout_drain(sc);
2517 		}
2518 
2519 		/*
2520 		 * Here we need to look for dirty disks and if all disks
2521 		 * with the biggest syncid are dirty, we have to choose
2522 		 * one with the biggest priority and rebuild the rest.
2523 		 */
2524 		/*
2525 		 * Find the number of dirty disks with the biggest syncid.
2526 		 * Find the number of disks with the biggest syncid.
2527 		 * While here, find a disk with the biggest priority.
2528 		 */
2529 		dirty = ndisks = 0;
2530 		pdisk = NULL;
2531 		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2532 			if (disk->d_sync.ds_syncid != sc->sc_syncid)
2533 				continue;
2534 			if ((disk->d_flags &
2535 			    G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
2536 				continue;
2537 			}
2538 			ndisks++;
2539 			if ((disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) != 0) {
2540 				dirty++;
2541 				if (pdisk == NULL ||
2542 				    pdisk->d_priority < disk->d_priority) {
2543 					pdisk = disk;
2544 				}
2545 			}
2546 		}
2547 		if (dirty == 0) {
2548 			/* No dirty disks at all, great. */
2549 		} else if (dirty == ndisks) {
2550 			/*
2551 			 * Force synchronization for all dirty disks except one
2552 			 * with the biggest priority.
2553 			 */
2554 			KASSERT(pdisk != NULL, ("pdisk == NULL"));
2555 			G_MIRROR_DEBUG(1, "Using disk %s (device %s) as a "
2556 			    "master disk for synchronization.",
2557 			    g_mirror_get_diskname(pdisk), sc->sc_name);
2558 			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2559 				if (disk->d_sync.ds_syncid != sc->sc_syncid)
2560 					continue;
2561 				if ((disk->d_flags &
2562 				    G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
2563 					continue;
2564 				}
2565 				KASSERT((disk->d_flags &
2566 				    G_MIRROR_DISK_FLAG_DIRTY) != 0,
2567 				    ("Disk %s isn't marked as dirty.",
2568 				    g_mirror_get_diskname(disk)));
2569 				/* Skip the disk with the biggest priority. */
2570 				if (disk == pdisk)
2571 					continue;
2572 				disk->d_sync.ds_syncid = 0;
2573 			}
2574 		} else if (dirty < ndisks) {
2575 			/*
2576 			 * Force synchronization for all dirty disks.
2577 			 * We have some non-dirty disks.
2578 			 */
2579 			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2580 				if (disk->d_sync.ds_syncid != sc->sc_syncid)
2581 					continue;
2582 				if ((disk->d_flags &
2583 				    G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
2584 					continue;
2585 				}
2586 				if ((disk->d_flags &
2587 				    G_MIRROR_DISK_FLAG_DIRTY) == 0) {
2588 					continue;
2589 				}
2590 				disk->d_sync.ds_syncid = 0;
2591 			}
2592 		}
2593 
2594 		/* Reset hint. */
2595 		sc->sc_hint = NULL;
2596 		if (force) {
2597 			/* Remember to bump syncid on first write. */
2598 			sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
2599 		}
2600 		state = G_MIRROR_DEVICE_STATE_RUNNING;
2601 		G_MIRROR_DEBUG(1, "Device %s state changed from %s to %s.",
2602 		    sc->sc_name, g_mirror_device_state2str(sc->sc_state),
2603 		    g_mirror_device_state2str(state));
2604 		sc->sc_state = state;
2605 		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2606 			state = g_mirror_determine_state(disk);
2607 			g_mirror_event_send(disk, state,
2608 			    G_MIRROR_EVENT_DONTWAIT);
2609 			if (state == G_MIRROR_DISK_STATE_STALE)
2610 				sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
2611 		}
2612 		break;
2613 	    }
2614 	case G_MIRROR_DEVICE_STATE_RUNNING:
2615 		if (g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) == 0 &&
2616 		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_NEW) == 0) {
2617 			/*
2618 			 * No usable disks, so destroy the device.
2619 			 */
2620 			sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
2621 			break;
2622 		} else if (g_mirror_ndisks(sc,
2623 		    G_MIRROR_DISK_STATE_ACTIVE) > 0 &&
2624 		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_NEW) == 0) {
2625 			/*
2626 			 * We have active disks, launch provider if it doesn't
2627 			 * exist.
2628 			 */
2629 			if (sc->sc_provider == NULL)
2630 				g_mirror_launch_provider(sc);
2631 			if (sc->sc_rootmount != NULL) {
2632 				G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p",
2633 				    __LINE__, sc->sc_rootmount);
2634 				root_mount_rel(sc->sc_rootmount);
2635 				sc->sc_rootmount = NULL;
2636 			}
2637 		}
2638 		/*
2639 		 * Genid should be bumped immediately, so do it here.
2640 		 */
2641 		if ((sc->sc_bump_id & G_MIRROR_BUMP_GENID) != 0) {
2642 			sc->sc_bump_id &= ~G_MIRROR_BUMP_GENID;
2643 			g_mirror_bump_genid(sc);
2644 		}
2645 		if ((sc->sc_bump_id & G_MIRROR_BUMP_SYNCID_NOW) != 0) {
2646 			sc->sc_bump_id &= ~G_MIRROR_BUMP_SYNCID_NOW;
2647 			g_mirror_bump_syncid(sc);
2648 		}
2649 		break;
2650 	default:
2651 		KASSERT(1 == 0, ("Wrong device state (%s, %s).",
2652 		    sc->sc_name, g_mirror_device_state2str(sc->sc_state)));
2653 		break;
2654 	}
2655 }
2656 
2657 /*
2658  * Update disk state and device state if needed.
2659  */
2660 #define	DISK_STATE_CHANGED()	G_MIRROR_DEBUG(1,			\
2661 	"Disk %s state changed from %s to %s (device %s).",		\
2662 	g_mirror_get_diskname(disk),					\
2663 	g_mirror_disk_state2str(disk->d_state),				\
2664 	g_mirror_disk_state2str(state), sc->sc_name)
2665 static int
2666 g_mirror_update_disk(struct g_mirror_disk *disk, u_int state)
2667 {
2668 	struct g_mirror_softc *sc;
2669 
2670 	sc = disk->d_softc;
2671 	sx_assert(&sc->sc_lock, SX_XLOCKED);
2672 
2673 again:
2674 	G_MIRROR_DEBUG(3, "Changing disk %s state from %s to %s.",
2675 	    g_mirror_get_diskname(disk), g_mirror_disk_state2str(disk->d_state),
2676 	    g_mirror_disk_state2str(state));
2677 	switch (state) {
2678 	case G_MIRROR_DISK_STATE_NEW:
2679 		/*
2680 		 * Possible scenarios:
2681 		 * 1. New disk arrive.
2682 		 */
2683 		/* Previous state should be NONE. */
2684 		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NONE,
2685 		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2686 		    g_mirror_disk_state2str(disk->d_state)));
2687 		DISK_STATE_CHANGED();
2688 
2689 		disk->d_state = state;
2690 		g_topology_lock();
2691 		if (LIST_EMPTY(&sc->sc_disks))
2692 			LIST_INSERT_HEAD(&sc->sc_disks, disk, d_next);
2693 		else {
2694 			struct g_mirror_disk *dp;
2695 
2696 			LIST_FOREACH(dp, &sc->sc_disks, d_next) {
2697 				if (disk->d_priority >= dp->d_priority) {
2698 					LIST_INSERT_BEFORE(dp, disk, d_next);
2699 					dp = NULL;
2700 					break;
2701 				}
2702 				if (LIST_NEXT(dp, d_next) == NULL)
2703 					break;
2704 			}
2705 			if (dp != NULL)
2706 				LIST_INSERT_AFTER(dp, disk, d_next);
2707 		}
2708 		g_topology_unlock();
2709 		G_MIRROR_DEBUG(1, "Device %s: provider %s detected.",
2710 		    sc->sc_name, g_mirror_get_diskname(disk));
2711 		if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING)
2712 			break;
2713 		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2714 		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2715 		    g_mirror_device_state2str(sc->sc_state),
2716 		    g_mirror_get_diskname(disk),
2717 		    g_mirror_disk_state2str(disk->d_state)));
2718 		state = g_mirror_determine_state(disk);
2719 		if (state != G_MIRROR_DISK_STATE_NONE)
2720 			goto again;
2721 		break;
2722 	case G_MIRROR_DISK_STATE_ACTIVE:
2723 		/*
2724 		 * Possible scenarios:
2725 		 * 1. New disk does not need synchronization.
2726 		 * 2. Synchronization process finished successfully.
2727 		 */
2728 		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2729 		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2730 		    g_mirror_device_state2str(sc->sc_state),
2731 		    g_mirror_get_diskname(disk),
2732 		    g_mirror_disk_state2str(disk->d_state)));
2733 		/* Previous state should be NEW or SYNCHRONIZING. */
2734 		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW ||
2735 		    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
2736 		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2737 		    g_mirror_disk_state2str(disk->d_state)));
2738 		DISK_STATE_CHANGED();
2739 
2740 		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
2741 			disk->d_flags &= ~G_MIRROR_DISK_FLAG_SYNCHRONIZING;
2742 			disk->d_flags &= ~G_MIRROR_DISK_FLAG_FORCE_SYNC;
2743 			g_mirror_sync_stop(disk, 0);
2744 		}
2745 		disk->d_state = state;
2746 		disk->d_sync.ds_offset = 0;
2747 		disk->d_sync.ds_offset_done = 0;
2748 		g_mirror_update_idle(sc, disk);
2749 		g_mirror_update_metadata(disk);
2750 		G_MIRROR_DEBUG(1, "Device %s: provider %s activated.",
2751 		    sc->sc_name, g_mirror_get_diskname(disk));
2752 		break;
2753 	case G_MIRROR_DISK_STATE_STALE:
2754 		/*
2755 		 * Possible scenarios:
2756 		 * 1. Stale disk was connected.
2757 		 */
2758 		/* Previous state should be NEW. */
2759 		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW,
2760 		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2761 		    g_mirror_disk_state2str(disk->d_state)));
2762 		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2763 		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2764 		    g_mirror_device_state2str(sc->sc_state),
2765 		    g_mirror_get_diskname(disk),
2766 		    g_mirror_disk_state2str(disk->d_state)));
2767 		/*
2768 		 * STALE state is only possible if device is marked
2769 		 * NOAUTOSYNC.
2770 		 */
2771 		KASSERT((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) != 0,
2772 		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2773 		    g_mirror_device_state2str(sc->sc_state),
2774 		    g_mirror_get_diskname(disk),
2775 		    g_mirror_disk_state2str(disk->d_state)));
2776 		DISK_STATE_CHANGED();
2777 
2778 		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2779 		disk->d_state = state;
2780 		g_mirror_update_metadata(disk);
2781 		G_MIRROR_DEBUG(0, "Device %s: provider %s is stale.",
2782 		    sc->sc_name, g_mirror_get_diskname(disk));
2783 		break;
2784 	case G_MIRROR_DISK_STATE_SYNCHRONIZING:
2785 		/*
2786 		 * Possible scenarios:
2787 		 * 1. Disk which needs synchronization was connected.
2788 		 */
2789 		/* Previous state should be NEW. */
2790 		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW,
2791 		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2792 		    g_mirror_disk_state2str(disk->d_state)));
2793 		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2794 		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2795 		    g_mirror_device_state2str(sc->sc_state),
2796 		    g_mirror_get_diskname(disk),
2797 		    g_mirror_disk_state2str(disk->d_state)));
2798 		DISK_STATE_CHANGED();
2799 
2800 		if (disk->d_state == G_MIRROR_DISK_STATE_NEW)
2801 			disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2802 		disk->d_state = state;
2803 		if (sc->sc_provider != NULL) {
2804 			g_mirror_sync_start(disk);
2805 			g_mirror_update_metadata(disk);
2806 		}
2807 		break;
2808 	case G_MIRROR_DISK_STATE_DISCONNECTED:
2809 		/*
2810 		 * Possible scenarios:
2811 		 * 1. Device wasn't running yet, but disk disappear.
2812 		 * 2. Disk was active and disapppear.
2813 		 * 3. Disk disappear during synchronization process.
2814 		 */
2815 		if (sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING) {
2816 			/*
2817 			 * Previous state should be ACTIVE, STALE or
2818 			 * SYNCHRONIZING.
2819 			 */
2820 			KASSERT(disk->d_state == G_MIRROR_DISK_STATE_ACTIVE ||
2821 			    disk->d_state == G_MIRROR_DISK_STATE_STALE ||
2822 			    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
2823 			    ("Wrong disk state (%s, %s).",
2824 			    g_mirror_get_diskname(disk),
2825 			    g_mirror_disk_state2str(disk->d_state)));
2826 		} else if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING) {
2827 			/* Previous state should be NEW. */
2828 			KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW,
2829 			    ("Wrong disk state (%s, %s).",
2830 			    g_mirror_get_diskname(disk),
2831 			    g_mirror_disk_state2str(disk->d_state)));
2832 			/*
2833 			 * Reset bumping syncid if disk disappeared in STARTING
2834 			 * state.
2835 			 */
2836 			if ((sc->sc_bump_id & G_MIRROR_BUMP_SYNCID) != 0)
2837 				sc->sc_bump_id &= ~G_MIRROR_BUMP_SYNCID;
2838 #ifdef	INVARIANTS
2839 		} else {
2840 			KASSERT(1 == 0, ("Wrong device state (%s, %s, %s, %s).",
2841 			    sc->sc_name,
2842 			    g_mirror_device_state2str(sc->sc_state),
2843 			    g_mirror_get_diskname(disk),
2844 			    g_mirror_disk_state2str(disk->d_state)));
2845 #endif
2846 		}
2847 		DISK_STATE_CHANGED();
2848 		G_MIRROR_DEBUG(0, "Device %s: provider %s disconnected.",
2849 		    sc->sc_name, g_mirror_get_diskname(disk));
2850 
2851 		g_mirror_destroy_disk(disk);
2852 		break;
2853 	case G_MIRROR_DISK_STATE_DESTROY:
2854 	    {
2855 		int error;
2856 
2857 		error = g_mirror_clear_metadata(disk);
2858 		if (error != 0) {
2859 			G_MIRROR_DEBUG(0,
2860 			    "Device %s: failed to clear metadata on %s: %d.",
2861 			    sc->sc_name, g_mirror_get_diskname(disk), error);
2862 			break;
2863 		}
2864 		DISK_STATE_CHANGED();
2865 		G_MIRROR_DEBUG(0, "Device %s: provider %s destroyed.",
2866 		    sc->sc_name, g_mirror_get_diskname(disk));
2867 
2868 		g_mirror_destroy_disk(disk);
2869 		sc->sc_ndisks--;
2870 		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2871 			g_mirror_update_metadata(disk);
2872 		}
2873 		break;
2874 	    }
2875 	default:
2876 		KASSERT(1 == 0, ("Unknown state (%u).", state));
2877 		break;
2878 	}
2879 	return (0);
2880 }
2881 #undef	DISK_STATE_CHANGED
2882 
2883 int
2884 g_mirror_read_metadata(struct g_consumer *cp, struct g_mirror_metadata *md)
2885 {
2886 	struct g_provider *pp;
2887 	u_char *buf;
2888 	int error;
2889 
2890 	g_topology_assert();
2891 
2892 	error = g_access(cp, 1, 0, 0);
2893 	if (error != 0)
2894 		return (error);
2895 	pp = cp->provider;
2896 	g_topology_unlock();
2897 	/* Metadata are stored on last sector. */
2898 	buf = g_read_data(cp, pp->mediasize - pp->sectorsize, pp->sectorsize,
2899 	    &error);
2900 	g_topology_lock();
2901 	g_access(cp, -1, 0, 0);
2902 	if (buf == NULL) {
2903 		G_MIRROR_DEBUG(1, "Cannot read metadata from %s (error=%d).",
2904 		    cp->provider->name, error);
2905 		return (error);
2906 	}
2907 
2908 	/* Decode metadata. */
2909 	error = mirror_metadata_decode(buf, md);
2910 	g_free(buf);
2911 	if (strcmp(md->md_magic, G_MIRROR_MAGIC) != 0)
2912 		return (EINVAL);
2913 	if (md->md_version > G_MIRROR_VERSION) {
2914 		G_MIRROR_DEBUG(0,
2915 		    "Kernel module is too old to handle metadata from %s.",
2916 		    cp->provider->name);
2917 		return (EINVAL);
2918 	}
2919 	if (error != 0) {
2920 		G_MIRROR_DEBUG(1, "MD5 metadata hash mismatch for provider %s.",
2921 		    cp->provider->name);
2922 		return (error);
2923 	}
2924 
2925 	return (0);
2926 }
2927 
2928 static int
2929 g_mirror_check_metadata(struct g_mirror_softc *sc, struct g_provider *pp,
2930     struct g_mirror_metadata *md)
2931 {
2932 
2933 	G_MIRROR_DEBUG(2, "%s: md_did 0x%u disk %s device %s md_all 0x%x "
2934 	    "sc_ndisks 0x%x md_slice 0x%x sc_slice 0x%x md_balance 0x%x "
2935 	    "sc_balance 0x%x sc_mediasize 0x%jx pp_mediasize 0x%jx "
2936 	    "md_sectorsize 0x%x sc_sectorsize 0x%x md_mflags 0x%jx "
2937 	    "md_dflags 0x%jx md_syncid 0x%x md_genid 0x%x md_priority 0x%x "
2938 	    "sc_state 0x%x.",
2939 	    __func__, md->md_did, pp->name, sc->sc_name, md->md_all,
2940 	    sc->sc_ndisks, md->md_slice, sc->sc_slice, md->md_balance,
2941 	    sc->sc_balance, (uintmax_t)sc->sc_mediasize,
2942 	    (uintmax_t)pp->mediasize, md->md_sectorsize, sc->sc_sectorsize,
2943 	    (uintmax_t)md->md_mflags, (uintmax_t)md->md_dflags, md->md_syncid,
2944 	    md->md_genid, md->md_priority, sc->sc_state);
2945 
2946 	if (g_mirror_id2disk(sc, md->md_did) != NULL) {
2947 		G_MIRROR_DEBUG(1, "Disk %s (id=%u) already exists, skipping.",
2948 		    pp->name, md->md_did);
2949 		return (EEXIST);
2950 	}
2951 	if (sc->sc_mediasize > pp->mediasize) {
2952 		G_MIRROR_DEBUG(1,
2953 		    "Invalid size of disk %s (device %s), skipping.", pp->name,
2954 		    sc->sc_name);
2955 		return (EINVAL);
2956 	}
2957 	if (md->md_sectorsize != sc->sc_sectorsize) {
2958 		G_MIRROR_DEBUG(1,
2959 		    "Invalid '%s' field on disk %s (device %s), skipping.",
2960 		    "md_sectorsize", pp->name, sc->sc_name);
2961 		return (EINVAL);
2962 	}
2963 	if ((sc->sc_sectorsize % pp->sectorsize) != 0) {
2964 		G_MIRROR_DEBUG(1,
2965 		    "Invalid sector size of disk %s (device %s), skipping.",
2966 		    pp->name, sc->sc_name);
2967 		return (EINVAL);
2968 	}
2969 	if ((md->md_mflags & ~G_MIRROR_DEVICE_FLAG_MASK) != 0) {
2970 		G_MIRROR_DEBUG(1,
2971 		    "Invalid device flags on disk %s (device %s), skipping.",
2972 		    pp->name, sc->sc_name);
2973 		return (EINVAL);
2974 	}
2975 	if ((md->md_dflags & ~G_MIRROR_DISK_FLAG_MASK) != 0) {
2976 		G_MIRROR_DEBUG(1,
2977 		    "Invalid disk flags on disk %s (device %s), skipping.",
2978 		    pp->name, sc->sc_name);
2979 		return (EINVAL);
2980 	}
2981 	return (0);
2982 }
2983 
2984 int
2985 g_mirror_add_disk(struct g_mirror_softc *sc, struct g_provider *pp,
2986     struct g_mirror_metadata *md)
2987 {
2988 	struct g_mirror_disk *disk;
2989 	int error;
2990 
2991 	g_topology_assert_not();
2992 	G_MIRROR_DEBUG(2, "Adding disk %s.", pp->name);
2993 
2994 	error = g_mirror_check_metadata(sc, pp, md);
2995 	if (error != 0)
2996 		return (error);
2997 
2998 	if (md->md_genid < sc->sc_genid) {
2999 		G_MIRROR_DEBUG(0, "Component %s (device %s) broken, skipping.",
3000 		    pp->name, sc->sc_name);
3001 		return (EINVAL);
3002 	}
3003 
3004 	/*
3005 	 * If the component disk we're tasting has newer metadata than the
3006 	 * STARTING gmirror device, refresh the device from the component.
3007 	 */
3008 	error = g_mirror_refresh_device(sc, pp, md);
3009 	if (error != 0)
3010 		return (error);
3011 
3012 	disk = g_mirror_init_disk(sc, pp, md, &error);
3013 	if (disk == NULL)
3014 		return (error);
3015 	error = g_mirror_event_send(disk, G_MIRROR_DISK_STATE_NEW,
3016 	    G_MIRROR_EVENT_WAIT);
3017 	if (error != 0)
3018 		return (error);
3019 	if (md->md_version < G_MIRROR_VERSION) {
3020 		G_MIRROR_DEBUG(0, "Upgrading metadata on %s (v%d->v%d).",
3021 		    pp->name, md->md_version, G_MIRROR_VERSION);
3022 		g_mirror_update_metadata(disk);
3023 	}
3024 	return (0);
3025 }
3026 
3027 static void
3028 g_mirror_destroy_delayed(void *arg, int flag)
3029 {
3030 	struct g_mirror_softc *sc;
3031 	int error;
3032 
3033 	if (flag == EV_CANCEL) {
3034 		G_MIRROR_DEBUG(1, "Destroying canceled.");
3035 		return;
3036 	}
3037 	sc = arg;
3038 	g_topology_unlock();
3039 	sx_xlock(&sc->sc_lock);
3040 	KASSERT((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) == 0,
3041 	    ("DESTROY flag set on %s.", sc->sc_name));
3042 	KASSERT((sc->sc_flags & G_MIRROR_DEVICE_FLAG_CLOSEWAIT) != 0,
3043 	    ("CLOSEWAIT flag not set on %s.", sc->sc_name));
3044 	G_MIRROR_DEBUG(1, "Destroying %s (delayed).", sc->sc_name);
3045 	error = g_mirror_destroy(sc, G_MIRROR_DESTROY_SOFT);
3046 	if (error != 0) {
3047 		G_MIRROR_DEBUG(0, "Cannot destroy %s (error=%d).",
3048 		    sc->sc_name, error);
3049 		sx_xunlock(&sc->sc_lock);
3050 	}
3051 	g_topology_lock();
3052 }
3053 
3054 static int
3055 g_mirror_access(struct g_provider *pp, int acr, int acw, int ace)
3056 {
3057 	struct g_mirror_softc *sc;
3058 	int error = 0;
3059 
3060 	g_topology_assert();
3061 	G_MIRROR_DEBUG(2, "Access request for %s: r%dw%de%d.", pp->name, acr,
3062 	    acw, ace);
3063 
3064 	sc = pp->private;
3065 	KASSERT(sc != NULL, ("NULL softc (provider=%s).", pp->name));
3066 
3067 	g_topology_unlock();
3068 	sx_xlock(&sc->sc_lock);
3069 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0 ||
3070 	    (sc->sc_flags & G_MIRROR_DEVICE_FLAG_CLOSEWAIT) != 0 ||
3071 	    LIST_EMPTY(&sc->sc_disks)) {
3072 		if (acr > 0 || acw > 0 || ace > 0)
3073 			error = ENXIO;
3074 		goto end;
3075 	}
3076 	sc->sc_provider_open += acr + acw + ace;
3077 	if (pp->acw + acw == 0)
3078 		g_mirror_idle(sc, 0);
3079 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_CLOSEWAIT) != 0 &&
3080 	    sc->sc_provider_open == 0)
3081 		g_post_event(g_mirror_destroy_delayed, sc, M_WAITOK, sc, NULL);
3082 end:
3083 	sx_xunlock(&sc->sc_lock);
3084 	g_topology_lock();
3085 	return (error);
3086 }
3087 
3088 static void
3089 g_mirror_reinit_from_metadata(struct g_mirror_softc *sc,
3090     const struct g_mirror_metadata *md)
3091 {
3092 
3093 	sc->sc_genid = md->md_genid;
3094 	sc->sc_syncid = md->md_syncid;
3095 
3096 	sc->sc_slice = md->md_slice;
3097 	sc->sc_balance = md->md_balance;
3098 	sc->sc_mediasize = md->md_mediasize;
3099 	sc->sc_ndisks = md->md_all;
3100 	sc->sc_flags &= ~G_MIRROR_DEVICE_FLAG_MASK;
3101 	sc->sc_flags |= (md->md_mflags & G_MIRROR_DEVICE_FLAG_MASK);
3102 }
3103 
3104 struct g_geom *
3105 g_mirror_create(struct g_class *mp, const struct g_mirror_metadata *md,
3106     u_int type)
3107 {
3108 	struct g_mirror_softc *sc;
3109 	struct g_geom *gp;
3110 	int error, timeout;
3111 
3112 	g_topology_assert();
3113 	G_MIRROR_DEBUG(1, "Creating device %s (id=%u).", md->md_name,
3114 	    md->md_mid);
3115 
3116 	/* One disk is minimum. */
3117 	if (md->md_all < 1)
3118 		return (NULL);
3119 	/*
3120 	 * Action geom.
3121 	 */
3122 	gp = g_new_geomf(mp, "%s", md->md_name);
3123 	sc = malloc(sizeof(*sc), M_MIRROR, M_WAITOK | M_ZERO);
3124 	gp->start = g_mirror_start;
3125 	gp->orphan = g_mirror_orphan;
3126 	gp->access = g_mirror_access;
3127 	gp->dumpconf = g_mirror_dumpconf;
3128 
3129 	sc->sc_type = type;
3130 	sc->sc_id = md->md_mid;
3131 	g_mirror_reinit_from_metadata(sc, md);
3132 	sc->sc_sectorsize = md->md_sectorsize;
3133 	sc->sc_bump_id = 0;
3134 	sc->sc_idle = 1;
3135 	sc->sc_last_write = time_uptime;
3136 	sc->sc_writes = 0;
3137 	sc->sc_refcnt = 1;
3138 	sx_init(&sc->sc_lock, "gmirror:lock");
3139 	TAILQ_INIT(&sc->sc_queue);
3140 	mtx_init(&sc->sc_queue_mtx, "gmirror:queue", NULL, MTX_DEF);
3141 	TAILQ_INIT(&sc->sc_regular_delayed);
3142 	TAILQ_INIT(&sc->sc_inflight);
3143 	TAILQ_INIT(&sc->sc_sync_delayed);
3144 	LIST_INIT(&sc->sc_disks);
3145 	TAILQ_INIT(&sc->sc_events);
3146 	mtx_init(&sc->sc_events_mtx, "gmirror:events", NULL, MTX_DEF);
3147 	callout_init(&sc->sc_callout, 1);
3148 	mtx_init(&sc->sc_done_mtx, "gmirror:done", NULL, MTX_DEF);
3149 	sc->sc_state = G_MIRROR_DEVICE_STATE_STARTING;
3150 	gp->softc = sc;
3151 	sc->sc_geom = gp;
3152 	sc->sc_provider = NULL;
3153 	sc->sc_provider_open = 0;
3154 	/*
3155 	 * Synchronization geom.
3156 	 */
3157 	gp = g_new_geomf(mp, "%s.sync", md->md_name);
3158 	gp->softc = sc;
3159 	gp->orphan = g_mirror_orphan;
3160 	sc->sc_sync.ds_geom = gp;
3161 	sc->sc_sync.ds_ndisks = 0;
3162 	error = kproc_create(g_mirror_worker, sc, &sc->sc_worker, 0, 0,
3163 	    "g_mirror %s", md->md_name);
3164 	if (error != 0) {
3165 		G_MIRROR_DEBUG(1, "Cannot create kernel thread for %s.",
3166 		    sc->sc_name);
3167 		g_destroy_geom(sc->sc_sync.ds_geom);
3168 		g_destroy_geom(sc->sc_geom);
3169 		g_mirror_free_device(sc);
3170 		return (NULL);
3171 	}
3172 
3173 	G_MIRROR_DEBUG(1, "Device %s created (%u components, id=%u).",
3174 	    sc->sc_name, sc->sc_ndisks, sc->sc_id);
3175 
3176 	sc->sc_rootmount = root_mount_hold("GMIRROR");
3177 	G_MIRROR_DEBUG(1, "root_mount_hold %p", sc->sc_rootmount);
3178 
3179 	/*
3180 	 * Schedule startup timeout.
3181 	 */
3182 	timeout = g_mirror_timeout * hz;
3183 	sc->sc_timeout_event = malloc(sizeof(struct g_mirror_event), M_MIRROR,
3184 	    M_WAITOK);
3185 	callout_reset(&sc->sc_callout, timeout, g_mirror_go, sc);
3186 	return (sc->sc_geom);
3187 }
3188 
3189 int
3190 g_mirror_destroy(struct g_mirror_softc *sc, int how)
3191 {
3192 	struct g_mirror_disk *disk;
3193 
3194 	g_topology_assert_not();
3195 	sx_assert(&sc->sc_lock, SX_XLOCKED);
3196 
3197 	if (sc->sc_provider_open != 0) {
3198 		switch (how) {
3199 		case G_MIRROR_DESTROY_SOFT:
3200 			G_MIRROR_DEBUG(1,
3201 			    "Device %s is still open (%d).", sc->sc_name,
3202 			    sc->sc_provider_open);
3203 			return (EBUSY);
3204 		case G_MIRROR_DESTROY_DELAYED:
3205 			G_MIRROR_DEBUG(1,
3206 			    "Device %s will be destroyed on last close.",
3207 			    sc->sc_name);
3208 			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
3209 				if (disk->d_state ==
3210 				    G_MIRROR_DISK_STATE_SYNCHRONIZING) {
3211 					g_mirror_sync_stop(disk, 1);
3212 				}
3213 			}
3214 			sc->sc_flags |= G_MIRROR_DEVICE_FLAG_CLOSEWAIT;
3215 			return (EBUSY);
3216 		case G_MIRROR_DESTROY_HARD:
3217 			G_MIRROR_DEBUG(1, "Device %s is still open, so it "
3218 			    "can't be definitely removed.", sc->sc_name);
3219 		}
3220 	}
3221 
3222 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
3223 		sx_xunlock(&sc->sc_lock);
3224 		return (0);
3225 	}
3226 	sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
3227 	sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DRAIN;
3228 	G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
3229 	sx_xunlock(&sc->sc_lock);
3230 	mtx_lock(&sc->sc_queue_mtx);
3231 	wakeup(sc);
3232 	mtx_unlock(&sc->sc_queue_mtx);
3233 	G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, &sc->sc_worker);
3234 	while (sc->sc_worker != NULL)
3235 		tsleep(&sc->sc_worker, PRIBIO, "m:destroy", hz / 5);
3236 	G_MIRROR_DEBUG(4, "%s: Woken up %p.", __func__, &sc->sc_worker);
3237 	sx_xlock(&sc->sc_lock);
3238 	g_mirror_destroy_device(sc);
3239 	return (0);
3240 }
3241 
3242 static void
3243 g_mirror_taste_orphan(struct g_consumer *cp)
3244 {
3245 
3246 	KASSERT(1 == 0, ("%s called while tasting %s.", __func__,
3247 	    cp->provider->name));
3248 }
3249 
3250 static struct g_geom *
3251 g_mirror_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
3252 {
3253 	struct g_mirror_metadata md;
3254 	struct g_mirror_softc *sc;
3255 	struct g_consumer *cp;
3256 	struct g_geom *gp;
3257 	int error;
3258 
3259 	g_topology_assert();
3260 	g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name);
3261 	G_MIRROR_DEBUG(2, "Tasting %s.", pp->name);
3262 
3263 	gp = g_new_geomf(mp, "mirror:taste");
3264 	/*
3265 	 * This orphan function should be never called.
3266 	 */
3267 	gp->orphan = g_mirror_taste_orphan;
3268 	cp = g_new_consumer(gp);
3269 	error = g_attach(cp, pp);
3270 	if (error == 0) {
3271 		error = g_mirror_read_metadata(cp, &md);
3272 		g_detach(cp);
3273 	}
3274 	g_destroy_consumer(cp);
3275 	g_destroy_geom(gp);
3276 	if (error != 0)
3277 		return (NULL);
3278 	gp = NULL;
3279 
3280 	if (md.md_provider[0] != '\0' &&
3281 	    !g_compare_names(md.md_provider, pp->name))
3282 		return (NULL);
3283 	if (md.md_provsize != 0 && md.md_provsize != pp->mediasize)
3284 		return (NULL);
3285 	if ((md.md_dflags & G_MIRROR_DISK_FLAG_INACTIVE) != 0) {
3286 		G_MIRROR_DEBUG(0,
3287 		    "Device %s: provider %s marked as inactive, skipping.",
3288 		    md.md_name, pp->name);
3289 		return (NULL);
3290 	}
3291 	if (g_mirror_debug >= 2)
3292 		mirror_metadata_dump(&md);
3293 
3294 	/*
3295 	 * Let's check if device already exists.
3296 	 */
3297 	sc = NULL;
3298 	LIST_FOREACH(gp, &mp->geom, geom) {
3299 		sc = gp->softc;
3300 		if (sc == NULL)
3301 			continue;
3302 		if (sc->sc_type != G_MIRROR_TYPE_AUTOMATIC)
3303 			continue;
3304 		if (sc->sc_sync.ds_geom == gp)
3305 			continue;
3306 		if (strcmp(md.md_name, sc->sc_name) != 0)
3307 			continue;
3308 		if (md.md_mid != sc->sc_id) {
3309 			G_MIRROR_DEBUG(0, "Device %s already configured.",
3310 			    sc->sc_name);
3311 			return (NULL);
3312 		}
3313 		break;
3314 	}
3315 	if (gp == NULL) {
3316 		gp = g_mirror_create(mp, &md, G_MIRROR_TYPE_AUTOMATIC);
3317 		if (gp == NULL) {
3318 			G_MIRROR_DEBUG(0, "Cannot create device %s.",
3319 			    md.md_name);
3320 			return (NULL);
3321 		}
3322 		sc = gp->softc;
3323 	}
3324 	G_MIRROR_DEBUG(1, "Adding disk %s to %s.", pp->name, gp->name);
3325 	g_topology_unlock();
3326 	sx_xlock(&sc->sc_lock);
3327 	sc->sc_flags |= G_MIRROR_DEVICE_FLAG_TASTING;
3328 	error = g_mirror_add_disk(sc, pp, &md);
3329 	sc->sc_flags &= ~G_MIRROR_DEVICE_FLAG_TASTING;
3330 	if (error != 0) {
3331 		G_MIRROR_DEBUG(0, "Cannot add disk %s to %s (error=%d).",
3332 		    pp->name, gp->name, error);
3333 		if (LIST_EMPTY(&sc->sc_disks)) {
3334 			g_cancel_event(sc);
3335 			g_mirror_destroy(sc, G_MIRROR_DESTROY_HARD);
3336 			g_topology_lock();
3337 			return (NULL);
3338 		}
3339 		gp = NULL;
3340 	}
3341 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
3342 		g_mirror_destroy(sc, G_MIRROR_DESTROY_HARD);
3343 		g_topology_lock();
3344 		return (NULL);
3345 	}
3346 	sx_xunlock(&sc->sc_lock);
3347 	g_topology_lock();
3348 	return (gp);
3349 }
3350 
3351 static void
3352 g_mirror_resize(struct g_consumer *cp)
3353 {
3354 	struct g_mirror_disk *disk;
3355 
3356 	g_topology_assert();
3357 	g_trace(G_T_TOPOLOGY, "%s(%s)", __func__, cp->provider->name);
3358 
3359 	disk = cp->private;
3360 	if (disk == NULL)
3361 		return;
3362 	g_topology_unlock();
3363 	g_mirror_update_metadata(disk);
3364 	g_topology_lock();
3365 }
3366 
3367 static int
3368 g_mirror_destroy_geom(struct gctl_req *req __unused,
3369     struct g_class *mp __unused, struct g_geom *gp)
3370 {
3371 	struct g_mirror_softc *sc;
3372 	int error;
3373 
3374 	g_topology_unlock();
3375 	sc = gp->softc;
3376 	sx_xlock(&sc->sc_lock);
3377 	g_cancel_event(sc);
3378 	error = g_mirror_destroy(gp->softc, G_MIRROR_DESTROY_SOFT);
3379 	if (error != 0)
3380 		sx_xunlock(&sc->sc_lock);
3381 	g_topology_lock();
3382 	return (error);
3383 }
3384 
3385 static void
3386 g_mirror_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
3387     struct g_consumer *cp, struct g_provider *pp)
3388 {
3389 	struct g_mirror_softc *sc;
3390 
3391 	g_topology_assert();
3392 
3393 	sc = gp->softc;
3394 	if (sc == NULL)
3395 		return;
3396 	/* Skip synchronization geom. */
3397 	if (gp == sc->sc_sync.ds_geom)
3398 		return;
3399 	if (pp != NULL) {
3400 		/* Nothing here. */
3401 	} else if (cp != NULL) {
3402 		struct g_mirror_disk *disk;
3403 
3404 		disk = cp->private;
3405 		if (disk == NULL)
3406 			return;
3407 		sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)disk->d_id);
3408 		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
3409 			sbuf_printf(sb, "%s<Synchronized>", indent);
3410 			if (disk->d_sync.ds_offset == 0)
3411 				sbuf_cat(sb, "0%");
3412 			else
3413 				sbuf_printf(sb, "%u%%",
3414 				    (u_int)((disk->d_sync.ds_offset * 100) /
3415 				    sc->sc_mediasize));
3416 			sbuf_cat(sb, "</Synchronized>\n");
3417 			if (disk->d_sync.ds_offset > 0)
3418 				sbuf_printf(sb, "%s<BytesSynced>%jd"
3419 				    "</BytesSynced>\n", indent,
3420 				    (intmax_t)disk->d_sync.ds_offset);
3421 		}
3422 		sbuf_printf(sb, "%s<SyncID>%u</SyncID>\n", indent,
3423 		    disk->d_sync.ds_syncid);
3424 		sbuf_printf(sb, "%s<GenID>%u</GenID>\n", indent,
3425 		    disk->d_genid);
3426 		sbuf_printf(sb, "%s<Flags>", indent);
3427 		if (disk->d_flags == 0)
3428 			sbuf_cat(sb, "NONE");
3429 		else {
3430 			int first = 1;
3431 
3432 #define	ADD_FLAG(flag, name)	do {					\
3433 	if ((disk->d_flags & (flag)) != 0) {				\
3434 		if (!first)						\
3435 			sbuf_cat(sb, ", ");				\
3436 		else							\
3437 			first = 0;					\
3438 		sbuf_cat(sb, name);					\
3439 	}								\
3440 } while (0)
3441 			ADD_FLAG(G_MIRROR_DISK_FLAG_DIRTY, "DIRTY");
3442 			ADD_FLAG(G_MIRROR_DISK_FLAG_HARDCODED, "HARDCODED");
3443 			ADD_FLAG(G_MIRROR_DISK_FLAG_INACTIVE, "INACTIVE");
3444 			ADD_FLAG(G_MIRROR_DISK_FLAG_SYNCHRONIZING,
3445 			    "SYNCHRONIZING");
3446 			ADD_FLAG(G_MIRROR_DISK_FLAG_FORCE_SYNC, "FORCE_SYNC");
3447 			ADD_FLAG(G_MIRROR_DISK_FLAG_BROKEN, "BROKEN");
3448 #undef	ADD_FLAG
3449 		}
3450 		sbuf_cat(sb, "</Flags>\n");
3451 		sbuf_printf(sb, "%s<Priority>%u</Priority>\n", indent,
3452 		    disk->d_priority);
3453 		sbuf_printf(sb, "%s<State>%s</State>\n", indent,
3454 		    g_mirror_disk_state2str(disk->d_state));
3455 	} else {
3456 		sbuf_printf(sb, "%s<Type>", indent);
3457 		switch (sc->sc_type) {
3458 		case G_MIRROR_TYPE_AUTOMATIC:
3459 			sbuf_cat(sb, "AUTOMATIC");
3460 			break;
3461 		case G_MIRROR_TYPE_MANUAL:
3462 			sbuf_cat(sb, "MANUAL");
3463 			break;
3464 		default:
3465 			sbuf_cat(sb, "UNKNOWN");
3466 			break;
3467 		}
3468 		sbuf_cat(sb, "</Type>\n");
3469 		sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)sc->sc_id);
3470 		sbuf_printf(sb, "%s<SyncID>%u</SyncID>\n", indent, sc->sc_syncid);
3471 		sbuf_printf(sb, "%s<GenID>%u</GenID>\n", indent, sc->sc_genid);
3472 		sbuf_printf(sb, "%s<Flags>", indent);
3473 		if (sc->sc_flags == 0)
3474 			sbuf_cat(sb, "NONE");
3475 		else {
3476 			int first = 1;
3477 
3478 #define	ADD_FLAG(flag, name)	do {					\
3479 	if ((sc->sc_flags & (flag)) != 0) {				\
3480 		if (!first)						\
3481 			sbuf_cat(sb, ", ");				\
3482 		else							\
3483 			first = 0;					\
3484 		sbuf_cat(sb, name);					\
3485 	}								\
3486 } while (0)
3487 			ADD_FLAG(G_MIRROR_DEVICE_FLAG_NOFAILSYNC, "NOFAILSYNC");
3488 			ADD_FLAG(G_MIRROR_DEVICE_FLAG_NOAUTOSYNC, "NOAUTOSYNC");
3489 #undef	ADD_FLAG
3490 		}
3491 		sbuf_cat(sb, "</Flags>\n");
3492 		sbuf_printf(sb, "%s<Slice>%u</Slice>\n", indent,
3493 		    (u_int)sc->sc_slice);
3494 		sbuf_printf(sb, "%s<Balance>%s</Balance>\n", indent,
3495 		    balance_name(sc->sc_balance));
3496 		sbuf_printf(sb, "%s<Components>%u</Components>\n", indent,
3497 		    sc->sc_ndisks);
3498 		sbuf_printf(sb, "%s<State>", indent);
3499 		if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING)
3500 			sbuf_printf(sb, "%s", "STARTING");
3501 		else if (sc->sc_ndisks ==
3502 		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE))
3503 			sbuf_printf(sb, "%s", "COMPLETE");
3504 		else
3505 			sbuf_printf(sb, "%s", "DEGRADED");
3506 		sbuf_cat(sb, "</State>\n");
3507 	}
3508 }
3509 
3510 static void
3511 g_mirror_shutdown_post_sync(void *arg, int howto)
3512 {
3513 	struct g_class *mp;
3514 	struct g_geom *gp, *gp2;
3515 	struct g_mirror_softc *sc;
3516 	int error;
3517 
3518 	if (KERNEL_PANICKED())
3519 		return;
3520 
3521 	mp = arg;
3522 	g_topology_lock();
3523 	g_mirror_shutdown = 1;
3524 	LIST_FOREACH_SAFE(gp, &mp->geom, geom, gp2) {
3525 		if ((sc = gp->softc) == NULL)
3526 			continue;
3527 		/* Skip synchronization geom. */
3528 		if (gp == sc->sc_sync.ds_geom)
3529 			continue;
3530 		g_topology_unlock();
3531 		sx_xlock(&sc->sc_lock);
3532 		g_mirror_idle(sc, -1);
3533 		g_cancel_event(sc);
3534 		error = g_mirror_destroy(sc, G_MIRROR_DESTROY_DELAYED);
3535 		if (error != 0)
3536 			sx_xunlock(&sc->sc_lock);
3537 		g_topology_lock();
3538 	}
3539 	g_topology_unlock();
3540 }
3541 
3542 static void
3543 g_mirror_init(struct g_class *mp)
3544 {
3545 
3546 	g_mirror_post_sync = EVENTHANDLER_REGISTER(shutdown_post_sync,
3547 	    g_mirror_shutdown_post_sync, mp, SHUTDOWN_PRI_FIRST);
3548 	if (g_mirror_post_sync == NULL)
3549 		G_MIRROR_DEBUG(0, "Warning! Cannot register shutdown event.");
3550 }
3551 
3552 static void
3553 g_mirror_fini(struct g_class *mp)
3554 {
3555 
3556 	if (g_mirror_post_sync != NULL)
3557 		EVENTHANDLER_DEREGISTER(shutdown_post_sync, g_mirror_post_sync);
3558 }
3559 
3560 /*
3561  * Refresh the mirror device's metadata when gmirror encounters a newer
3562  * generation as the individual components are being added to the mirror set.
3563  */
3564 static int
3565 g_mirror_refresh_device(struct g_mirror_softc *sc, const struct g_provider *pp,
3566     const struct g_mirror_metadata *md)
3567 {
3568 
3569 	g_topology_assert_not();
3570 	sx_assert(&sc->sc_lock, SX_XLOCKED);
3571 
3572 	KASSERT(sc->sc_genid <= md->md_genid,
3573 	    ("%s: attempted to refresh from stale component %s (device %s) "
3574 	    "(%u < %u).", __func__, pp->name, sc->sc_name, md->md_genid,
3575 	    sc->sc_genid));
3576 
3577 	if (sc->sc_genid > md->md_genid || (sc->sc_genid == md->md_genid &&
3578 	    sc->sc_syncid >= md->md_syncid))
3579 		return (0);
3580 
3581 	G_MIRROR_DEBUG(0, "Found newer version for device %s (genid: curr=%u "
3582 	    "new=%u; syncid: curr=%u new=%u; ndisks: curr=%u new=%u; "
3583 	    "provider=%s).", sc->sc_name, sc->sc_genid, md->md_genid,
3584 	    sc->sc_syncid, md->md_syncid, sc->sc_ndisks, md->md_all, pp->name);
3585 
3586 	if (sc->sc_state != G_MIRROR_DEVICE_STATE_STARTING) {
3587 		/* Probable data corruption detected */
3588 		G_MIRROR_DEBUG(0, "Cannot refresh metadata in %s state "
3589 		    "(device=%s genid=%u). A stale mirror device was launched.",
3590 		    g_mirror_device_state2str(sc->sc_state), sc->sc_name,
3591 		    sc->sc_genid);
3592 		return (EINVAL);
3593 	}
3594 
3595 	/* Update softc */
3596 	g_mirror_reinit_from_metadata(sc, md);
3597 
3598 	G_MIRROR_DEBUG(1, "Refresh device %s (id=%u, state=%s) from disk %s "
3599 	    "(genid=%u syncid=%u md_all=%u).", sc->sc_name, md->md_mid,
3600 	    g_mirror_device_state2str(sc->sc_state), pp->name, md->md_genid,
3601 	    md->md_syncid, (unsigned)md->md_all);
3602 
3603 	return (0);
3604 }
3605 
3606 DECLARE_GEOM_CLASS(g_mirror_class, g_mirror);
3607 MODULE_VERSION(geom_mirror, 0);
3608