xref: /freebsd/sys/geom/mirror/g_mirror.c (revision 60eddb209b5ad13a549ca74a41b7cb38a31da5ef)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2004-2006 Pawel Jakub Dawidek <pjd@FreeBSD.org>
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/bio.h>
35 #include <sys/eventhandler.h>
36 #include <sys/fail.h>
37 #include <sys/kernel.h>
38 #include <sys/kthread.h>
39 #include <sys/limits.h>
40 #include <sys/lock.h>
41 #include <sys/malloc.h>
42 #include <sys/mutex.h>
43 #include <sys/proc.h>
44 #include <sys/sbuf.h>
45 #include <sys/sched.h>
46 #include <sys/sx.h>
47 #include <sys/sysctl.h>
48 
49 #include <geom/geom.h>
50 #include <geom/mirror/g_mirror.h>
51 
52 FEATURE(geom_mirror, "GEOM mirroring support");
53 
54 static MALLOC_DEFINE(M_MIRROR, "mirror_data", "GEOM_MIRROR Data");
55 
56 SYSCTL_DECL(_kern_geom);
57 static SYSCTL_NODE(_kern_geom, OID_AUTO, mirror, CTLFLAG_RW, 0,
58     "GEOM_MIRROR stuff");
59 int g_mirror_debug = 0;
60 SYSCTL_INT(_kern_geom_mirror, OID_AUTO, debug, CTLFLAG_RWTUN, &g_mirror_debug, 0,
61     "Debug level");
62 static u_int g_mirror_timeout = 4;
63 SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, timeout, CTLFLAG_RWTUN, &g_mirror_timeout,
64     0, "Time to wait on all mirror components");
65 static u_int g_mirror_idletime = 5;
66 SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, idletime, CTLFLAG_RWTUN,
67     &g_mirror_idletime, 0, "Mark components as clean when idling");
68 static u_int g_mirror_disconnect_on_failure = 1;
69 SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, disconnect_on_failure, CTLFLAG_RWTUN,
70     &g_mirror_disconnect_on_failure, 0, "Disconnect component on I/O failure.");
71 static u_int g_mirror_syncreqs = 2;
72 SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, sync_requests, CTLFLAG_RDTUN,
73     &g_mirror_syncreqs, 0, "Parallel synchronization I/O requests.");
74 static u_int g_mirror_sync_period = 5;
75 SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, sync_update_period, CTLFLAG_RWTUN,
76     &g_mirror_sync_period, 0,
77     "Metadata update period during synchronization, in seconds");
78 
79 #define	MSLEEP(ident, mtx, priority, wmesg, timeout)	do {		\
80 	G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, (ident));	\
81 	msleep((ident), (mtx), (priority), (wmesg), (timeout));		\
82 	G_MIRROR_DEBUG(4, "%s: Woken up %p.", __func__, (ident));	\
83 } while (0)
84 
85 static eventhandler_tag g_mirror_post_sync = NULL;
86 static int g_mirror_shutdown = 0;
87 
88 static g_ctl_destroy_geom_t g_mirror_destroy_geom;
89 static g_taste_t g_mirror_taste;
90 static g_init_t g_mirror_init;
91 static g_fini_t g_mirror_fini;
92 static g_provgone_t g_mirror_providergone;
93 static g_resize_t g_mirror_resize;
94 
95 struct g_class g_mirror_class = {
96 	.name = G_MIRROR_CLASS_NAME,
97 	.version = G_VERSION,
98 	.ctlreq = g_mirror_config,
99 	.taste = g_mirror_taste,
100 	.destroy_geom = g_mirror_destroy_geom,
101 	.init = g_mirror_init,
102 	.fini = g_mirror_fini,
103 	.providergone = g_mirror_providergone,
104 	.resize = g_mirror_resize
105 };
106 
107 
108 static void g_mirror_destroy_provider(struct g_mirror_softc *sc);
109 static int g_mirror_update_disk(struct g_mirror_disk *disk, u_int state);
110 static void g_mirror_update_device(struct g_mirror_softc *sc, bool force);
111 static void g_mirror_dumpconf(struct sbuf *sb, const char *indent,
112     struct g_geom *gp, struct g_consumer *cp, struct g_provider *pp);
113 static void g_mirror_sync_reinit(const struct g_mirror_disk *disk,
114     struct bio *bp, off_t offset);
115 static void g_mirror_sync_stop(struct g_mirror_disk *disk, int type);
116 static void g_mirror_register_request(struct g_mirror_softc *sc,
117     struct bio *bp);
118 static void g_mirror_sync_release(struct g_mirror_softc *sc);
119 
120 
121 static const char *
122 g_mirror_disk_state2str(int state)
123 {
124 
125 	switch (state) {
126 	case G_MIRROR_DISK_STATE_NONE:
127 		return ("NONE");
128 	case G_MIRROR_DISK_STATE_NEW:
129 		return ("NEW");
130 	case G_MIRROR_DISK_STATE_ACTIVE:
131 		return ("ACTIVE");
132 	case G_MIRROR_DISK_STATE_STALE:
133 		return ("STALE");
134 	case G_MIRROR_DISK_STATE_SYNCHRONIZING:
135 		return ("SYNCHRONIZING");
136 	case G_MIRROR_DISK_STATE_DISCONNECTED:
137 		return ("DISCONNECTED");
138 	case G_MIRROR_DISK_STATE_DESTROY:
139 		return ("DESTROY");
140 	default:
141 		return ("INVALID");
142 	}
143 }
144 
145 static const char *
146 g_mirror_device_state2str(int state)
147 {
148 
149 	switch (state) {
150 	case G_MIRROR_DEVICE_STATE_STARTING:
151 		return ("STARTING");
152 	case G_MIRROR_DEVICE_STATE_RUNNING:
153 		return ("RUNNING");
154 	default:
155 		return ("INVALID");
156 	}
157 }
158 
159 static const char *
160 g_mirror_get_diskname(struct g_mirror_disk *disk)
161 {
162 
163 	if (disk->d_consumer == NULL || disk->d_consumer->provider == NULL)
164 		return ("[unknown]");
165 	return (disk->d_name);
166 }
167 
168 /*
169  * --- Events handling functions ---
170  * Events in geom_mirror are used to maintain disks and device status
171  * from one thread to simplify locking.
172  */
173 static void
174 g_mirror_event_free(struct g_mirror_event *ep)
175 {
176 
177 	free(ep, M_MIRROR);
178 }
179 
180 int
181 g_mirror_event_send(void *arg, int state, int flags)
182 {
183 	struct g_mirror_softc *sc;
184 	struct g_mirror_disk *disk;
185 	struct g_mirror_event *ep;
186 	int error;
187 
188 	ep = malloc(sizeof(*ep), M_MIRROR, M_WAITOK);
189 	G_MIRROR_DEBUG(4, "%s: Sending event %p.", __func__, ep);
190 	if ((flags & G_MIRROR_EVENT_DEVICE) != 0) {
191 		disk = NULL;
192 		sc = arg;
193 	} else {
194 		disk = arg;
195 		sc = disk->d_softc;
196 	}
197 	ep->e_disk = disk;
198 	ep->e_state = state;
199 	ep->e_flags = flags;
200 	ep->e_error = 0;
201 	mtx_lock(&sc->sc_events_mtx);
202 	TAILQ_INSERT_TAIL(&sc->sc_events, ep, e_next);
203 	mtx_unlock(&sc->sc_events_mtx);
204 	G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
205 	mtx_lock(&sc->sc_queue_mtx);
206 	wakeup(sc);
207 	mtx_unlock(&sc->sc_queue_mtx);
208 	if ((flags & G_MIRROR_EVENT_DONTWAIT) != 0)
209 		return (0);
210 	sx_assert(&sc->sc_lock, SX_XLOCKED);
211 	G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, ep);
212 	sx_xunlock(&sc->sc_lock);
213 	while ((ep->e_flags & G_MIRROR_EVENT_DONE) == 0) {
214 		mtx_lock(&sc->sc_events_mtx);
215 		MSLEEP(ep, &sc->sc_events_mtx, PRIBIO | PDROP, "m:event",
216 		    hz * 5);
217 	}
218 	error = ep->e_error;
219 	g_mirror_event_free(ep);
220 	sx_xlock(&sc->sc_lock);
221 	return (error);
222 }
223 
224 static struct g_mirror_event *
225 g_mirror_event_first(struct g_mirror_softc *sc)
226 {
227 	struct g_mirror_event *ep;
228 
229 	mtx_lock(&sc->sc_events_mtx);
230 	ep = TAILQ_FIRST(&sc->sc_events);
231 	mtx_unlock(&sc->sc_events_mtx);
232 	return (ep);
233 }
234 
235 static void
236 g_mirror_event_remove(struct g_mirror_softc *sc, struct g_mirror_event *ep)
237 {
238 
239 	mtx_lock(&sc->sc_events_mtx);
240 	TAILQ_REMOVE(&sc->sc_events, ep, e_next);
241 	mtx_unlock(&sc->sc_events_mtx);
242 }
243 
244 static void
245 g_mirror_event_cancel(struct g_mirror_disk *disk)
246 {
247 	struct g_mirror_softc *sc;
248 	struct g_mirror_event *ep, *tmpep;
249 
250 	sc = disk->d_softc;
251 	sx_assert(&sc->sc_lock, SX_XLOCKED);
252 
253 	mtx_lock(&sc->sc_events_mtx);
254 	TAILQ_FOREACH_SAFE(ep, &sc->sc_events, e_next, tmpep) {
255 		if ((ep->e_flags & G_MIRROR_EVENT_DEVICE) != 0)
256 			continue;
257 		if (ep->e_disk != disk)
258 			continue;
259 		TAILQ_REMOVE(&sc->sc_events, ep, e_next);
260 		if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0)
261 			g_mirror_event_free(ep);
262 		else {
263 			ep->e_error = ECANCELED;
264 			wakeup(ep);
265 		}
266 	}
267 	mtx_unlock(&sc->sc_events_mtx);
268 }
269 
270 /*
271  * Return the number of disks in given state.
272  * If state is equal to -1, count all connected disks.
273  */
274 u_int
275 g_mirror_ndisks(struct g_mirror_softc *sc, int state)
276 {
277 	struct g_mirror_disk *disk;
278 	u_int n = 0;
279 
280 	sx_assert(&sc->sc_lock, SX_LOCKED);
281 
282 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
283 		if (state == -1 || disk->d_state == state)
284 			n++;
285 	}
286 	return (n);
287 }
288 
289 /*
290  * Find a disk in mirror by its disk ID.
291  */
292 static struct g_mirror_disk *
293 g_mirror_id2disk(struct g_mirror_softc *sc, uint32_t id)
294 {
295 	struct g_mirror_disk *disk;
296 
297 	sx_assert(&sc->sc_lock, SX_XLOCKED);
298 
299 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
300 		if (disk->d_id == id)
301 			return (disk);
302 	}
303 	return (NULL);
304 }
305 
306 static u_int
307 g_mirror_nrequests(struct g_mirror_softc *sc, struct g_consumer *cp)
308 {
309 	struct bio *bp;
310 	u_int nreqs = 0;
311 
312 	mtx_lock(&sc->sc_queue_mtx);
313 	TAILQ_FOREACH(bp, &sc->sc_queue, bio_queue) {
314 		if (bp->bio_from == cp)
315 			nreqs++;
316 	}
317 	mtx_unlock(&sc->sc_queue_mtx);
318 	return (nreqs);
319 }
320 
321 static int
322 g_mirror_is_busy(struct g_mirror_softc *sc, struct g_consumer *cp)
323 {
324 
325 	if (cp->index > 0) {
326 		G_MIRROR_DEBUG(2,
327 		    "I/O requests for %s exist, can't destroy it now.",
328 		    cp->provider->name);
329 		return (1);
330 	}
331 	if (g_mirror_nrequests(sc, cp) > 0) {
332 		G_MIRROR_DEBUG(2,
333 		    "I/O requests for %s in queue, can't destroy it now.",
334 		    cp->provider->name);
335 		return (1);
336 	}
337 	return (0);
338 }
339 
340 static void
341 g_mirror_destroy_consumer(void *arg, int flags __unused)
342 {
343 	struct g_consumer *cp;
344 
345 	g_topology_assert();
346 
347 	cp = arg;
348 	G_MIRROR_DEBUG(1, "Consumer %s destroyed.", cp->provider->name);
349 	g_detach(cp);
350 	g_destroy_consumer(cp);
351 }
352 
353 static void
354 g_mirror_kill_consumer(struct g_mirror_softc *sc, struct g_consumer *cp)
355 {
356 	struct g_provider *pp;
357 	int retaste_wait;
358 
359 	g_topology_assert();
360 
361 	cp->private = NULL;
362 	if (g_mirror_is_busy(sc, cp))
363 		return;
364 	pp = cp->provider;
365 	retaste_wait = 0;
366 	if (cp->acw == 1) {
367 		if ((pp->geom->flags & G_GEOM_WITHER) == 0)
368 			retaste_wait = 1;
369 	}
370 	G_MIRROR_DEBUG(2, "Access %s r%dw%de%d = %d", pp->name, -cp->acr,
371 	    -cp->acw, -cp->ace, 0);
372 	if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0)
373 		g_access(cp, -cp->acr, -cp->acw, -cp->ace);
374 	if (retaste_wait) {
375 		/*
376 		 * After retaste event was send (inside g_access()), we can send
377 		 * event to detach and destroy consumer.
378 		 * A class, which has consumer to the given provider connected
379 		 * will not receive retaste event for the provider.
380 		 * This is the way how I ignore retaste events when I close
381 		 * consumers opened for write: I detach and destroy consumer
382 		 * after retaste event is sent.
383 		 */
384 		g_post_event(g_mirror_destroy_consumer, cp, M_WAITOK, NULL);
385 		return;
386 	}
387 	G_MIRROR_DEBUG(1, "Consumer %s destroyed.", pp->name);
388 	g_detach(cp);
389 	g_destroy_consumer(cp);
390 }
391 
392 static int
393 g_mirror_connect_disk(struct g_mirror_disk *disk, struct g_provider *pp)
394 {
395 	struct g_consumer *cp;
396 	int error;
397 
398 	g_topology_assert_not();
399 	KASSERT(disk->d_consumer == NULL,
400 	    ("Disk already connected (device %s).", disk->d_softc->sc_name));
401 
402 	g_topology_lock();
403 	cp = g_new_consumer(disk->d_softc->sc_geom);
404 	cp->flags |= G_CF_DIRECT_RECEIVE;
405 	error = g_attach(cp, pp);
406 	if (error != 0) {
407 		g_destroy_consumer(cp);
408 		g_topology_unlock();
409 		return (error);
410 	}
411 	error = g_access(cp, 1, 1, 1);
412 	if (error != 0) {
413 		g_detach(cp);
414 		g_destroy_consumer(cp);
415 		g_topology_unlock();
416 		G_MIRROR_DEBUG(0, "Cannot open consumer %s (error=%d).",
417 		    pp->name, error);
418 		return (error);
419 	}
420 	g_topology_unlock();
421 	disk->d_consumer = cp;
422 	disk->d_consumer->private = disk;
423 	disk->d_consumer->index = 0;
424 
425 	G_MIRROR_DEBUG(2, "Disk %s connected.", g_mirror_get_diskname(disk));
426 	return (0);
427 }
428 
429 static void
430 g_mirror_disconnect_consumer(struct g_mirror_softc *sc, struct g_consumer *cp)
431 {
432 
433 	g_topology_assert();
434 
435 	if (cp == NULL)
436 		return;
437 	if (cp->provider != NULL)
438 		g_mirror_kill_consumer(sc, cp);
439 	else
440 		g_destroy_consumer(cp);
441 }
442 
443 /*
444  * Initialize disk. This means allocate memory, create consumer, attach it
445  * to the provider and open access (r1w1e1) to it.
446  */
447 static struct g_mirror_disk *
448 g_mirror_init_disk(struct g_mirror_softc *sc, struct g_provider *pp,
449     struct g_mirror_metadata *md, int *errorp)
450 {
451 	struct g_mirror_disk *disk;
452 	int i, error;
453 
454 	disk = malloc(sizeof(*disk), M_MIRROR, M_NOWAIT | M_ZERO);
455 	if (disk == NULL) {
456 		error = ENOMEM;
457 		goto fail;
458 	}
459 	disk->d_softc = sc;
460 	error = g_mirror_connect_disk(disk, pp);
461 	if (error != 0)
462 		goto fail;
463 	disk->d_id = md->md_did;
464 	disk->d_state = G_MIRROR_DISK_STATE_NONE;
465 	disk->d_priority = md->md_priority;
466 	disk->d_flags = md->md_dflags;
467 	error = g_getattr("GEOM::candelete", disk->d_consumer, &i);
468 	if (error == 0 && i != 0)
469 		disk->d_flags |= G_MIRROR_DISK_FLAG_CANDELETE;
470 	if (md->md_provider[0] != '\0')
471 		disk->d_flags |= G_MIRROR_DISK_FLAG_HARDCODED;
472 	disk->d_sync.ds_consumer = NULL;
473 	disk->d_sync.ds_offset = md->md_sync_offset;
474 	disk->d_sync.ds_offset_done = md->md_sync_offset;
475 	disk->d_sync.ds_update_ts = time_uptime;
476 	disk->d_genid = md->md_genid;
477 	disk->d_sync.ds_syncid = md->md_syncid;
478 	if (errorp != NULL)
479 		*errorp = 0;
480 	return (disk);
481 fail:
482 	if (errorp != NULL)
483 		*errorp = error;
484 	if (disk != NULL)
485 		free(disk, M_MIRROR);
486 	return (NULL);
487 }
488 
489 static void
490 g_mirror_destroy_disk(struct g_mirror_disk *disk)
491 {
492 	struct g_mirror_softc *sc;
493 
494 	g_topology_assert_not();
495 	sc = disk->d_softc;
496 	sx_assert(&sc->sc_lock, SX_XLOCKED);
497 
498 	LIST_REMOVE(disk, d_next);
499 	g_mirror_event_cancel(disk);
500 	if (sc->sc_hint == disk)
501 		sc->sc_hint = NULL;
502 	switch (disk->d_state) {
503 	case G_MIRROR_DISK_STATE_SYNCHRONIZING:
504 		g_mirror_sync_stop(disk, 1);
505 		/* FALLTHROUGH */
506 	case G_MIRROR_DISK_STATE_NEW:
507 	case G_MIRROR_DISK_STATE_STALE:
508 	case G_MIRROR_DISK_STATE_ACTIVE:
509 		g_topology_lock();
510 		g_mirror_disconnect_consumer(sc, disk->d_consumer);
511 		g_topology_unlock();
512 		free(disk, M_MIRROR);
513 		break;
514 	default:
515 		KASSERT(0 == 1, ("Wrong disk state (%s, %s).",
516 		    g_mirror_get_diskname(disk),
517 		    g_mirror_disk_state2str(disk->d_state)));
518 	}
519 }
520 
521 static void
522 g_mirror_free_device(struct g_mirror_softc *sc)
523 {
524 
525 	mtx_destroy(&sc->sc_queue_mtx);
526 	mtx_destroy(&sc->sc_events_mtx);
527 	mtx_destroy(&sc->sc_done_mtx);
528 	sx_destroy(&sc->sc_lock);
529 	free(sc, M_MIRROR);
530 }
531 
532 static void
533 g_mirror_providergone(struct g_provider *pp)
534 {
535 	struct g_mirror_softc *sc = pp->private;
536 
537 	if ((--sc->sc_refcnt) == 0)
538 		g_mirror_free_device(sc);
539 }
540 
541 static void
542 g_mirror_destroy_device(struct g_mirror_softc *sc)
543 {
544 	struct g_mirror_disk *disk;
545 	struct g_mirror_event *ep;
546 	struct g_geom *gp;
547 	struct g_consumer *cp, *tmpcp;
548 
549 	g_topology_assert_not();
550 	sx_assert(&sc->sc_lock, SX_XLOCKED);
551 
552 	gp = sc->sc_geom;
553 	if (sc->sc_provider != NULL)
554 		g_mirror_destroy_provider(sc);
555 	for (disk = LIST_FIRST(&sc->sc_disks); disk != NULL;
556 	    disk = LIST_FIRST(&sc->sc_disks)) {
557 		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
558 		g_mirror_update_metadata(disk);
559 		g_mirror_destroy_disk(disk);
560 	}
561 	while ((ep = g_mirror_event_first(sc)) != NULL) {
562 		g_mirror_event_remove(sc, ep);
563 		if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0)
564 			g_mirror_event_free(ep);
565 		else {
566 			ep->e_error = ECANCELED;
567 			ep->e_flags |= G_MIRROR_EVENT_DONE;
568 			G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, ep);
569 			mtx_lock(&sc->sc_events_mtx);
570 			wakeup(ep);
571 			mtx_unlock(&sc->sc_events_mtx);
572 		}
573 	}
574 	callout_drain(&sc->sc_callout);
575 
576 	g_topology_lock();
577 	LIST_FOREACH_SAFE(cp, &sc->sc_sync.ds_geom->consumer, consumer, tmpcp) {
578 		g_mirror_disconnect_consumer(sc, cp);
579 	}
580 	g_wither_geom(sc->sc_sync.ds_geom, ENXIO);
581 	G_MIRROR_DEBUG(0, "Device %s destroyed.", gp->name);
582 	g_wither_geom(gp, ENXIO);
583 	sx_xunlock(&sc->sc_lock);
584 	if ((--sc->sc_refcnt) == 0)
585 		g_mirror_free_device(sc);
586 	g_topology_unlock();
587 }
588 
589 static void
590 g_mirror_orphan(struct g_consumer *cp)
591 {
592 	struct g_mirror_disk *disk;
593 
594 	g_topology_assert();
595 
596 	disk = cp->private;
597 	if (disk == NULL)
598 		return;
599 	disk->d_softc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
600 	g_mirror_event_send(disk, G_MIRROR_DISK_STATE_DISCONNECTED,
601 	    G_MIRROR_EVENT_DONTWAIT);
602 }
603 
604 /*
605  * Function should return the next active disk on the list.
606  * It is possible that it will be the same disk as given.
607  * If there are no active disks on list, NULL is returned.
608  */
609 static __inline struct g_mirror_disk *
610 g_mirror_find_next(struct g_mirror_softc *sc, struct g_mirror_disk *disk)
611 {
612 	struct g_mirror_disk *dp;
613 
614 	for (dp = LIST_NEXT(disk, d_next); dp != disk;
615 	    dp = LIST_NEXT(dp, d_next)) {
616 		if (dp == NULL)
617 			dp = LIST_FIRST(&sc->sc_disks);
618 		if (dp->d_state == G_MIRROR_DISK_STATE_ACTIVE)
619 			break;
620 	}
621 	if (dp->d_state != G_MIRROR_DISK_STATE_ACTIVE)
622 		return (NULL);
623 	return (dp);
624 }
625 
626 static struct g_mirror_disk *
627 g_mirror_get_disk(struct g_mirror_softc *sc)
628 {
629 	struct g_mirror_disk *disk;
630 
631 	if (sc->sc_hint == NULL) {
632 		sc->sc_hint = LIST_FIRST(&sc->sc_disks);
633 		if (sc->sc_hint == NULL)
634 			return (NULL);
635 	}
636 	disk = sc->sc_hint;
637 	if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE) {
638 		disk = g_mirror_find_next(sc, disk);
639 		if (disk == NULL)
640 			return (NULL);
641 	}
642 	sc->sc_hint = g_mirror_find_next(sc, disk);
643 	return (disk);
644 }
645 
646 static int
647 g_mirror_write_metadata(struct g_mirror_disk *disk,
648     struct g_mirror_metadata *md)
649 {
650 	struct g_mirror_softc *sc;
651 	struct g_consumer *cp;
652 	off_t offset, length;
653 	u_char *sector;
654 	int error = 0;
655 
656 	g_topology_assert_not();
657 	sc = disk->d_softc;
658 	sx_assert(&sc->sc_lock, SX_LOCKED);
659 
660 	cp = disk->d_consumer;
661 	KASSERT(cp != NULL, ("NULL consumer (%s).", sc->sc_name));
662 	KASSERT(cp->provider != NULL, ("NULL provider (%s).", sc->sc_name));
663 	KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
664 	    ("Consumer %s closed? (r%dw%de%d).", cp->provider->name, cp->acr,
665 	    cp->acw, cp->ace));
666 	length = cp->provider->sectorsize;
667 	offset = cp->provider->mediasize - length;
668 	sector = malloc((size_t)length, M_MIRROR, M_WAITOK | M_ZERO);
669 	if (md != NULL &&
670 	    (sc->sc_flags & G_MIRROR_DEVICE_FLAG_WIPE) == 0) {
671 		/*
672 		 * Handle the case, when the size of parent provider reduced.
673 		 */
674 		if (offset < md->md_mediasize)
675 			error = ENOSPC;
676 		else
677 			mirror_metadata_encode(md, sector);
678 	}
679 	KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_metadata_write, error);
680 	if (error == 0)
681 		error = g_write_data(cp, offset, sector, length);
682 	free(sector, M_MIRROR);
683 	if (error != 0) {
684 		if ((disk->d_flags & G_MIRROR_DISK_FLAG_BROKEN) == 0) {
685 			disk->d_flags |= G_MIRROR_DISK_FLAG_BROKEN;
686 			G_MIRROR_DEBUG(0, "Cannot write metadata on %s "
687 			    "(device=%s, error=%d).",
688 			    g_mirror_get_diskname(disk), sc->sc_name, error);
689 		} else {
690 			G_MIRROR_DEBUG(1, "Cannot write metadata on %s "
691 			    "(device=%s, error=%d).",
692 			    g_mirror_get_diskname(disk), sc->sc_name, error);
693 		}
694 		if (g_mirror_disconnect_on_failure &&
695 		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 1) {
696 			sc->sc_bump_id |= G_MIRROR_BUMP_GENID;
697 			g_mirror_event_send(disk,
698 			    G_MIRROR_DISK_STATE_DISCONNECTED,
699 			    G_MIRROR_EVENT_DONTWAIT);
700 		}
701 	}
702 	return (error);
703 }
704 
705 static int
706 g_mirror_clear_metadata(struct g_mirror_disk *disk)
707 {
708 	int error;
709 
710 	g_topology_assert_not();
711 	sx_assert(&disk->d_softc->sc_lock, SX_LOCKED);
712 
713 	if (disk->d_softc->sc_type != G_MIRROR_TYPE_AUTOMATIC)
714 		return (0);
715 	error = g_mirror_write_metadata(disk, NULL);
716 	if (error == 0) {
717 		G_MIRROR_DEBUG(2, "Metadata on %s cleared.",
718 		    g_mirror_get_diskname(disk));
719 	} else {
720 		G_MIRROR_DEBUG(0,
721 		    "Cannot clear metadata on disk %s (error=%d).",
722 		    g_mirror_get_diskname(disk), error);
723 	}
724 	return (error);
725 }
726 
727 void
728 g_mirror_fill_metadata(struct g_mirror_softc *sc, struct g_mirror_disk *disk,
729     struct g_mirror_metadata *md)
730 {
731 
732 	strlcpy(md->md_magic, G_MIRROR_MAGIC, sizeof(md->md_magic));
733 	md->md_version = G_MIRROR_VERSION;
734 	strlcpy(md->md_name, sc->sc_name, sizeof(md->md_name));
735 	md->md_mid = sc->sc_id;
736 	md->md_all = sc->sc_ndisks;
737 	md->md_slice = sc->sc_slice;
738 	md->md_balance = sc->sc_balance;
739 	md->md_genid = sc->sc_genid;
740 	md->md_mediasize = sc->sc_mediasize;
741 	md->md_sectorsize = sc->sc_sectorsize;
742 	md->md_mflags = (sc->sc_flags & G_MIRROR_DEVICE_FLAG_MASK);
743 	bzero(md->md_provider, sizeof(md->md_provider));
744 	if (disk == NULL) {
745 		md->md_did = arc4random();
746 		md->md_priority = 0;
747 		md->md_syncid = 0;
748 		md->md_dflags = 0;
749 		md->md_sync_offset = 0;
750 		md->md_provsize = 0;
751 	} else {
752 		md->md_did = disk->d_id;
753 		md->md_priority = disk->d_priority;
754 		md->md_syncid = disk->d_sync.ds_syncid;
755 		md->md_dflags = (disk->d_flags & G_MIRROR_DISK_FLAG_MASK);
756 		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
757 			md->md_sync_offset = disk->d_sync.ds_offset_done;
758 		else
759 			md->md_sync_offset = 0;
760 		if ((disk->d_flags & G_MIRROR_DISK_FLAG_HARDCODED) != 0) {
761 			strlcpy(md->md_provider,
762 			    disk->d_consumer->provider->name,
763 			    sizeof(md->md_provider));
764 		}
765 		md->md_provsize = disk->d_consumer->provider->mediasize;
766 	}
767 }
768 
769 void
770 g_mirror_update_metadata(struct g_mirror_disk *disk)
771 {
772 	struct g_mirror_softc *sc;
773 	struct g_mirror_metadata md;
774 	int error;
775 
776 	g_topology_assert_not();
777 	sc = disk->d_softc;
778 	sx_assert(&sc->sc_lock, SX_LOCKED);
779 
780 	if (sc->sc_type != G_MIRROR_TYPE_AUTOMATIC)
781 		return;
782 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_WIPE) == 0)
783 		g_mirror_fill_metadata(sc, disk, &md);
784 	error = g_mirror_write_metadata(disk, &md);
785 	if (error == 0) {
786 		G_MIRROR_DEBUG(2, "Metadata on %s updated.",
787 		    g_mirror_get_diskname(disk));
788 	} else {
789 		G_MIRROR_DEBUG(0,
790 		    "Cannot update metadata on disk %s (error=%d).",
791 		    g_mirror_get_diskname(disk), error);
792 	}
793 }
794 
795 static void
796 g_mirror_bump_syncid(struct g_mirror_softc *sc)
797 {
798 	struct g_mirror_disk *disk;
799 
800 	g_topology_assert_not();
801 	sx_assert(&sc->sc_lock, SX_XLOCKED);
802 	KASSERT(g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 0,
803 	    ("%s called with no active disks (device=%s).", __func__,
804 	    sc->sc_name));
805 
806 	sc->sc_syncid++;
807 	G_MIRROR_DEBUG(1, "Device %s: syncid bumped to %u.", sc->sc_name,
808 	    sc->sc_syncid);
809 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
810 		if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE ||
811 		    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
812 			disk->d_sync.ds_syncid = sc->sc_syncid;
813 			g_mirror_update_metadata(disk);
814 		}
815 	}
816 }
817 
818 static void
819 g_mirror_bump_genid(struct g_mirror_softc *sc)
820 {
821 	struct g_mirror_disk *disk;
822 
823 	g_topology_assert_not();
824 	sx_assert(&sc->sc_lock, SX_XLOCKED);
825 	KASSERT(g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 0,
826 	    ("%s called with no active disks (device=%s).", __func__,
827 	    sc->sc_name));
828 
829 	sc->sc_genid++;
830 	G_MIRROR_DEBUG(1, "Device %s: genid bumped to %u.", sc->sc_name,
831 	    sc->sc_genid);
832 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
833 		if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE ||
834 		    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
835 			disk->d_genid = sc->sc_genid;
836 			g_mirror_update_metadata(disk);
837 		}
838 	}
839 }
840 
841 static int
842 g_mirror_idle(struct g_mirror_softc *sc, int acw)
843 {
844 	struct g_mirror_disk *disk;
845 	int timeout;
846 
847 	g_topology_assert_not();
848 	sx_assert(&sc->sc_lock, SX_XLOCKED);
849 
850 	if (sc->sc_provider == NULL)
851 		return (0);
852 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOFAILSYNC) != 0)
853 		return (0);
854 	if (sc->sc_idle)
855 		return (0);
856 	if (sc->sc_writes > 0)
857 		return (0);
858 	if (acw > 0 || (acw == -1 && sc->sc_provider->acw > 0)) {
859 		timeout = g_mirror_idletime - (time_uptime - sc->sc_last_write);
860 		if (!g_mirror_shutdown && timeout > 0)
861 			return (timeout);
862 	}
863 	sc->sc_idle = 1;
864 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
865 		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
866 			continue;
867 		G_MIRROR_DEBUG(2, "Disk %s (device %s) marked as clean.",
868 		    g_mirror_get_diskname(disk), sc->sc_name);
869 		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
870 		g_mirror_update_metadata(disk);
871 	}
872 	return (0);
873 }
874 
875 static void
876 g_mirror_unidle(struct g_mirror_softc *sc)
877 {
878 	struct g_mirror_disk *disk;
879 
880 	g_topology_assert_not();
881 	sx_assert(&sc->sc_lock, SX_XLOCKED);
882 
883 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOFAILSYNC) != 0)
884 		return;
885 	sc->sc_idle = 0;
886 	sc->sc_last_write = time_uptime;
887 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
888 		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
889 			continue;
890 		G_MIRROR_DEBUG(2, "Disk %s (device %s) marked as dirty.",
891 		    g_mirror_get_diskname(disk), sc->sc_name);
892 		disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
893 		g_mirror_update_metadata(disk);
894 	}
895 }
896 
897 static void
898 g_mirror_done(struct bio *bp)
899 {
900 	struct g_mirror_softc *sc;
901 
902 	sc = bp->bio_from->geom->softc;
903 	bp->bio_cflags = G_MIRROR_BIO_FLAG_REGULAR;
904 	mtx_lock(&sc->sc_queue_mtx);
905 	TAILQ_INSERT_TAIL(&sc->sc_queue, bp, bio_queue);
906 	mtx_unlock(&sc->sc_queue_mtx);
907 	wakeup(sc);
908 }
909 
910 static void
911 g_mirror_regular_request_error(struct g_mirror_softc *sc,
912     struct g_mirror_disk *disk, struct bio *bp)
913 {
914 
915 	if (bp->bio_cmd == BIO_FLUSH && bp->bio_error == EOPNOTSUPP)
916 		return;
917 
918 	if ((disk->d_flags & G_MIRROR_DISK_FLAG_BROKEN) == 0) {
919 		disk->d_flags |= G_MIRROR_DISK_FLAG_BROKEN;
920 		G_MIRROR_LOGREQ(0, bp, "Request failed (error=%d).",
921 		    bp->bio_error);
922 	} else {
923 		G_MIRROR_LOGREQ(1, bp, "Request failed (error=%d).",
924 		    bp->bio_error);
925 	}
926 	if (g_mirror_disconnect_on_failure &&
927 	    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 1) {
928 		if (bp->bio_error == ENXIO &&
929 		    bp->bio_cmd == BIO_READ)
930 			sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
931 		else if (bp->bio_error == ENXIO)
932 			sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID_NOW;
933 		else
934 			sc->sc_bump_id |= G_MIRROR_BUMP_GENID;
935 		g_mirror_event_send(disk, G_MIRROR_DISK_STATE_DISCONNECTED,
936 		    G_MIRROR_EVENT_DONTWAIT);
937 	}
938 }
939 
940 static void
941 g_mirror_regular_request(struct g_mirror_softc *sc, struct bio *bp)
942 {
943 	struct g_mirror_disk *disk;
944 	struct bio *pbp;
945 
946 	g_topology_assert_not();
947 	KASSERT(sc->sc_provider == bp->bio_parent->bio_to,
948 	    ("regular request %p with unexpected origin", bp));
949 
950 	pbp = bp->bio_parent;
951 	bp->bio_from->index--;
952 	if (bp->bio_cmd == BIO_WRITE || bp->bio_cmd == BIO_DELETE)
953 		sc->sc_writes--;
954 	disk = bp->bio_from->private;
955 	if (disk == NULL) {
956 		g_topology_lock();
957 		g_mirror_kill_consumer(sc, bp->bio_from);
958 		g_topology_unlock();
959 	}
960 
961 	switch (bp->bio_cmd) {
962 	case BIO_READ:
963 		KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_regular_request_read,
964 		    bp->bio_error);
965 		break;
966 	case BIO_WRITE:
967 		KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_regular_request_write,
968 		    bp->bio_error);
969 		break;
970 	case BIO_DELETE:
971 		KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_regular_request_delete,
972 		    bp->bio_error);
973 		break;
974 	case BIO_FLUSH:
975 		KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_regular_request_flush,
976 		    bp->bio_error);
977 		break;
978 	}
979 
980 	pbp->bio_inbed++;
981 	KASSERT(pbp->bio_inbed <= pbp->bio_children,
982 	    ("bio_inbed (%u) is bigger than bio_children (%u).", pbp->bio_inbed,
983 	    pbp->bio_children));
984 	if (bp->bio_error == 0 && pbp->bio_error == 0) {
985 		G_MIRROR_LOGREQ(3, bp, "Request delivered.");
986 		g_destroy_bio(bp);
987 		if (pbp->bio_children == pbp->bio_inbed) {
988 			G_MIRROR_LOGREQ(3, pbp, "Request delivered.");
989 			pbp->bio_completed = pbp->bio_length;
990 			if (pbp->bio_cmd == BIO_WRITE ||
991 			    pbp->bio_cmd == BIO_DELETE) {
992 				TAILQ_REMOVE(&sc->sc_inflight, pbp, bio_queue);
993 				/* Release delayed sync requests if possible. */
994 				g_mirror_sync_release(sc);
995 			}
996 			g_io_deliver(pbp, pbp->bio_error);
997 		}
998 		return;
999 	} else if (bp->bio_error != 0) {
1000 		if (pbp->bio_error == 0)
1001 			pbp->bio_error = bp->bio_error;
1002 		if (disk != NULL)
1003 			g_mirror_regular_request_error(sc, disk, bp);
1004 		switch (pbp->bio_cmd) {
1005 		case BIO_DELETE:
1006 		case BIO_WRITE:
1007 		case BIO_FLUSH:
1008 			pbp->bio_inbed--;
1009 			pbp->bio_children--;
1010 			break;
1011 		}
1012 	}
1013 	g_destroy_bio(bp);
1014 
1015 	switch (pbp->bio_cmd) {
1016 	case BIO_READ:
1017 		if (pbp->bio_inbed < pbp->bio_children)
1018 			break;
1019 		if (g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) == 1)
1020 			g_io_deliver(pbp, pbp->bio_error);
1021 		else {
1022 			pbp->bio_error = 0;
1023 			mtx_lock(&sc->sc_queue_mtx);
1024 			TAILQ_INSERT_TAIL(&sc->sc_queue, pbp, bio_queue);
1025 			mtx_unlock(&sc->sc_queue_mtx);
1026 			G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
1027 			wakeup(sc);
1028 		}
1029 		break;
1030 	case BIO_DELETE:
1031 	case BIO_WRITE:
1032 	case BIO_FLUSH:
1033 		if (pbp->bio_children == 0) {
1034 			/*
1035 			 * All requests failed.
1036 			 */
1037 		} else if (pbp->bio_inbed < pbp->bio_children) {
1038 			/* Do nothing. */
1039 			break;
1040 		} else if (pbp->bio_children == pbp->bio_inbed) {
1041 			/* Some requests succeeded. */
1042 			pbp->bio_error = 0;
1043 			pbp->bio_completed = pbp->bio_length;
1044 		}
1045 		if (pbp->bio_cmd == BIO_WRITE || pbp->bio_cmd == BIO_DELETE) {
1046 			TAILQ_REMOVE(&sc->sc_inflight, pbp, bio_queue);
1047 			/* Release delayed sync requests if possible. */
1048 			g_mirror_sync_release(sc);
1049 		}
1050 		g_io_deliver(pbp, pbp->bio_error);
1051 		break;
1052 	default:
1053 		KASSERT(1 == 0, ("Invalid request: %u.", pbp->bio_cmd));
1054 		break;
1055 	}
1056 }
1057 
1058 static void
1059 g_mirror_sync_done(struct bio *bp)
1060 {
1061 	struct g_mirror_softc *sc;
1062 
1063 	G_MIRROR_LOGREQ(3, bp, "Synchronization request delivered.");
1064 	sc = bp->bio_from->geom->softc;
1065 	bp->bio_cflags = G_MIRROR_BIO_FLAG_SYNC;
1066 	mtx_lock(&sc->sc_queue_mtx);
1067 	TAILQ_INSERT_TAIL(&sc->sc_queue, bp, bio_queue);
1068 	mtx_unlock(&sc->sc_queue_mtx);
1069 	wakeup(sc);
1070 }
1071 
1072 static void
1073 g_mirror_candelete(struct bio *bp)
1074 {
1075 	struct g_mirror_softc *sc;
1076 	struct g_mirror_disk *disk;
1077 	int *val;
1078 
1079 	sc = bp->bio_to->private;
1080 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1081 		if (disk->d_flags & G_MIRROR_DISK_FLAG_CANDELETE)
1082 			break;
1083 	}
1084 	val = (int *)bp->bio_data;
1085 	*val = (disk != NULL);
1086 	g_io_deliver(bp, 0);
1087 }
1088 
1089 static void
1090 g_mirror_kernel_dump(struct bio *bp)
1091 {
1092 	struct g_mirror_softc *sc;
1093 	struct g_mirror_disk *disk;
1094 	struct bio *cbp;
1095 	struct g_kerneldump *gkd;
1096 
1097 	/*
1098 	 * We configure dumping to the first component, because this component
1099 	 * will be used for reading with 'prefer' balance algorithm.
1100 	 * If the component with the highest priority is currently disconnected
1101 	 * we will not be able to read the dump after the reboot if it will be
1102 	 * connected and synchronized later. Can we do something better?
1103 	 */
1104 	sc = bp->bio_to->private;
1105 	disk = LIST_FIRST(&sc->sc_disks);
1106 
1107 	gkd = (struct g_kerneldump *)bp->bio_data;
1108 	if (gkd->length > bp->bio_to->mediasize)
1109 		gkd->length = bp->bio_to->mediasize;
1110 	cbp = g_clone_bio(bp);
1111 	if (cbp == NULL) {
1112 		g_io_deliver(bp, ENOMEM);
1113 		return;
1114 	}
1115 	cbp->bio_done = g_std_done;
1116 	g_io_request(cbp, disk->d_consumer);
1117 	G_MIRROR_DEBUG(1, "Kernel dump will go to %s.",
1118 	    g_mirror_get_diskname(disk));
1119 }
1120 
1121 static void
1122 g_mirror_start(struct bio *bp)
1123 {
1124 	struct g_mirror_softc *sc;
1125 
1126 	sc = bp->bio_to->private;
1127 	/*
1128 	 * If sc == NULL or there are no valid disks, provider's error
1129 	 * should be set and g_mirror_start() should not be called at all.
1130 	 */
1131 	KASSERT(sc != NULL && sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
1132 	    ("Provider's error should be set (error=%d)(mirror=%s).",
1133 	    bp->bio_to->error, bp->bio_to->name));
1134 	G_MIRROR_LOGREQ(3, bp, "Request received.");
1135 
1136 	switch (bp->bio_cmd) {
1137 	case BIO_READ:
1138 	case BIO_WRITE:
1139 	case BIO_DELETE:
1140 	case BIO_FLUSH:
1141 		break;
1142 	case BIO_GETATTR:
1143 		if (!strcmp(bp->bio_attribute, "GEOM::candelete")) {
1144 			g_mirror_candelete(bp);
1145 			return;
1146 		} else if (strcmp("GEOM::kerneldump", bp->bio_attribute) == 0) {
1147 			g_mirror_kernel_dump(bp);
1148 			return;
1149 		}
1150 		/* FALLTHROUGH */
1151 	default:
1152 		g_io_deliver(bp, EOPNOTSUPP);
1153 		return;
1154 	}
1155 	mtx_lock(&sc->sc_queue_mtx);
1156 	if (bp->bio_to->error != 0) {
1157 		mtx_unlock(&sc->sc_queue_mtx);
1158 		g_io_deliver(bp, bp->bio_to->error);
1159 		return;
1160 	}
1161 	TAILQ_INSERT_TAIL(&sc->sc_queue, bp, bio_queue);
1162 	mtx_unlock(&sc->sc_queue_mtx);
1163 	G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
1164 	wakeup(sc);
1165 }
1166 
1167 /*
1168  * Return TRUE if the given request is colliding with a in-progress
1169  * synchronization request.
1170  */
1171 static bool
1172 g_mirror_sync_collision(struct g_mirror_softc *sc, struct bio *bp)
1173 {
1174 	struct g_mirror_disk *disk;
1175 	struct bio *sbp;
1176 	off_t rstart, rend, sstart, send;
1177 	u_int i;
1178 
1179 	if (sc->sc_sync.ds_ndisks == 0)
1180 		return (false);
1181 	rstart = bp->bio_offset;
1182 	rend = bp->bio_offset + bp->bio_length;
1183 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1184 		if (disk->d_state != G_MIRROR_DISK_STATE_SYNCHRONIZING)
1185 			continue;
1186 		for (i = 0; i < g_mirror_syncreqs; i++) {
1187 			sbp = disk->d_sync.ds_bios[i];
1188 			if (sbp == NULL)
1189 				continue;
1190 			sstart = sbp->bio_offset;
1191 			send = sbp->bio_offset + sbp->bio_length;
1192 			if (rend > sstart && rstart < send)
1193 				return (true);
1194 		}
1195 	}
1196 	return (false);
1197 }
1198 
1199 /*
1200  * Return TRUE if the given sync request is colliding with a in-progress regular
1201  * request.
1202  */
1203 static bool
1204 g_mirror_regular_collision(struct g_mirror_softc *sc, struct bio *sbp)
1205 {
1206 	off_t rstart, rend, sstart, send;
1207 	struct bio *bp;
1208 
1209 	if (sc->sc_sync.ds_ndisks == 0)
1210 		return (false);
1211 	sstart = sbp->bio_offset;
1212 	send = sbp->bio_offset + sbp->bio_length;
1213 	TAILQ_FOREACH(bp, &sc->sc_inflight, bio_queue) {
1214 		rstart = bp->bio_offset;
1215 		rend = bp->bio_offset + bp->bio_length;
1216 		if (rend > sstart && rstart < send)
1217 			return (true);
1218 	}
1219 	return (false);
1220 }
1221 
1222 /*
1223  * Puts regular request onto delayed queue.
1224  */
1225 static void
1226 g_mirror_regular_delay(struct g_mirror_softc *sc, struct bio *bp)
1227 {
1228 
1229 	G_MIRROR_LOGREQ(2, bp, "Delaying request.");
1230 	TAILQ_INSERT_TAIL(&sc->sc_regular_delayed, bp, bio_queue);
1231 }
1232 
1233 /*
1234  * Puts synchronization request onto delayed queue.
1235  */
1236 static void
1237 g_mirror_sync_delay(struct g_mirror_softc *sc, struct bio *bp)
1238 {
1239 
1240 	G_MIRROR_LOGREQ(2, bp, "Delaying synchronization request.");
1241 	TAILQ_INSERT_TAIL(&sc->sc_sync_delayed, bp, bio_queue);
1242 }
1243 
1244 /*
1245  * Requeue delayed regular requests.
1246  */
1247 static void
1248 g_mirror_regular_release(struct g_mirror_softc *sc)
1249 {
1250 	struct bio *bp;
1251 
1252 	if ((bp = TAILQ_FIRST(&sc->sc_regular_delayed)) == NULL)
1253 		return;
1254 	if (g_mirror_sync_collision(sc, bp))
1255 		return;
1256 
1257 	G_MIRROR_DEBUG(2, "Requeuing regular requests after collision.");
1258 	mtx_lock(&sc->sc_queue_mtx);
1259 	TAILQ_CONCAT(&sc->sc_regular_delayed, &sc->sc_queue, bio_queue);
1260 	TAILQ_SWAP(&sc->sc_regular_delayed, &sc->sc_queue, bio, bio_queue);
1261 	mtx_unlock(&sc->sc_queue_mtx);
1262 }
1263 
1264 /*
1265  * Releases delayed sync requests which don't collide anymore with regular
1266  * requests.
1267  */
1268 static void
1269 g_mirror_sync_release(struct g_mirror_softc *sc)
1270 {
1271 	struct bio *bp, *bp2;
1272 
1273 	TAILQ_FOREACH_SAFE(bp, &sc->sc_sync_delayed, bio_queue, bp2) {
1274 		if (g_mirror_regular_collision(sc, bp))
1275 			continue;
1276 		TAILQ_REMOVE(&sc->sc_sync_delayed, bp, bio_queue);
1277 		G_MIRROR_LOGREQ(2, bp,
1278 		    "Releasing delayed synchronization request.");
1279 		g_io_request(bp, bp->bio_from);
1280 	}
1281 }
1282 
1283 /*
1284  * Free a synchronization request and clear its slot in the array.
1285  */
1286 static void
1287 g_mirror_sync_request_free(struct g_mirror_disk *disk, struct bio *bp)
1288 {
1289 	int idx;
1290 
1291 	if (disk != NULL && disk->d_sync.ds_bios != NULL) {
1292 		idx = (int)(uintptr_t)bp->bio_caller1;
1293 		KASSERT(disk->d_sync.ds_bios[idx] == bp,
1294 		    ("unexpected sync BIO at %p:%d", disk, idx));
1295 		disk->d_sync.ds_bios[idx] = NULL;
1296 	}
1297 	free(bp->bio_data, M_MIRROR);
1298 	g_destroy_bio(bp);
1299 }
1300 
1301 /*
1302  * Handle synchronization requests.
1303  * Every synchronization request is a two-step process: first, a read request is
1304  * sent to the mirror provider via the sync consumer. If that request completes
1305  * successfully, it is converted to a write and sent to the disk being
1306  * synchronized. If the write also completes successfully, the synchronization
1307  * offset is advanced and a new read request is submitted.
1308  */
1309 static void
1310 g_mirror_sync_request(struct g_mirror_softc *sc, struct bio *bp)
1311 {
1312 	struct g_mirror_disk *disk;
1313 	struct g_mirror_disk_sync *sync;
1314 
1315 	KASSERT((bp->bio_cmd == BIO_READ &&
1316 	    bp->bio_from->geom == sc->sc_sync.ds_geom) ||
1317 	    (bp->bio_cmd == BIO_WRITE && bp->bio_from->geom == sc->sc_geom),
1318 	    ("Sync BIO %p with unexpected origin", bp));
1319 
1320 	bp->bio_from->index--;
1321 	disk = bp->bio_from->private;
1322 	if (disk == NULL) {
1323 		sx_xunlock(&sc->sc_lock); /* Avoid recursion on sc_lock. */
1324 		g_topology_lock();
1325 		g_mirror_kill_consumer(sc, bp->bio_from);
1326 		g_topology_unlock();
1327 		g_mirror_sync_request_free(NULL, bp);
1328 		sx_xlock(&sc->sc_lock);
1329 		return;
1330 	}
1331 
1332 	sync = &disk->d_sync;
1333 
1334 	/*
1335 	 * Synchronization request.
1336 	 */
1337 	switch (bp->bio_cmd) {
1338 	case BIO_READ: {
1339 		struct g_mirror_disk *d;
1340 		struct g_consumer *cp;
1341 		int readable;
1342 
1343 		KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_sync_request_read,
1344 		    bp->bio_error);
1345 
1346 		if (bp->bio_error != 0) {
1347 			G_MIRROR_LOGREQ(0, bp,
1348 			    "Synchronization request failed (error=%d).",
1349 			    bp->bio_error);
1350 
1351 			/*
1352 			 * If there's at least one other disk from which we can
1353 			 * read the block, retry the request.
1354 			 */
1355 			readable = 0;
1356 			LIST_FOREACH(d, &sc->sc_disks, d_next)
1357 				if (d->d_state == G_MIRROR_DISK_STATE_ACTIVE &&
1358 				    !(d->d_flags & G_MIRROR_DISK_FLAG_BROKEN))
1359 					readable++;
1360 
1361 			/*
1362 			 * The read error will trigger a syncid bump, so there's
1363 			 * no need to do that here.
1364 			 *
1365 			 * If we can retry the read from another disk, do so.
1366 			 * Otherwise, all we can do is kick out the new disk.
1367 			 */
1368 			if (readable == 0) {
1369 				g_mirror_sync_request_free(disk, bp);
1370 				g_mirror_event_send(disk,
1371 				    G_MIRROR_DISK_STATE_DISCONNECTED,
1372 				    G_MIRROR_EVENT_DONTWAIT);
1373 			} else {
1374 				g_mirror_sync_reinit(disk, bp, bp->bio_offset);
1375 				goto retry_read;
1376 			}
1377 			return;
1378 		}
1379 		G_MIRROR_LOGREQ(3, bp,
1380 		    "Synchronization request half-finished.");
1381 		bp->bio_cmd = BIO_WRITE;
1382 		bp->bio_cflags = 0;
1383 		cp = disk->d_consumer;
1384 		KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1385 		    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
1386 		    cp->acr, cp->acw, cp->ace));
1387 		cp->index++;
1388 		g_io_request(bp, cp);
1389 		return;
1390 	}
1391 	case BIO_WRITE: {
1392 		off_t offset;
1393 		int i;
1394 
1395 		KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_sync_request_write,
1396 		    bp->bio_error);
1397 
1398 		if (bp->bio_error != 0) {
1399 			G_MIRROR_LOGREQ(0, bp,
1400 			    "Synchronization request failed (error=%d).",
1401 			    bp->bio_error);
1402 			g_mirror_sync_request_free(disk, bp);
1403 			sc->sc_bump_id |= G_MIRROR_BUMP_GENID;
1404 			g_mirror_event_send(disk,
1405 			    G_MIRROR_DISK_STATE_DISCONNECTED,
1406 			    G_MIRROR_EVENT_DONTWAIT);
1407 			return;
1408 		}
1409 		G_MIRROR_LOGREQ(3, bp, "Synchronization request finished.");
1410 		if (sync->ds_offset >= sc->sc_mediasize ||
1411 		    sync->ds_consumer == NULL ||
1412 		    (sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
1413 			/* Don't send more synchronization requests. */
1414 			sync->ds_inflight--;
1415 			g_mirror_sync_request_free(disk, bp);
1416 			if (sync->ds_inflight > 0)
1417 				return;
1418 			if (sync->ds_consumer == NULL ||
1419 			    (sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
1420 				return;
1421 			}
1422 			/* Disk up-to-date, activate it. */
1423 			g_mirror_event_send(disk, G_MIRROR_DISK_STATE_ACTIVE,
1424 			    G_MIRROR_EVENT_DONTWAIT);
1425 			return;
1426 		}
1427 
1428 		/* Send next synchronization request. */
1429 		g_mirror_sync_reinit(disk, bp, sync->ds_offset);
1430 		sync->ds_offset += bp->bio_length;
1431 
1432 retry_read:
1433 		G_MIRROR_LOGREQ(3, bp, "Sending synchronization request.");
1434 		sync->ds_consumer->index++;
1435 
1436 		/*
1437 		 * Delay the request if it is colliding with a regular request.
1438 		 */
1439 		if (g_mirror_regular_collision(sc, bp))
1440 			g_mirror_sync_delay(sc, bp);
1441 		else
1442 			g_io_request(bp, sync->ds_consumer);
1443 
1444 		/* Requeue delayed requests if possible. */
1445 		g_mirror_regular_release(sc);
1446 
1447 		/* Find the smallest offset */
1448 		offset = sc->sc_mediasize;
1449 		for (i = 0; i < g_mirror_syncreqs; i++) {
1450 			bp = sync->ds_bios[i];
1451 			if (bp != NULL && bp->bio_offset < offset)
1452 				offset = bp->bio_offset;
1453 		}
1454 		if (g_mirror_sync_period > 0 &&
1455 		    time_uptime - sync->ds_update_ts > g_mirror_sync_period) {
1456 			sync->ds_offset_done = offset;
1457 			g_mirror_update_metadata(disk);
1458 			sync->ds_update_ts = time_uptime;
1459 		}
1460 		return;
1461 	}
1462 	default:
1463 		panic("Invalid I/O request %p", bp);
1464 	}
1465 }
1466 
1467 static void
1468 g_mirror_request_prefer(struct g_mirror_softc *sc, struct bio *bp)
1469 {
1470 	struct g_mirror_disk *disk;
1471 	struct g_consumer *cp;
1472 	struct bio *cbp;
1473 
1474 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1475 		if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE)
1476 			break;
1477 	}
1478 	if (disk == NULL) {
1479 		if (bp->bio_error == 0)
1480 			bp->bio_error = ENXIO;
1481 		g_io_deliver(bp, bp->bio_error);
1482 		return;
1483 	}
1484 	cbp = g_clone_bio(bp);
1485 	if (cbp == NULL) {
1486 		if (bp->bio_error == 0)
1487 			bp->bio_error = ENOMEM;
1488 		g_io_deliver(bp, bp->bio_error);
1489 		return;
1490 	}
1491 	/*
1492 	 * Fill in the component buf structure.
1493 	 */
1494 	cp = disk->d_consumer;
1495 	cbp->bio_done = g_mirror_done;
1496 	cbp->bio_to = cp->provider;
1497 	G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1498 	KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1499 	    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr,
1500 	    cp->acw, cp->ace));
1501 	cp->index++;
1502 	g_io_request(cbp, cp);
1503 }
1504 
1505 static void
1506 g_mirror_request_round_robin(struct g_mirror_softc *sc, struct bio *bp)
1507 {
1508 	struct g_mirror_disk *disk;
1509 	struct g_consumer *cp;
1510 	struct bio *cbp;
1511 
1512 	disk = g_mirror_get_disk(sc);
1513 	if (disk == NULL) {
1514 		if (bp->bio_error == 0)
1515 			bp->bio_error = ENXIO;
1516 		g_io_deliver(bp, bp->bio_error);
1517 		return;
1518 	}
1519 	cbp = g_clone_bio(bp);
1520 	if (cbp == NULL) {
1521 		if (bp->bio_error == 0)
1522 			bp->bio_error = ENOMEM;
1523 		g_io_deliver(bp, bp->bio_error);
1524 		return;
1525 	}
1526 	/*
1527 	 * Fill in the component buf structure.
1528 	 */
1529 	cp = disk->d_consumer;
1530 	cbp->bio_done = g_mirror_done;
1531 	cbp->bio_to = cp->provider;
1532 	G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1533 	KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1534 	    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr,
1535 	    cp->acw, cp->ace));
1536 	cp->index++;
1537 	g_io_request(cbp, cp);
1538 }
1539 
1540 #define TRACK_SIZE  (1 * 1024 * 1024)
1541 #define LOAD_SCALE	256
1542 #define ABS(x)		(((x) >= 0) ? (x) : (-(x)))
1543 
1544 static void
1545 g_mirror_request_load(struct g_mirror_softc *sc, struct bio *bp)
1546 {
1547 	struct g_mirror_disk *disk, *dp;
1548 	struct g_consumer *cp;
1549 	struct bio *cbp;
1550 	int prio, best;
1551 
1552 	/* Find a disk with the smallest load. */
1553 	disk = NULL;
1554 	best = INT_MAX;
1555 	LIST_FOREACH(dp, &sc->sc_disks, d_next) {
1556 		if (dp->d_state != G_MIRROR_DISK_STATE_ACTIVE)
1557 			continue;
1558 		prio = dp->load;
1559 		/* If disk head is precisely in position - highly prefer it. */
1560 		if (dp->d_last_offset == bp->bio_offset)
1561 			prio -= 2 * LOAD_SCALE;
1562 		else
1563 		/* If disk head is close to position - prefer it. */
1564 		if (ABS(dp->d_last_offset - bp->bio_offset) < TRACK_SIZE)
1565 			prio -= 1 * LOAD_SCALE;
1566 		if (prio <= best) {
1567 			disk = dp;
1568 			best = prio;
1569 		}
1570 	}
1571 	KASSERT(disk != NULL, ("NULL disk for %s.", sc->sc_name));
1572 	cbp = g_clone_bio(bp);
1573 	if (cbp == NULL) {
1574 		if (bp->bio_error == 0)
1575 			bp->bio_error = ENOMEM;
1576 		g_io_deliver(bp, bp->bio_error);
1577 		return;
1578 	}
1579 	/*
1580 	 * Fill in the component buf structure.
1581 	 */
1582 	cp = disk->d_consumer;
1583 	cbp->bio_done = g_mirror_done;
1584 	cbp->bio_to = cp->provider;
1585 	G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1586 	KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1587 	    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr,
1588 	    cp->acw, cp->ace));
1589 	cp->index++;
1590 	/* Remember last head position */
1591 	disk->d_last_offset = bp->bio_offset + bp->bio_length;
1592 	/* Update loads. */
1593 	LIST_FOREACH(dp, &sc->sc_disks, d_next) {
1594 		dp->load = (dp->d_consumer->index * LOAD_SCALE +
1595 		    dp->load * 7) / 8;
1596 	}
1597 	g_io_request(cbp, cp);
1598 }
1599 
1600 static void
1601 g_mirror_request_split(struct g_mirror_softc *sc, struct bio *bp)
1602 {
1603 	struct bio_queue queue;
1604 	struct g_mirror_disk *disk;
1605 	struct g_consumer *cp;
1606 	struct bio *cbp;
1607 	off_t left, mod, offset, slice;
1608 	u_char *data;
1609 	u_int ndisks;
1610 
1611 	if (bp->bio_length <= sc->sc_slice) {
1612 		g_mirror_request_round_robin(sc, bp);
1613 		return;
1614 	}
1615 	ndisks = g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE);
1616 	slice = bp->bio_length / ndisks;
1617 	mod = slice % sc->sc_provider->sectorsize;
1618 	if (mod != 0)
1619 		slice += sc->sc_provider->sectorsize - mod;
1620 	/*
1621 	 * Allocate all bios before sending any request, so we can
1622 	 * return ENOMEM in nice and clean way.
1623 	 */
1624 	left = bp->bio_length;
1625 	offset = bp->bio_offset;
1626 	data = bp->bio_data;
1627 	TAILQ_INIT(&queue);
1628 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1629 		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
1630 			continue;
1631 		cbp = g_clone_bio(bp);
1632 		if (cbp == NULL) {
1633 			while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
1634 				TAILQ_REMOVE(&queue, cbp, bio_queue);
1635 				g_destroy_bio(cbp);
1636 			}
1637 			if (bp->bio_error == 0)
1638 				bp->bio_error = ENOMEM;
1639 			g_io_deliver(bp, bp->bio_error);
1640 			return;
1641 		}
1642 		TAILQ_INSERT_TAIL(&queue, cbp, bio_queue);
1643 		cbp->bio_done = g_mirror_done;
1644 		cbp->bio_caller1 = disk;
1645 		cbp->bio_to = disk->d_consumer->provider;
1646 		cbp->bio_offset = offset;
1647 		cbp->bio_data = data;
1648 		cbp->bio_length = MIN(left, slice);
1649 		left -= cbp->bio_length;
1650 		if (left == 0)
1651 			break;
1652 		offset += cbp->bio_length;
1653 		data += cbp->bio_length;
1654 	}
1655 	while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
1656 		TAILQ_REMOVE(&queue, cbp, bio_queue);
1657 		G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1658 		disk = cbp->bio_caller1;
1659 		cbp->bio_caller1 = NULL;
1660 		cp = disk->d_consumer;
1661 		KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1662 		    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
1663 		    cp->acr, cp->acw, cp->ace));
1664 		disk->d_consumer->index++;
1665 		g_io_request(cbp, disk->d_consumer);
1666 	}
1667 }
1668 
1669 static void
1670 g_mirror_register_request(struct g_mirror_softc *sc, struct bio *bp)
1671 {
1672 	struct bio_queue queue;
1673 	struct bio *cbp;
1674 	struct g_consumer *cp;
1675 	struct g_mirror_disk *disk;
1676 
1677 	sx_assert(&sc->sc_lock, SA_XLOCKED);
1678 
1679 	/*
1680 	 * To avoid ordering issues, if a write is deferred because of a
1681 	 * collision with a sync request, all I/O is deferred until that
1682 	 * write is initiated.
1683 	 */
1684 	if (bp->bio_from->geom != sc->sc_sync.ds_geom &&
1685 	    !TAILQ_EMPTY(&sc->sc_regular_delayed)) {
1686 		g_mirror_regular_delay(sc, bp);
1687 		return;
1688 	}
1689 
1690 	switch (bp->bio_cmd) {
1691 	case BIO_READ:
1692 		switch (sc->sc_balance) {
1693 		case G_MIRROR_BALANCE_LOAD:
1694 			g_mirror_request_load(sc, bp);
1695 			break;
1696 		case G_MIRROR_BALANCE_PREFER:
1697 			g_mirror_request_prefer(sc, bp);
1698 			break;
1699 		case G_MIRROR_BALANCE_ROUND_ROBIN:
1700 			g_mirror_request_round_robin(sc, bp);
1701 			break;
1702 		case G_MIRROR_BALANCE_SPLIT:
1703 			g_mirror_request_split(sc, bp);
1704 			break;
1705 		}
1706 		return;
1707 	case BIO_WRITE:
1708 	case BIO_DELETE:
1709 		/*
1710 		 * Delay the request if it is colliding with a synchronization
1711 		 * request.
1712 		 */
1713 		if (g_mirror_sync_collision(sc, bp)) {
1714 			g_mirror_regular_delay(sc, bp);
1715 			return;
1716 		}
1717 
1718 		if (sc->sc_idle)
1719 			g_mirror_unidle(sc);
1720 		else
1721 			sc->sc_last_write = time_uptime;
1722 
1723 		/*
1724 		 * Bump syncid on first write.
1725 		 */
1726 		if ((sc->sc_bump_id & G_MIRROR_BUMP_SYNCID) != 0) {
1727 			sc->sc_bump_id &= ~G_MIRROR_BUMP_SYNCID;
1728 			g_mirror_bump_syncid(sc);
1729 		}
1730 
1731 		/*
1732 		 * Allocate all bios before sending any request, so we can
1733 		 * return ENOMEM in nice and clean way.
1734 		 */
1735 		TAILQ_INIT(&queue);
1736 		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1737 			switch (disk->d_state) {
1738 			case G_MIRROR_DISK_STATE_ACTIVE:
1739 				break;
1740 			case G_MIRROR_DISK_STATE_SYNCHRONIZING:
1741 				if (bp->bio_offset >= disk->d_sync.ds_offset)
1742 					continue;
1743 				break;
1744 			default:
1745 				continue;
1746 			}
1747 			if (bp->bio_cmd == BIO_DELETE &&
1748 			    (disk->d_flags & G_MIRROR_DISK_FLAG_CANDELETE) == 0)
1749 				continue;
1750 			cbp = g_clone_bio(bp);
1751 			if (cbp == NULL) {
1752 				while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
1753 					TAILQ_REMOVE(&queue, cbp, bio_queue);
1754 					g_destroy_bio(cbp);
1755 				}
1756 				if (bp->bio_error == 0)
1757 					bp->bio_error = ENOMEM;
1758 				g_io_deliver(bp, bp->bio_error);
1759 				return;
1760 			}
1761 			TAILQ_INSERT_TAIL(&queue, cbp, bio_queue);
1762 			cbp->bio_done = g_mirror_done;
1763 			cp = disk->d_consumer;
1764 			cbp->bio_caller1 = cp;
1765 			cbp->bio_to = cp->provider;
1766 			KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1767 			    ("Consumer %s not opened (r%dw%de%d).",
1768 			    cp->provider->name, cp->acr, cp->acw, cp->ace));
1769 		}
1770 		if (TAILQ_EMPTY(&queue)) {
1771 			KASSERT(bp->bio_cmd == BIO_DELETE,
1772 			    ("No consumers for regular request %p", bp));
1773 			g_io_deliver(bp, EOPNOTSUPP);
1774 			return;
1775 		}
1776 		while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
1777 			G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1778 			TAILQ_REMOVE(&queue, cbp, bio_queue);
1779 			cp = cbp->bio_caller1;
1780 			cbp->bio_caller1 = NULL;
1781 			cp->index++;
1782 			sc->sc_writes++;
1783 			g_io_request(cbp, cp);
1784 		}
1785 		/*
1786 		 * Put request onto inflight queue, so we can check if new
1787 		 * synchronization requests don't collide with it.
1788 		 */
1789 		TAILQ_INSERT_TAIL(&sc->sc_inflight, bp, bio_queue);
1790 		return;
1791 	case BIO_FLUSH:
1792 		TAILQ_INIT(&queue);
1793 		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1794 			if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
1795 				continue;
1796 			cbp = g_clone_bio(bp);
1797 			if (cbp == NULL) {
1798 				while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
1799 					TAILQ_REMOVE(&queue, cbp, bio_queue);
1800 					g_destroy_bio(cbp);
1801 				}
1802 				if (bp->bio_error == 0)
1803 					bp->bio_error = ENOMEM;
1804 				g_io_deliver(bp, bp->bio_error);
1805 				return;
1806 			}
1807 			TAILQ_INSERT_TAIL(&queue, cbp, bio_queue);
1808 			cbp->bio_done = g_mirror_done;
1809 			cbp->bio_caller1 = disk;
1810 			cbp->bio_to = disk->d_consumer->provider;
1811 		}
1812 		KASSERT(!TAILQ_EMPTY(&queue),
1813 		    ("No consumers for regular request %p", bp));
1814 		while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
1815 			G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1816 			TAILQ_REMOVE(&queue, cbp, bio_queue);
1817 			disk = cbp->bio_caller1;
1818 			cbp->bio_caller1 = NULL;
1819 			cp = disk->d_consumer;
1820 			KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1821 			    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
1822 			    cp->acr, cp->acw, cp->ace));
1823 			cp->index++;
1824 			g_io_request(cbp, cp);
1825 		}
1826 		break;
1827 	default:
1828 		KASSERT(1 == 0, ("Invalid command here: %u (device=%s)",
1829 		    bp->bio_cmd, sc->sc_name));
1830 		break;
1831 	}
1832 }
1833 
1834 static int
1835 g_mirror_can_destroy(struct g_mirror_softc *sc)
1836 {
1837 	struct g_geom *gp;
1838 	struct g_consumer *cp;
1839 
1840 	g_topology_assert();
1841 	gp = sc->sc_geom;
1842 	if (gp->softc == NULL)
1843 		return (1);
1844 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_TASTING) != 0)
1845 		return (0);
1846 	LIST_FOREACH(cp, &gp->consumer, consumer) {
1847 		if (g_mirror_is_busy(sc, cp))
1848 			return (0);
1849 	}
1850 	gp = sc->sc_sync.ds_geom;
1851 	LIST_FOREACH(cp, &gp->consumer, consumer) {
1852 		if (g_mirror_is_busy(sc, cp))
1853 			return (0);
1854 	}
1855 	G_MIRROR_DEBUG(2, "No I/O requests for %s, it can be destroyed.",
1856 	    sc->sc_name);
1857 	return (1);
1858 }
1859 
1860 static int
1861 g_mirror_try_destroy(struct g_mirror_softc *sc)
1862 {
1863 
1864 	if (sc->sc_rootmount != NULL) {
1865 		G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p", __LINE__,
1866 		    sc->sc_rootmount);
1867 		root_mount_rel(sc->sc_rootmount);
1868 		sc->sc_rootmount = NULL;
1869 	}
1870 	g_topology_lock();
1871 	if (!g_mirror_can_destroy(sc)) {
1872 		g_topology_unlock();
1873 		return (0);
1874 	}
1875 	sc->sc_geom->softc = NULL;
1876 	sc->sc_sync.ds_geom->softc = NULL;
1877 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DRAIN) != 0) {
1878 		g_topology_unlock();
1879 		G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__,
1880 		    &sc->sc_worker);
1881 		/* Unlock sc_lock here, as it can be destroyed after wakeup. */
1882 		sx_xunlock(&sc->sc_lock);
1883 		wakeup(&sc->sc_worker);
1884 		sc->sc_worker = NULL;
1885 	} else {
1886 		g_topology_unlock();
1887 		g_mirror_destroy_device(sc);
1888 	}
1889 	return (1);
1890 }
1891 
1892 /*
1893  * Worker thread.
1894  */
1895 static void
1896 g_mirror_worker(void *arg)
1897 {
1898 	struct g_mirror_softc *sc;
1899 	struct g_mirror_event *ep;
1900 	struct bio *bp;
1901 	int timeout;
1902 
1903 	sc = arg;
1904 	thread_lock(curthread);
1905 	sched_prio(curthread, PRIBIO);
1906 	thread_unlock(curthread);
1907 
1908 	sx_xlock(&sc->sc_lock);
1909 	for (;;) {
1910 		G_MIRROR_DEBUG(5, "%s: Let's see...", __func__);
1911 		/*
1912 		 * First take a look at events.
1913 		 * This is important to handle events before any I/O requests.
1914 		 */
1915 		ep = g_mirror_event_first(sc);
1916 		if (ep != NULL) {
1917 			g_mirror_event_remove(sc, ep);
1918 			if ((ep->e_flags & G_MIRROR_EVENT_DEVICE) != 0) {
1919 				/* Update only device status. */
1920 				G_MIRROR_DEBUG(3,
1921 				    "Running event for device %s.",
1922 				    sc->sc_name);
1923 				ep->e_error = 0;
1924 				g_mirror_update_device(sc, true);
1925 			} else {
1926 				/* Update disk status. */
1927 				G_MIRROR_DEBUG(3, "Running event for disk %s.",
1928 				     g_mirror_get_diskname(ep->e_disk));
1929 				ep->e_error = g_mirror_update_disk(ep->e_disk,
1930 				    ep->e_state);
1931 				if (ep->e_error == 0)
1932 					g_mirror_update_device(sc, false);
1933 			}
1934 			if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0) {
1935 				KASSERT(ep->e_error == 0,
1936 				    ("Error cannot be handled."));
1937 				g_mirror_event_free(ep);
1938 			} else {
1939 				ep->e_flags |= G_MIRROR_EVENT_DONE;
1940 				G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__,
1941 				    ep);
1942 				mtx_lock(&sc->sc_events_mtx);
1943 				wakeup(ep);
1944 				mtx_unlock(&sc->sc_events_mtx);
1945 			}
1946 			if ((sc->sc_flags &
1947 			    G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
1948 				if (g_mirror_try_destroy(sc)) {
1949 					curthread->td_pflags &= ~TDP_GEOM;
1950 					G_MIRROR_DEBUG(1, "Thread exiting.");
1951 					kproc_exit(0);
1952 				}
1953 			}
1954 			G_MIRROR_DEBUG(5, "%s: I'm here 1.", __func__);
1955 			continue;
1956 		}
1957 
1958 		/*
1959 		 * Check if we can mark array as CLEAN and if we can't take
1960 		 * how much seconds should we wait.
1961 		 */
1962 		timeout = g_mirror_idle(sc, -1);
1963 
1964 		/*
1965 		 * Handle I/O requests.
1966 		 */
1967 		mtx_lock(&sc->sc_queue_mtx);
1968 		bp = TAILQ_FIRST(&sc->sc_queue);
1969 		if (bp != NULL)
1970 			TAILQ_REMOVE(&sc->sc_queue, bp, bio_queue);
1971 		else {
1972 			if ((sc->sc_flags &
1973 			    G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
1974 				mtx_unlock(&sc->sc_queue_mtx);
1975 				if (g_mirror_try_destroy(sc)) {
1976 					curthread->td_pflags &= ~TDP_GEOM;
1977 					G_MIRROR_DEBUG(1, "Thread exiting.");
1978 					kproc_exit(0);
1979 				}
1980 				mtx_lock(&sc->sc_queue_mtx);
1981 				if (!TAILQ_EMPTY(&sc->sc_queue)) {
1982 					mtx_unlock(&sc->sc_queue_mtx);
1983 					continue;
1984 				}
1985 			}
1986 			if (g_mirror_event_first(sc) != NULL) {
1987 				mtx_unlock(&sc->sc_queue_mtx);
1988 				continue;
1989 			}
1990 			sx_xunlock(&sc->sc_lock);
1991 			MSLEEP(sc, &sc->sc_queue_mtx, PRIBIO | PDROP, "m:w1",
1992 			    timeout * hz);
1993 			sx_xlock(&sc->sc_lock);
1994 			G_MIRROR_DEBUG(5, "%s: I'm here 4.", __func__);
1995 			continue;
1996 		}
1997 		mtx_unlock(&sc->sc_queue_mtx);
1998 
1999 		if (bp->bio_from->geom == sc->sc_sync.ds_geom &&
2000 		    (bp->bio_cflags & G_MIRROR_BIO_FLAG_SYNC) != 0) {
2001 			/*
2002 			 * Handle completion of the first half (the read) of a
2003 			 * block synchronization operation.
2004 			 */
2005 			g_mirror_sync_request(sc, bp);
2006 		} else if (bp->bio_to != sc->sc_provider) {
2007 			if ((bp->bio_cflags & G_MIRROR_BIO_FLAG_REGULAR) != 0)
2008 				/*
2009 				 * Handle completion of a regular I/O request.
2010 				 */
2011 				g_mirror_regular_request(sc, bp);
2012 			else if ((bp->bio_cflags & G_MIRROR_BIO_FLAG_SYNC) != 0)
2013 				/*
2014 				 * Handle completion of the second half (the
2015 				 * write) of a block synchronization operation.
2016 				 */
2017 				g_mirror_sync_request(sc, bp);
2018 			else {
2019 				KASSERT(0,
2020 				    ("Invalid request cflags=0x%hx to=%s.",
2021 				    bp->bio_cflags, bp->bio_to->name));
2022 			}
2023 		} else {
2024 			/*
2025 			 * Initiate an I/O request.
2026 			 */
2027 			g_mirror_register_request(sc, bp);
2028 		}
2029 		G_MIRROR_DEBUG(5, "%s: I'm here 9.", __func__);
2030 	}
2031 }
2032 
2033 static void
2034 g_mirror_update_idle(struct g_mirror_softc *sc, struct g_mirror_disk *disk)
2035 {
2036 
2037 	sx_assert(&sc->sc_lock, SX_LOCKED);
2038 
2039 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOFAILSYNC) != 0)
2040 		return;
2041 	if (!sc->sc_idle && (disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) == 0) {
2042 		G_MIRROR_DEBUG(2, "Disk %s (device %s) marked as dirty.",
2043 		    g_mirror_get_diskname(disk), sc->sc_name);
2044 		disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
2045 	} else if (sc->sc_idle &&
2046 	    (disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) != 0) {
2047 		G_MIRROR_DEBUG(2, "Disk %s (device %s) marked as clean.",
2048 		    g_mirror_get_diskname(disk), sc->sc_name);
2049 		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2050 	}
2051 }
2052 
2053 static void
2054 g_mirror_sync_reinit(const struct g_mirror_disk *disk, struct bio *bp,
2055     off_t offset)
2056 {
2057 	void *data;
2058 	int idx;
2059 
2060 	data = bp->bio_data;
2061 	idx = (int)(uintptr_t)bp->bio_caller1;
2062 	g_reset_bio(bp);
2063 
2064 	bp->bio_cmd = BIO_READ;
2065 	bp->bio_data = data;
2066 	bp->bio_done = g_mirror_sync_done;
2067 	bp->bio_from = disk->d_sync.ds_consumer;
2068 	bp->bio_to = disk->d_softc->sc_provider;
2069 	bp->bio_caller1 = (void *)(uintptr_t)idx;
2070 	bp->bio_offset = offset;
2071 	bp->bio_length = MIN(MAXPHYS,
2072 	    disk->d_softc->sc_mediasize - bp->bio_offset);
2073 }
2074 
2075 static void
2076 g_mirror_sync_start(struct g_mirror_disk *disk)
2077 {
2078 	struct g_mirror_softc *sc;
2079 	struct g_mirror_disk_sync *sync;
2080 	struct g_consumer *cp;
2081 	struct bio *bp;
2082 	int error, i;
2083 
2084 	g_topology_assert_not();
2085 	sc = disk->d_softc;
2086 	sync = &disk->d_sync;
2087 	sx_assert(&sc->sc_lock, SX_LOCKED);
2088 
2089 	KASSERT(disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
2090 	    ("Disk %s is not marked for synchronization.",
2091 	    g_mirror_get_diskname(disk)));
2092 	KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2093 	    ("Device not in RUNNING state (%s, %u).", sc->sc_name,
2094 	    sc->sc_state));
2095 
2096 	sx_xunlock(&sc->sc_lock);
2097 	g_topology_lock();
2098 	cp = g_new_consumer(sc->sc_sync.ds_geom);
2099 	cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
2100 	error = g_attach(cp, sc->sc_provider);
2101 	KASSERT(error == 0,
2102 	    ("Cannot attach to %s (error=%d).", sc->sc_name, error));
2103 	error = g_access(cp, 1, 0, 0);
2104 	KASSERT(error == 0, ("Cannot open %s (error=%d).", sc->sc_name, error));
2105 	g_topology_unlock();
2106 	sx_xlock(&sc->sc_lock);
2107 
2108 	G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s.", sc->sc_name,
2109 	    g_mirror_get_diskname(disk));
2110 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOFAILSYNC) == 0)
2111 		disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
2112 	KASSERT(sync->ds_consumer == NULL,
2113 	    ("Sync consumer already exists (device=%s, disk=%s).",
2114 	    sc->sc_name, g_mirror_get_diskname(disk)));
2115 
2116 	sync->ds_consumer = cp;
2117 	sync->ds_consumer->private = disk;
2118 	sync->ds_consumer->index = 0;
2119 
2120 	/*
2121 	 * Allocate memory for synchronization bios and initialize them.
2122 	 */
2123 	sync->ds_bios = malloc(sizeof(struct bio *) * g_mirror_syncreqs,
2124 	    M_MIRROR, M_WAITOK);
2125 	for (i = 0; i < g_mirror_syncreqs; i++) {
2126 		bp = g_alloc_bio();
2127 		sync->ds_bios[i] = bp;
2128 
2129 		bp->bio_data = malloc(MAXPHYS, M_MIRROR, M_WAITOK);
2130 		bp->bio_caller1 = (void *)(uintptr_t)i;
2131 		g_mirror_sync_reinit(disk, bp, sync->ds_offset);
2132 		sync->ds_offset += bp->bio_length;
2133 	}
2134 
2135 	/* Increase the number of disks in SYNCHRONIZING state. */
2136 	sc->sc_sync.ds_ndisks++;
2137 	/* Set the number of in-flight synchronization requests. */
2138 	sync->ds_inflight = g_mirror_syncreqs;
2139 
2140 	/*
2141 	 * Fire off first synchronization requests.
2142 	 */
2143 	for (i = 0; i < g_mirror_syncreqs; i++) {
2144 		bp = sync->ds_bios[i];
2145 		G_MIRROR_LOGREQ(3, bp, "Sending synchronization request.");
2146 		sync->ds_consumer->index++;
2147 		/*
2148 		 * Delay the request if it is colliding with a regular request.
2149 		 */
2150 		if (g_mirror_regular_collision(sc, bp))
2151 			g_mirror_sync_delay(sc, bp);
2152 		else
2153 			g_io_request(bp, sync->ds_consumer);
2154 	}
2155 }
2156 
2157 /*
2158  * Stop synchronization process.
2159  * type: 0 - synchronization finished
2160  *       1 - synchronization stopped
2161  */
2162 static void
2163 g_mirror_sync_stop(struct g_mirror_disk *disk, int type)
2164 {
2165 	struct g_mirror_softc *sc;
2166 	struct g_consumer *cp;
2167 
2168 	g_topology_assert_not();
2169 	sc = disk->d_softc;
2170 	sx_assert(&sc->sc_lock, SX_LOCKED);
2171 
2172 	KASSERT(disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
2173 	    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2174 	    g_mirror_disk_state2str(disk->d_state)));
2175 	if (disk->d_sync.ds_consumer == NULL)
2176 		return;
2177 
2178 	if (type == 0) {
2179 		G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s finished.",
2180 		    sc->sc_name, g_mirror_get_diskname(disk));
2181 	} else /* if (type == 1) */ {
2182 		G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s stopped.",
2183 		    sc->sc_name, g_mirror_get_diskname(disk));
2184 	}
2185 	g_mirror_regular_release(sc);
2186 	free(disk->d_sync.ds_bios, M_MIRROR);
2187 	disk->d_sync.ds_bios = NULL;
2188 	cp = disk->d_sync.ds_consumer;
2189 	disk->d_sync.ds_consumer = NULL;
2190 	disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2191 	sc->sc_sync.ds_ndisks--;
2192 	sx_xunlock(&sc->sc_lock); /* Avoid recursion on sc_lock. */
2193 	g_topology_lock();
2194 	g_mirror_kill_consumer(sc, cp);
2195 	g_topology_unlock();
2196 	sx_xlock(&sc->sc_lock);
2197 }
2198 
2199 static void
2200 g_mirror_launch_provider(struct g_mirror_softc *sc)
2201 {
2202 	struct g_mirror_disk *disk;
2203 	struct g_provider *pp, *dp;
2204 
2205 	sx_assert(&sc->sc_lock, SX_LOCKED);
2206 
2207 	g_topology_lock();
2208 	pp = g_new_providerf(sc->sc_geom, "mirror/%s", sc->sc_name);
2209 	pp->flags |= G_PF_DIRECT_RECEIVE;
2210 	pp->mediasize = sc->sc_mediasize;
2211 	pp->sectorsize = sc->sc_sectorsize;
2212 	pp->stripesize = 0;
2213 	pp->stripeoffset = 0;
2214 
2215 	/* Splitting of unmapped BIO's could work but isn't implemented now */
2216 	if (sc->sc_balance != G_MIRROR_BALANCE_SPLIT)
2217 		pp->flags |= G_PF_ACCEPT_UNMAPPED;
2218 
2219 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2220 		if (disk->d_consumer && disk->d_consumer->provider) {
2221 			dp = disk->d_consumer->provider;
2222 			if (dp->stripesize > pp->stripesize) {
2223 				pp->stripesize = dp->stripesize;
2224 				pp->stripeoffset = dp->stripeoffset;
2225 			}
2226 			/* A provider underneath us doesn't support unmapped */
2227 			if ((dp->flags & G_PF_ACCEPT_UNMAPPED) == 0) {
2228 				G_MIRROR_DEBUG(0, "Cancelling unmapped "
2229 				    "because of %s.", dp->name);
2230 				pp->flags &= ~G_PF_ACCEPT_UNMAPPED;
2231 			}
2232 		}
2233 	}
2234 	pp->private = sc;
2235 	sc->sc_refcnt++;
2236 	sc->sc_provider = pp;
2237 	g_error_provider(pp, 0);
2238 	g_topology_unlock();
2239 	G_MIRROR_DEBUG(0, "Device %s launched (%u/%u).", pp->name,
2240 	    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE), sc->sc_ndisks);
2241 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2242 		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
2243 			g_mirror_sync_start(disk);
2244 	}
2245 }
2246 
2247 static void
2248 g_mirror_destroy_provider(struct g_mirror_softc *sc)
2249 {
2250 	struct g_mirror_disk *disk;
2251 	struct bio *bp;
2252 
2253 	g_topology_assert_not();
2254 	KASSERT(sc->sc_provider != NULL, ("NULL provider (device=%s).",
2255 	    sc->sc_name));
2256 
2257 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2258 		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
2259 			g_mirror_sync_stop(disk, 1);
2260 	}
2261 
2262 	g_topology_lock();
2263 	g_error_provider(sc->sc_provider, ENXIO);
2264 	mtx_lock(&sc->sc_queue_mtx);
2265 	while ((bp = TAILQ_FIRST(&sc->sc_queue)) != NULL) {
2266 		TAILQ_REMOVE(&sc->sc_queue, bp, bio_queue);
2267 		/*
2268 		 * Abort any pending I/O that wasn't generated by us.
2269 		 * Synchronization requests and requests destined for individual
2270 		 * mirror components can be destroyed immediately.
2271 		 */
2272 		if (bp->bio_to == sc->sc_provider &&
2273 		    bp->bio_from->geom != sc->sc_sync.ds_geom) {
2274 			g_io_deliver(bp, ENXIO);
2275 		} else {
2276 			if ((bp->bio_cflags & G_MIRROR_BIO_FLAG_SYNC) != 0)
2277 				free(bp->bio_data, M_MIRROR);
2278 			g_destroy_bio(bp);
2279 		}
2280 	}
2281 	mtx_unlock(&sc->sc_queue_mtx);
2282 	g_wither_provider(sc->sc_provider, ENXIO);
2283 	sc->sc_provider = NULL;
2284 	G_MIRROR_DEBUG(0, "Device %s: provider destroyed.", sc->sc_name);
2285 	g_topology_unlock();
2286 }
2287 
2288 static void
2289 g_mirror_go(void *arg)
2290 {
2291 	struct g_mirror_softc *sc;
2292 
2293 	sc = arg;
2294 	G_MIRROR_DEBUG(0, "Force device %s start due to timeout.", sc->sc_name);
2295 	g_mirror_event_send(sc, 0,
2296 	    G_MIRROR_EVENT_DONTWAIT | G_MIRROR_EVENT_DEVICE);
2297 }
2298 
2299 static u_int
2300 g_mirror_determine_state(struct g_mirror_disk *disk)
2301 {
2302 	struct g_mirror_softc *sc;
2303 	u_int state;
2304 
2305 	sc = disk->d_softc;
2306 	if (sc->sc_syncid == disk->d_sync.ds_syncid) {
2307 		if ((disk->d_flags &
2308 		    G_MIRROR_DISK_FLAG_SYNCHRONIZING) == 0 &&
2309 		    (g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) == 0 ||
2310 		     (disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) == 0)) {
2311 			/* Disk does not need synchronization. */
2312 			state = G_MIRROR_DISK_STATE_ACTIVE;
2313 		} else {
2314 			if ((sc->sc_flags &
2315 			     G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) == 0 ||
2316 			    (disk->d_flags &
2317 			     G_MIRROR_DISK_FLAG_FORCE_SYNC) != 0) {
2318 				/*
2319 				 * We can start synchronization from
2320 				 * the stored offset.
2321 				 */
2322 				state = G_MIRROR_DISK_STATE_SYNCHRONIZING;
2323 			} else {
2324 				state = G_MIRROR_DISK_STATE_STALE;
2325 			}
2326 		}
2327 	} else if (disk->d_sync.ds_syncid < sc->sc_syncid) {
2328 		/*
2329 		 * Reset all synchronization data for this disk,
2330 		 * because if it even was synchronized, it was
2331 		 * synchronized to disks with different syncid.
2332 		 */
2333 		disk->d_flags |= G_MIRROR_DISK_FLAG_SYNCHRONIZING;
2334 		disk->d_sync.ds_offset = 0;
2335 		disk->d_sync.ds_offset_done = 0;
2336 		disk->d_sync.ds_syncid = sc->sc_syncid;
2337 		if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) == 0 ||
2338 		    (disk->d_flags & G_MIRROR_DISK_FLAG_FORCE_SYNC) != 0) {
2339 			state = G_MIRROR_DISK_STATE_SYNCHRONIZING;
2340 		} else {
2341 			state = G_MIRROR_DISK_STATE_STALE;
2342 		}
2343 	} else /* if (sc->sc_syncid < disk->d_sync.ds_syncid) */ {
2344 		/*
2345 		 * Not good, NOT GOOD!
2346 		 * It means that mirror was started on stale disks
2347 		 * and more fresh disk just arrive.
2348 		 * If there were writes, mirror is broken, sorry.
2349 		 * I think the best choice here is don't touch
2350 		 * this disk and inform the user loudly.
2351 		 */
2352 		G_MIRROR_DEBUG(0, "Device %s was started before the freshest "
2353 		    "disk (%s) arrives!! It will not be connected to the "
2354 		    "running device.", sc->sc_name,
2355 		    g_mirror_get_diskname(disk));
2356 		g_mirror_destroy_disk(disk);
2357 		state = G_MIRROR_DISK_STATE_NONE;
2358 		/* Return immediately, because disk was destroyed. */
2359 		return (state);
2360 	}
2361 	G_MIRROR_DEBUG(3, "State for %s disk: %s.",
2362 	    g_mirror_get_diskname(disk), g_mirror_disk_state2str(state));
2363 	return (state);
2364 }
2365 
2366 /*
2367  * Update device state.
2368  */
2369 static void
2370 g_mirror_update_device(struct g_mirror_softc *sc, bool force)
2371 {
2372 	struct g_mirror_disk *disk;
2373 	u_int state;
2374 
2375 	sx_assert(&sc->sc_lock, SX_XLOCKED);
2376 
2377 	switch (sc->sc_state) {
2378 	case G_MIRROR_DEVICE_STATE_STARTING:
2379 	    {
2380 		struct g_mirror_disk *pdisk, *tdisk;
2381 		u_int dirty, ndisks, genid, syncid;
2382 		bool broken;
2383 
2384 		KASSERT(sc->sc_provider == NULL,
2385 		    ("Non-NULL provider in STARTING state (%s).", sc->sc_name));
2386 		/*
2387 		 * Are we ready? We are, if all disks are connected or
2388 		 * if we have any disks and 'force' is true.
2389 		 */
2390 		ndisks = g_mirror_ndisks(sc, -1);
2391 		if (sc->sc_ndisks == ndisks || (force && ndisks > 0)) {
2392 			;
2393 		} else if (ndisks == 0) {
2394 			/*
2395 			 * Disks went down in starting phase, so destroy
2396 			 * device.
2397 			 */
2398 			callout_drain(&sc->sc_callout);
2399 			sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
2400 			G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p", __LINE__,
2401 			    sc->sc_rootmount);
2402 			root_mount_rel(sc->sc_rootmount);
2403 			sc->sc_rootmount = NULL;
2404 			return;
2405 		} else {
2406 			return;
2407 		}
2408 
2409 		/*
2410 		 * Activate all disks with the biggest syncid.
2411 		 */
2412 		if (force) {
2413 			/*
2414 			 * If 'force' is true, we have been called due to
2415 			 * timeout, so don't bother canceling timeout.
2416 			 */
2417 			ndisks = 0;
2418 			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2419 				if ((disk->d_flags &
2420 				    G_MIRROR_DISK_FLAG_SYNCHRONIZING) == 0) {
2421 					ndisks++;
2422 				}
2423 			}
2424 			if (ndisks == 0) {
2425 				/* No valid disks found, destroy device. */
2426 				sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
2427 				G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p",
2428 				    __LINE__, sc->sc_rootmount);
2429 				root_mount_rel(sc->sc_rootmount);
2430 				sc->sc_rootmount = NULL;
2431 				return;
2432 			}
2433 		} else {
2434 			/* Cancel timeout. */
2435 			callout_drain(&sc->sc_callout);
2436 		}
2437 
2438 		/*
2439 		 * Find the biggest genid.
2440 		 */
2441 		genid = 0;
2442 		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2443 			if (disk->d_genid > genid)
2444 				genid = disk->d_genid;
2445 		}
2446 		sc->sc_genid = genid;
2447 		/*
2448 		 * Remove all disks without the biggest genid.
2449 		 */
2450 		broken = false;
2451 		LIST_FOREACH_SAFE(disk, &sc->sc_disks, d_next, tdisk) {
2452 			if (disk->d_genid < genid) {
2453 				G_MIRROR_DEBUG(0,
2454 				    "Component %s (device %s) broken, skipping.",
2455 				    g_mirror_get_diskname(disk), sc->sc_name);
2456 				g_mirror_destroy_disk(disk);
2457 				/*
2458 				 * Bump the syncid in case we discover a healthy
2459 				 * replacement disk after starting the mirror.
2460 				 */
2461 				broken = true;
2462 			}
2463 		}
2464 
2465 		/*
2466 		 * Find the biggest syncid.
2467 		 */
2468 		syncid = 0;
2469 		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2470 			if (disk->d_sync.ds_syncid > syncid)
2471 				syncid = disk->d_sync.ds_syncid;
2472 		}
2473 
2474 		/*
2475 		 * Here we need to look for dirty disks and if all disks
2476 		 * with the biggest syncid are dirty, we have to choose
2477 		 * one with the biggest priority and rebuild the rest.
2478 		 */
2479 		/*
2480 		 * Find the number of dirty disks with the biggest syncid.
2481 		 * Find the number of disks with the biggest syncid.
2482 		 * While here, find a disk with the biggest priority.
2483 		 */
2484 		dirty = ndisks = 0;
2485 		pdisk = NULL;
2486 		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2487 			if (disk->d_sync.ds_syncid != syncid)
2488 				continue;
2489 			if ((disk->d_flags &
2490 			    G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
2491 				continue;
2492 			}
2493 			ndisks++;
2494 			if ((disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) != 0) {
2495 				dirty++;
2496 				if (pdisk == NULL ||
2497 				    pdisk->d_priority < disk->d_priority) {
2498 					pdisk = disk;
2499 				}
2500 			}
2501 		}
2502 		if (dirty == 0) {
2503 			/* No dirty disks at all, great. */
2504 		} else if (dirty == ndisks) {
2505 			/*
2506 			 * Force synchronization for all dirty disks except one
2507 			 * with the biggest priority.
2508 			 */
2509 			KASSERT(pdisk != NULL, ("pdisk == NULL"));
2510 			G_MIRROR_DEBUG(1, "Using disk %s (device %s) as a "
2511 			    "master disk for synchronization.",
2512 			    g_mirror_get_diskname(pdisk), sc->sc_name);
2513 			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2514 				if (disk->d_sync.ds_syncid != syncid)
2515 					continue;
2516 				if ((disk->d_flags &
2517 				    G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
2518 					continue;
2519 				}
2520 				KASSERT((disk->d_flags &
2521 				    G_MIRROR_DISK_FLAG_DIRTY) != 0,
2522 				    ("Disk %s isn't marked as dirty.",
2523 				    g_mirror_get_diskname(disk)));
2524 				/* Skip the disk with the biggest priority. */
2525 				if (disk == pdisk)
2526 					continue;
2527 				disk->d_sync.ds_syncid = 0;
2528 			}
2529 		} else if (dirty < ndisks) {
2530 			/*
2531 			 * Force synchronization for all dirty disks.
2532 			 * We have some non-dirty disks.
2533 			 */
2534 			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2535 				if (disk->d_sync.ds_syncid != syncid)
2536 					continue;
2537 				if ((disk->d_flags &
2538 				    G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
2539 					continue;
2540 				}
2541 				if ((disk->d_flags &
2542 				    G_MIRROR_DISK_FLAG_DIRTY) == 0) {
2543 					continue;
2544 				}
2545 				disk->d_sync.ds_syncid = 0;
2546 			}
2547 		}
2548 
2549 		/* Reset hint. */
2550 		sc->sc_hint = NULL;
2551 		sc->sc_syncid = syncid;
2552 		if (force || broken) {
2553 			/* Remember to bump syncid on first write. */
2554 			sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
2555 		}
2556 		state = G_MIRROR_DEVICE_STATE_RUNNING;
2557 		G_MIRROR_DEBUG(1, "Device %s state changed from %s to %s.",
2558 		    sc->sc_name, g_mirror_device_state2str(sc->sc_state),
2559 		    g_mirror_device_state2str(state));
2560 		sc->sc_state = state;
2561 		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2562 			state = g_mirror_determine_state(disk);
2563 			g_mirror_event_send(disk, state,
2564 			    G_MIRROR_EVENT_DONTWAIT);
2565 			if (state == G_MIRROR_DISK_STATE_STALE)
2566 				sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
2567 		}
2568 		break;
2569 	    }
2570 	case G_MIRROR_DEVICE_STATE_RUNNING:
2571 		if (g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) == 0 &&
2572 		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_NEW) == 0) {
2573 			/*
2574 			 * No usable disks, so destroy the device.
2575 			 */
2576 			sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
2577 			break;
2578 		} else if (g_mirror_ndisks(sc,
2579 		    G_MIRROR_DISK_STATE_ACTIVE) > 0 &&
2580 		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_NEW) == 0) {
2581 			/*
2582 			 * We have active disks, launch provider if it doesn't
2583 			 * exist.
2584 			 */
2585 			if (sc->sc_provider == NULL)
2586 				g_mirror_launch_provider(sc);
2587 			if (sc->sc_rootmount != NULL) {
2588 				G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p",
2589 				    __LINE__, sc->sc_rootmount);
2590 				root_mount_rel(sc->sc_rootmount);
2591 				sc->sc_rootmount = NULL;
2592 			}
2593 		}
2594 		/*
2595 		 * Genid should be bumped immediately, so do it here.
2596 		 */
2597 		if ((sc->sc_bump_id & G_MIRROR_BUMP_GENID) != 0) {
2598 			sc->sc_bump_id &= ~G_MIRROR_BUMP_GENID;
2599 			g_mirror_bump_genid(sc);
2600 		}
2601 		if ((sc->sc_bump_id & G_MIRROR_BUMP_SYNCID_NOW) != 0) {
2602 			sc->sc_bump_id &= ~G_MIRROR_BUMP_SYNCID_NOW;
2603 			g_mirror_bump_syncid(sc);
2604 		}
2605 		break;
2606 	default:
2607 		KASSERT(1 == 0, ("Wrong device state (%s, %s).",
2608 		    sc->sc_name, g_mirror_device_state2str(sc->sc_state)));
2609 		break;
2610 	}
2611 }
2612 
2613 /*
2614  * Update disk state and device state if needed.
2615  */
2616 #define	DISK_STATE_CHANGED()	G_MIRROR_DEBUG(1,			\
2617 	"Disk %s state changed from %s to %s (device %s).",		\
2618 	g_mirror_get_diskname(disk),					\
2619 	g_mirror_disk_state2str(disk->d_state),				\
2620 	g_mirror_disk_state2str(state), sc->sc_name)
2621 static int
2622 g_mirror_update_disk(struct g_mirror_disk *disk, u_int state)
2623 {
2624 	struct g_mirror_softc *sc;
2625 
2626 	sc = disk->d_softc;
2627 	sx_assert(&sc->sc_lock, SX_XLOCKED);
2628 
2629 again:
2630 	G_MIRROR_DEBUG(3, "Changing disk %s state from %s to %s.",
2631 	    g_mirror_get_diskname(disk), g_mirror_disk_state2str(disk->d_state),
2632 	    g_mirror_disk_state2str(state));
2633 	switch (state) {
2634 	case G_MIRROR_DISK_STATE_NEW:
2635 		/*
2636 		 * Possible scenarios:
2637 		 * 1. New disk arrive.
2638 		 */
2639 		/* Previous state should be NONE. */
2640 		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NONE,
2641 		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2642 		    g_mirror_disk_state2str(disk->d_state)));
2643 		DISK_STATE_CHANGED();
2644 
2645 		disk->d_state = state;
2646 		if (LIST_EMPTY(&sc->sc_disks))
2647 			LIST_INSERT_HEAD(&sc->sc_disks, disk, d_next);
2648 		else {
2649 			struct g_mirror_disk *dp;
2650 
2651 			LIST_FOREACH(dp, &sc->sc_disks, d_next) {
2652 				if (disk->d_priority >= dp->d_priority) {
2653 					LIST_INSERT_BEFORE(dp, disk, d_next);
2654 					dp = NULL;
2655 					break;
2656 				}
2657 				if (LIST_NEXT(dp, d_next) == NULL)
2658 					break;
2659 			}
2660 			if (dp != NULL)
2661 				LIST_INSERT_AFTER(dp, disk, d_next);
2662 		}
2663 		G_MIRROR_DEBUG(1, "Device %s: provider %s detected.",
2664 		    sc->sc_name, g_mirror_get_diskname(disk));
2665 		if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING)
2666 			break;
2667 		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2668 		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2669 		    g_mirror_device_state2str(sc->sc_state),
2670 		    g_mirror_get_diskname(disk),
2671 		    g_mirror_disk_state2str(disk->d_state)));
2672 		state = g_mirror_determine_state(disk);
2673 		if (state != G_MIRROR_DISK_STATE_NONE)
2674 			goto again;
2675 		break;
2676 	case G_MIRROR_DISK_STATE_ACTIVE:
2677 		/*
2678 		 * Possible scenarios:
2679 		 * 1. New disk does not need synchronization.
2680 		 * 2. Synchronization process finished successfully.
2681 		 */
2682 		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2683 		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2684 		    g_mirror_device_state2str(sc->sc_state),
2685 		    g_mirror_get_diskname(disk),
2686 		    g_mirror_disk_state2str(disk->d_state)));
2687 		/* Previous state should be NEW or SYNCHRONIZING. */
2688 		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW ||
2689 		    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
2690 		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2691 		    g_mirror_disk_state2str(disk->d_state)));
2692 		DISK_STATE_CHANGED();
2693 
2694 		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
2695 			disk->d_flags &= ~G_MIRROR_DISK_FLAG_SYNCHRONIZING;
2696 			disk->d_flags &= ~G_MIRROR_DISK_FLAG_FORCE_SYNC;
2697 			g_mirror_sync_stop(disk, 0);
2698 		}
2699 		disk->d_state = state;
2700 		disk->d_sync.ds_offset = 0;
2701 		disk->d_sync.ds_offset_done = 0;
2702 		g_mirror_update_idle(sc, disk);
2703 		g_mirror_update_metadata(disk);
2704 		G_MIRROR_DEBUG(1, "Device %s: provider %s activated.",
2705 		    sc->sc_name, g_mirror_get_diskname(disk));
2706 		break;
2707 	case G_MIRROR_DISK_STATE_STALE:
2708 		/*
2709 		 * Possible scenarios:
2710 		 * 1. Stale disk was connected.
2711 		 */
2712 		/* Previous state should be NEW. */
2713 		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW,
2714 		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2715 		    g_mirror_disk_state2str(disk->d_state)));
2716 		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2717 		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2718 		    g_mirror_device_state2str(sc->sc_state),
2719 		    g_mirror_get_diskname(disk),
2720 		    g_mirror_disk_state2str(disk->d_state)));
2721 		/*
2722 		 * STALE state is only possible if device is marked
2723 		 * NOAUTOSYNC.
2724 		 */
2725 		KASSERT((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) != 0,
2726 		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2727 		    g_mirror_device_state2str(sc->sc_state),
2728 		    g_mirror_get_diskname(disk),
2729 		    g_mirror_disk_state2str(disk->d_state)));
2730 		DISK_STATE_CHANGED();
2731 
2732 		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2733 		disk->d_state = state;
2734 		g_mirror_update_metadata(disk);
2735 		G_MIRROR_DEBUG(0, "Device %s: provider %s is stale.",
2736 		    sc->sc_name, g_mirror_get_diskname(disk));
2737 		break;
2738 	case G_MIRROR_DISK_STATE_SYNCHRONIZING:
2739 		/*
2740 		 * Possible scenarios:
2741 		 * 1. Disk which needs synchronization was connected.
2742 		 */
2743 		/* Previous state should be NEW. */
2744 		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW,
2745 		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2746 		    g_mirror_disk_state2str(disk->d_state)));
2747 		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2748 		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2749 		    g_mirror_device_state2str(sc->sc_state),
2750 		    g_mirror_get_diskname(disk),
2751 		    g_mirror_disk_state2str(disk->d_state)));
2752 		DISK_STATE_CHANGED();
2753 
2754 		if (disk->d_state == G_MIRROR_DISK_STATE_NEW)
2755 			disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2756 		disk->d_state = state;
2757 		if (sc->sc_provider != NULL) {
2758 			g_mirror_sync_start(disk);
2759 			g_mirror_update_metadata(disk);
2760 		}
2761 		break;
2762 	case G_MIRROR_DISK_STATE_DISCONNECTED:
2763 		/*
2764 		 * Possible scenarios:
2765 		 * 1. Device wasn't running yet, but disk disappear.
2766 		 * 2. Disk was active and disapppear.
2767 		 * 3. Disk disappear during synchronization process.
2768 		 */
2769 		if (sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING) {
2770 			/*
2771 			 * Previous state should be ACTIVE, STALE or
2772 			 * SYNCHRONIZING.
2773 			 */
2774 			KASSERT(disk->d_state == G_MIRROR_DISK_STATE_ACTIVE ||
2775 			    disk->d_state == G_MIRROR_DISK_STATE_STALE ||
2776 			    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
2777 			    ("Wrong disk state (%s, %s).",
2778 			    g_mirror_get_diskname(disk),
2779 			    g_mirror_disk_state2str(disk->d_state)));
2780 		} else if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING) {
2781 			/* Previous state should be NEW. */
2782 			KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW,
2783 			    ("Wrong disk state (%s, %s).",
2784 			    g_mirror_get_diskname(disk),
2785 			    g_mirror_disk_state2str(disk->d_state)));
2786 			/*
2787 			 * Reset bumping syncid if disk disappeared in STARTING
2788 			 * state.
2789 			 */
2790 			if ((sc->sc_bump_id & G_MIRROR_BUMP_SYNCID) != 0)
2791 				sc->sc_bump_id &= ~G_MIRROR_BUMP_SYNCID;
2792 #ifdef	INVARIANTS
2793 		} else {
2794 			KASSERT(1 == 0, ("Wrong device state (%s, %s, %s, %s).",
2795 			    sc->sc_name,
2796 			    g_mirror_device_state2str(sc->sc_state),
2797 			    g_mirror_get_diskname(disk),
2798 			    g_mirror_disk_state2str(disk->d_state)));
2799 #endif
2800 		}
2801 		DISK_STATE_CHANGED();
2802 		G_MIRROR_DEBUG(0, "Device %s: provider %s disconnected.",
2803 		    sc->sc_name, g_mirror_get_diskname(disk));
2804 
2805 		g_mirror_destroy_disk(disk);
2806 		break;
2807 	case G_MIRROR_DISK_STATE_DESTROY:
2808 	    {
2809 		int error;
2810 
2811 		error = g_mirror_clear_metadata(disk);
2812 		if (error != 0) {
2813 			G_MIRROR_DEBUG(0,
2814 			    "Device %s: failed to clear metadata on %s: %d.",
2815 			    sc->sc_name, g_mirror_get_diskname(disk), error);
2816 			break;
2817 		}
2818 		DISK_STATE_CHANGED();
2819 		G_MIRROR_DEBUG(0, "Device %s: provider %s destroyed.",
2820 		    sc->sc_name, g_mirror_get_diskname(disk));
2821 
2822 		g_mirror_destroy_disk(disk);
2823 		sc->sc_ndisks--;
2824 		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2825 			g_mirror_update_metadata(disk);
2826 		}
2827 		break;
2828 	    }
2829 	default:
2830 		KASSERT(1 == 0, ("Unknown state (%u).", state));
2831 		break;
2832 	}
2833 	return (0);
2834 }
2835 #undef	DISK_STATE_CHANGED
2836 
2837 int
2838 g_mirror_read_metadata(struct g_consumer *cp, struct g_mirror_metadata *md)
2839 {
2840 	struct g_provider *pp;
2841 	u_char *buf;
2842 	int error;
2843 
2844 	g_topology_assert();
2845 
2846 	error = g_access(cp, 1, 0, 0);
2847 	if (error != 0)
2848 		return (error);
2849 	pp = cp->provider;
2850 	g_topology_unlock();
2851 	/* Metadata are stored on last sector. */
2852 	buf = g_read_data(cp, pp->mediasize - pp->sectorsize, pp->sectorsize,
2853 	    &error);
2854 	g_topology_lock();
2855 	g_access(cp, -1, 0, 0);
2856 	if (buf == NULL) {
2857 		G_MIRROR_DEBUG(1, "Cannot read metadata from %s (error=%d).",
2858 		    cp->provider->name, error);
2859 		return (error);
2860 	}
2861 
2862 	/* Decode metadata. */
2863 	error = mirror_metadata_decode(buf, md);
2864 	g_free(buf);
2865 	if (strcmp(md->md_magic, G_MIRROR_MAGIC) != 0)
2866 		return (EINVAL);
2867 	if (md->md_version > G_MIRROR_VERSION) {
2868 		G_MIRROR_DEBUG(0,
2869 		    "Kernel module is too old to handle metadata from %s.",
2870 		    cp->provider->name);
2871 		return (EINVAL);
2872 	}
2873 	if (error != 0) {
2874 		G_MIRROR_DEBUG(1, "MD5 metadata hash mismatch for provider %s.",
2875 		    cp->provider->name);
2876 		return (error);
2877 	}
2878 
2879 	return (0);
2880 }
2881 
2882 static int
2883 g_mirror_check_metadata(struct g_mirror_softc *sc, struct g_provider *pp,
2884     struct g_mirror_metadata *md)
2885 {
2886 
2887 	if (g_mirror_id2disk(sc, md->md_did) != NULL) {
2888 		G_MIRROR_DEBUG(1, "Disk %s (id=%u) already exists, skipping.",
2889 		    pp->name, md->md_did);
2890 		return (EEXIST);
2891 	}
2892 	if (md->md_all != sc->sc_ndisks) {
2893 		G_MIRROR_DEBUG(1,
2894 		    "Invalid '%s' field on disk %s (device %s), skipping.",
2895 		    "md_all", pp->name, sc->sc_name);
2896 		return (EINVAL);
2897 	}
2898 	if (md->md_slice != sc->sc_slice) {
2899 		G_MIRROR_DEBUG(1,
2900 		    "Invalid '%s' field on disk %s (device %s), skipping.",
2901 		    "md_slice", pp->name, sc->sc_name);
2902 		return (EINVAL);
2903 	}
2904 	if (md->md_balance != sc->sc_balance) {
2905 		G_MIRROR_DEBUG(1,
2906 		    "Invalid '%s' field on disk %s (device %s), skipping.",
2907 		    "md_balance", pp->name, sc->sc_name);
2908 		return (EINVAL);
2909 	}
2910 #if 0
2911 	if (md->md_mediasize != sc->sc_mediasize) {
2912 		G_MIRROR_DEBUG(1,
2913 		    "Invalid '%s' field on disk %s (device %s), skipping.",
2914 		    "md_mediasize", pp->name, sc->sc_name);
2915 		return (EINVAL);
2916 	}
2917 #endif
2918 	if (sc->sc_mediasize > pp->mediasize) {
2919 		G_MIRROR_DEBUG(1,
2920 		    "Invalid size of disk %s (device %s), skipping.", pp->name,
2921 		    sc->sc_name);
2922 		return (EINVAL);
2923 	}
2924 	if (md->md_sectorsize != sc->sc_sectorsize) {
2925 		G_MIRROR_DEBUG(1,
2926 		    "Invalid '%s' field on disk %s (device %s), skipping.",
2927 		    "md_sectorsize", pp->name, sc->sc_name);
2928 		return (EINVAL);
2929 	}
2930 	if ((sc->sc_sectorsize % pp->sectorsize) != 0) {
2931 		G_MIRROR_DEBUG(1,
2932 		    "Invalid sector size of disk %s (device %s), skipping.",
2933 		    pp->name, sc->sc_name);
2934 		return (EINVAL);
2935 	}
2936 	if ((md->md_mflags & ~G_MIRROR_DEVICE_FLAG_MASK) != 0) {
2937 		G_MIRROR_DEBUG(1,
2938 		    "Invalid device flags on disk %s (device %s), skipping.",
2939 		    pp->name, sc->sc_name);
2940 		return (EINVAL);
2941 	}
2942 	if ((md->md_dflags & ~G_MIRROR_DISK_FLAG_MASK) != 0) {
2943 		G_MIRROR_DEBUG(1,
2944 		    "Invalid disk flags on disk %s (device %s), skipping.",
2945 		    pp->name, sc->sc_name);
2946 		return (EINVAL);
2947 	}
2948 	return (0);
2949 }
2950 
2951 int
2952 g_mirror_add_disk(struct g_mirror_softc *sc, struct g_provider *pp,
2953     struct g_mirror_metadata *md)
2954 {
2955 	struct g_mirror_disk *disk;
2956 	int error;
2957 
2958 	g_topology_assert_not();
2959 	G_MIRROR_DEBUG(2, "Adding disk %s.", pp->name);
2960 
2961 	error = g_mirror_check_metadata(sc, pp, md);
2962 	if (error != 0)
2963 		return (error);
2964 	if (sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING &&
2965 	    md->md_genid < sc->sc_genid) {
2966 		G_MIRROR_DEBUG(0, "Component %s (device %s) broken, skipping.",
2967 		    pp->name, sc->sc_name);
2968 		return (EINVAL);
2969 	}
2970 	disk = g_mirror_init_disk(sc, pp, md, &error);
2971 	if (disk == NULL)
2972 		return (error);
2973 	error = g_mirror_event_send(disk, G_MIRROR_DISK_STATE_NEW,
2974 	    G_MIRROR_EVENT_WAIT);
2975 	if (error != 0)
2976 		return (error);
2977 	if (md->md_version < G_MIRROR_VERSION) {
2978 		G_MIRROR_DEBUG(0, "Upgrading metadata on %s (v%d->v%d).",
2979 		    pp->name, md->md_version, G_MIRROR_VERSION);
2980 		g_mirror_update_metadata(disk);
2981 	}
2982 	return (0);
2983 }
2984 
2985 static void
2986 g_mirror_destroy_delayed(void *arg, int flag)
2987 {
2988 	struct g_mirror_softc *sc;
2989 	int error;
2990 
2991 	if (flag == EV_CANCEL) {
2992 		G_MIRROR_DEBUG(1, "Destroying canceled.");
2993 		return;
2994 	}
2995 	sc = arg;
2996 	g_topology_unlock();
2997 	sx_xlock(&sc->sc_lock);
2998 	KASSERT((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) == 0,
2999 	    ("DESTROY flag set on %s.", sc->sc_name));
3000 	KASSERT((sc->sc_flags & G_MIRROR_DEVICE_FLAG_CLOSEWAIT) != 0,
3001 	    ("CLOSEWAIT flag not set on %s.", sc->sc_name));
3002 	G_MIRROR_DEBUG(1, "Destroying %s (delayed).", sc->sc_name);
3003 	error = g_mirror_destroy(sc, G_MIRROR_DESTROY_SOFT);
3004 	if (error != 0) {
3005 		G_MIRROR_DEBUG(0, "Cannot destroy %s (error=%d).",
3006 		    sc->sc_name, error);
3007 		sx_xunlock(&sc->sc_lock);
3008 	}
3009 	g_topology_lock();
3010 }
3011 
3012 static int
3013 g_mirror_access(struct g_provider *pp, int acr, int acw, int ace)
3014 {
3015 	struct g_mirror_softc *sc;
3016 	int error = 0;
3017 
3018 	g_topology_assert();
3019 	G_MIRROR_DEBUG(2, "Access request for %s: r%dw%de%d.", pp->name, acr,
3020 	    acw, ace);
3021 
3022 	sc = pp->private;
3023 	KASSERT(sc != NULL, ("NULL softc (provider=%s).", pp->name));
3024 
3025 	g_topology_unlock();
3026 	sx_xlock(&sc->sc_lock);
3027 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0 ||
3028 	    (sc->sc_flags & G_MIRROR_DEVICE_FLAG_CLOSEWAIT) != 0 ||
3029 	    LIST_EMPTY(&sc->sc_disks)) {
3030 		if (acr > 0 || acw > 0 || ace > 0)
3031 			error = ENXIO;
3032 		goto end;
3033 	}
3034 	sc->sc_provider_open += acr + acw + ace;
3035 	if (pp->acw + acw == 0)
3036 		g_mirror_idle(sc, 0);
3037 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_CLOSEWAIT) != 0 &&
3038 	    sc->sc_provider_open == 0)
3039 		g_post_event(g_mirror_destroy_delayed, sc, M_WAITOK, sc, NULL);
3040 end:
3041 	sx_xunlock(&sc->sc_lock);
3042 	g_topology_lock();
3043 	return (error);
3044 }
3045 
3046 struct g_geom *
3047 g_mirror_create(struct g_class *mp, const struct g_mirror_metadata *md,
3048     u_int type)
3049 {
3050 	struct g_mirror_softc *sc;
3051 	struct g_geom *gp;
3052 	int error, timeout;
3053 
3054 	g_topology_assert();
3055 	G_MIRROR_DEBUG(1, "Creating device %s (id=%u).", md->md_name,
3056 	    md->md_mid);
3057 
3058 	/* One disk is minimum. */
3059 	if (md->md_all < 1)
3060 		return (NULL);
3061 	/*
3062 	 * Action geom.
3063 	 */
3064 	gp = g_new_geomf(mp, "%s", md->md_name);
3065 	sc = malloc(sizeof(*sc), M_MIRROR, M_WAITOK | M_ZERO);
3066 	gp->start = g_mirror_start;
3067 	gp->orphan = g_mirror_orphan;
3068 	gp->access = g_mirror_access;
3069 	gp->dumpconf = g_mirror_dumpconf;
3070 
3071 	sc->sc_type = type;
3072 	sc->sc_id = md->md_mid;
3073 	sc->sc_slice = md->md_slice;
3074 	sc->sc_balance = md->md_balance;
3075 	sc->sc_mediasize = md->md_mediasize;
3076 	sc->sc_sectorsize = md->md_sectorsize;
3077 	sc->sc_ndisks = md->md_all;
3078 	sc->sc_flags = md->md_mflags;
3079 	sc->sc_bump_id = 0;
3080 	sc->sc_idle = 1;
3081 	sc->sc_last_write = time_uptime;
3082 	sc->sc_writes = 0;
3083 	sc->sc_refcnt = 1;
3084 	sx_init(&sc->sc_lock, "gmirror:lock");
3085 	TAILQ_INIT(&sc->sc_queue);
3086 	mtx_init(&sc->sc_queue_mtx, "gmirror:queue", NULL, MTX_DEF);
3087 	TAILQ_INIT(&sc->sc_regular_delayed);
3088 	TAILQ_INIT(&sc->sc_inflight);
3089 	TAILQ_INIT(&sc->sc_sync_delayed);
3090 	LIST_INIT(&sc->sc_disks);
3091 	TAILQ_INIT(&sc->sc_events);
3092 	mtx_init(&sc->sc_events_mtx, "gmirror:events", NULL, MTX_DEF);
3093 	callout_init(&sc->sc_callout, 1);
3094 	mtx_init(&sc->sc_done_mtx, "gmirror:done", NULL, MTX_DEF);
3095 	sc->sc_state = G_MIRROR_DEVICE_STATE_STARTING;
3096 	gp->softc = sc;
3097 	sc->sc_geom = gp;
3098 	sc->sc_provider = NULL;
3099 	sc->sc_provider_open = 0;
3100 	/*
3101 	 * Synchronization geom.
3102 	 */
3103 	gp = g_new_geomf(mp, "%s.sync", md->md_name);
3104 	gp->softc = sc;
3105 	gp->orphan = g_mirror_orphan;
3106 	sc->sc_sync.ds_geom = gp;
3107 	sc->sc_sync.ds_ndisks = 0;
3108 	error = kproc_create(g_mirror_worker, sc, &sc->sc_worker, 0, 0,
3109 	    "g_mirror %s", md->md_name);
3110 	if (error != 0) {
3111 		G_MIRROR_DEBUG(1, "Cannot create kernel thread for %s.",
3112 		    sc->sc_name);
3113 		g_destroy_geom(sc->sc_sync.ds_geom);
3114 		g_destroy_geom(sc->sc_geom);
3115 		g_mirror_free_device(sc);
3116 		return (NULL);
3117 	}
3118 
3119 	G_MIRROR_DEBUG(1, "Device %s created (%u components, id=%u).",
3120 	    sc->sc_name, sc->sc_ndisks, sc->sc_id);
3121 
3122 	sc->sc_rootmount = root_mount_hold("GMIRROR");
3123 	G_MIRROR_DEBUG(1, "root_mount_hold %p", sc->sc_rootmount);
3124 	/*
3125 	 * Run timeout.
3126 	 */
3127 	timeout = g_mirror_timeout * hz;
3128 	callout_reset(&sc->sc_callout, timeout, g_mirror_go, sc);
3129 	return (sc->sc_geom);
3130 }
3131 
3132 int
3133 g_mirror_destroy(struct g_mirror_softc *sc, int how)
3134 {
3135 	struct g_mirror_disk *disk;
3136 
3137 	g_topology_assert_not();
3138 	sx_assert(&sc->sc_lock, SX_XLOCKED);
3139 
3140 	if (sc->sc_provider_open != 0) {
3141 		switch (how) {
3142 		case G_MIRROR_DESTROY_SOFT:
3143 			G_MIRROR_DEBUG(1,
3144 			    "Device %s is still open (%d).", sc->sc_name,
3145 			    sc->sc_provider_open);
3146 			return (EBUSY);
3147 		case G_MIRROR_DESTROY_DELAYED:
3148 			G_MIRROR_DEBUG(1,
3149 			    "Device %s will be destroyed on last close.",
3150 			    sc->sc_name);
3151 			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
3152 				if (disk->d_state ==
3153 				    G_MIRROR_DISK_STATE_SYNCHRONIZING) {
3154 					g_mirror_sync_stop(disk, 1);
3155 				}
3156 			}
3157 			sc->sc_flags |= G_MIRROR_DEVICE_FLAG_CLOSEWAIT;
3158 			return (EBUSY);
3159 		case G_MIRROR_DESTROY_HARD:
3160 			G_MIRROR_DEBUG(1, "Device %s is still open, so it "
3161 			    "can't be definitely removed.", sc->sc_name);
3162 		}
3163 	}
3164 
3165 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
3166 		sx_xunlock(&sc->sc_lock);
3167 		return (0);
3168 	}
3169 	sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
3170 	sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DRAIN;
3171 	G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
3172 	sx_xunlock(&sc->sc_lock);
3173 	mtx_lock(&sc->sc_queue_mtx);
3174 	wakeup(sc);
3175 	mtx_unlock(&sc->sc_queue_mtx);
3176 	G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, &sc->sc_worker);
3177 	while (sc->sc_worker != NULL)
3178 		tsleep(&sc->sc_worker, PRIBIO, "m:destroy", hz / 5);
3179 	G_MIRROR_DEBUG(4, "%s: Woken up %p.", __func__, &sc->sc_worker);
3180 	sx_xlock(&sc->sc_lock);
3181 	g_mirror_destroy_device(sc);
3182 	return (0);
3183 }
3184 
3185 static void
3186 g_mirror_taste_orphan(struct g_consumer *cp)
3187 {
3188 
3189 	KASSERT(1 == 0, ("%s called while tasting %s.", __func__,
3190 	    cp->provider->name));
3191 }
3192 
3193 static struct g_geom *
3194 g_mirror_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
3195 {
3196 	struct g_mirror_metadata md;
3197 	struct g_mirror_softc *sc;
3198 	struct g_consumer *cp;
3199 	struct g_geom *gp;
3200 	int error;
3201 
3202 	g_topology_assert();
3203 	g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name);
3204 	G_MIRROR_DEBUG(2, "Tasting %s.", pp->name);
3205 
3206 	gp = g_new_geomf(mp, "mirror:taste");
3207 	/*
3208 	 * This orphan function should be never called.
3209 	 */
3210 	gp->orphan = g_mirror_taste_orphan;
3211 	cp = g_new_consumer(gp);
3212 	g_attach(cp, pp);
3213 	error = g_mirror_read_metadata(cp, &md);
3214 	g_detach(cp);
3215 	g_destroy_consumer(cp);
3216 	g_destroy_geom(gp);
3217 	if (error != 0)
3218 		return (NULL);
3219 	gp = NULL;
3220 
3221 	if (md.md_provider[0] != '\0' &&
3222 	    !g_compare_names(md.md_provider, pp->name))
3223 		return (NULL);
3224 	if (md.md_provsize != 0 && md.md_provsize != pp->mediasize)
3225 		return (NULL);
3226 	if ((md.md_dflags & G_MIRROR_DISK_FLAG_INACTIVE) != 0) {
3227 		G_MIRROR_DEBUG(0,
3228 		    "Device %s: provider %s marked as inactive, skipping.",
3229 		    md.md_name, pp->name);
3230 		return (NULL);
3231 	}
3232 	if (g_mirror_debug >= 2)
3233 		mirror_metadata_dump(&md);
3234 
3235 	/*
3236 	 * Let's check if device already exists.
3237 	 */
3238 	sc = NULL;
3239 	LIST_FOREACH(gp, &mp->geom, geom) {
3240 		sc = gp->softc;
3241 		if (sc == NULL)
3242 			continue;
3243 		if (sc->sc_type != G_MIRROR_TYPE_AUTOMATIC)
3244 			continue;
3245 		if (sc->sc_sync.ds_geom == gp)
3246 			continue;
3247 		if (strcmp(md.md_name, sc->sc_name) != 0)
3248 			continue;
3249 		if (md.md_mid != sc->sc_id) {
3250 			G_MIRROR_DEBUG(0, "Device %s already configured.",
3251 			    sc->sc_name);
3252 			return (NULL);
3253 		}
3254 		break;
3255 	}
3256 	if (gp == NULL) {
3257 		gp = g_mirror_create(mp, &md, G_MIRROR_TYPE_AUTOMATIC);
3258 		if (gp == NULL) {
3259 			G_MIRROR_DEBUG(0, "Cannot create device %s.",
3260 			    md.md_name);
3261 			return (NULL);
3262 		}
3263 		sc = gp->softc;
3264 	}
3265 	G_MIRROR_DEBUG(1, "Adding disk %s to %s.", pp->name, gp->name);
3266 	g_topology_unlock();
3267 	sx_xlock(&sc->sc_lock);
3268 	sc->sc_flags |= G_MIRROR_DEVICE_FLAG_TASTING;
3269 	error = g_mirror_add_disk(sc, pp, &md);
3270 	if (error != 0) {
3271 		G_MIRROR_DEBUG(0, "Cannot add disk %s to %s (error=%d).",
3272 		    pp->name, gp->name, error);
3273 		if (LIST_EMPTY(&sc->sc_disks)) {
3274 			g_cancel_event(sc);
3275 			g_mirror_destroy(sc, G_MIRROR_DESTROY_HARD);
3276 			g_topology_lock();
3277 			return (NULL);
3278 		}
3279 		gp = NULL;
3280 	}
3281 	sc->sc_flags &= ~G_MIRROR_DEVICE_FLAG_TASTING;
3282 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
3283 		g_mirror_destroy(sc, G_MIRROR_DESTROY_HARD);
3284 		g_topology_lock();
3285 		return (NULL);
3286 	}
3287 	sx_xunlock(&sc->sc_lock);
3288 	g_topology_lock();
3289 	return (gp);
3290 }
3291 
3292 static void
3293 g_mirror_resize(struct g_consumer *cp)
3294 {
3295 	struct g_mirror_disk *disk;
3296 
3297 	g_topology_assert();
3298 	g_trace(G_T_TOPOLOGY, "%s(%s)", __func__, cp->provider->name);
3299 
3300 	disk = cp->private;
3301 	if (disk == NULL)
3302 		return;
3303 	g_topology_unlock();
3304 	g_mirror_update_metadata(disk);
3305 	g_topology_lock();
3306 }
3307 
3308 static int
3309 g_mirror_destroy_geom(struct gctl_req *req __unused,
3310     struct g_class *mp __unused, struct g_geom *gp)
3311 {
3312 	struct g_mirror_softc *sc;
3313 	int error;
3314 
3315 	g_topology_unlock();
3316 	sc = gp->softc;
3317 	sx_xlock(&sc->sc_lock);
3318 	g_cancel_event(sc);
3319 	error = g_mirror_destroy(gp->softc, G_MIRROR_DESTROY_SOFT);
3320 	if (error != 0)
3321 		sx_xunlock(&sc->sc_lock);
3322 	g_topology_lock();
3323 	return (error);
3324 }
3325 
3326 static void
3327 g_mirror_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
3328     struct g_consumer *cp, struct g_provider *pp)
3329 {
3330 	struct g_mirror_softc *sc;
3331 
3332 	g_topology_assert();
3333 
3334 	sc = gp->softc;
3335 	if (sc == NULL)
3336 		return;
3337 	/* Skip synchronization geom. */
3338 	if (gp == sc->sc_sync.ds_geom)
3339 		return;
3340 	if (pp != NULL) {
3341 		/* Nothing here. */
3342 	} else if (cp != NULL) {
3343 		struct g_mirror_disk *disk;
3344 
3345 		disk = cp->private;
3346 		if (disk == NULL)
3347 			return;
3348 		g_topology_unlock();
3349 		sx_xlock(&sc->sc_lock);
3350 		sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)disk->d_id);
3351 		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
3352 			sbuf_printf(sb, "%s<Synchronized>", indent);
3353 			if (disk->d_sync.ds_offset == 0)
3354 				sbuf_printf(sb, "0%%");
3355 			else {
3356 				sbuf_printf(sb, "%u%%",
3357 				    (u_int)((disk->d_sync.ds_offset * 100) /
3358 				    sc->sc_provider->mediasize));
3359 			}
3360 			sbuf_printf(sb, "</Synchronized>\n");
3361 			if (disk->d_sync.ds_offset > 0) {
3362 				sbuf_printf(sb, "%s<BytesSynced>%jd"
3363 				    "</BytesSynced>\n", indent,
3364 				    (intmax_t)disk->d_sync.ds_offset);
3365 			}
3366 		}
3367 		sbuf_printf(sb, "%s<SyncID>%u</SyncID>\n", indent,
3368 		    disk->d_sync.ds_syncid);
3369 		sbuf_printf(sb, "%s<GenID>%u</GenID>\n", indent,
3370 		    disk->d_genid);
3371 		sbuf_printf(sb, "%s<Flags>", indent);
3372 		if (disk->d_flags == 0)
3373 			sbuf_printf(sb, "NONE");
3374 		else {
3375 			int first = 1;
3376 
3377 #define	ADD_FLAG(flag, name)	do {					\
3378 	if ((disk->d_flags & (flag)) != 0) {				\
3379 		if (!first)						\
3380 			sbuf_printf(sb, ", ");				\
3381 		else							\
3382 			first = 0;					\
3383 		sbuf_printf(sb, name);					\
3384 	}								\
3385 } while (0)
3386 			ADD_FLAG(G_MIRROR_DISK_FLAG_DIRTY, "DIRTY");
3387 			ADD_FLAG(G_MIRROR_DISK_FLAG_HARDCODED, "HARDCODED");
3388 			ADD_FLAG(G_MIRROR_DISK_FLAG_INACTIVE, "INACTIVE");
3389 			ADD_FLAG(G_MIRROR_DISK_FLAG_SYNCHRONIZING,
3390 			    "SYNCHRONIZING");
3391 			ADD_FLAG(G_MIRROR_DISK_FLAG_FORCE_SYNC, "FORCE_SYNC");
3392 			ADD_FLAG(G_MIRROR_DISK_FLAG_BROKEN, "BROKEN");
3393 #undef	ADD_FLAG
3394 		}
3395 		sbuf_printf(sb, "</Flags>\n");
3396 		sbuf_printf(sb, "%s<Priority>%u</Priority>\n", indent,
3397 		    disk->d_priority);
3398 		sbuf_printf(sb, "%s<State>%s</State>\n", indent,
3399 		    g_mirror_disk_state2str(disk->d_state));
3400 		sx_xunlock(&sc->sc_lock);
3401 		g_topology_lock();
3402 	} else {
3403 		g_topology_unlock();
3404 		sx_xlock(&sc->sc_lock);
3405 		sbuf_printf(sb, "%s<Type>", indent);
3406 		switch (sc->sc_type) {
3407 		case G_MIRROR_TYPE_AUTOMATIC:
3408 			sbuf_printf(sb, "AUTOMATIC");
3409 			break;
3410 		case G_MIRROR_TYPE_MANUAL:
3411 			sbuf_printf(sb, "MANUAL");
3412 			break;
3413 		default:
3414 			sbuf_printf(sb, "UNKNOWN");
3415 			break;
3416 		}
3417 		sbuf_printf(sb, "</Type>\n");
3418 		sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)sc->sc_id);
3419 		sbuf_printf(sb, "%s<SyncID>%u</SyncID>\n", indent, sc->sc_syncid);
3420 		sbuf_printf(sb, "%s<GenID>%u</GenID>\n", indent, sc->sc_genid);
3421 		sbuf_printf(sb, "%s<Flags>", indent);
3422 		if (sc->sc_flags == 0)
3423 			sbuf_printf(sb, "NONE");
3424 		else {
3425 			int first = 1;
3426 
3427 #define	ADD_FLAG(flag, name)	do {					\
3428 	if ((sc->sc_flags & (flag)) != 0) {				\
3429 		if (!first)						\
3430 			sbuf_printf(sb, ", ");				\
3431 		else							\
3432 			first = 0;					\
3433 		sbuf_printf(sb, name);					\
3434 	}								\
3435 } while (0)
3436 			ADD_FLAG(G_MIRROR_DEVICE_FLAG_NOFAILSYNC, "NOFAILSYNC");
3437 			ADD_FLAG(G_MIRROR_DEVICE_FLAG_NOAUTOSYNC, "NOAUTOSYNC");
3438 #undef	ADD_FLAG
3439 		}
3440 		sbuf_printf(sb, "</Flags>\n");
3441 		sbuf_printf(sb, "%s<Slice>%u</Slice>\n", indent,
3442 		    (u_int)sc->sc_slice);
3443 		sbuf_printf(sb, "%s<Balance>%s</Balance>\n", indent,
3444 		    balance_name(sc->sc_balance));
3445 		sbuf_printf(sb, "%s<Components>%u</Components>\n", indent,
3446 		    sc->sc_ndisks);
3447 		sbuf_printf(sb, "%s<State>", indent);
3448 		if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING)
3449 			sbuf_printf(sb, "%s", "STARTING");
3450 		else if (sc->sc_ndisks ==
3451 		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE))
3452 			sbuf_printf(sb, "%s", "COMPLETE");
3453 		else
3454 			sbuf_printf(sb, "%s", "DEGRADED");
3455 		sbuf_printf(sb, "</State>\n");
3456 		sx_xunlock(&sc->sc_lock);
3457 		g_topology_lock();
3458 	}
3459 }
3460 
3461 static void
3462 g_mirror_shutdown_post_sync(void *arg, int howto)
3463 {
3464 	struct g_class *mp;
3465 	struct g_geom *gp, *gp2;
3466 	struct g_mirror_softc *sc;
3467 	int error;
3468 
3469 	if (panicstr != NULL)
3470 		return;
3471 
3472 	mp = arg;
3473 	g_topology_lock();
3474 	g_mirror_shutdown = 1;
3475 	LIST_FOREACH_SAFE(gp, &mp->geom, geom, gp2) {
3476 		if ((sc = gp->softc) == NULL)
3477 			continue;
3478 		/* Skip synchronization geom. */
3479 		if (gp == sc->sc_sync.ds_geom)
3480 			continue;
3481 		g_topology_unlock();
3482 		sx_xlock(&sc->sc_lock);
3483 		g_mirror_idle(sc, -1);
3484 		g_cancel_event(sc);
3485 		error = g_mirror_destroy(sc, G_MIRROR_DESTROY_DELAYED);
3486 		if (error != 0)
3487 			sx_xunlock(&sc->sc_lock);
3488 		g_topology_lock();
3489 	}
3490 	g_topology_unlock();
3491 }
3492 
3493 static void
3494 g_mirror_init(struct g_class *mp)
3495 {
3496 
3497 	g_mirror_post_sync = EVENTHANDLER_REGISTER(shutdown_post_sync,
3498 	    g_mirror_shutdown_post_sync, mp, SHUTDOWN_PRI_FIRST);
3499 	if (g_mirror_post_sync == NULL)
3500 		G_MIRROR_DEBUG(0, "Warning! Cannot register shutdown event.");
3501 }
3502 
3503 static void
3504 g_mirror_fini(struct g_class *mp)
3505 {
3506 
3507 	if (g_mirror_post_sync != NULL)
3508 		EVENTHANDLER_DEREGISTER(shutdown_post_sync, g_mirror_post_sync);
3509 }
3510 
3511 DECLARE_GEOM_CLASS(g_mirror_class, g_mirror);
3512