xref: /freebsd/sys/geom/mirror/g_mirror.c (revision c66ec88fed842fbaad62c30d510644ceb7bd2d71)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2004-2006 Pawel Jakub Dawidek <pjd@FreeBSD.org>
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/bio.h>
35 #include <sys/eventhandler.h>
36 #include <sys/fail.h>
37 #include <sys/kernel.h>
38 #include <sys/kthread.h>
39 #include <sys/limits.h>
40 #include <sys/lock.h>
41 #include <sys/malloc.h>
42 #include <sys/mutex.h>
43 #include <sys/proc.h>
44 #include <sys/sbuf.h>
45 #include <sys/sched.h>
46 #include <sys/sx.h>
47 #include <sys/sysctl.h>
48 
49 #include <geom/geom.h>
50 #include <geom/geom_dbg.h>
51 #include <geom/mirror/g_mirror.h>
52 
53 FEATURE(geom_mirror, "GEOM mirroring support");
54 
55 static MALLOC_DEFINE(M_MIRROR, "mirror_data", "GEOM_MIRROR Data");
56 
57 SYSCTL_DECL(_kern_geom);
58 static SYSCTL_NODE(_kern_geom, OID_AUTO, mirror, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
59     "GEOM_MIRROR stuff");
60 int g_mirror_debug = 0;
61 SYSCTL_INT(_kern_geom_mirror, OID_AUTO, debug, CTLFLAG_RWTUN, &g_mirror_debug, 0,
62     "Debug level");
63 bool g_launch_mirror_before_timeout = true;
64 SYSCTL_BOOL(_kern_geom_mirror, OID_AUTO, launch_mirror_before_timeout,
65     CTLFLAG_RWTUN, &g_launch_mirror_before_timeout, 0,
66     "If false, force gmirror to wait out the full kern.geom.mirror.timeout "
67     "before launching mirrors");
68 static u_int g_mirror_timeout = 4;
69 SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, timeout, CTLFLAG_RWTUN, &g_mirror_timeout,
70     0, "Time to wait on all mirror components");
71 static u_int g_mirror_idletime = 5;
72 SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, idletime, CTLFLAG_RWTUN,
73     &g_mirror_idletime, 0, "Mark components as clean when idling");
74 static u_int g_mirror_disconnect_on_failure = 1;
75 SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, disconnect_on_failure, CTLFLAG_RWTUN,
76     &g_mirror_disconnect_on_failure, 0, "Disconnect component on I/O failure.");
77 static u_int g_mirror_syncreqs = 2;
78 SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, sync_requests, CTLFLAG_RDTUN,
79     &g_mirror_syncreqs, 0, "Parallel synchronization I/O requests.");
80 static u_int g_mirror_sync_period = 5;
81 SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, sync_update_period, CTLFLAG_RWTUN,
82     &g_mirror_sync_period, 0,
83     "Metadata update period during synchronization, in seconds");
84 
85 #define	MSLEEP(ident, mtx, priority, wmesg, timeout)	do {		\
86 	G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, (ident));	\
87 	msleep((ident), (mtx), (priority), (wmesg), (timeout));		\
88 	G_MIRROR_DEBUG(4, "%s: Woken up %p.", __func__, (ident));	\
89 } while (0)
90 
91 static eventhandler_tag g_mirror_post_sync = NULL;
92 static int g_mirror_shutdown = 0;
93 
94 static g_ctl_destroy_geom_t g_mirror_destroy_geom;
95 static g_taste_t g_mirror_taste;
96 static g_init_t g_mirror_init;
97 static g_fini_t g_mirror_fini;
98 static g_provgone_t g_mirror_providergone;
99 static g_resize_t g_mirror_resize;
100 
101 struct g_class g_mirror_class = {
102 	.name = G_MIRROR_CLASS_NAME,
103 	.version = G_VERSION,
104 	.ctlreq = g_mirror_config,
105 	.taste = g_mirror_taste,
106 	.destroy_geom = g_mirror_destroy_geom,
107 	.init = g_mirror_init,
108 	.fini = g_mirror_fini,
109 	.providergone = g_mirror_providergone,
110 	.resize = g_mirror_resize
111 };
112 
113 static void g_mirror_destroy_provider(struct g_mirror_softc *sc);
114 static int g_mirror_update_disk(struct g_mirror_disk *disk, u_int state);
115 static void g_mirror_update_device(struct g_mirror_softc *sc, bool force);
116 static void g_mirror_dumpconf(struct sbuf *sb, const char *indent,
117     struct g_geom *gp, struct g_consumer *cp, struct g_provider *pp);
118 static int g_mirror_refresh_device(struct g_mirror_softc *sc,
119     const struct g_provider *pp, const struct g_mirror_metadata *md);
120 static void g_mirror_sync_reinit(const struct g_mirror_disk *disk,
121     struct bio *bp, off_t offset);
122 static void g_mirror_sync_stop(struct g_mirror_disk *disk, int type);
123 static void g_mirror_register_request(struct g_mirror_softc *sc,
124     struct bio *bp);
125 static void g_mirror_sync_release(struct g_mirror_softc *sc);
126 
127 static const char *
128 g_mirror_disk_state2str(int state)
129 {
130 
131 	switch (state) {
132 	case G_MIRROR_DISK_STATE_NONE:
133 		return ("NONE");
134 	case G_MIRROR_DISK_STATE_NEW:
135 		return ("NEW");
136 	case G_MIRROR_DISK_STATE_ACTIVE:
137 		return ("ACTIVE");
138 	case G_MIRROR_DISK_STATE_STALE:
139 		return ("STALE");
140 	case G_MIRROR_DISK_STATE_SYNCHRONIZING:
141 		return ("SYNCHRONIZING");
142 	case G_MIRROR_DISK_STATE_DISCONNECTED:
143 		return ("DISCONNECTED");
144 	case G_MIRROR_DISK_STATE_DESTROY:
145 		return ("DESTROY");
146 	default:
147 		return ("INVALID");
148 	}
149 }
150 
151 static const char *
152 g_mirror_device_state2str(int state)
153 {
154 
155 	switch (state) {
156 	case G_MIRROR_DEVICE_STATE_STARTING:
157 		return ("STARTING");
158 	case G_MIRROR_DEVICE_STATE_RUNNING:
159 		return ("RUNNING");
160 	default:
161 		return ("INVALID");
162 	}
163 }
164 
165 static const char *
166 g_mirror_get_diskname(struct g_mirror_disk *disk)
167 {
168 
169 	if (disk->d_consumer == NULL || disk->d_consumer->provider == NULL)
170 		return ("[unknown]");
171 	return (disk->d_name);
172 }
173 
174 /*
175  * --- Events handling functions ---
176  * Events in geom_mirror are used to maintain disks and device status
177  * from one thread to simplify locking.
178  */
179 static void
180 g_mirror_event_free(struct g_mirror_event *ep)
181 {
182 
183 	free(ep, M_MIRROR);
184 }
185 
186 int
187 g_mirror_event_send(void *arg, int state, int flags)
188 {
189 	struct g_mirror_softc *sc;
190 	struct g_mirror_disk *disk;
191 	struct g_mirror_event *ep;
192 	int error;
193 
194 	ep = malloc(sizeof(*ep), M_MIRROR, M_WAITOK);
195 	G_MIRROR_DEBUG(4, "%s: Sending event %p.", __func__, ep);
196 	if ((flags & G_MIRROR_EVENT_DEVICE) != 0) {
197 		disk = NULL;
198 		sc = arg;
199 	} else {
200 		disk = arg;
201 		sc = disk->d_softc;
202 	}
203 	ep->e_disk = disk;
204 	ep->e_state = state;
205 	ep->e_flags = flags;
206 	ep->e_error = 0;
207 	mtx_lock(&sc->sc_events_mtx);
208 	TAILQ_INSERT_TAIL(&sc->sc_events, ep, e_next);
209 	mtx_unlock(&sc->sc_events_mtx);
210 	G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
211 	mtx_lock(&sc->sc_queue_mtx);
212 	wakeup(sc);
213 	mtx_unlock(&sc->sc_queue_mtx);
214 	if ((flags & G_MIRROR_EVENT_DONTWAIT) != 0)
215 		return (0);
216 	G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, ep);
217 	sx_xunlock(&sc->sc_lock);
218 	while ((ep->e_flags & G_MIRROR_EVENT_DONE) == 0) {
219 		mtx_lock(&sc->sc_events_mtx);
220 		MSLEEP(ep, &sc->sc_events_mtx, PRIBIO | PDROP, "m:event",
221 		    hz * 5);
222 	}
223 	error = ep->e_error;
224 	g_mirror_event_free(ep);
225 	sx_xlock(&sc->sc_lock);
226 	return (error);
227 }
228 
229 static struct g_mirror_event *
230 g_mirror_event_first(struct g_mirror_softc *sc)
231 {
232 	struct g_mirror_event *ep;
233 
234 	mtx_lock(&sc->sc_events_mtx);
235 	ep = TAILQ_FIRST(&sc->sc_events);
236 	mtx_unlock(&sc->sc_events_mtx);
237 	return (ep);
238 }
239 
240 static void
241 g_mirror_event_remove(struct g_mirror_softc *sc, struct g_mirror_event *ep)
242 {
243 
244 	mtx_lock(&sc->sc_events_mtx);
245 	TAILQ_REMOVE(&sc->sc_events, ep, e_next);
246 	mtx_unlock(&sc->sc_events_mtx);
247 }
248 
249 static void
250 g_mirror_event_cancel(struct g_mirror_disk *disk)
251 {
252 	struct g_mirror_softc *sc;
253 	struct g_mirror_event *ep, *tmpep;
254 
255 	sc = disk->d_softc;
256 	sx_assert(&sc->sc_lock, SX_XLOCKED);
257 
258 	mtx_lock(&sc->sc_events_mtx);
259 	TAILQ_FOREACH_SAFE(ep, &sc->sc_events, e_next, tmpep) {
260 		if ((ep->e_flags & G_MIRROR_EVENT_DEVICE) != 0)
261 			continue;
262 		if (ep->e_disk != disk)
263 			continue;
264 		TAILQ_REMOVE(&sc->sc_events, ep, e_next);
265 		if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0)
266 			g_mirror_event_free(ep);
267 		else {
268 			ep->e_error = ECANCELED;
269 			wakeup(ep);
270 		}
271 	}
272 	mtx_unlock(&sc->sc_events_mtx);
273 }
274 
275 /*
276  * Return the number of disks in given state.
277  * If state is equal to -1, count all connected disks.
278  */
279 u_int
280 g_mirror_ndisks(struct g_mirror_softc *sc, int state)
281 {
282 	struct g_mirror_disk *disk;
283 	u_int n = 0;
284 
285 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
286 		if (state == -1 || disk->d_state == state)
287 			n++;
288 	}
289 	return (n);
290 }
291 
292 /*
293  * Find a disk in mirror by its disk ID.
294  */
295 static struct g_mirror_disk *
296 g_mirror_id2disk(struct g_mirror_softc *sc, uint32_t id)
297 {
298 	struct g_mirror_disk *disk;
299 
300 	sx_assert(&sc->sc_lock, SX_XLOCKED);
301 
302 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
303 		if (disk->d_id == id)
304 			return (disk);
305 	}
306 	return (NULL);
307 }
308 
309 static u_int
310 g_mirror_nrequests(struct g_mirror_softc *sc, struct g_consumer *cp)
311 {
312 	struct bio *bp;
313 	u_int nreqs = 0;
314 
315 	mtx_lock(&sc->sc_queue_mtx);
316 	TAILQ_FOREACH(bp, &sc->sc_queue, bio_queue) {
317 		if (bp->bio_from == cp)
318 			nreqs++;
319 	}
320 	mtx_unlock(&sc->sc_queue_mtx);
321 	return (nreqs);
322 }
323 
324 static int
325 g_mirror_is_busy(struct g_mirror_softc *sc, struct g_consumer *cp)
326 {
327 
328 	if (cp->index > 0) {
329 		G_MIRROR_DEBUG(2,
330 		    "I/O requests for %s exist, can't destroy it now.",
331 		    cp->provider->name);
332 		return (1);
333 	}
334 	if (g_mirror_nrequests(sc, cp) > 0) {
335 		G_MIRROR_DEBUG(2,
336 		    "I/O requests for %s in queue, can't destroy it now.",
337 		    cp->provider->name);
338 		return (1);
339 	}
340 	return (0);
341 }
342 
343 static void
344 g_mirror_destroy_consumer(void *arg, int flags __unused)
345 {
346 	struct g_consumer *cp;
347 
348 	g_topology_assert();
349 
350 	cp = arg;
351 	G_MIRROR_DEBUG(1, "Consumer %s destroyed.", cp->provider->name);
352 	g_detach(cp);
353 	g_destroy_consumer(cp);
354 }
355 
356 static void
357 g_mirror_kill_consumer(struct g_mirror_softc *sc, struct g_consumer *cp)
358 {
359 	struct g_provider *pp;
360 	int retaste_wait;
361 
362 	g_topology_assert();
363 
364 	cp->private = NULL;
365 	if (g_mirror_is_busy(sc, cp))
366 		return;
367 	pp = cp->provider;
368 	retaste_wait = 0;
369 	if (cp->acw == 1) {
370 		if ((pp->geom->flags & G_GEOM_WITHER) == 0)
371 			retaste_wait = 1;
372 	}
373 	G_MIRROR_DEBUG(2, "Access %s r%dw%de%d = %d", pp->name, -cp->acr,
374 	    -cp->acw, -cp->ace, 0);
375 	if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0)
376 		g_access(cp, -cp->acr, -cp->acw, -cp->ace);
377 	if (retaste_wait) {
378 		/*
379 		 * After retaste event was send (inside g_access()), we can send
380 		 * event to detach and destroy consumer.
381 		 * A class, which has consumer to the given provider connected
382 		 * will not receive retaste event for the provider.
383 		 * This is the way how I ignore retaste events when I close
384 		 * consumers opened for write: I detach and destroy consumer
385 		 * after retaste event is sent.
386 		 */
387 		g_post_event(g_mirror_destroy_consumer, cp, M_WAITOK, NULL);
388 		return;
389 	}
390 	G_MIRROR_DEBUG(1, "Consumer %s destroyed.", pp->name);
391 	g_detach(cp);
392 	g_destroy_consumer(cp);
393 }
394 
395 static int
396 g_mirror_connect_disk(struct g_mirror_disk *disk, struct g_provider *pp)
397 {
398 	struct g_consumer *cp;
399 	int error;
400 
401 	g_topology_assert_not();
402 	KASSERT(disk->d_consumer == NULL,
403 	    ("Disk already connected (device %s).", disk->d_softc->sc_name));
404 
405 	g_topology_lock();
406 	cp = g_new_consumer(disk->d_softc->sc_geom);
407 	cp->flags |= G_CF_DIRECT_RECEIVE;
408 	error = g_attach(cp, pp);
409 	if (error != 0) {
410 		g_destroy_consumer(cp);
411 		g_topology_unlock();
412 		return (error);
413 	}
414 	error = g_access(cp, 1, 1, 1);
415 	if (error != 0) {
416 		g_detach(cp);
417 		g_destroy_consumer(cp);
418 		g_topology_unlock();
419 		G_MIRROR_DEBUG(0, "Cannot open consumer %s (error=%d).",
420 		    pp->name, error);
421 		return (error);
422 	}
423 	g_topology_unlock();
424 	disk->d_consumer = cp;
425 	disk->d_consumer->private = disk;
426 	disk->d_consumer->index = 0;
427 
428 	G_MIRROR_DEBUG(2, "Disk %s connected.", g_mirror_get_diskname(disk));
429 	return (0);
430 }
431 
432 static void
433 g_mirror_disconnect_consumer(struct g_mirror_softc *sc, struct g_consumer *cp)
434 {
435 
436 	g_topology_assert();
437 
438 	if (cp == NULL)
439 		return;
440 	if (cp->provider != NULL)
441 		g_mirror_kill_consumer(sc, cp);
442 	else
443 		g_destroy_consumer(cp);
444 }
445 
446 /*
447  * Initialize disk. This means allocate memory, create consumer, attach it
448  * to the provider and open access (r1w1e1) to it.
449  */
450 static struct g_mirror_disk *
451 g_mirror_init_disk(struct g_mirror_softc *sc, struct g_provider *pp,
452     struct g_mirror_metadata *md, int *errorp)
453 {
454 	struct g_mirror_disk *disk;
455 	int i, error;
456 
457 	disk = malloc(sizeof(*disk), M_MIRROR, M_NOWAIT | M_ZERO);
458 	if (disk == NULL) {
459 		error = ENOMEM;
460 		goto fail;
461 	}
462 	disk->d_softc = sc;
463 	error = g_mirror_connect_disk(disk, pp);
464 	if (error != 0)
465 		goto fail;
466 	disk->d_id = md->md_did;
467 	disk->d_state = G_MIRROR_DISK_STATE_NONE;
468 	disk->d_priority = md->md_priority;
469 	disk->d_flags = md->md_dflags;
470 	error = g_getattr("GEOM::candelete", disk->d_consumer, &i);
471 	if (error == 0 && i != 0)
472 		disk->d_flags |= G_MIRROR_DISK_FLAG_CANDELETE;
473 	if (md->md_provider[0] != '\0')
474 		disk->d_flags |= G_MIRROR_DISK_FLAG_HARDCODED;
475 	disk->d_sync.ds_consumer = NULL;
476 	disk->d_sync.ds_offset = md->md_sync_offset;
477 	disk->d_sync.ds_offset_done = md->md_sync_offset;
478 	disk->d_sync.ds_update_ts = time_uptime;
479 	disk->d_genid = md->md_genid;
480 	disk->d_sync.ds_syncid = md->md_syncid;
481 	disk->d_init_ndisks = md->md_all;
482 	disk->d_init_slice = md->md_slice;
483 	disk->d_init_balance = md->md_balance;
484 	disk->d_init_mediasize = md->md_mediasize;
485 	if (errorp != NULL)
486 		*errorp = 0;
487 	return (disk);
488 fail:
489 	if (errorp != NULL)
490 		*errorp = error;
491 	if (disk != NULL)
492 		free(disk, M_MIRROR);
493 	return (NULL);
494 }
495 
496 static void
497 g_mirror_destroy_disk(struct g_mirror_disk *disk)
498 {
499 	struct g_mirror_softc *sc;
500 
501 	g_topology_assert_not();
502 	sc = disk->d_softc;
503 	sx_assert(&sc->sc_lock, SX_XLOCKED);
504 
505 	g_topology_lock();
506 	LIST_REMOVE(disk, d_next);
507 	g_topology_unlock();
508 	g_mirror_event_cancel(disk);
509 	if (sc->sc_hint == disk)
510 		sc->sc_hint = NULL;
511 	switch (disk->d_state) {
512 	case G_MIRROR_DISK_STATE_SYNCHRONIZING:
513 		g_mirror_sync_stop(disk, 1);
514 		/* FALLTHROUGH */
515 	case G_MIRROR_DISK_STATE_NEW:
516 	case G_MIRROR_DISK_STATE_STALE:
517 	case G_MIRROR_DISK_STATE_ACTIVE:
518 		g_topology_lock();
519 		g_mirror_disconnect_consumer(sc, disk->d_consumer);
520 		g_topology_unlock();
521 		free(disk, M_MIRROR);
522 		break;
523 	default:
524 		KASSERT(0 == 1, ("Wrong disk state (%s, %s).",
525 		    g_mirror_get_diskname(disk),
526 		    g_mirror_disk_state2str(disk->d_state)));
527 	}
528 }
529 
530 static void
531 g_mirror_free_device(struct g_mirror_softc *sc)
532 {
533 
534 	g_topology_assert();
535 
536 	mtx_destroy(&sc->sc_queue_mtx);
537 	mtx_destroy(&sc->sc_events_mtx);
538 	mtx_destroy(&sc->sc_done_mtx);
539 	sx_destroy(&sc->sc_lock);
540 	free(sc, M_MIRROR);
541 }
542 
543 static void
544 g_mirror_providergone(struct g_provider *pp)
545 {
546 	struct g_mirror_softc *sc = pp->private;
547 
548 	if ((--sc->sc_refcnt) == 0)
549 		g_mirror_free_device(sc);
550 }
551 
552 static void
553 g_mirror_destroy_device(struct g_mirror_softc *sc)
554 {
555 	struct g_mirror_disk *disk;
556 	struct g_mirror_event *ep;
557 	struct g_geom *gp;
558 	struct g_consumer *cp, *tmpcp;
559 
560 	g_topology_assert_not();
561 	sx_assert(&sc->sc_lock, SX_XLOCKED);
562 
563 	gp = sc->sc_geom;
564 	if (sc->sc_provider != NULL)
565 		g_mirror_destroy_provider(sc);
566 	for (disk = LIST_FIRST(&sc->sc_disks); disk != NULL;
567 	    disk = LIST_FIRST(&sc->sc_disks)) {
568 		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
569 		g_mirror_update_metadata(disk);
570 		g_mirror_destroy_disk(disk);
571 	}
572 	while ((ep = g_mirror_event_first(sc)) != NULL) {
573 		g_mirror_event_remove(sc, ep);
574 		if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0)
575 			g_mirror_event_free(ep);
576 		else {
577 			ep->e_error = ECANCELED;
578 			ep->e_flags |= G_MIRROR_EVENT_DONE;
579 			G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, ep);
580 			mtx_lock(&sc->sc_events_mtx);
581 			wakeup(ep);
582 			mtx_unlock(&sc->sc_events_mtx);
583 		}
584 	}
585 	callout_drain(&sc->sc_callout);
586 
587 	g_topology_lock();
588 	LIST_FOREACH_SAFE(cp, &sc->sc_sync.ds_geom->consumer, consumer, tmpcp) {
589 		g_mirror_disconnect_consumer(sc, cp);
590 	}
591 	g_wither_geom(sc->sc_sync.ds_geom, ENXIO);
592 	G_MIRROR_DEBUG(0, "Device %s destroyed.", gp->name);
593 	g_wither_geom(gp, ENXIO);
594 	sx_xunlock(&sc->sc_lock);
595 	if ((--sc->sc_refcnt) == 0)
596 		g_mirror_free_device(sc);
597 	g_topology_unlock();
598 }
599 
600 static void
601 g_mirror_orphan(struct g_consumer *cp)
602 {
603 	struct g_mirror_disk *disk;
604 
605 	g_topology_assert();
606 
607 	disk = cp->private;
608 	if (disk == NULL)
609 		return;
610 	disk->d_softc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
611 	g_mirror_event_send(disk, G_MIRROR_DISK_STATE_DISCONNECTED,
612 	    G_MIRROR_EVENT_DONTWAIT);
613 }
614 
615 /*
616  * Function should return the next active disk on the list.
617  * It is possible that it will be the same disk as given.
618  * If there are no active disks on list, NULL is returned.
619  */
620 static __inline struct g_mirror_disk *
621 g_mirror_find_next(struct g_mirror_softc *sc, struct g_mirror_disk *disk)
622 {
623 	struct g_mirror_disk *dp;
624 
625 	for (dp = LIST_NEXT(disk, d_next); dp != disk;
626 	    dp = LIST_NEXT(dp, d_next)) {
627 		if (dp == NULL)
628 			dp = LIST_FIRST(&sc->sc_disks);
629 		if (dp->d_state == G_MIRROR_DISK_STATE_ACTIVE)
630 			break;
631 	}
632 	if (dp->d_state != G_MIRROR_DISK_STATE_ACTIVE)
633 		return (NULL);
634 	return (dp);
635 }
636 
637 static struct g_mirror_disk *
638 g_mirror_get_disk(struct g_mirror_softc *sc)
639 {
640 	struct g_mirror_disk *disk;
641 
642 	if (sc->sc_hint == NULL) {
643 		sc->sc_hint = LIST_FIRST(&sc->sc_disks);
644 		if (sc->sc_hint == NULL)
645 			return (NULL);
646 	}
647 	disk = sc->sc_hint;
648 	if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE) {
649 		disk = g_mirror_find_next(sc, disk);
650 		if (disk == NULL)
651 			return (NULL);
652 	}
653 	sc->sc_hint = g_mirror_find_next(sc, disk);
654 	return (disk);
655 }
656 
657 static int
658 g_mirror_write_metadata(struct g_mirror_disk *disk,
659     struct g_mirror_metadata *md)
660 {
661 	struct g_mirror_softc *sc;
662 	struct g_consumer *cp;
663 	off_t offset, length;
664 	u_char *sector;
665 	int error = 0;
666 
667 	g_topology_assert_not();
668 	sc = disk->d_softc;
669 	sx_assert(&sc->sc_lock, SX_LOCKED);
670 
671 	cp = disk->d_consumer;
672 	KASSERT(cp != NULL, ("NULL consumer (%s).", sc->sc_name));
673 	KASSERT(cp->provider != NULL, ("NULL provider (%s).", sc->sc_name));
674 	KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
675 	    ("Consumer %s closed? (r%dw%de%d).", cp->provider->name, cp->acr,
676 	    cp->acw, cp->ace));
677 	length = cp->provider->sectorsize;
678 	offset = cp->provider->mediasize - length;
679 	sector = malloc((size_t)length, M_MIRROR, M_WAITOK | M_ZERO);
680 	if (md != NULL &&
681 	    (sc->sc_flags & G_MIRROR_DEVICE_FLAG_WIPE) == 0) {
682 		/*
683 		 * Handle the case, when the size of parent provider reduced.
684 		 */
685 		if (offset < md->md_mediasize)
686 			error = ENOSPC;
687 		else
688 			mirror_metadata_encode(md, sector);
689 	}
690 	KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_metadata_write, error);
691 	if (error == 0)
692 		error = g_write_data(cp, offset, sector, length);
693 	free(sector, M_MIRROR);
694 	if (error != 0) {
695 		if ((disk->d_flags & G_MIRROR_DISK_FLAG_BROKEN) == 0) {
696 			disk->d_flags |= G_MIRROR_DISK_FLAG_BROKEN;
697 			G_MIRROR_DEBUG(0, "Cannot write metadata on %s "
698 			    "(device=%s, error=%d).",
699 			    g_mirror_get_diskname(disk), sc->sc_name, error);
700 		} else {
701 			G_MIRROR_DEBUG(1, "Cannot write metadata on %s "
702 			    "(device=%s, error=%d).",
703 			    g_mirror_get_diskname(disk), sc->sc_name, error);
704 		}
705 		if (g_mirror_disconnect_on_failure &&
706 		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 1) {
707 			sc->sc_bump_id |= G_MIRROR_BUMP_GENID;
708 			g_mirror_event_send(disk,
709 			    G_MIRROR_DISK_STATE_DISCONNECTED,
710 			    G_MIRROR_EVENT_DONTWAIT);
711 		}
712 	}
713 	return (error);
714 }
715 
716 static int
717 g_mirror_clear_metadata(struct g_mirror_disk *disk)
718 {
719 	int error;
720 
721 	g_topology_assert_not();
722 	sx_assert(&disk->d_softc->sc_lock, SX_LOCKED);
723 
724 	if (disk->d_softc->sc_type != G_MIRROR_TYPE_AUTOMATIC)
725 		return (0);
726 	error = g_mirror_write_metadata(disk, NULL);
727 	if (error == 0) {
728 		G_MIRROR_DEBUG(2, "Metadata on %s cleared.",
729 		    g_mirror_get_diskname(disk));
730 	} else {
731 		G_MIRROR_DEBUG(0,
732 		    "Cannot clear metadata on disk %s (error=%d).",
733 		    g_mirror_get_diskname(disk), error);
734 	}
735 	return (error);
736 }
737 
738 void
739 g_mirror_fill_metadata(struct g_mirror_softc *sc, struct g_mirror_disk *disk,
740     struct g_mirror_metadata *md)
741 {
742 
743 	strlcpy(md->md_magic, G_MIRROR_MAGIC, sizeof(md->md_magic));
744 	md->md_version = G_MIRROR_VERSION;
745 	strlcpy(md->md_name, sc->sc_name, sizeof(md->md_name));
746 	md->md_mid = sc->sc_id;
747 	md->md_all = sc->sc_ndisks;
748 	md->md_slice = sc->sc_slice;
749 	md->md_balance = sc->sc_balance;
750 	md->md_genid = sc->sc_genid;
751 	md->md_mediasize = sc->sc_mediasize;
752 	md->md_sectorsize = sc->sc_sectorsize;
753 	md->md_mflags = (sc->sc_flags & G_MIRROR_DEVICE_FLAG_MASK);
754 	bzero(md->md_provider, sizeof(md->md_provider));
755 	if (disk == NULL) {
756 		md->md_did = arc4random();
757 		md->md_priority = 0;
758 		md->md_syncid = 0;
759 		md->md_dflags = 0;
760 		md->md_sync_offset = 0;
761 		md->md_provsize = 0;
762 	} else {
763 		md->md_did = disk->d_id;
764 		md->md_priority = disk->d_priority;
765 		md->md_syncid = disk->d_sync.ds_syncid;
766 		md->md_dflags = (disk->d_flags & G_MIRROR_DISK_FLAG_MASK);
767 		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
768 			md->md_sync_offset = disk->d_sync.ds_offset_done;
769 		else
770 			md->md_sync_offset = 0;
771 		if ((disk->d_flags & G_MIRROR_DISK_FLAG_HARDCODED) != 0) {
772 			strlcpy(md->md_provider,
773 			    disk->d_consumer->provider->name,
774 			    sizeof(md->md_provider));
775 		}
776 		md->md_provsize = disk->d_consumer->provider->mediasize;
777 	}
778 }
779 
780 void
781 g_mirror_update_metadata(struct g_mirror_disk *disk)
782 {
783 	struct g_mirror_softc *sc;
784 	struct g_mirror_metadata md;
785 	int error;
786 
787 	g_topology_assert_not();
788 	sc = disk->d_softc;
789 	sx_assert(&sc->sc_lock, SX_LOCKED);
790 
791 	if (sc->sc_type != G_MIRROR_TYPE_AUTOMATIC)
792 		return;
793 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_WIPE) == 0)
794 		g_mirror_fill_metadata(sc, disk, &md);
795 	error = g_mirror_write_metadata(disk, &md);
796 	if (error == 0) {
797 		G_MIRROR_DEBUG(2, "Metadata on %s updated.",
798 		    g_mirror_get_diskname(disk));
799 	} else {
800 		G_MIRROR_DEBUG(0,
801 		    "Cannot update metadata on disk %s (error=%d).",
802 		    g_mirror_get_diskname(disk), error);
803 	}
804 }
805 
806 static void
807 g_mirror_bump_syncid(struct g_mirror_softc *sc)
808 {
809 	struct g_mirror_disk *disk;
810 
811 	g_topology_assert_not();
812 	sx_assert(&sc->sc_lock, SX_XLOCKED);
813 	KASSERT(g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 0,
814 	    ("%s called with no active disks (device=%s).", __func__,
815 	    sc->sc_name));
816 
817 	sc->sc_syncid++;
818 	G_MIRROR_DEBUG(1, "Device %s: syncid bumped to %u.", sc->sc_name,
819 	    sc->sc_syncid);
820 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
821 		if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE ||
822 		    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
823 			disk->d_sync.ds_syncid = sc->sc_syncid;
824 			g_mirror_update_metadata(disk);
825 		}
826 	}
827 }
828 
829 static void
830 g_mirror_bump_genid(struct g_mirror_softc *sc)
831 {
832 	struct g_mirror_disk *disk;
833 
834 	g_topology_assert_not();
835 	sx_assert(&sc->sc_lock, SX_XLOCKED);
836 	KASSERT(g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 0,
837 	    ("%s called with no active disks (device=%s).", __func__,
838 	    sc->sc_name));
839 
840 	sc->sc_genid++;
841 	G_MIRROR_DEBUG(1, "Device %s: genid bumped to %u.", sc->sc_name,
842 	    sc->sc_genid);
843 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
844 		if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE ||
845 		    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
846 			disk->d_genid = sc->sc_genid;
847 			g_mirror_update_metadata(disk);
848 		}
849 	}
850 }
851 
852 static int
853 g_mirror_idle(struct g_mirror_softc *sc, int acw)
854 {
855 	struct g_mirror_disk *disk;
856 	int timeout;
857 
858 	g_topology_assert_not();
859 	sx_assert(&sc->sc_lock, SX_XLOCKED);
860 
861 	if (sc->sc_provider == NULL)
862 		return (0);
863 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOFAILSYNC) != 0)
864 		return (0);
865 	if (sc->sc_idle)
866 		return (0);
867 	if (sc->sc_writes > 0)
868 		return (0);
869 	if (acw > 0 || (acw == -1 && sc->sc_provider->acw > 0)) {
870 		timeout = g_mirror_idletime - (time_uptime - sc->sc_last_write);
871 		if (!g_mirror_shutdown && timeout > 0)
872 			return (timeout);
873 	}
874 	sc->sc_idle = 1;
875 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
876 		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
877 			continue;
878 		G_MIRROR_DEBUG(2, "Disk %s (device %s) marked as clean.",
879 		    g_mirror_get_diskname(disk), sc->sc_name);
880 		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
881 		g_mirror_update_metadata(disk);
882 	}
883 	return (0);
884 }
885 
886 static void
887 g_mirror_unidle(struct g_mirror_softc *sc)
888 {
889 	struct g_mirror_disk *disk;
890 
891 	g_topology_assert_not();
892 	sx_assert(&sc->sc_lock, SX_XLOCKED);
893 
894 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOFAILSYNC) != 0)
895 		return;
896 	sc->sc_idle = 0;
897 	sc->sc_last_write = time_uptime;
898 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
899 		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
900 			continue;
901 		G_MIRROR_DEBUG(2, "Disk %s (device %s) marked as dirty.",
902 		    g_mirror_get_diskname(disk), sc->sc_name);
903 		disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
904 		g_mirror_update_metadata(disk);
905 	}
906 }
907 
908 static void
909 g_mirror_done(struct bio *bp)
910 {
911 	struct g_mirror_softc *sc;
912 
913 	sc = bp->bio_from->geom->softc;
914 	bp->bio_cflags = G_MIRROR_BIO_FLAG_REGULAR;
915 	mtx_lock(&sc->sc_queue_mtx);
916 	TAILQ_INSERT_TAIL(&sc->sc_queue, bp, bio_queue);
917 	mtx_unlock(&sc->sc_queue_mtx);
918 	wakeup(sc);
919 }
920 
921 static void
922 g_mirror_regular_request_error(struct g_mirror_softc *sc,
923     struct g_mirror_disk *disk, struct bio *bp)
924 {
925 
926 	if ((bp->bio_cmd == BIO_FLUSH || bp->bio_cmd == BIO_SPEEDUP) &&
927 	    bp->bio_error == EOPNOTSUPP)
928 		return;
929 
930 	if ((disk->d_flags & G_MIRROR_DISK_FLAG_BROKEN) == 0) {
931 		disk->d_flags |= G_MIRROR_DISK_FLAG_BROKEN;
932 		G_MIRROR_LOGREQ(0, bp, "Request failed (error=%d).",
933 		    bp->bio_error);
934 	} else {
935 		G_MIRROR_LOGREQ(1, bp, "Request failed (error=%d).",
936 		    bp->bio_error);
937 	}
938 	if (g_mirror_disconnect_on_failure &&
939 	    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 1) {
940 		if (bp->bio_error == ENXIO &&
941 		    bp->bio_cmd == BIO_READ)
942 			sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
943 		else if (bp->bio_error == ENXIO)
944 			sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID_NOW;
945 		else
946 			sc->sc_bump_id |= G_MIRROR_BUMP_GENID;
947 		g_mirror_event_send(disk, G_MIRROR_DISK_STATE_DISCONNECTED,
948 		    G_MIRROR_EVENT_DONTWAIT);
949 	}
950 }
951 
952 static void
953 g_mirror_regular_request(struct g_mirror_softc *sc, struct bio *bp)
954 {
955 	struct g_mirror_disk *disk;
956 	struct bio *pbp;
957 
958 	g_topology_assert_not();
959 	KASSERT(sc->sc_provider == bp->bio_parent->bio_to,
960 	    ("regular request %p with unexpected origin", bp));
961 
962 	pbp = bp->bio_parent;
963 	bp->bio_from->index--;
964 	if (bp->bio_cmd == BIO_WRITE || bp->bio_cmd == BIO_DELETE)
965 		sc->sc_writes--;
966 	disk = bp->bio_from->private;
967 	if (disk == NULL) {
968 		g_topology_lock();
969 		g_mirror_kill_consumer(sc, bp->bio_from);
970 		g_topology_unlock();
971 	}
972 
973 	switch (bp->bio_cmd) {
974 	case BIO_READ:
975 		KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_regular_request_read,
976 		    bp->bio_error);
977 		break;
978 	case BIO_WRITE:
979 		KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_regular_request_write,
980 		    bp->bio_error);
981 		break;
982 	case BIO_DELETE:
983 		KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_regular_request_delete,
984 		    bp->bio_error);
985 		break;
986 	case BIO_FLUSH:
987 		KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_regular_request_flush,
988 		    bp->bio_error);
989 		break;
990 	case BIO_SPEEDUP:
991 		KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_regular_request_speedup,
992 		    bp->bio_error);
993 		break;
994 	}
995 
996 	pbp->bio_inbed++;
997 	KASSERT(pbp->bio_inbed <= pbp->bio_children,
998 	    ("bio_inbed (%u) is bigger than bio_children (%u).", pbp->bio_inbed,
999 	    pbp->bio_children));
1000 	if (bp->bio_error == 0 && pbp->bio_error == 0) {
1001 		G_MIRROR_LOGREQ(3, bp, "Request delivered.");
1002 		g_destroy_bio(bp);
1003 		if (pbp->bio_children == pbp->bio_inbed) {
1004 			G_MIRROR_LOGREQ(3, pbp, "Request delivered.");
1005 			pbp->bio_completed = pbp->bio_length;
1006 			if (pbp->bio_cmd == BIO_WRITE ||
1007 			    pbp->bio_cmd == BIO_DELETE) {
1008 				TAILQ_REMOVE(&sc->sc_inflight, pbp, bio_queue);
1009 				/* Release delayed sync requests if possible. */
1010 				g_mirror_sync_release(sc);
1011 			}
1012 			g_io_deliver(pbp, pbp->bio_error);
1013 		}
1014 		return;
1015 	} else if (bp->bio_error != 0) {
1016 		if (pbp->bio_error == 0)
1017 			pbp->bio_error = bp->bio_error;
1018 		if (disk != NULL)
1019 			g_mirror_regular_request_error(sc, disk, bp);
1020 		switch (pbp->bio_cmd) {
1021 		case BIO_DELETE:
1022 		case BIO_WRITE:
1023 		case BIO_FLUSH:
1024 		case BIO_SPEEDUP:
1025 			pbp->bio_inbed--;
1026 			pbp->bio_children--;
1027 			break;
1028 		}
1029 	}
1030 	g_destroy_bio(bp);
1031 
1032 	switch (pbp->bio_cmd) {
1033 	case BIO_READ:
1034 		if (pbp->bio_inbed < pbp->bio_children)
1035 			break;
1036 		if (g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) == 1)
1037 			g_io_deliver(pbp, pbp->bio_error);
1038 		else {
1039 			pbp->bio_error = 0;
1040 			mtx_lock(&sc->sc_queue_mtx);
1041 			TAILQ_INSERT_TAIL(&sc->sc_queue, pbp, bio_queue);
1042 			mtx_unlock(&sc->sc_queue_mtx);
1043 			G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
1044 			wakeup(sc);
1045 		}
1046 		break;
1047 	case BIO_DELETE:
1048 	case BIO_WRITE:
1049 	case BIO_FLUSH:
1050 	case BIO_SPEEDUP:
1051 		if (pbp->bio_children == 0) {
1052 			/*
1053 			 * All requests failed.
1054 			 */
1055 		} else if (pbp->bio_inbed < pbp->bio_children) {
1056 			/* Do nothing. */
1057 			break;
1058 		} else if (pbp->bio_children == pbp->bio_inbed) {
1059 			/* Some requests succeeded. */
1060 			pbp->bio_error = 0;
1061 			pbp->bio_completed = pbp->bio_length;
1062 		}
1063 		if (pbp->bio_cmd == BIO_WRITE || pbp->bio_cmd == BIO_DELETE) {
1064 			TAILQ_REMOVE(&sc->sc_inflight, pbp, bio_queue);
1065 			/* Release delayed sync requests if possible. */
1066 			g_mirror_sync_release(sc);
1067 		}
1068 		g_io_deliver(pbp, pbp->bio_error);
1069 		break;
1070 	default:
1071 		KASSERT(1 == 0, ("Invalid request: %u.", pbp->bio_cmd));
1072 		break;
1073 	}
1074 }
1075 
1076 static void
1077 g_mirror_sync_done(struct bio *bp)
1078 {
1079 	struct g_mirror_softc *sc;
1080 
1081 	G_MIRROR_LOGREQ(3, bp, "Synchronization request delivered.");
1082 	sc = bp->bio_from->geom->softc;
1083 	bp->bio_cflags = G_MIRROR_BIO_FLAG_SYNC;
1084 	mtx_lock(&sc->sc_queue_mtx);
1085 	TAILQ_INSERT_TAIL(&sc->sc_queue, bp, bio_queue);
1086 	mtx_unlock(&sc->sc_queue_mtx);
1087 	wakeup(sc);
1088 }
1089 
1090 static void
1091 g_mirror_candelete(struct bio *bp)
1092 {
1093 	struct g_mirror_softc *sc;
1094 	struct g_mirror_disk *disk;
1095 	int val;
1096 
1097 	sc = bp->bio_to->private;
1098 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1099 		if (disk->d_flags & G_MIRROR_DISK_FLAG_CANDELETE)
1100 			break;
1101 	}
1102 	val = disk != NULL;
1103 	g_handleattr(bp, "GEOM::candelete", &val, sizeof(val));
1104 }
1105 
1106 static void
1107 g_mirror_kernel_dump(struct bio *bp)
1108 {
1109 	struct g_mirror_softc *sc;
1110 	struct g_mirror_disk *disk;
1111 	struct bio *cbp;
1112 	struct g_kerneldump *gkd;
1113 
1114 	/*
1115 	 * We configure dumping to the first component, because this component
1116 	 * will be used for reading with 'prefer' balance algorithm.
1117 	 * If the component with the highest priority is currently disconnected
1118 	 * we will not be able to read the dump after the reboot if it will be
1119 	 * connected and synchronized later. Can we do something better?
1120 	 */
1121 	sc = bp->bio_to->private;
1122 	disk = LIST_FIRST(&sc->sc_disks);
1123 
1124 	gkd = (struct g_kerneldump *)bp->bio_data;
1125 	if (gkd->length > bp->bio_to->mediasize)
1126 		gkd->length = bp->bio_to->mediasize;
1127 	cbp = g_clone_bio(bp);
1128 	if (cbp == NULL) {
1129 		g_io_deliver(bp, ENOMEM);
1130 		return;
1131 	}
1132 	cbp->bio_done = g_std_done;
1133 	g_io_request(cbp, disk->d_consumer);
1134 	G_MIRROR_DEBUG(1, "Kernel dump will go to %s.",
1135 	    g_mirror_get_diskname(disk));
1136 }
1137 
1138 static void
1139 g_mirror_start(struct bio *bp)
1140 {
1141 	struct g_mirror_softc *sc;
1142 
1143 	sc = bp->bio_to->private;
1144 	/*
1145 	 * If sc == NULL or there are no valid disks, provider's error
1146 	 * should be set and g_mirror_start() should not be called at all.
1147 	 */
1148 	KASSERT(sc != NULL && sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
1149 	    ("Provider's error should be set (error=%d)(mirror=%s).",
1150 	    bp->bio_to->error, bp->bio_to->name));
1151 	G_MIRROR_LOGREQ(3, bp, "Request received.");
1152 
1153 	switch (bp->bio_cmd) {
1154 	case BIO_READ:
1155 	case BIO_WRITE:
1156 	case BIO_DELETE:
1157 	case BIO_SPEEDUP:
1158 	case BIO_FLUSH:
1159 		break;
1160 	case BIO_GETATTR:
1161 		if (!strcmp(bp->bio_attribute, "GEOM::candelete")) {
1162 			g_mirror_candelete(bp);
1163 			return;
1164 		} else if (strcmp("GEOM::kerneldump", bp->bio_attribute) == 0) {
1165 			g_mirror_kernel_dump(bp);
1166 			return;
1167 		}
1168 		/* FALLTHROUGH */
1169 	default:
1170 		g_io_deliver(bp, EOPNOTSUPP);
1171 		return;
1172 	}
1173 	mtx_lock(&sc->sc_queue_mtx);
1174 	if (bp->bio_to->error != 0) {
1175 		mtx_unlock(&sc->sc_queue_mtx);
1176 		g_io_deliver(bp, bp->bio_to->error);
1177 		return;
1178 	}
1179 	TAILQ_INSERT_TAIL(&sc->sc_queue, bp, bio_queue);
1180 	mtx_unlock(&sc->sc_queue_mtx);
1181 	G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
1182 	wakeup(sc);
1183 }
1184 
1185 /*
1186  * Return TRUE if the given request is colliding with a in-progress
1187  * synchronization request.
1188  */
1189 static bool
1190 g_mirror_sync_collision(struct g_mirror_softc *sc, struct bio *bp)
1191 {
1192 	struct g_mirror_disk *disk;
1193 	struct bio *sbp;
1194 	off_t rstart, rend, sstart, send;
1195 	u_int i;
1196 
1197 	if (sc->sc_sync.ds_ndisks == 0)
1198 		return (false);
1199 	rstart = bp->bio_offset;
1200 	rend = bp->bio_offset + bp->bio_length;
1201 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1202 		if (disk->d_state != G_MIRROR_DISK_STATE_SYNCHRONIZING)
1203 			continue;
1204 		for (i = 0; i < g_mirror_syncreqs; i++) {
1205 			sbp = disk->d_sync.ds_bios[i];
1206 			if (sbp == NULL)
1207 				continue;
1208 			sstart = sbp->bio_offset;
1209 			send = sbp->bio_offset + sbp->bio_length;
1210 			if (rend > sstart && rstart < send)
1211 				return (true);
1212 		}
1213 	}
1214 	return (false);
1215 }
1216 
1217 /*
1218  * Return TRUE if the given sync request is colliding with a in-progress regular
1219  * request.
1220  */
1221 static bool
1222 g_mirror_regular_collision(struct g_mirror_softc *sc, struct bio *sbp)
1223 {
1224 	off_t rstart, rend, sstart, send;
1225 	struct bio *bp;
1226 
1227 	if (sc->sc_sync.ds_ndisks == 0)
1228 		return (false);
1229 	sstart = sbp->bio_offset;
1230 	send = sbp->bio_offset + sbp->bio_length;
1231 	TAILQ_FOREACH(bp, &sc->sc_inflight, bio_queue) {
1232 		rstart = bp->bio_offset;
1233 		rend = bp->bio_offset + bp->bio_length;
1234 		if (rend > sstart && rstart < send)
1235 			return (true);
1236 	}
1237 	return (false);
1238 }
1239 
1240 /*
1241  * Puts regular request onto delayed queue.
1242  */
1243 static void
1244 g_mirror_regular_delay(struct g_mirror_softc *sc, struct bio *bp)
1245 {
1246 
1247 	G_MIRROR_LOGREQ(2, bp, "Delaying request.");
1248 	TAILQ_INSERT_TAIL(&sc->sc_regular_delayed, bp, bio_queue);
1249 }
1250 
1251 /*
1252  * Puts synchronization request onto delayed queue.
1253  */
1254 static void
1255 g_mirror_sync_delay(struct g_mirror_softc *sc, struct bio *bp)
1256 {
1257 
1258 	G_MIRROR_LOGREQ(2, bp, "Delaying synchronization request.");
1259 	TAILQ_INSERT_TAIL(&sc->sc_sync_delayed, bp, bio_queue);
1260 }
1261 
1262 /*
1263  * Requeue delayed regular requests.
1264  */
1265 static void
1266 g_mirror_regular_release(struct g_mirror_softc *sc)
1267 {
1268 	struct bio *bp;
1269 
1270 	if ((bp = TAILQ_FIRST(&sc->sc_regular_delayed)) == NULL)
1271 		return;
1272 	if (g_mirror_sync_collision(sc, bp))
1273 		return;
1274 
1275 	G_MIRROR_DEBUG(2, "Requeuing regular requests after collision.");
1276 	mtx_lock(&sc->sc_queue_mtx);
1277 	TAILQ_CONCAT(&sc->sc_regular_delayed, &sc->sc_queue, bio_queue);
1278 	TAILQ_SWAP(&sc->sc_regular_delayed, &sc->sc_queue, bio, bio_queue);
1279 	mtx_unlock(&sc->sc_queue_mtx);
1280 }
1281 
1282 /*
1283  * Releases delayed sync requests which don't collide anymore with regular
1284  * requests.
1285  */
1286 static void
1287 g_mirror_sync_release(struct g_mirror_softc *sc)
1288 {
1289 	struct bio *bp, *bp2;
1290 
1291 	TAILQ_FOREACH_SAFE(bp, &sc->sc_sync_delayed, bio_queue, bp2) {
1292 		if (g_mirror_regular_collision(sc, bp))
1293 			continue;
1294 		TAILQ_REMOVE(&sc->sc_sync_delayed, bp, bio_queue);
1295 		G_MIRROR_LOGREQ(2, bp,
1296 		    "Releasing delayed synchronization request.");
1297 		g_io_request(bp, bp->bio_from);
1298 	}
1299 }
1300 
1301 /*
1302  * Free a synchronization request and clear its slot in the array.
1303  */
1304 static void
1305 g_mirror_sync_request_free(struct g_mirror_disk *disk, struct bio *bp)
1306 {
1307 	int idx;
1308 
1309 	if (disk != NULL && disk->d_sync.ds_bios != NULL) {
1310 		idx = (int)(uintptr_t)bp->bio_caller1;
1311 		KASSERT(disk->d_sync.ds_bios[idx] == bp,
1312 		    ("unexpected sync BIO at %p:%d", disk, idx));
1313 		disk->d_sync.ds_bios[idx] = NULL;
1314 	}
1315 	free(bp->bio_data, M_MIRROR);
1316 	g_destroy_bio(bp);
1317 }
1318 
1319 /*
1320  * Handle synchronization requests.
1321  * Every synchronization request is a two-step process: first, a read request is
1322  * sent to the mirror provider via the sync consumer. If that request completes
1323  * successfully, it is converted to a write and sent to the disk being
1324  * synchronized. If the write also completes successfully, the synchronization
1325  * offset is advanced and a new read request is submitted.
1326  */
1327 static void
1328 g_mirror_sync_request(struct g_mirror_softc *sc, struct bio *bp)
1329 {
1330 	struct g_mirror_disk *disk;
1331 	struct g_mirror_disk_sync *sync;
1332 
1333 	KASSERT((bp->bio_cmd == BIO_READ &&
1334 	    bp->bio_from->geom == sc->sc_sync.ds_geom) ||
1335 	    (bp->bio_cmd == BIO_WRITE && bp->bio_from->geom == sc->sc_geom),
1336 	    ("Sync BIO %p with unexpected origin", bp));
1337 
1338 	bp->bio_from->index--;
1339 	disk = bp->bio_from->private;
1340 	if (disk == NULL) {
1341 		sx_xunlock(&sc->sc_lock); /* Avoid recursion on sc_lock. */
1342 		g_topology_lock();
1343 		g_mirror_kill_consumer(sc, bp->bio_from);
1344 		g_topology_unlock();
1345 		g_mirror_sync_request_free(NULL, bp);
1346 		sx_xlock(&sc->sc_lock);
1347 		return;
1348 	}
1349 
1350 	sync = &disk->d_sync;
1351 
1352 	/*
1353 	 * Synchronization request.
1354 	 */
1355 	switch (bp->bio_cmd) {
1356 	case BIO_READ: {
1357 		struct g_consumer *cp;
1358 
1359 		KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_sync_request_read,
1360 		    bp->bio_error);
1361 
1362 		if (bp->bio_error != 0) {
1363 			G_MIRROR_LOGREQ(0, bp,
1364 			    "Synchronization request failed (error=%d).",
1365 			    bp->bio_error);
1366 
1367 			/*
1368 			 * The read error will trigger a syncid bump, so there's
1369 			 * no need to do that here.
1370 			 *
1371 			 * The read error handling for regular requests will
1372 			 * retry the read from all active mirrors before passing
1373 			 * the error back up, so there's no need to retry here.
1374 			 */
1375 			g_mirror_sync_request_free(disk, bp);
1376 			g_mirror_event_send(disk,
1377 			    G_MIRROR_DISK_STATE_DISCONNECTED,
1378 			    G_MIRROR_EVENT_DONTWAIT);
1379 			return;
1380 		}
1381 		G_MIRROR_LOGREQ(3, bp,
1382 		    "Synchronization request half-finished.");
1383 		bp->bio_cmd = BIO_WRITE;
1384 		bp->bio_cflags = 0;
1385 		cp = disk->d_consumer;
1386 		KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1387 		    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
1388 		    cp->acr, cp->acw, cp->ace));
1389 		cp->index++;
1390 		g_io_request(bp, cp);
1391 		return;
1392 	}
1393 	case BIO_WRITE: {
1394 		off_t offset;
1395 		int i;
1396 
1397 		KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_sync_request_write,
1398 		    bp->bio_error);
1399 
1400 		if (bp->bio_error != 0) {
1401 			G_MIRROR_LOGREQ(0, bp,
1402 			    "Synchronization request failed (error=%d).",
1403 			    bp->bio_error);
1404 			g_mirror_sync_request_free(disk, bp);
1405 			sc->sc_bump_id |= G_MIRROR_BUMP_GENID;
1406 			g_mirror_event_send(disk,
1407 			    G_MIRROR_DISK_STATE_DISCONNECTED,
1408 			    G_MIRROR_EVENT_DONTWAIT);
1409 			return;
1410 		}
1411 		G_MIRROR_LOGREQ(3, bp, "Synchronization request finished.");
1412 		if (sync->ds_offset >= sc->sc_mediasize ||
1413 		    sync->ds_consumer == NULL ||
1414 		    (sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
1415 			/* Don't send more synchronization requests. */
1416 			sync->ds_inflight--;
1417 			g_mirror_sync_request_free(disk, bp);
1418 			if (sync->ds_inflight > 0)
1419 				return;
1420 			if (sync->ds_consumer == NULL ||
1421 			    (sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
1422 				return;
1423 			}
1424 			/* Disk up-to-date, activate it. */
1425 			g_mirror_event_send(disk, G_MIRROR_DISK_STATE_ACTIVE,
1426 			    G_MIRROR_EVENT_DONTWAIT);
1427 			return;
1428 		}
1429 
1430 		/* Send next synchronization request. */
1431 		g_mirror_sync_reinit(disk, bp, sync->ds_offset);
1432 		sync->ds_offset += bp->bio_length;
1433 
1434 		G_MIRROR_LOGREQ(3, bp, "Sending synchronization request.");
1435 		sync->ds_consumer->index++;
1436 
1437 		/*
1438 		 * Delay the request if it is colliding with a regular request.
1439 		 */
1440 		if (g_mirror_regular_collision(sc, bp))
1441 			g_mirror_sync_delay(sc, bp);
1442 		else
1443 			g_io_request(bp, sync->ds_consumer);
1444 
1445 		/* Requeue delayed requests if possible. */
1446 		g_mirror_regular_release(sc);
1447 
1448 		/* Find the smallest offset */
1449 		offset = sc->sc_mediasize;
1450 		for (i = 0; i < g_mirror_syncreqs; i++) {
1451 			bp = sync->ds_bios[i];
1452 			if (bp != NULL && bp->bio_offset < offset)
1453 				offset = bp->bio_offset;
1454 		}
1455 		if (g_mirror_sync_period > 0 &&
1456 		    time_uptime - sync->ds_update_ts > g_mirror_sync_period) {
1457 			sync->ds_offset_done = offset;
1458 			g_mirror_update_metadata(disk);
1459 			sync->ds_update_ts = time_uptime;
1460 		}
1461 		return;
1462 	}
1463 	default:
1464 		panic("Invalid I/O request %p", bp);
1465 	}
1466 }
1467 
1468 static void
1469 g_mirror_request_prefer(struct g_mirror_softc *sc, struct bio *bp)
1470 {
1471 	struct g_mirror_disk *disk;
1472 	struct g_consumer *cp;
1473 	struct bio *cbp;
1474 
1475 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1476 		if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE)
1477 			break;
1478 	}
1479 	if (disk == NULL) {
1480 		if (bp->bio_error == 0)
1481 			bp->bio_error = ENXIO;
1482 		g_io_deliver(bp, bp->bio_error);
1483 		return;
1484 	}
1485 	cbp = g_clone_bio(bp);
1486 	if (cbp == NULL) {
1487 		if (bp->bio_error == 0)
1488 			bp->bio_error = ENOMEM;
1489 		g_io_deliver(bp, bp->bio_error);
1490 		return;
1491 	}
1492 	/*
1493 	 * Fill in the component buf structure.
1494 	 */
1495 	cp = disk->d_consumer;
1496 	cbp->bio_done = g_mirror_done;
1497 	cbp->bio_to = cp->provider;
1498 	G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1499 	KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1500 	    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr,
1501 	    cp->acw, cp->ace));
1502 	cp->index++;
1503 	g_io_request(cbp, cp);
1504 }
1505 
1506 static void
1507 g_mirror_request_round_robin(struct g_mirror_softc *sc, struct bio *bp)
1508 {
1509 	struct g_mirror_disk *disk;
1510 	struct g_consumer *cp;
1511 	struct bio *cbp;
1512 
1513 	disk = g_mirror_get_disk(sc);
1514 	if (disk == NULL) {
1515 		if (bp->bio_error == 0)
1516 			bp->bio_error = ENXIO;
1517 		g_io_deliver(bp, bp->bio_error);
1518 		return;
1519 	}
1520 	cbp = g_clone_bio(bp);
1521 	if (cbp == NULL) {
1522 		if (bp->bio_error == 0)
1523 			bp->bio_error = ENOMEM;
1524 		g_io_deliver(bp, bp->bio_error);
1525 		return;
1526 	}
1527 	/*
1528 	 * Fill in the component buf structure.
1529 	 */
1530 	cp = disk->d_consumer;
1531 	cbp->bio_done = g_mirror_done;
1532 	cbp->bio_to = cp->provider;
1533 	G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1534 	KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1535 	    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr,
1536 	    cp->acw, cp->ace));
1537 	cp->index++;
1538 	g_io_request(cbp, cp);
1539 }
1540 
1541 #define TRACK_SIZE  (1 * 1024 * 1024)
1542 #define LOAD_SCALE	256
1543 #define ABS(x)		(((x) >= 0) ? (x) : (-(x)))
1544 
1545 static void
1546 g_mirror_request_load(struct g_mirror_softc *sc, struct bio *bp)
1547 {
1548 	struct g_mirror_disk *disk, *dp;
1549 	struct g_consumer *cp;
1550 	struct bio *cbp;
1551 	int prio, best;
1552 
1553 	/* Find a disk with the smallest load. */
1554 	disk = NULL;
1555 	best = INT_MAX;
1556 	LIST_FOREACH(dp, &sc->sc_disks, d_next) {
1557 		if (dp->d_state != G_MIRROR_DISK_STATE_ACTIVE)
1558 			continue;
1559 		prio = dp->load;
1560 		/* If disk head is precisely in position - highly prefer it. */
1561 		if (dp->d_last_offset == bp->bio_offset)
1562 			prio -= 2 * LOAD_SCALE;
1563 		else
1564 		/* If disk head is close to position - prefer it. */
1565 		if (ABS(dp->d_last_offset - bp->bio_offset) < TRACK_SIZE)
1566 			prio -= 1 * LOAD_SCALE;
1567 		if (prio <= best) {
1568 			disk = dp;
1569 			best = prio;
1570 		}
1571 	}
1572 	KASSERT(disk != NULL, ("NULL disk for %s.", sc->sc_name));
1573 	cbp = g_clone_bio(bp);
1574 	if (cbp == NULL) {
1575 		if (bp->bio_error == 0)
1576 			bp->bio_error = ENOMEM;
1577 		g_io_deliver(bp, bp->bio_error);
1578 		return;
1579 	}
1580 	/*
1581 	 * Fill in the component buf structure.
1582 	 */
1583 	cp = disk->d_consumer;
1584 	cbp->bio_done = g_mirror_done;
1585 	cbp->bio_to = cp->provider;
1586 	G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1587 	KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1588 	    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr,
1589 	    cp->acw, cp->ace));
1590 	cp->index++;
1591 	/* Remember last head position */
1592 	disk->d_last_offset = bp->bio_offset + bp->bio_length;
1593 	/* Update loads. */
1594 	LIST_FOREACH(dp, &sc->sc_disks, d_next) {
1595 		dp->load = (dp->d_consumer->index * LOAD_SCALE +
1596 		    dp->load * 7) / 8;
1597 	}
1598 	g_io_request(cbp, cp);
1599 }
1600 
1601 static void
1602 g_mirror_request_split(struct g_mirror_softc *sc, struct bio *bp)
1603 {
1604 	struct bio_queue queue;
1605 	struct g_mirror_disk *disk;
1606 	struct g_consumer *cp;
1607 	struct bio *cbp;
1608 	off_t left, mod, offset, slice;
1609 	u_char *data;
1610 	u_int ndisks;
1611 
1612 	if (bp->bio_length <= sc->sc_slice) {
1613 		g_mirror_request_round_robin(sc, bp);
1614 		return;
1615 	}
1616 	ndisks = g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE);
1617 	slice = bp->bio_length / ndisks;
1618 	mod = slice % sc->sc_provider->sectorsize;
1619 	if (mod != 0)
1620 		slice += sc->sc_provider->sectorsize - mod;
1621 	/*
1622 	 * Allocate all bios before sending any request, so we can
1623 	 * return ENOMEM in nice and clean way.
1624 	 */
1625 	left = bp->bio_length;
1626 	offset = bp->bio_offset;
1627 	data = bp->bio_data;
1628 	TAILQ_INIT(&queue);
1629 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1630 		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
1631 			continue;
1632 		cbp = g_clone_bio(bp);
1633 		if (cbp == NULL) {
1634 			while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
1635 				TAILQ_REMOVE(&queue, cbp, bio_queue);
1636 				g_destroy_bio(cbp);
1637 			}
1638 			if (bp->bio_error == 0)
1639 				bp->bio_error = ENOMEM;
1640 			g_io_deliver(bp, bp->bio_error);
1641 			return;
1642 		}
1643 		TAILQ_INSERT_TAIL(&queue, cbp, bio_queue);
1644 		cbp->bio_done = g_mirror_done;
1645 		cbp->bio_caller1 = disk;
1646 		cbp->bio_to = disk->d_consumer->provider;
1647 		cbp->bio_offset = offset;
1648 		cbp->bio_data = data;
1649 		cbp->bio_length = MIN(left, slice);
1650 		left -= cbp->bio_length;
1651 		if (left == 0)
1652 			break;
1653 		offset += cbp->bio_length;
1654 		data += cbp->bio_length;
1655 	}
1656 	while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
1657 		TAILQ_REMOVE(&queue, cbp, bio_queue);
1658 		G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1659 		disk = cbp->bio_caller1;
1660 		cbp->bio_caller1 = NULL;
1661 		cp = disk->d_consumer;
1662 		KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1663 		    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
1664 		    cp->acr, cp->acw, cp->ace));
1665 		disk->d_consumer->index++;
1666 		g_io_request(cbp, disk->d_consumer);
1667 	}
1668 }
1669 
1670 static void
1671 g_mirror_register_request(struct g_mirror_softc *sc, struct bio *bp)
1672 {
1673 	struct bio_queue queue;
1674 	struct bio *cbp;
1675 	struct g_consumer *cp;
1676 	struct g_mirror_disk *disk;
1677 
1678 	sx_assert(&sc->sc_lock, SA_XLOCKED);
1679 
1680 	/*
1681 	 * To avoid ordering issues, if a write is deferred because of a
1682 	 * collision with a sync request, all I/O is deferred until that
1683 	 * write is initiated.
1684 	 */
1685 	if (bp->bio_from->geom != sc->sc_sync.ds_geom &&
1686 	    !TAILQ_EMPTY(&sc->sc_regular_delayed)) {
1687 		g_mirror_regular_delay(sc, bp);
1688 		return;
1689 	}
1690 
1691 	switch (bp->bio_cmd) {
1692 	case BIO_READ:
1693 		switch (sc->sc_balance) {
1694 		case G_MIRROR_BALANCE_LOAD:
1695 			g_mirror_request_load(sc, bp);
1696 			break;
1697 		case G_MIRROR_BALANCE_PREFER:
1698 			g_mirror_request_prefer(sc, bp);
1699 			break;
1700 		case G_MIRROR_BALANCE_ROUND_ROBIN:
1701 			g_mirror_request_round_robin(sc, bp);
1702 			break;
1703 		case G_MIRROR_BALANCE_SPLIT:
1704 			g_mirror_request_split(sc, bp);
1705 			break;
1706 		}
1707 		return;
1708 	case BIO_WRITE:
1709 	case BIO_DELETE:
1710 		/*
1711 		 * Delay the request if it is colliding with a synchronization
1712 		 * request.
1713 		 */
1714 		if (g_mirror_sync_collision(sc, bp)) {
1715 			g_mirror_regular_delay(sc, bp);
1716 			return;
1717 		}
1718 
1719 		if (sc->sc_idle)
1720 			g_mirror_unidle(sc);
1721 		else
1722 			sc->sc_last_write = time_uptime;
1723 
1724 		/*
1725 		 * Bump syncid on first write.
1726 		 */
1727 		if ((sc->sc_bump_id & G_MIRROR_BUMP_SYNCID) != 0) {
1728 			sc->sc_bump_id &= ~G_MIRROR_BUMP_SYNCID;
1729 			g_mirror_bump_syncid(sc);
1730 		}
1731 
1732 		/*
1733 		 * Allocate all bios before sending any request, so we can
1734 		 * return ENOMEM in nice and clean way.
1735 		 */
1736 		TAILQ_INIT(&queue);
1737 		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1738 			switch (disk->d_state) {
1739 			case G_MIRROR_DISK_STATE_ACTIVE:
1740 				break;
1741 			case G_MIRROR_DISK_STATE_SYNCHRONIZING:
1742 				if (bp->bio_offset >= disk->d_sync.ds_offset)
1743 					continue;
1744 				break;
1745 			default:
1746 				continue;
1747 			}
1748 			if (bp->bio_cmd == BIO_DELETE &&
1749 			    (disk->d_flags & G_MIRROR_DISK_FLAG_CANDELETE) == 0)
1750 				continue;
1751 			cbp = g_clone_bio(bp);
1752 			if (cbp == NULL) {
1753 				while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
1754 					TAILQ_REMOVE(&queue, cbp, bio_queue);
1755 					g_destroy_bio(cbp);
1756 				}
1757 				if (bp->bio_error == 0)
1758 					bp->bio_error = ENOMEM;
1759 				g_io_deliver(bp, bp->bio_error);
1760 				return;
1761 			}
1762 			TAILQ_INSERT_TAIL(&queue, cbp, bio_queue);
1763 			cbp->bio_done = g_mirror_done;
1764 			cp = disk->d_consumer;
1765 			cbp->bio_caller1 = cp;
1766 			cbp->bio_to = cp->provider;
1767 			KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1768 			    ("Consumer %s not opened (r%dw%de%d).",
1769 			    cp->provider->name, cp->acr, cp->acw, cp->ace));
1770 		}
1771 		if (TAILQ_EMPTY(&queue)) {
1772 			KASSERT(bp->bio_cmd == BIO_DELETE,
1773 			    ("No consumers for regular request %p", bp));
1774 			g_io_deliver(bp, EOPNOTSUPP);
1775 			return;
1776 		}
1777 		while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
1778 			G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1779 			TAILQ_REMOVE(&queue, cbp, bio_queue);
1780 			cp = cbp->bio_caller1;
1781 			cbp->bio_caller1 = NULL;
1782 			cp->index++;
1783 			sc->sc_writes++;
1784 			g_io_request(cbp, cp);
1785 		}
1786 		/*
1787 		 * Put request onto inflight queue, so we can check if new
1788 		 * synchronization requests don't collide with it.
1789 		 */
1790 		TAILQ_INSERT_TAIL(&sc->sc_inflight, bp, bio_queue);
1791 		return;
1792 	case BIO_SPEEDUP:
1793 	case BIO_FLUSH:
1794 		TAILQ_INIT(&queue);
1795 		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1796 			if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
1797 				continue;
1798 			cbp = g_clone_bio(bp);
1799 			if (cbp == NULL) {
1800 				while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
1801 					TAILQ_REMOVE(&queue, cbp, bio_queue);
1802 					g_destroy_bio(cbp);
1803 				}
1804 				if (bp->bio_error == 0)
1805 					bp->bio_error = ENOMEM;
1806 				g_io_deliver(bp, bp->bio_error);
1807 				return;
1808 			}
1809 			TAILQ_INSERT_TAIL(&queue, cbp, bio_queue);
1810 			cbp->bio_done = g_mirror_done;
1811 			cbp->bio_caller1 = disk;
1812 			cbp->bio_to = disk->d_consumer->provider;
1813 		}
1814 		KASSERT(!TAILQ_EMPTY(&queue),
1815 		    ("No consumers for regular request %p", bp));
1816 		while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
1817 			G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1818 			TAILQ_REMOVE(&queue, cbp, bio_queue);
1819 			disk = cbp->bio_caller1;
1820 			cbp->bio_caller1 = NULL;
1821 			cp = disk->d_consumer;
1822 			KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1823 			    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
1824 			    cp->acr, cp->acw, cp->ace));
1825 			cp->index++;
1826 			g_io_request(cbp, cp);
1827 		}
1828 		break;
1829 	default:
1830 		KASSERT(1 == 0, ("Invalid command here: %u (device=%s)",
1831 		    bp->bio_cmd, sc->sc_name));
1832 		break;
1833 	}
1834 }
1835 
1836 static int
1837 g_mirror_can_destroy(struct g_mirror_softc *sc)
1838 {
1839 	struct g_geom *gp;
1840 	struct g_consumer *cp;
1841 
1842 	g_topology_assert();
1843 	gp = sc->sc_geom;
1844 	if (gp->softc == NULL)
1845 		return (1);
1846 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_TASTING) != 0)
1847 		return (0);
1848 	LIST_FOREACH(cp, &gp->consumer, consumer) {
1849 		if (g_mirror_is_busy(sc, cp))
1850 			return (0);
1851 	}
1852 	gp = sc->sc_sync.ds_geom;
1853 	LIST_FOREACH(cp, &gp->consumer, consumer) {
1854 		if (g_mirror_is_busy(sc, cp))
1855 			return (0);
1856 	}
1857 	G_MIRROR_DEBUG(2, "No I/O requests for %s, it can be destroyed.",
1858 	    sc->sc_name);
1859 	return (1);
1860 }
1861 
1862 static int
1863 g_mirror_try_destroy(struct g_mirror_softc *sc)
1864 {
1865 
1866 	if (sc->sc_rootmount != NULL) {
1867 		G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p", __LINE__,
1868 		    sc->sc_rootmount);
1869 		root_mount_rel(sc->sc_rootmount);
1870 		sc->sc_rootmount = NULL;
1871 	}
1872 	g_topology_lock();
1873 	if (!g_mirror_can_destroy(sc)) {
1874 		g_topology_unlock();
1875 		return (0);
1876 	}
1877 	sc->sc_geom->softc = NULL;
1878 	sc->sc_sync.ds_geom->softc = NULL;
1879 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DRAIN) != 0) {
1880 		g_topology_unlock();
1881 		G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__,
1882 		    &sc->sc_worker);
1883 		/* Unlock sc_lock here, as it can be destroyed after wakeup. */
1884 		sx_xunlock(&sc->sc_lock);
1885 		wakeup(&sc->sc_worker);
1886 		sc->sc_worker = NULL;
1887 	} else {
1888 		g_topology_unlock();
1889 		g_mirror_destroy_device(sc);
1890 	}
1891 	return (1);
1892 }
1893 
1894 /*
1895  * Worker thread.
1896  */
1897 static void
1898 g_mirror_worker(void *arg)
1899 {
1900 	struct g_mirror_softc *sc;
1901 	struct g_mirror_event *ep;
1902 	struct bio *bp;
1903 	int timeout;
1904 
1905 	sc = arg;
1906 	thread_lock(curthread);
1907 	sched_prio(curthread, PRIBIO);
1908 	thread_unlock(curthread);
1909 
1910 	sx_xlock(&sc->sc_lock);
1911 	for (;;) {
1912 		G_MIRROR_DEBUG(5, "%s: Let's see...", __func__);
1913 		/*
1914 		 * First take a look at events.
1915 		 * This is important to handle events before any I/O requests.
1916 		 */
1917 		ep = g_mirror_event_first(sc);
1918 		if (ep != NULL) {
1919 			g_mirror_event_remove(sc, ep);
1920 			if ((ep->e_flags & G_MIRROR_EVENT_DEVICE) != 0) {
1921 				/* Update only device status. */
1922 				G_MIRROR_DEBUG(3,
1923 				    "Running event for device %s.",
1924 				    sc->sc_name);
1925 				ep->e_error = 0;
1926 				g_mirror_update_device(sc, true);
1927 			} else {
1928 				/* Update disk status. */
1929 				G_MIRROR_DEBUG(3, "Running event for disk %s.",
1930 				     g_mirror_get_diskname(ep->e_disk));
1931 				ep->e_error = g_mirror_update_disk(ep->e_disk,
1932 				    ep->e_state);
1933 				if (ep->e_error == 0)
1934 					g_mirror_update_device(sc, false);
1935 			}
1936 			if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0) {
1937 				KASSERT(ep->e_error == 0,
1938 				    ("Error cannot be handled."));
1939 				g_mirror_event_free(ep);
1940 			} else {
1941 				ep->e_flags |= G_MIRROR_EVENT_DONE;
1942 				G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__,
1943 				    ep);
1944 				mtx_lock(&sc->sc_events_mtx);
1945 				wakeup(ep);
1946 				mtx_unlock(&sc->sc_events_mtx);
1947 			}
1948 			if ((sc->sc_flags &
1949 			    G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
1950 				if (g_mirror_try_destroy(sc)) {
1951 					curthread->td_pflags &= ~TDP_GEOM;
1952 					G_MIRROR_DEBUG(1, "Thread exiting.");
1953 					kproc_exit(0);
1954 				}
1955 			}
1956 			G_MIRROR_DEBUG(5, "%s: I'm here 1.", __func__);
1957 			continue;
1958 		}
1959 
1960 		/*
1961 		 * Check if we can mark array as CLEAN and if we can't take
1962 		 * how much seconds should we wait.
1963 		 */
1964 		timeout = g_mirror_idle(sc, -1);
1965 
1966 		/*
1967 		 * Handle I/O requests.
1968 		 */
1969 		mtx_lock(&sc->sc_queue_mtx);
1970 		bp = TAILQ_FIRST(&sc->sc_queue);
1971 		if (bp != NULL)
1972 			TAILQ_REMOVE(&sc->sc_queue, bp, bio_queue);
1973 		else {
1974 			if ((sc->sc_flags &
1975 			    G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
1976 				mtx_unlock(&sc->sc_queue_mtx);
1977 				if (g_mirror_try_destroy(sc)) {
1978 					curthread->td_pflags &= ~TDP_GEOM;
1979 					G_MIRROR_DEBUG(1, "Thread exiting.");
1980 					kproc_exit(0);
1981 				}
1982 				mtx_lock(&sc->sc_queue_mtx);
1983 				if (!TAILQ_EMPTY(&sc->sc_queue)) {
1984 					mtx_unlock(&sc->sc_queue_mtx);
1985 					continue;
1986 				}
1987 			}
1988 			if (g_mirror_event_first(sc) != NULL) {
1989 				mtx_unlock(&sc->sc_queue_mtx);
1990 				continue;
1991 			}
1992 			sx_xunlock(&sc->sc_lock);
1993 			MSLEEP(sc, &sc->sc_queue_mtx, PRIBIO | PDROP, "m:w1",
1994 			    timeout * hz);
1995 			sx_xlock(&sc->sc_lock);
1996 			G_MIRROR_DEBUG(5, "%s: I'm here 4.", __func__);
1997 			continue;
1998 		}
1999 		mtx_unlock(&sc->sc_queue_mtx);
2000 
2001 		if (bp->bio_from->geom == sc->sc_sync.ds_geom &&
2002 		    (bp->bio_cflags & G_MIRROR_BIO_FLAG_SYNC) != 0) {
2003 			/*
2004 			 * Handle completion of the first half (the read) of a
2005 			 * block synchronization operation.
2006 			 */
2007 			g_mirror_sync_request(sc, bp);
2008 		} else if (bp->bio_to != sc->sc_provider) {
2009 			if ((bp->bio_cflags & G_MIRROR_BIO_FLAG_REGULAR) != 0)
2010 				/*
2011 				 * Handle completion of a regular I/O request.
2012 				 */
2013 				g_mirror_regular_request(sc, bp);
2014 			else if ((bp->bio_cflags & G_MIRROR_BIO_FLAG_SYNC) != 0)
2015 				/*
2016 				 * Handle completion of the second half (the
2017 				 * write) of a block synchronization operation.
2018 				 */
2019 				g_mirror_sync_request(sc, bp);
2020 			else {
2021 				KASSERT(0,
2022 				    ("Invalid request cflags=0x%hx to=%s.",
2023 				    bp->bio_cflags, bp->bio_to->name));
2024 			}
2025 		} else {
2026 			/*
2027 			 * Initiate an I/O request.
2028 			 */
2029 			g_mirror_register_request(sc, bp);
2030 		}
2031 		G_MIRROR_DEBUG(5, "%s: I'm here 9.", __func__);
2032 	}
2033 }
2034 
2035 static void
2036 g_mirror_update_idle(struct g_mirror_softc *sc, struct g_mirror_disk *disk)
2037 {
2038 
2039 	sx_assert(&sc->sc_lock, SX_LOCKED);
2040 
2041 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOFAILSYNC) != 0)
2042 		return;
2043 	if (!sc->sc_idle && (disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) == 0) {
2044 		G_MIRROR_DEBUG(2, "Disk %s (device %s) marked as dirty.",
2045 		    g_mirror_get_diskname(disk), sc->sc_name);
2046 		disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
2047 	} else if (sc->sc_idle &&
2048 	    (disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) != 0) {
2049 		G_MIRROR_DEBUG(2, "Disk %s (device %s) marked as clean.",
2050 		    g_mirror_get_diskname(disk), sc->sc_name);
2051 		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2052 	}
2053 }
2054 
2055 static void
2056 g_mirror_sync_reinit(const struct g_mirror_disk *disk, struct bio *bp,
2057     off_t offset)
2058 {
2059 	void *data;
2060 	int idx;
2061 
2062 	data = bp->bio_data;
2063 	idx = (int)(uintptr_t)bp->bio_caller1;
2064 	g_reset_bio(bp);
2065 
2066 	bp->bio_cmd = BIO_READ;
2067 	bp->bio_data = data;
2068 	bp->bio_done = g_mirror_sync_done;
2069 	bp->bio_from = disk->d_sync.ds_consumer;
2070 	bp->bio_to = disk->d_softc->sc_provider;
2071 	bp->bio_caller1 = (void *)(uintptr_t)idx;
2072 	bp->bio_offset = offset;
2073 	bp->bio_length = MIN(maxphys,
2074 	    disk->d_softc->sc_mediasize - bp->bio_offset);
2075 }
2076 
2077 static void
2078 g_mirror_sync_start(struct g_mirror_disk *disk)
2079 {
2080 	struct g_mirror_softc *sc;
2081 	struct g_mirror_disk_sync *sync;
2082 	struct g_consumer *cp;
2083 	struct bio *bp;
2084 	int error, i;
2085 
2086 	g_topology_assert_not();
2087 	sc = disk->d_softc;
2088 	sync = &disk->d_sync;
2089 	sx_assert(&sc->sc_lock, SX_LOCKED);
2090 
2091 	KASSERT(disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
2092 	    ("Disk %s is not marked for synchronization.",
2093 	    g_mirror_get_diskname(disk)));
2094 	KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2095 	    ("Device not in RUNNING state (%s, %u).", sc->sc_name,
2096 	    sc->sc_state));
2097 
2098 	sx_xunlock(&sc->sc_lock);
2099 	g_topology_lock();
2100 	cp = g_new_consumer(sc->sc_sync.ds_geom);
2101 	cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
2102 	error = g_attach(cp, sc->sc_provider);
2103 	KASSERT(error == 0,
2104 	    ("Cannot attach to %s (error=%d).", sc->sc_name, error));
2105 	error = g_access(cp, 1, 0, 0);
2106 	KASSERT(error == 0, ("Cannot open %s (error=%d).", sc->sc_name, error));
2107 	g_topology_unlock();
2108 	sx_xlock(&sc->sc_lock);
2109 
2110 	G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s.", sc->sc_name,
2111 	    g_mirror_get_diskname(disk));
2112 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOFAILSYNC) == 0)
2113 		disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
2114 	KASSERT(sync->ds_consumer == NULL,
2115 	    ("Sync consumer already exists (device=%s, disk=%s).",
2116 	    sc->sc_name, g_mirror_get_diskname(disk)));
2117 
2118 	sync->ds_consumer = cp;
2119 	sync->ds_consumer->private = disk;
2120 	sync->ds_consumer->index = 0;
2121 
2122 	/*
2123 	 * Allocate memory for synchronization bios and initialize them.
2124 	 */
2125 	sync->ds_bios = malloc(sizeof(struct bio *) * g_mirror_syncreqs,
2126 	    M_MIRROR, M_WAITOK);
2127 	for (i = 0; i < g_mirror_syncreqs; i++) {
2128 		bp = g_alloc_bio();
2129 		sync->ds_bios[i] = bp;
2130 
2131 		bp->bio_data = malloc(maxphys, M_MIRROR, M_WAITOK);
2132 		bp->bio_caller1 = (void *)(uintptr_t)i;
2133 		g_mirror_sync_reinit(disk, bp, sync->ds_offset);
2134 		sync->ds_offset += bp->bio_length;
2135 	}
2136 
2137 	/* Increase the number of disks in SYNCHRONIZING state. */
2138 	sc->sc_sync.ds_ndisks++;
2139 	/* Set the number of in-flight synchronization requests. */
2140 	sync->ds_inflight = g_mirror_syncreqs;
2141 
2142 	/*
2143 	 * Fire off first synchronization requests.
2144 	 */
2145 	for (i = 0; i < g_mirror_syncreqs; i++) {
2146 		bp = sync->ds_bios[i];
2147 		G_MIRROR_LOGREQ(3, bp, "Sending synchronization request.");
2148 		sync->ds_consumer->index++;
2149 		/*
2150 		 * Delay the request if it is colliding with a regular request.
2151 		 */
2152 		if (g_mirror_regular_collision(sc, bp))
2153 			g_mirror_sync_delay(sc, bp);
2154 		else
2155 			g_io_request(bp, sync->ds_consumer);
2156 	}
2157 }
2158 
2159 /*
2160  * Stop synchronization process.
2161  * type: 0 - synchronization finished
2162  *       1 - synchronization stopped
2163  */
2164 static void
2165 g_mirror_sync_stop(struct g_mirror_disk *disk, int type)
2166 {
2167 	struct g_mirror_softc *sc;
2168 	struct g_consumer *cp;
2169 
2170 	g_topology_assert_not();
2171 	sc = disk->d_softc;
2172 	sx_assert(&sc->sc_lock, SX_LOCKED);
2173 
2174 	KASSERT(disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
2175 	    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2176 	    g_mirror_disk_state2str(disk->d_state)));
2177 	if (disk->d_sync.ds_consumer == NULL)
2178 		return;
2179 
2180 	if (type == 0) {
2181 		G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s finished.",
2182 		    sc->sc_name, g_mirror_get_diskname(disk));
2183 	} else /* if (type == 1) */ {
2184 		G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s stopped.",
2185 		    sc->sc_name, g_mirror_get_diskname(disk));
2186 	}
2187 	g_mirror_regular_release(sc);
2188 	free(disk->d_sync.ds_bios, M_MIRROR);
2189 	disk->d_sync.ds_bios = NULL;
2190 	cp = disk->d_sync.ds_consumer;
2191 	disk->d_sync.ds_consumer = NULL;
2192 	disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2193 	sc->sc_sync.ds_ndisks--;
2194 	sx_xunlock(&sc->sc_lock); /* Avoid recursion on sc_lock. */
2195 	g_topology_lock();
2196 	g_mirror_kill_consumer(sc, cp);
2197 	g_topology_unlock();
2198 	sx_xlock(&sc->sc_lock);
2199 }
2200 
2201 static void
2202 g_mirror_launch_provider(struct g_mirror_softc *sc)
2203 {
2204 	struct g_mirror_disk *disk;
2205 	struct g_provider *pp, *dp;
2206 
2207 	sx_assert(&sc->sc_lock, SX_LOCKED);
2208 
2209 	g_topology_lock();
2210 	pp = g_new_providerf(sc->sc_geom, "mirror/%s", sc->sc_name);
2211 	pp->flags |= G_PF_DIRECT_RECEIVE;
2212 	pp->mediasize = sc->sc_mediasize;
2213 	pp->sectorsize = sc->sc_sectorsize;
2214 	pp->stripesize = 0;
2215 	pp->stripeoffset = 0;
2216 
2217 	/* Splitting of unmapped BIO's could work but isn't implemented now */
2218 	if (sc->sc_balance != G_MIRROR_BALANCE_SPLIT)
2219 		pp->flags |= G_PF_ACCEPT_UNMAPPED;
2220 
2221 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2222 		if (disk->d_consumer && disk->d_consumer->provider) {
2223 			dp = disk->d_consumer->provider;
2224 			if (dp->stripesize > pp->stripesize) {
2225 				pp->stripesize = dp->stripesize;
2226 				pp->stripeoffset = dp->stripeoffset;
2227 			}
2228 			/* A provider underneath us doesn't support unmapped */
2229 			if ((dp->flags & G_PF_ACCEPT_UNMAPPED) == 0) {
2230 				G_MIRROR_DEBUG(0, "Cancelling unmapped "
2231 				    "because of %s.", dp->name);
2232 				pp->flags &= ~G_PF_ACCEPT_UNMAPPED;
2233 			}
2234 		}
2235 	}
2236 	pp->private = sc;
2237 	sc->sc_refcnt++;
2238 	sc->sc_provider = pp;
2239 	g_error_provider(pp, 0);
2240 	g_topology_unlock();
2241 	G_MIRROR_DEBUG(0, "Device %s launched (%u/%u).", pp->name,
2242 	    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE), sc->sc_ndisks);
2243 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2244 		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
2245 			g_mirror_sync_start(disk);
2246 	}
2247 }
2248 
2249 static void
2250 g_mirror_destroy_provider(struct g_mirror_softc *sc)
2251 {
2252 	struct g_mirror_disk *disk;
2253 	struct bio *bp;
2254 
2255 	g_topology_assert_not();
2256 	KASSERT(sc->sc_provider != NULL, ("NULL provider (device=%s).",
2257 	    sc->sc_name));
2258 
2259 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2260 		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
2261 			g_mirror_sync_stop(disk, 1);
2262 	}
2263 
2264 	g_topology_lock();
2265 	g_error_provider(sc->sc_provider, ENXIO);
2266 	mtx_lock(&sc->sc_queue_mtx);
2267 	while ((bp = TAILQ_FIRST(&sc->sc_queue)) != NULL) {
2268 		TAILQ_REMOVE(&sc->sc_queue, bp, bio_queue);
2269 		/*
2270 		 * Abort any pending I/O that wasn't generated by us.
2271 		 * Synchronization requests and requests destined for individual
2272 		 * mirror components can be destroyed immediately.
2273 		 */
2274 		if (bp->bio_to == sc->sc_provider &&
2275 		    bp->bio_from->geom != sc->sc_sync.ds_geom) {
2276 			g_io_deliver(bp, ENXIO);
2277 		} else {
2278 			if ((bp->bio_cflags & G_MIRROR_BIO_FLAG_SYNC) != 0)
2279 				free(bp->bio_data, M_MIRROR);
2280 			g_destroy_bio(bp);
2281 		}
2282 	}
2283 	mtx_unlock(&sc->sc_queue_mtx);
2284 	g_wither_provider(sc->sc_provider, ENXIO);
2285 	sc->sc_provider = NULL;
2286 	G_MIRROR_DEBUG(0, "Device %s: provider destroyed.", sc->sc_name);
2287 	g_topology_unlock();
2288 }
2289 
2290 static void
2291 g_mirror_go(void *arg)
2292 {
2293 	struct g_mirror_softc *sc;
2294 
2295 	sc = arg;
2296 	G_MIRROR_DEBUG(0, "Force device %s start due to timeout.", sc->sc_name);
2297 	g_mirror_event_send(sc, 0,
2298 	    G_MIRROR_EVENT_DONTWAIT | G_MIRROR_EVENT_DEVICE);
2299 }
2300 
2301 static u_int
2302 g_mirror_determine_state(struct g_mirror_disk *disk)
2303 {
2304 	struct g_mirror_softc *sc;
2305 	u_int state;
2306 
2307 	sc = disk->d_softc;
2308 	if (sc->sc_syncid == disk->d_sync.ds_syncid) {
2309 		if ((disk->d_flags &
2310 		    G_MIRROR_DISK_FLAG_SYNCHRONIZING) == 0 &&
2311 		    (g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) == 0 ||
2312 		     (disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) == 0)) {
2313 			/* Disk does not need synchronization. */
2314 			state = G_MIRROR_DISK_STATE_ACTIVE;
2315 		} else {
2316 			if ((sc->sc_flags &
2317 			     G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) == 0 ||
2318 			    (disk->d_flags &
2319 			     G_MIRROR_DISK_FLAG_FORCE_SYNC) != 0) {
2320 				/*
2321 				 * We can start synchronization from
2322 				 * the stored offset.
2323 				 */
2324 				state = G_MIRROR_DISK_STATE_SYNCHRONIZING;
2325 			} else {
2326 				state = G_MIRROR_DISK_STATE_STALE;
2327 			}
2328 		}
2329 	} else if (disk->d_sync.ds_syncid < sc->sc_syncid) {
2330 		/*
2331 		 * Reset all synchronization data for this disk,
2332 		 * because if it even was synchronized, it was
2333 		 * synchronized to disks with different syncid.
2334 		 */
2335 		disk->d_flags |= G_MIRROR_DISK_FLAG_SYNCHRONIZING;
2336 		disk->d_sync.ds_offset = 0;
2337 		disk->d_sync.ds_offset_done = 0;
2338 		disk->d_sync.ds_syncid = sc->sc_syncid;
2339 		if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) == 0 ||
2340 		    (disk->d_flags & G_MIRROR_DISK_FLAG_FORCE_SYNC) != 0) {
2341 			state = G_MIRROR_DISK_STATE_SYNCHRONIZING;
2342 		} else {
2343 			state = G_MIRROR_DISK_STATE_STALE;
2344 		}
2345 	} else /* if (sc->sc_syncid < disk->d_sync.ds_syncid) */ {
2346 		/*
2347 		 * Not good, NOT GOOD!
2348 		 * It means that mirror was started on stale disks
2349 		 * and more fresh disk just arrive.
2350 		 * If there were writes, mirror is broken, sorry.
2351 		 * I think the best choice here is don't touch
2352 		 * this disk and inform the user loudly.
2353 		 */
2354 		G_MIRROR_DEBUG(0, "Device %s was started before the freshest "
2355 		    "disk (%s) arrives!! It will not be connected to the "
2356 		    "running device.", sc->sc_name,
2357 		    g_mirror_get_diskname(disk));
2358 		g_mirror_destroy_disk(disk);
2359 		state = G_MIRROR_DISK_STATE_NONE;
2360 		/* Return immediately, because disk was destroyed. */
2361 		return (state);
2362 	}
2363 	G_MIRROR_DEBUG(3, "State for %s disk: %s.",
2364 	    g_mirror_get_diskname(disk), g_mirror_disk_state2str(state));
2365 	return (state);
2366 }
2367 
2368 /*
2369  * Update device state.
2370  */
2371 static void
2372 g_mirror_update_device(struct g_mirror_softc *sc, bool force)
2373 {
2374 	struct g_mirror_disk *disk;
2375 	u_int state;
2376 
2377 	sx_assert(&sc->sc_lock, SX_XLOCKED);
2378 
2379 	switch (sc->sc_state) {
2380 	case G_MIRROR_DEVICE_STATE_STARTING:
2381 	    {
2382 		struct g_mirror_disk *pdisk, *tdisk;
2383 		const char *mismatch;
2384 		uintmax_t found, newest;
2385 		u_int dirty, ndisks;
2386 
2387 		/* Pre-flight checks */
2388 		LIST_FOREACH_SAFE(disk, &sc->sc_disks, d_next, tdisk) {
2389 			/*
2390 			 * Confirm we already detected the newest genid.
2391 			 */
2392 			KASSERT(sc->sc_genid >= disk->d_genid,
2393 			    ("%s: found newer genid %u (sc:%p had %u).", __func__,
2394 			    disk->d_genid, sc, sc->sc_genid));
2395 
2396 			/* Kick out any previously tasted stale components. */
2397 			if (disk->d_genid < sc->sc_genid) {
2398 				G_MIRROR_DEBUG(0, "Stale 'genid' field on %s "
2399 				    "(device %s) (component=%u latest=%u), skipping.",
2400 				    g_mirror_get_diskname(disk), sc->sc_name,
2401 				    disk->d_genid, sc->sc_genid);
2402 				g_mirror_destroy_disk(disk);
2403 				sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
2404 				continue;
2405 			}
2406 
2407 			/*
2408 			 * Confirm we already detected the newest syncid.
2409 			 */
2410 			KASSERT(sc->sc_syncid >= disk->d_sync.ds_syncid,
2411 			    ("%s: found newer syncid %u (sc:%p had %u).",
2412 			     __func__, disk->d_sync.ds_syncid, sc,
2413 			     sc->sc_syncid));
2414 
2415 #define DETECT_MISMATCH(field, name) \
2416 			if (mismatch == NULL &&					\
2417 			    disk->d_init_ ## field != sc->sc_ ## field) {	\
2418 				mismatch = name;				\
2419 				found = (intmax_t)disk->d_init_ ## field;	\
2420 				newest = (intmax_t)sc->sc_ ## field;		\
2421 			}
2422 			mismatch = NULL;
2423 			DETECT_MISMATCH(ndisks, "md_all");
2424 			DETECT_MISMATCH(balance, "md_balance");
2425 			DETECT_MISMATCH(slice, "md_slice");
2426 			DETECT_MISMATCH(mediasize, "md_mediasize");
2427 #undef DETECT_MISMATCH
2428 			if (mismatch != NULL) {
2429 				G_MIRROR_DEBUG(0, "Found a mismatching '%s' "
2430 				    "field on %s (device %s) (found=%ju "
2431 				    "newest=%ju).", mismatch,
2432 				    g_mirror_get_diskname(disk), sc->sc_name,
2433 				    found, newest);
2434 				g_mirror_destroy_disk(disk);
2435 				sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
2436 				continue;
2437 			}
2438 		}
2439 
2440 		KASSERT(sc->sc_provider == NULL,
2441 		    ("Non-NULL provider in STARTING state (%s).", sc->sc_name));
2442 		/*
2443 		 * Are we ready? If the timeout (force is true) has expired, and
2444 		 * any disks are present, then yes. If we're permitted to launch
2445 		 * before the timeout has expired and the expected number of
2446 		 * current-generation mirror disks have been tasted, then yes.
2447 		 */
2448 		ndisks = g_mirror_ndisks(sc, -1);
2449 		if ((force && ndisks > 0) ||
2450 		    (g_launch_mirror_before_timeout && ndisks == sc->sc_ndisks)) {
2451 			;
2452 		} else if (ndisks == 0) {
2453 			/*
2454 			 * Disks went down in starting phase, so destroy
2455 			 * device.
2456 			 */
2457 			callout_drain(&sc->sc_callout);
2458 			sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
2459 			G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p", __LINE__,
2460 			    sc->sc_rootmount);
2461 			root_mount_rel(sc->sc_rootmount);
2462 			sc->sc_rootmount = NULL;
2463 			return;
2464 		} else {
2465 			return;
2466 		}
2467 
2468 		/*
2469 		 * Activate all disks with the biggest syncid.
2470 		 */
2471 		if (force) {
2472 			/*
2473 			 * If 'force' is true, we have been called due to
2474 			 * timeout, so don't bother canceling timeout.
2475 			 */
2476 			ndisks = 0;
2477 			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2478 				if ((disk->d_flags &
2479 				    G_MIRROR_DISK_FLAG_SYNCHRONIZING) == 0) {
2480 					ndisks++;
2481 				}
2482 			}
2483 			if (ndisks == 0) {
2484 				/* No valid disks found, destroy device. */
2485 				sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
2486 				G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p",
2487 				    __LINE__, sc->sc_rootmount);
2488 				root_mount_rel(sc->sc_rootmount);
2489 				sc->sc_rootmount = NULL;
2490 				return;
2491 			}
2492 		} else {
2493 			/* Cancel timeout. */
2494 			callout_drain(&sc->sc_callout);
2495 		}
2496 
2497 		/*
2498 		 * Here we need to look for dirty disks and if all disks
2499 		 * with the biggest syncid are dirty, we have to choose
2500 		 * one with the biggest priority and rebuild the rest.
2501 		 */
2502 		/*
2503 		 * Find the number of dirty disks with the biggest syncid.
2504 		 * Find the number of disks with the biggest syncid.
2505 		 * While here, find a disk with the biggest priority.
2506 		 */
2507 		dirty = ndisks = 0;
2508 		pdisk = NULL;
2509 		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2510 			if (disk->d_sync.ds_syncid != sc->sc_syncid)
2511 				continue;
2512 			if ((disk->d_flags &
2513 			    G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
2514 				continue;
2515 			}
2516 			ndisks++;
2517 			if ((disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) != 0) {
2518 				dirty++;
2519 				if (pdisk == NULL ||
2520 				    pdisk->d_priority < disk->d_priority) {
2521 					pdisk = disk;
2522 				}
2523 			}
2524 		}
2525 		if (dirty == 0) {
2526 			/* No dirty disks at all, great. */
2527 		} else if (dirty == ndisks) {
2528 			/*
2529 			 * Force synchronization for all dirty disks except one
2530 			 * with the biggest priority.
2531 			 */
2532 			KASSERT(pdisk != NULL, ("pdisk == NULL"));
2533 			G_MIRROR_DEBUG(1, "Using disk %s (device %s) as a "
2534 			    "master disk for synchronization.",
2535 			    g_mirror_get_diskname(pdisk), sc->sc_name);
2536 			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2537 				if (disk->d_sync.ds_syncid != sc->sc_syncid)
2538 					continue;
2539 				if ((disk->d_flags &
2540 				    G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
2541 					continue;
2542 				}
2543 				KASSERT((disk->d_flags &
2544 				    G_MIRROR_DISK_FLAG_DIRTY) != 0,
2545 				    ("Disk %s isn't marked as dirty.",
2546 				    g_mirror_get_diskname(disk)));
2547 				/* Skip the disk with the biggest priority. */
2548 				if (disk == pdisk)
2549 					continue;
2550 				disk->d_sync.ds_syncid = 0;
2551 			}
2552 		} else if (dirty < ndisks) {
2553 			/*
2554 			 * Force synchronization for all dirty disks.
2555 			 * We have some non-dirty disks.
2556 			 */
2557 			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2558 				if (disk->d_sync.ds_syncid != sc->sc_syncid)
2559 					continue;
2560 				if ((disk->d_flags &
2561 				    G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
2562 					continue;
2563 				}
2564 				if ((disk->d_flags &
2565 				    G_MIRROR_DISK_FLAG_DIRTY) == 0) {
2566 					continue;
2567 				}
2568 				disk->d_sync.ds_syncid = 0;
2569 			}
2570 		}
2571 
2572 		/* Reset hint. */
2573 		sc->sc_hint = NULL;
2574 		if (force) {
2575 			/* Remember to bump syncid on first write. */
2576 			sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
2577 		}
2578 		state = G_MIRROR_DEVICE_STATE_RUNNING;
2579 		G_MIRROR_DEBUG(1, "Device %s state changed from %s to %s.",
2580 		    sc->sc_name, g_mirror_device_state2str(sc->sc_state),
2581 		    g_mirror_device_state2str(state));
2582 		sc->sc_state = state;
2583 		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2584 			state = g_mirror_determine_state(disk);
2585 			g_mirror_event_send(disk, state,
2586 			    G_MIRROR_EVENT_DONTWAIT);
2587 			if (state == G_MIRROR_DISK_STATE_STALE)
2588 				sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
2589 		}
2590 		break;
2591 	    }
2592 	case G_MIRROR_DEVICE_STATE_RUNNING:
2593 		if (g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) == 0 &&
2594 		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_NEW) == 0) {
2595 			/*
2596 			 * No usable disks, so destroy the device.
2597 			 */
2598 			sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
2599 			break;
2600 		} else if (g_mirror_ndisks(sc,
2601 		    G_MIRROR_DISK_STATE_ACTIVE) > 0 &&
2602 		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_NEW) == 0) {
2603 			/*
2604 			 * We have active disks, launch provider if it doesn't
2605 			 * exist.
2606 			 */
2607 			if (sc->sc_provider == NULL)
2608 				g_mirror_launch_provider(sc);
2609 			if (sc->sc_rootmount != NULL) {
2610 				G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p",
2611 				    __LINE__, sc->sc_rootmount);
2612 				root_mount_rel(sc->sc_rootmount);
2613 				sc->sc_rootmount = NULL;
2614 			}
2615 		}
2616 		/*
2617 		 * Genid should be bumped immediately, so do it here.
2618 		 */
2619 		if ((sc->sc_bump_id & G_MIRROR_BUMP_GENID) != 0) {
2620 			sc->sc_bump_id &= ~G_MIRROR_BUMP_GENID;
2621 			g_mirror_bump_genid(sc);
2622 		}
2623 		if ((sc->sc_bump_id & G_MIRROR_BUMP_SYNCID_NOW) != 0) {
2624 			sc->sc_bump_id &= ~G_MIRROR_BUMP_SYNCID_NOW;
2625 			g_mirror_bump_syncid(sc);
2626 		}
2627 		break;
2628 	default:
2629 		KASSERT(1 == 0, ("Wrong device state (%s, %s).",
2630 		    sc->sc_name, g_mirror_device_state2str(sc->sc_state)));
2631 		break;
2632 	}
2633 }
2634 
2635 /*
2636  * Update disk state and device state if needed.
2637  */
2638 #define	DISK_STATE_CHANGED()	G_MIRROR_DEBUG(1,			\
2639 	"Disk %s state changed from %s to %s (device %s).",		\
2640 	g_mirror_get_diskname(disk),					\
2641 	g_mirror_disk_state2str(disk->d_state),				\
2642 	g_mirror_disk_state2str(state), sc->sc_name)
2643 static int
2644 g_mirror_update_disk(struct g_mirror_disk *disk, u_int state)
2645 {
2646 	struct g_mirror_softc *sc;
2647 
2648 	sc = disk->d_softc;
2649 	sx_assert(&sc->sc_lock, SX_XLOCKED);
2650 
2651 again:
2652 	G_MIRROR_DEBUG(3, "Changing disk %s state from %s to %s.",
2653 	    g_mirror_get_diskname(disk), g_mirror_disk_state2str(disk->d_state),
2654 	    g_mirror_disk_state2str(state));
2655 	switch (state) {
2656 	case G_MIRROR_DISK_STATE_NEW:
2657 		/*
2658 		 * Possible scenarios:
2659 		 * 1. New disk arrive.
2660 		 */
2661 		/* Previous state should be NONE. */
2662 		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NONE,
2663 		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2664 		    g_mirror_disk_state2str(disk->d_state)));
2665 		DISK_STATE_CHANGED();
2666 
2667 		disk->d_state = state;
2668 		g_topology_lock();
2669 		if (LIST_EMPTY(&sc->sc_disks))
2670 			LIST_INSERT_HEAD(&sc->sc_disks, disk, d_next);
2671 		else {
2672 			struct g_mirror_disk *dp;
2673 
2674 			LIST_FOREACH(dp, &sc->sc_disks, d_next) {
2675 				if (disk->d_priority >= dp->d_priority) {
2676 					LIST_INSERT_BEFORE(dp, disk, d_next);
2677 					dp = NULL;
2678 					break;
2679 				}
2680 				if (LIST_NEXT(dp, d_next) == NULL)
2681 					break;
2682 			}
2683 			if (dp != NULL)
2684 				LIST_INSERT_AFTER(dp, disk, d_next);
2685 		}
2686 		g_topology_unlock();
2687 		G_MIRROR_DEBUG(1, "Device %s: provider %s detected.",
2688 		    sc->sc_name, g_mirror_get_diskname(disk));
2689 		if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING)
2690 			break;
2691 		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2692 		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2693 		    g_mirror_device_state2str(sc->sc_state),
2694 		    g_mirror_get_diskname(disk),
2695 		    g_mirror_disk_state2str(disk->d_state)));
2696 		state = g_mirror_determine_state(disk);
2697 		if (state != G_MIRROR_DISK_STATE_NONE)
2698 			goto again;
2699 		break;
2700 	case G_MIRROR_DISK_STATE_ACTIVE:
2701 		/*
2702 		 * Possible scenarios:
2703 		 * 1. New disk does not need synchronization.
2704 		 * 2. Synchronization process finished successfully.
2705 		 */
2706 		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2707 		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2708 		    g_mirror_device_state2str(sc->sc_state),
2709 		    g_mirror_get_diskname(disk),
2710 		    g_mirror_disk_state2str(disk->d_state)));
2711 		/* Previous state should be NEW or SYNCHRONIZING. */
2712 		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW ||
2713 		    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
2714 		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2715 		    g_mirror_disk_state2str(disk->d_state)));
2716 		DISK_STATE_CHANGED();
2717 
2718 		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
2719 			disk->d_flags &= ~G_MIRROR_DISK_FLAG_SYNCHRONIZING;
2720 			disk->d_flags &= ~G_MIRROR_DISK_FLAG_FORCE_SYNC;
2721 			g_mirror_sync_stop(disk, 0);
2722 		}
2723 		disk->d_state = state;
2724 		disk->d_sync.ds_offset = 0;
2725 		disk->d_sync.ds_offset_done = 0;
2726 		g_mirror_update_idle(sc, disk);
2727 		g_mirror_update_metadata(disk);
2728 		G_MIRROR_DEBUG(1, "Device %s: provider %s activated.",
2729 		    sc->sc_name, g_mirror_get_diskname(disk));
2730 		break;
2731 	case G_MIRROR_DISK_STATE_STALE:
2732 		/*
2733 		 * Possible scenarios:
2734 		 * 1. Stale disk was connected.
2735 		 */
2736 		/* Previous state should be NEW. */
2737 		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW,
2738 		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2739 		    g_mirror_disk_state2str(disk->d_state)));
2740 		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2741 		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2742 		    g_mirror_device_state2str(sc->sc_state),
2743 		    g_mirror_get_diskname(disk),
2744 		    g_mirror_disk_state2str(disk->d_state)));
2745 		/*
2746 		 * STALE state is only possible if device is marked
2747 		 * NOAUTOSYNC.
2748 		 */
2749 		KASSERT((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) != 0,
2750 		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2751 		    g_mirror_device_state2str(sc->sc_state),
2752 		    g_mirror_get_diskname(disk),
2753 		    g_mirror_disk_state2str(disk->d_state)));
2754 		DISK_STATE_CHANGED();
2755 
2756 		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2757 		disk->d_state = state;
2758 		g_mirror_update_metadata(disk);
2759 		G_MIRROR_DEBUG(0, "Device %s: provider %s is stale.",
2760 		    sc->sc_name, g_mirror_get_diskname(disk));
2761 		break;
2762 	case G_MIRROR_DISK_STATE_SYNCHRONIZING:
2763 		/*
2764 		 * Possible scenarios:
2765 		 * 1. Disk which needs synchronization was connected.
2766 		 */
2767 		/* Previous state should be NEW. */
2768 		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW,
2769 		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2770 		    g_mirror_disk_state2str(disk->d_state)));
2771 		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2772 		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2773 		    g_mirror_device_state2str(sc->sc_state),
2774 		    g_mirror_get_diskname(disk),
2775 		    g_mirror_disk_state2str(disk->d_state)));
2776 		DISK_STATE_CHANGED();
2777 
2778 		if (disk->d_state == G_MIRROR_DISK_STATE_NEW)
2779 			disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2780 		disk->d_state = state;
2781 		if (sc->sc_provider != NULL) {
2782 			g_mirror_sync_start(disk);
2783 			g_mirror_update_metadata(disk);
2784 		}
2785 		break;
2786 	case G_MIRROR_DISK_STATE_DISCONNECTED:
2787 		/*
2788 		 * Possible scenarios:
2789 		 * 1. Device wasn't running yet, but disk disappear.
2790 		 * 2. Disk was active and disapppear.
2791 		 * 3. Disk disappear during synchronization process.
2792 		 */
2793 		if (sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING) {
2794 			/*
2795 			 * Previous state should be ACTIVE, STALE or
2796 			 * SYNCHRONIZING.
2797 			 */
2798 			KASSERT(disk->d_state == G_MIRROR_DISK_STATE_ACTIVE ||
2799 			    disk->d_state == G_MIRROR_DISK_STATE_STALE ||
2800 			    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
2801 			    ("Wrong disk state (%s, %s).",
2802 			    g_mirror_get_diskname(disk),
2803 			    g_mirror_disk_state2str(disk->d_state)));
2804 		} else if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING) {
2805 			/* Previous state should be NEW. */
2806 			KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW,
2807 			    ("Wrong disk state (%s, %s).",
2808 			    g_mirror_get_diskname(disk),
2809 			    g_mirror_disk_state2str(disk->d_state)));
2810 			/*
2811 			 * Reset bumping syncid if disk disappeared in STARTING
2812 			 * state.
2813 			 */
2814 			if ((sc->sc_bump_id & G_MIRROR_BUMP_SYNCID) != 0)
2815 				sc->sc_bump_id &= ~G_MIRROR_BUMP_SYNCID;
2816 #ifdef	INVARIANTS
2817 		} else {
2818 			KASSERT(1 == 0, ("Wrong device state (%s, %s, %s, %s).",
2819 			    sc->sc_name,
2820 			    g_mirror_device_state2str(sc->sc_state),
2821 			    g_mirror_get_diskname(disk),
2822 			    g_mirror_disk_state2str(disk->d_state)));
2823 #endif
2824 		}
2825 		DISK_STATE_CHANGED();
2826 		G_MIRROR_DEBUG(0, "Device %s: provider %s disconnected.",
2827 		    sc->sc_name, g_mirror_get_diskname(disk));
2828 
2829 		g_mirror_destroy_disk(disk);
2830 		break;
2831 	case G_MIRROR_DISK_STATE_DESTROY:
2832 	    {
2833 		int error;
2834 
2835 		error = g_mirror_clear_metadata(disk);
2836 		if (error != 0) {
2837 			G_MIRROR_DEBUG(0,
2838 			    "Device %s: failed to clear metadata on %s: %d.",
2839 			    sc->sc_name, g_mirror_get_diskname(disk), error);
2840 			break;
2841 		}
2842 		DISK_STATE_CHANGED();
2843 		G_MIRROR_DEBUG(0, "Device %s: provider %s destroyed.",
2844 		    sc->sc_name, g_mirror_get_diskname(disk));
2845 
2846 		g_mirror_destroy_disk(disk);
2847 		sc->sc_ndisks--;
2848 		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2849 			g_mirror_update_metadata(disk);
2850 		}
2851 		break;
2852 	    }
2853 	default:
2854 		KASSERT(1 == 0, ("Unknown state (%u).", state));
2855 		break;
2856 	}
2857 	return (0);
2858 }
2859 #undef	DISK_STATE_CHANGED
2860 
2861 int
2862 g_mirror_read_metadata(struct g_consumer *cp, struct g_mirror_metadata *md)
2863 {
2864 	struct g_provider *pp;
2865 	u_char *buf;
2866 	int error;
2867 
2868 	g_topology_assert();
2869 
2870 	error = g_access(cp, 1, 0, 0);
2871 	if (error != 0)
2872 		return (error);
2873 	pp = cp->provider;
2874 	g_topology_unlock();
2875 	/* Metadata are stored on last sector. */
2876 	buf = g_read_data(cp, pp->mediasize - pp->sectorsize, pp->sectorsize,
2877 	    &error);
2878 	g_topology_lock();
2879 	g_access(cp, -1, 0, 0);
2880 	if (buf == NULL) {
2881 		G_MIRROR_DEBUG(1, "Cannot read metadata from %s (error=%d).",
2882 		    cp->provider->name, error);
2883 		return (error);
2884 	}
2885 
2886 	/* Decode metadata. */
2887 	error = mirror_metadata_decode(buf, md);
2888 	g_free(buf);
2889 	if (strcmp(md->md_magic, G_MIRROR_MAGIC) != 0)
2890 		return (EINVAL);
2891 	if (md->md_version > G_MIRROR_VERSION) {
2892 		G_MIRROR_DEBUG(0,
2893 		    "Kernel module is too old to handle metadata from %s.",
2894 		    cp->provider->name);
2895 		return (EINVAL);
2896 	}
2897 	if (error != 0) {
2898 		G_MIRROR_DEBUG(1, "MD5 metadata hash mismatch for provider %s.",
2899 		    cp->provider->name);
2900 		return (error);
2901 	}
2902 
2903 	return (0);
2904 }
2905 
2906 static int
2907 g_mirror_check_metadata(struct g_mirror_softc *sc, struct g_provider *pp,
2908     struct g_mirror_metadata *md)
2909 {
2910 
2911 	G_MIRROR_DEBUG(2, "%s: md_did 0x%u disk %s device %s md_all 0x%x "
2912 	    "sc_ndisks 0x%x md_slice 0x%x sc_slice 0x%x md_balance 0x%x "
2913 	    "sc_balance 0x%x sc_mediasize 0x%jx pp_mediasize 0x%jx "
2914 	    "md_sectorsize 0x%x sc_sectorsize 0x%x md_mflags 0x%jx "
2915 	    "md_dflags 0x%jx md_syncid 0x%x md_genid 0x%x md_priority 0x%x "
2916 	    "sc_state 0x%x.",
2917 	    __func__, md->md_did, pp->name, sc->sc_name, md->md_all,
2918 	    sc->sc_ndisks, md->md_slice, sc->sc_slice, md->md_balance,
2919 	    sc->sc_balance, (uintmax_t)sc->sc_mediasize,
2920 	    (uintmax_t)pp->mediasize, md->md_sectorsize, sc->sc_sectorsize,
2921 	    (uintmax_t)md->md_mflags, (uintmax_t)md->md_dflags, md->md_syncid,
2922 	    md->md_genid, md->md_priority, sc->sc_state);
2923 
2924 	if (g_mirror_id2disk(sc, md->md_did) != NULL) {
2925 		G_MIRROR_DEBUG(1, "Disk %s (id=%u) already exists, skipping.",
2926 		    pp->name, md->md_did);
2927 		return (EEXIST);
2928 	}
2929 	if (sc->sc_mediasize > pp->mediasize) {
2930 		G_MIRROR_DEBUG(1,
2931 		    "Invalid size of disk %s (device %s), skipping.", pp->name,
2932 		    sc->sc_name);
2933 		return (EINVAL);
2934 	}
2935 	if (md->md_sectorsize != sc->sc_sectorsize) {
2936 		G_MIRROR_DEBUG(1,
2937 		    "Invalid '%s' field on disk %s (device %s), skipping.",
2938 		    "md_sectorsize", pp->name, sc->sc_name);
2939 		return (EINVAL);
2940 	}
2941 	if ((sc->sc_sectorsize % pp->sectorsize) != 0) {
2942 		G_MIRROR_DEBUG(1,
2943 		    "Invalid sector size of disk %s (device %s), skipping.",
2944 		    pp->name, sc->sc_name);
2945 		return (EINVAL);
2946 	}
2947 	if ((md->md_mflags & ~G_MIRROR_DEVICE_FLAG_MASK) != 0) {
2948 		G_MIRROR_DEBUG(1,
2949 		    "Invalid device flags on disk %s (device %s), skipping.",
2950 		    pp->name, sc->sc_name);
2951 		return (EINVAL);
2952 	}
2953 	if ((md->md_dflags & ~G_MIRROR_DISK_FLAG_MASK) != 0) {
2954 		G_MIRROR_DEBUG(1,
2955 		    "Invalid disk flags on disk %s (device %s), skipping.",
2956 		    pp->name, sc->sc_name);
2957 		return (EINVAL);
2958 	}
2959 	return (0);
2960 }
2961 
2962 int
2963 g_mirror_add_disk(struct g_mirror_softc *sc, struct g_provider *pp,
2964     struct g_mirror_metadata *md)
2965 {
2966 	struct g_mirror_disk *disk;
2967 	int error;
2968 
2969 	g_topology_assert_not();
2970 	G_MIRROR_DEBUG(2, "Adding disk %s.", pp->name);
2971 
2972 	error = g_mirror_check_metadata(sc, pp, md);
2973 	if (error != 0)
2974 		return (error);
2975 
2976 	if (md->md_genid < sc->sc_genid) {
2977 		G_MIRROR_DEBUG(0, "Component %s (device %s) broken, skipping.",
2978 		    pp->name, sc->sc_name);
2979 		return (EINVAL);
2980 	}
2981 
2982 	/*
2983 	 * If the component disk we're tasting has newer metadata than the
2984 	 * STARTING gmirror device, refresh the device from the component.
2985 	 */
2986 	error = g_mirror_refresh_device(sc, pp, md);
2987 	if (error != 0)
2988 		return (error);
2989 
2990 	disk = g_mirror_init_disk(sc, pp, md, &error);
2991 	if (disk == NULL)
2992 		return (error);
2993 	error = g_mirror_event_send(disk, G_MIRROR_DISK_STATE_NEW,
2994 	    G_MIRROR_EVENT_WAIT);
2995 	if (error != 0)
2996 		return (error);
2997 	if (md->md_version < G_MIRROR_VERSION) {
2998 		G_MIRROR_DEBUG(0, "Upgrading metadata on %s (v%d->v%d).",
2999 		    pp->name, md->md_version, G_MIRROR_VERSION);
3000 		g_mirror_update_metadata(disk);
3001 	}
3002 	return (0);
3003 }
3004 
3005 static void
3006 g_mirror_destroy_delayed(void *arg, int flag)
3007 {
3008 	struct g_mirror_softc *sc;
3009 	int error;
3010 
3011 	if (flag == EV_CANCEL) {
3012 		G_MIRROR_DEBUG(1, "Destroying canceled.");
3013 		return;
3014 	}
3015 	sc = arg;
3016 	g_topology_unlock();
3017 	sx_xlock(&sc->sc_lock);
3018 	KASSERT((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) == 0,
3019 	    ("DESTROY flag set on %s.", sc->sc_name));
3020 	KASSERT((sc->sc_flags & G_MIRROR_DEVICE_FLAG_CLOSEWAIT) != 0,
3021 	    ("CLOSEWAIT flag not set on %s.", sc->sc_name));
3022 	G_MIRROR_DEBUG(1, "Destroying %s (delayed).", sc->sc_name);
3023 	error = g_mirror_destroy(sc, G_MIRROR_DESTROY_SOFT);
3024 	if (error != 0) {
3025 		G_MIRROR_DEBUG(0, "Cannot destroy %s (error=%d).",
3026 		    sc->sc_name, error);
3027 		sx_xunlock(&sc->sc_lock);
3028 	}
3029 	g_topology_lock();
3030 }
3031 
3032 static int
3033 g_mirror_access(struct g_provider *pp, int acr, int acw, int ace)
3034 {
3035 	struct g_mirror_softc *sc;
3036 	int error = 0;
3037 
3038 	g_topology_assert();
3039 	G_MIRROR_DEBUG(2, "Access request for %s: r%dw%de%d.", pp->name, acr,
3040 	    acw, ace);
3041 
3042 	sc = pp->private;
3043 	KASSERT(sc != NULL, ("NULL softc (provider=%s).", pp->name));
3044 
3045 	g_topology_unlock();
3046 	sx_xlock(&sc->sc_lock);
3047 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0 ||
3048 	    (sc->sc_flags & G_MIRROR_DEVICE_FLAG_CLOSEWAIT) != 0 ||
3049 	    LIST_EMPTY(&sc->sc_disks)) {
3050 		if (acr > 0 || acw > 0 || ace > 0)
3051 			error = ENXIO;
3052 		goto end;
3053 	}
3054 	sc->sc_provider_open += acr + acw + ace;
3055 	if (pp->acw + acw == 0)
3056 		g_mirror_idle(sc, 0);
3057 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_CLOSEWAIT) != 0 &&
3058 	    sc->sc_provider_open == 0)
3059 		g_post_event(g_mirror_destroy_delayed, sc, M_WAITOK, sc, NULL);
3060 end:
3061 	sx_xunlock(&sc->sc_lock);
3062 	g_topology_lock();
3063 	return (error);
3064 }
3065 
3066 static void
3067 g_mirror_reinit_from_metadata(struct g_mirror_softc *sc,
3068     const struct g_mirror_metadata *md)
3069 {
3070 
3071 	sc->sc_genid = md->md_genid;
3072 	sc->sc_syncid = md->md_syncid;
3073 
3074 	sc->sc_slice = md->md_slice;
3075 	sc->sc_balance = md->md_balance;
3076 	sc->sc_mediasize = md->md_mediasize;
3077 	sc->sc_ndisks = md->md_all;
3078 	sc->sc_flags &= ~G_MIRROR_DEVICE_FLAG_MASK;
3079 	sc->sc_flags |= (md->md_mflags & G_MIRROR_DEVICE_FLAG_MASK);
3080 }
3081 
3082 struct g_geom *
3083 g_mirror_create(struct g_class *mp, const struct g_mirror_metadata *md,
3084     u_int type)
3085 {
3086 	struct g_mirror_softc *sc;
3087 	struct g_geom *gp;
3088 	int error, timeout;
3089 
3090 	g_topology_assert();
3091 	G_MIRROR_DEBUG(1, "Creating device %s (id=%u).", md->md_name,
3092 	    md->md_mid);
3093 
3094 	/* One disk is minimum. */
3095 	if (md->md_all < 1)
3096 		return (NULL);
3097 	/*
3098 	 * Action geom.
3099 	 */
3100 	gp = g_new_geomf(mp, "%s", md->md_name);
3101 	sc = malloc(sizeof(*sc), M_MIRROR, M_WAITOK | M_ZERO);
3102 	gp->start = g_mirror_start;
3103 	gp->orphan = g_mirror_orphan;
3104 	gp->access = g_mirror_access;
3105 	gp->dumpconf = g_mirror_dumpconf;
3106 
3107 	sc->sc_type = type;
3108 	sc->sc_id = md->md_mid;
3109 	g_mirror_reinit_from_metadata(sc, md);
3110 	sc->sc_sectorsize = md->md_sectorsize;
3111 	sc->sc_bump_id = 0;
3112 	sc->sc_idle = 1;
3113 	sc->sc_last_write = time_uptime;
3114 	sc->sc_writes = 0;
3115 	sc->sc_refcnt = 1;
3116 	sx_init(&sc->sc_lock, "gmirror:lock");
3117 	TAILQ_INIT(&sc->sc_queue);
3118 	mtx_init(&sc->sc_queue_mtx, "gmirror:queue", NULL, MTX_DEF);
3119 	TAILQ_INIT(&sc->sc_regular_delayed);
3120 	TAILQ_INIT(&sc->sc_inflight);
3121 	TAILQ_INIT(&sc->sc_sync_delayed);
3122 	LIST_INIT(&sc->sc_disks);
3123 	TAILQ_INIT(&sc->sc_events);
3124 	mtx_init(&sc->sc_events_mtx, "gmirror:events", NULL, MTX_DEF);
3125 	callout_init(&sc->sc_callout, 1);
3126 	mtx_init(&sc->sc_done_mtx, "gmirror:done", NULL, MTX_DEF);
3127 	sc->sc_state = G_MIRROR_DEVICE_STATE_STARTING;
3128 	gp->softc = sc;
3129 	sc->sc_geom = gp;
3130 	sc->sc_provider = NULL;
3131 	sc->sc_provider_open = 0;
3132 	/*
3133 	 * Synchronization geom.
3134 	 */
3135 	gp = g_new_geomf(mp, "%s.sync", md->md_name);
3136 	gp->softc = sc;
3137 	gp->orphan = g_mirror_orphan;
3138 	sc->sc_sync.ds_geom = gp;
3139 	sc->sc_sync.ds_ndisks = 0;
3140 	error = kproc_create(g_mirror_worker, sc, &sc->sc_worker, 0, 0,
3141 	    "g_mirror %s", md->md_name);
3142 	if (error != 0) {
3143 		G_MIRROR_DEBUG(1, "Cannot create kernel thread for %s.",
3144 		    sc->sc_name);
3145 		g_destroy_geom(sc->sc_sync.ds_geom);
3146 		g_destroy_geom(sc->sc_geom);
3147 		g_mirror_free_device(sc);
3148 		return (NULL);
3149 	}
3150 
3151 	G_MIRROR_DEBUG(1, "Device %s created (%u components, id=%u).",
3152 	    sc->sc_name, sc->sc_ndisks, sc->sc_id);
3153 
3154 	sc->sc_rootmount = root_mount_hold("GMIRROR");
3155 	G_MIRROR_DEBUG(1, "root_mount_hold %p", sc->sc_rootmount);
3156 	/*
3157 	 * Run timeout.
3158 	 */
3159 	timeout = g_mirror_timeout * hz;
3160 	callout_reset(&sc->sc_callout, timeout, g_mirror_go, sc);
3161 	return (sc->sc_geom);
3162 }
3163 
3164 int
3165 g_mirror_destroy(struct g_mirror_softc *sc, int how)
3166 {
3167 	struct g_mirror_disk *disk;
3168 
3169 	g_topology_assert_not();
3170 	sx_assert(&sc->sc_lock, SX_XLOCKED);
3171 
3172 	if (sc->sc_provider_open != 0) {
3173 		switch (how) {
3174 		case G_MIRROR_DESTROY_SOFT:
3175 			G_MIRROR_DEBUG(1,
3176 			    "Device %s is still open (%d).", sc->sc_name,
3177 			    sc->sc_provider_open);
3178 			return (EBUSY);
3179 		case G_MIRROR_DESTROY_DELAYED:
3180 			G_MIRROR_DEBUG(1,
3181 			    "Device %s will be destroyed on last close.",
3182 			    sc->sc_name);
3183 			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
3184 				if (disk->d_state ==
3185 				    G_MIRROR_DISK_STATE_SYNCHRONIZING) {
3186 					g_mirror_sync_stop(disk, 1);
3187 				}
3188 			}
3189 			sc->sc_flags |= G_MIRROR_DEVICE_FLAG_CLOSEWAIT;
3190 			return (EBUSY);
3191 		case G_MIRROR_DESTROY_HARD:
3192 			G_MIRROR_DEBUG(1, "Device %s is still open, so it "
3193 			    "can't be definitely removed.", sc->sc_name);
3194 		}
3195 	}
3196 
3197 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
3198 		sx_xunlock(&sc->sc_lock);
3199 		return (0);
3200 	}
3201 	sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
3202 	sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DRAIN;
3203 	G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
3204 	sx_xunlock(&sc->sc_lock);
3205 	mtx_lock(&sc->sc_queue_mtx);
3206 	wakeup(sc);
3207 	mtx_unlock(&sc->sc_queue_mtx);
3208 	G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, &sc->sc_worker);
3209 	while (sc->sc_worker != NULL)
3210 		tsleep(&sc->sc_worker, PRIBIO, "m:destroy", hz / 5);
3211 	G_MIRROR_DEBUG(4, "%s: Woken up %p.", __func__, &sc->sc_worker);
3212 	sx_xlock(&sc->sc_lock);
3213 	g_mirror_destroy_device(sc);
3214 	return (0);
3215 }
3216 
3217 static void
3218 g_mirror_taste_orphan(struct g_consumer *cp)
3219 {
3220 
3221 	KASSERT(1 == 0, ("%s called while tasting %s.", __func__,
3222 	    cp->provider->name));
3223 }
3224 
3225 static struct g_geom *
3226 g_mirror_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
3227 {
3228 	struct g_mirror_metadata md;
3229 	struct g_mirror_softc *sc;
3230 	struct g_consumer *cp;
3231 	struct g_geom *gp;
3232 	int error;
3233 
3234 	g_topology_assert();
3235 	g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name);
3236 	G_MIRROR_DEBUG(2, "Tasting %s.", pp->name);
3237 
3238 	gp = g_new_geomf(mp, "mirror:taste");
3239 	/*
3240 	 * This orphan function should be never called.
3241 	 */
3242 	gp->orphan = g_mirror_taste_orphan;
3243 	cp = g_new_consumer(gp);
3244 	error = g_attach(cp, pp);
3245 	if (error == 0) {
3246 		error = g_mirror_read_metadata(cp, &md);
3247 		g_detach(cp);
3248 	}
3249 	g_destroy_consumer(cp);
3250 	g_destroy_geom(gp);
3251 	if (error != 0)
3252 		return (NULL);
3253 	gp = NULL;
3254 
3255 	if (md.md_provider[0] != '\0' &&
3256 	    !g_compare_names(md.md_provider, pp->name))
3257 		return (NULL);
3258 	if (md.md_provsize != 0 && md.md_provsize != pp->mediasize)
3259 		return (NULL);
3260 	if ((md.md_dflags & G_MIRROR_DISK_FLAG_INACTIVE) != 0) {
3261 		G_MIRROR_DEBUG(0,
3262 		    "Device %s: provider %s marked as inactive, skipping.",
3263 		    md.md_name, pp->name);
3264 		return (NULL);
3265 	}
3266 	if (g_mirror_debug >= 2)
3267 		mirror_metadata_dump(&md);
3268 
3269 	/*
3270 	 * Let's check if device already exists.
3271 	 */
3272 	sc = NULL;
3273 	LIST_FOREACH(gp, &mp->geom, geom) {
3274 		sc = gp->softc;
3275 		if (sc == NULL)
3276 			continue;
3277 		if (sc->sc_type != G_MIRROR_TYPE_AUTOMATIC)
3278 			continue;
3279 		if (sc->sc_sync.ds_geom == gp)
3280 			continue;
3281 		if (strcmp(md.md_name, sc->sc_name) != 0)
3282 			continue;
3283 		if (md.md_mid != sc->sc_id) {
3284 			G_MIRROR_DEBUG(0, "Device %s already configured.",
3285 			    sc->sc_name);
3286 			return (NULL);
3287 		}
3288 		break;
3289 	}
3290 	if (gp == NULL) {
3291 		gp = g_mirror_create(mp, &md, G_MIRROR_TYPE_AUTOMATIC);
3292 		if (gp == NULL) {
3293 			G_MIRROR_DEBUG(0, "Cannot create device %s.",
3294 			    md.md_name);
3295 			return (NULL);
3296 		}
3297 		sc = gp->softc;
3298 	}
3299 	G_MIRROR_DEBUG(1, "Adding disk %s to %s.", pp->name, gp->name);
3300 	g_topology_unlock();
3301 	sx_xlock(&sc->sc_lock);
3302 	sc->sc_flags |= G_MIRROR_DEVICE_FLAG_TASTING;
3303 	error = g_mirror_add_disk(sc, pp, &md);
3304 	sc->sc_flags &= ~G_MIRROR_DEVICE_FLAG_TASTING;
3305 	if (error != 0) {
3306 		G_MIRROR_DEBUG(0, "Cannot add disk %s to %s (error=%d).",
3307 		    pp->name, gp->name, error);
3308 		if (LIST_EMPTY(&sc->sc_disks)) {
3309 			g_cancel_event(sc);
3310 			g_mirror_destroy(sc, G_MIRROR_DESTROY_HARD);
3311 			g_topology_lock();
3312 			return (NULL);
3313 		}
3314 		gp = NULL;
3315 	}
3316 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
3317 		g_mirror_destroy(sc, G_MIRROR_DESTROY_HARD);
3318 		g_topology_lock();
3319 		return (NULL);
3320 	}
3321 	sx_xunlock(&sc->sc_lock);
3322 	g_topology_lock();
3323 	return (gp);
3324 }
3325 
3326 static void
3327 g_mirror_resize(struct g_consumer *cp)
3328 {
3329 	struct g_mirror_disk *disk;
3330 
3331 	g_topology_assert();
3332 	g_trace(G_T_TOPOLOGY, "%s(%s)", __func__, cp->provider->name);
3333 
3334 	disk = cp->private;
3335 	if (disk == NULL)
3336 		return;
3337 	g_topology_unlock();
3338 	g_mirror_update_metadata(disk);
3339 	g_topology_lock();
3340 }
3341 
3342 static int
3343 g_mirror_destroy_geom(struct gctl_req *req __unused,
3344     struct g_class *mp __unused, struct g_geom *gp)
3345 {
3346 	struct g_mirror_softc *sc;
3347 	int error;
3348 
3349 	g_topology_unlock();
3350 	sc = gp->softc;
3351 	sx_xlock(&sc->sc_lock);
3352 	g_cancel_event(sc);
3353 	error = g_mirror_destroy(gp->softc, G_MIRROR_DESTROY_SOFT);
3354 	if (error != 0)
3355 		sx_xunlock(&sc->sc_lock);
3356 	g_topology_lock();
3357 	return (error);
3358 }
3359 
3360 static void
3361 g_mirror_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
3362     struct g_consumer *cp, struct g_provider *pp)
3363 {
3364 	struct g_mirror_softc *sc;
3365 
3366 	g_topology_assert();
3367 
3368 	sc = gp->softc;
3369 	if (sc == NULL)
3370 		return;
3371 	/* Skip synchronization geom. */
3372 	if (gp == sc->sc_sync.ds_geom)
3373 		return;
3374 	if (pp != NULL) {
3375 		/* Nothing here. */
3376 	} else if (cp != NULL) {
3377 		struct g_mirror_disk *disk;
3378 
3379 		disk = cp->private;
3380 		if (disk == NULL)
3381 			return;
3382 		sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)disk->d_id);
3383 		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
3384 			sbuf_printf(sb, "%s<Synchronized>", indent);
3385 			if (disk->d_sync.ds_offset == 0)
3386 				sbuf_cat(sb, "0%");
3387 			else
3388 				sbuf_printf(sb, "%u%%",
3389 				    (u_int)((disk->d_sync.ds_offset * 100) /
3390 				    sc->sc_mediasize));
3391 			sbuf_cat(sb, "</Synchronized>\n");
3392 			if (disk->d_sync.ds_offset > 0)
3393 				sbuf_printf(sb, "%s<BytesSynced>%jd"
3394 				    "</BytesSynced>\n", indent,
3395 				    (intmax_t)disk->d_sync.ds_offset);
3396 		}
3397 		sbuf_printf(sb, "%s<SyncID>%u</SyncID>\n", indent,
3398 		    disk->d_sync.ds_syncid);
3399 		sbuf_printf(sb, "%s<GenID>%u</GenID>\n", indent,
3400 		    disk->d_genid);
3401 		sbuf_printf(sb, "%s<Flags>", indent);
3402 		if (disk->d_flags == 0)
3403 			sbuf_cat(sb, "NONE");
3404 		else {
3405 			int first = 1;
3406 
3407 #define	ADD_FLAG(flag, name)	do {					\
3408 	if ((disk->d_flags & (flag)) != 0) {				\
3409 		if (!first)						\
3410 			sbuf_cat(sb, ", ");				\
3411 		else							\
3412 			first = 0;					\
3413 		sbuf_cat(sb, name);					\
3414 	}								\
3415 } while (0)
3416 			ADD_FLAG(G_MIRROR_DISK_FLAG_DIRTY, "DIRTY");
3417 			ADD_FLAG(G_MIRROR_DISK_FLAG_HARDCODED, "HARDCODED");
3418 			ADD_FLAG(G_MIRROR_DISK_FLAG_INACTIVE, "INACTIVE");
3419 			ADD_FLAG(G_MIRROR_DISK_FLAG_SYNCHRONIZING,
3420 			    "SYNCHRONIZING");
3421 			ADD_FLAG(G_MIRROR_DISK_FLAG_FORCE_SYNC, "FORCE_SYNC");
3422 			ADD_FLAG(G_MIRROR_DISK_FLAG_BROKEN, "BROKEN");
3423 #undef	ADD_FLAG
3424 		}
3425 		sbuf_cat(sb, "</Flags>\n");
3426 		sbuf_printf(sb, "%s<Priority>%u</Priority>\n", indent,
3427 		    disk->d_priority);
3428 		sbuf_printf(sb, "%s<State>%s</State>\n", indent,
3429 		    g_mirror_disk_state2str(disk->d_state));
3430 	} else {
3431 		sbuf_printf(sb, "%s<Type>", indent);
3432 		switch (sc->sc_type) {
3433 		case G_MIRROR_TYPE_AUTOMATIC:
3434 			sbuf_cat(sb, "AUTOMATIC");
3435 			break;
3436 		case G_MIRROR_TYPE_MANUAL:
3437 			sbuf_cat(sb, "MANUAL");
3438 			break;
3439 		default:
3440 			sbuf_cat(sb, "UNKNOWN");
3441 			break;
3442 		}
3443 		sbuf_cat(sb, "</Type>\n");
3444 		sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)sc->sc_id);
3445 		sbuf_printf(sb, "%s<SyncID>%u</SyncID>\n", indent, sc->sc_syncid);
3446 		sbuf_printf(sb, "%s<GenID>%u</GenID>\n", indent, sc->sc_genid);
3447 		sbuf_printf(sb, "%s<Flags>", indent);
3448 		if (sc->sc_flags == 0)
3449 			sbuf_cat(sb, "NONE");
3450 		else {
3451 			int first = 1;
3452 
3453 #define	ADD_FLAG(flag, name)	do {					\
3454 	if ((sc->sc_flags & (flag)) != 0) {				\
3455 		if (!first)						\
3456 			sbuf_cat(sb, ", ");				\
3457 		else							\
3458 			first = 0;					\
3459 		sbuf_cat(sb, name);					\
3460 	}								\
3461 } while (0)
3462 			ADD_FLAG(G_MIRROR_DEVICE_FLAG_NOFAILSYNC, "NOFAILSYNC");
3463 			ADD_FLAG(G_MIRROR_DEVICE_FLAG_NOAUTOSYNC, "NOAUTOSYNC");
3464 #undef	ADD_FLAG
3465 		}
3466 		sbuf_cat(sb, "</Flags>\n");
3467 		sbuf_printf(sb, "%s<Slice>%u</Slice>\n", indent,
3468 		    (u_int)sc->sc_slice);
3469 		sbuf_printf(sb, "%s<Balance>%s</Balance>\n", indent,
3470 		    balance_name(sc->sc_balance));
3471 		sbuf_printf(sb, "%s<Components>%u</Components>\n", indent,
3472 		    sc->sc_ndisks);
3473 		sbuf_printf(sb, "%s<State>", indent);
3474 		if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING)
3475 			sbuf_printf(sb, "%s", "STARTING");
3476 		else if (sc->sc_ndisks ==
3477 		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE))
3478 			sbuf_printf(sb, "%s", "COMPLETE");
3479 		else
3480 			sbuf_printf(sb, "%s", "DEGRADED");
3481 		sbuf_cat(sb, "</State>\n");
3482 	}
3483 }
3484 
3485 static void
3486 g_mirror_shutdown_post_sync(void *arg, int howto)
3487 {
3488 	struct g_class *mp;
3489 	struct g_geom *gp, *gp2;
3490 	struct g_mirror_softc *sc;
3491 	int error;
3492 
3493 	if (KERNEL_PANICKED())
3494 		return;
3495 
3496 	mp = arg;
3497 	g_topology_lock();
3498 	g_mirror_shutdown = 1;
3499 	LIST_FOREACH_SAFE(gp, &mp->geom, geom, gp2) {
3500 		if ((sc = gp->softc) == NULL)
3501 			continue;
3502 		/* Skip synchronization geom. */
3503 		if (gp == sc->sc_sync.ds_geom)
3504 			continue;
3505 		g_topology_unlock();
3506 		sx_xlock(&sc->sc_lock);
3507 		g_mirror_idle(sc, -1);
3508 		g_cancel_event(sc);
3509 		error = g_mirror_destroy(sc, G_MIRROR_DESTROY_DELAYED);
3510 		if (error != 0)
3511 			sx_xunlock(&sc->sc_lock);
3512 		g_topology_lock();
3513 	}
3514 	g_topology_unlock();
3515 }
3516 
3517 static void
3518 g_mirror_init(struct g_class *mp)
3519 {
3520 
3521 	g_mirror_post_sync = EVENTHANDLER_REGISTER(shutdown_post_sync,
3522 	    g_mirror_shutdown_post_sync, mp, SHUTDOWN_PRI_FIRST);
3523 	if (g_mirror_post_sync == NULL)
3524 		G_MIRROR_DEBUG(0, "Warning! Cannot register shutdown event.");
3525 }
3526 
3527 static void
3528 g_mirror_fini(struct g_class *mp)
3529 {
3530 
3531 	if (g_mirror_post_sync != NULL)
3532 		EVENTHANDLER_DEREGISTER(shutdown_post_sync, g_mirror_post_sync);
3533 }
3534 
3535 /*
3536  * Refresh the mirror device's metadata when gmirror encounters a newer
3537  * generation as the individual components are being added to the mirror set.
3538  */
3539 static int
3540 g_mirror_refresh_device(struct g_mirror_softc *sc, const struct g_provider *pp,
3541     const struct g_mirror_metadata *md)
3542 {
3543 
3544 	g_topology_assert_not();
3545 	sx_assert(&sc->sc_lock, SX_XLOCKED);
3546 
3547 	KASSERT(sc->sc_genid <= md->md_genid,
3548 	    ("%s: attempted to refresh from stale component %s (device %s) "
3549 	    "(%u < %u).", __func__, pp->name, sc->sc_name, md->md_genid,
3550 	    sc->sc_genid));
3551 
3552 	if (sc->sc_genid > md->md_genid || (sc->sc_genid == md->md_genid &&
3553 	    sc->sc_syncid >= md->md_syncid))
3554 		return (0);
3555 
3556 	G_MIRROR_DEBUG(0, "Found newer version for device %s (genid: curr=%u "
3557 	    "new=%u; syncid: curr=%u new=%u; ndisks: curr=%u new=%u; "
3558 	    "provider=%s).", sc->sc_name, sc->sc_genid, md->md_genid,
3559 	    sc->sc_syncid, md->md_syncid, sc->sc_ndisks, md->md_all, pp->name);
3560 
3561 	if (sc->sc_state != G_MIRROR_DEVICE_STATE_STARTING) {
3562 		/* Probable data corruption detected */
3563 		G_MIRROR_DEBUG(0, "Cannot refresh metadata in %s state "
3564 		    "(device=%s genid=%u). A stale mirror device was launched.",
3565 		    g_mirror_device_state2str(sc->sc_state), sc->sc_name,
3566 		    sc->sc_genid);
3567 		return (EINVAL);
3568 	}
3569 
3570 	/* Update softc */
3571 	g_mirror_reinit_from_metadata(sc, md);
3572 
3573 	G_MIRROR_DEBUG(1, "Refresh device %s (id=%u, state=%s) from disk %s "
3574 	    "(genid=%u syncid=%u md_all=%u).", sc->sc_name, md->md_mid,
3575 	    g_mirror_device_state2str(sc->sc_state), pp->name, md->md_genid,
3576 	    md->md_syncid, (unsigned)md->md_all);
3577 
3578 	return (0);
3579 }
3580 
3581 DECLARE_GEOM_CLASS(g_mirror_class, g_mirror);
3582 MODULE_VERSION(geom_mirror, 0);
3583