xref: /freebsd/sys/geom/mirror/g_mirror.c (revision c1cdf6a42f0d951ba720688dfc6ce07608b02f6e)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2004-2006 Pawel Jakub Dawidek <pjd@FreeBSD.org>
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/bio.h>
35 #include <sys/eventhandler.h>
36 #include <sys/fail.h>
37 #include <sys/kernel.h>
38 #include <sys/kthread.h>
39 #include <sys/limits.h>
40 #include <sys/lock.h>
41 #include <sys/malloc.h>
42 #include <sys/mutex.h>
43 #include <sys/proc.h>
44 #include <sys/sbuf.h>
45 #include <sys/sched.h>
46 #include <sys/sx.h>
47 #include <sys/sysctl.h>
48 
49 #include <geom/geom.h>
50 #include <geom/mirror/g_mirror.h>
51 
52 FEATURE(geom_mirror, "GEOM mirroring support");
53 
54 static MALLOC_DEFINE(M_MIRROR, "mirror_data", "GEOM_MIRROR Data");
55 
56 SYSCTL_DECL(_kern_geom);
57 static SYSCTL_NODE(_kern_geom, OID_AUTO, mirror, CTLFLAG_RW, 0,
58     "GEOM_MIRROR stuff");
59 int g_mirror_debug = 0;
60 SYSCTL_INT(_kern_geom_mirror, OID_AUTO, debug, CTLFLAG_RWTUN, &g_mirror_debug, 0,
61     "Debug level");
62 static u_int g_mirror_timeout = 4;
63 SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, timeout, CTLFLAG_RWTUN, &g_mirror_timeout,
64     0, "Time to wait on all mirror components");
65 static u_int g_mirror_idletime = 5;
66 SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, idletime, CTLFLAG_RWTUN,
67     &g_mirror_idletime, 0, "Mark components as clean when idling");
68 static u_int g_mirror_disconnect_on_failure = 1;
69 SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, disconnect_on_failure, CTLFLAG_RWTUN,
70     &g_mirror_disconnect_on_failure, 0, "Disconnect component on I/O failure.");
71 static u_int g_mirror_syncreqs = 2;
72 SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, sync_requests, CTLFLAG_RDTUN,
73     &g_mirror_syncreqs, 0, "Parallel synchronization I/O requests.");
74 static u_int g_mirror_sync_period = 5;
75 SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, sync_update_period, CTLFLAG_RWTUN,
76     &g_mirror_sync_period, 0,
77     "Metadata update period during synchronization, in seconds");
78 
79 #define	MSLEEP(ident, mtx, priority, wmesg, timeout)	do {		\
80 	G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, (ident));	\
81 	msleep((ident), (mtx), (priority), (wmesg), (timeout));		\
82 	G_MIRROR_DEBUG(4, "%s: Woken up %p.", __func__, (ident));	\
83 } while (0)
84 
85 static eventhandler_tag g_mirror_post_sync = NULL;
86 static int g_mirror_shutdown = 0;
87 
88 static g_ctl_destroy_geom_t g_mirror_destroy_geom;
89 static g_taste_t g_mirror_taste;
90 static g_init_t g_mirror_init;
91 static g_fini_t g_mirror_fini;
92 static g_provgone_t g_mirror_providergone;
93 static g_resize_t g_mirror_resize;
94 
95 struct g_class g_mirror_class = {
96 	.name = G_MIRROR_CLASS_NAME,
97 	.version = G_VERSION,
98 	.ctlreq = g_mirror_config,
99 	.taste = g_mirror_taste,
100 	.destroy_geom = g_mirror_destroy_geom,
101 	.init = g_mirror_init,
102 	.fini = g_mirror_fini,
103 	.providergone = g_mirror_providergone,
104 	.resize = g_mirror_resize
105 };
106 
107 
108 static void g_mirror_destroy_provider(struct g_mirror_softc *sc);
109 static int g_mirror_update_disk(struct g_mirror_disk *disk, u_int state);
110 static void g_mirror_update_device(struct g_mirror_softc *sc, bool force);
111 static void g_mirror_dumpconf(struct sbuf *sb, const char *indent,
112     struct g_geom *gp, struct g_consumer *cp, struct g_provider *pp);
113 static void g_mirror_sync_reinit(const struct g_mirror_disk *disk,
114     struct bio *bp, off_t offset);
115 static void g_mirror_sync_stop(struct g_mirror_disk *disk, int type);
116 static void g_mirror_register_request(struct g_mirror_softc *sc,
117     struct bio *bp);
118 static void g_mirror_sync_release(struct g_mirror_softc *sc);
119 
120 
121 static const char *
122 g_mirror_disk_state2str(int state)
123 {
124 
125 	switch (state) {
126 	case G_MIRROR_DISK_STATE_NONE:
127 		return ("NONE");
128 	case G_MIRROR_DISK_STATE_NEW:
129 		return ("NEW");
130 	case G_MIRROR_DISK_STATE_ACTIVE:
131 		return ("ACTIVE");
132 	case G_MIRROR_DISK_STATE_STALE:
133 		return ("STALE");
134 	case G_MIRROR_DISK_STATE_SYNCHRONIZING:
135 		return ("SYNCHRONIZING");
136 	case G_MIRROR_DISK_STATE_DISCONNECTED:
137 		return ("DISCONNECTED");
138 	case G_MIRROR_DISK_STATE_DESTROY:
139 		return ("DESTROY");
140 	default:
141 		return ("INVALID");
142 	}
143 }
144 
145 static const char *
146 g_mirror_device_state2str(int state)
147 {
148 
149 	switch (state) {
150 	case G_MIRROR_DEVICE_STATE_STARTING:
151 		return ("STARTING");
152 	case G_MIRROR_DEVICE_STATE_RUNNING:
153 		return ("RUNNING");
154 	default:
155 		return ("INVALID");
156 	}
157 }
158 
159 static const char *
160 g_mirror_get_diskname(struct g_mirror_disk *disk)
161 {
162 
163 	if (disk->d_consumer == NULL || disk->d_consumer->provider == NULL)
164 		return ("[unknown]");
165 	return (disk->d_name);
166 }
167 
168 /*
169  * --- Events handling functions ---
170  * Events in geom_mirror are used to maintain disks and device status
171  * from one thread to simplify locking.
172  */
173 static void
174 g_mirror_event_free(struct g_mirror_event *ep)
175 {
176 
177 	free(ep, M_MIRROR);
178 }
179 
180 int
181 g_mirror_event_send(void *arg, int state, int flags)
182 {
183 	struct g_mirror_softc *sc;
184 	struct g_mirror_disk *disk;
185 	struct g_mirror_event *ep;
186 	int error;
187 
188 	ep = malloc(sizeof(*ep), M_MIRROR, M_WAITOK);
189 	G_MIRROR_DEBUG(4, "%s: Sending event %p.", __func__, ep);
190 	if ((flags & G_MIRROR_EVENT_DEVICE) != 0) {
191 		disk = NULL;
192 		sc = arg;
193 	} else {
194 		disk = arg;
195 		sc = disk->d_softc;
196 	}
197 	ep->e_disk = disk;
198 	ep->e_state = state;
199 	ep->e_flags = flags;
200 	ep->e_error = 0;
201 	mtx_lock(&sc->sc_events_mtx);
202 	TAILQ_INSERT_TAIL(&sc->sc_events, ep, e_next);
203 	mtx_unlock(&sc->sc_events_mtx);
204 	G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
205 	mtx_lock(&sc->sc_queue_mtx);
206 	wakeup(sc);
207 	mtx_unlock(&sc->sc_queue_mtx);
208 	if ((flags & G_MIRROR_EVENT_DONTWAIT) != 0)
209 		return (0);
210 	G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, ep);
211 	sx_xunlock(&sc->sc_lock);
212 	while ((ep->e_flags & G_MIRROR_EVENT_DONE) == 0) {
213 		mtx_lock(&sc->sc_events_mtx);
214 		MSLEEP(ep, &sc->sc_events_mtx, PRIBIO | PDROP, "m:event",
215 		    hz * 5);
216 	}
217 	error = ep->e_error;
218 	g_mirror_event_free(ep);
219 	sx_xlock(&sc->sc_lock);
220 	return (error);
221 }
222 
223 static struct g_mirror_event *
224 g_mirror_event_first(struct g_mirror_softc *sc)
225 {
226 	struct g_mirror_event *ep;
227 
228 	mtx_lock(&sc->sc_events_mtx);
229 	ep = TAILQ_FIRST(&sc->sc_events);
230 	mtx_unlock(&sc->sc_events_mtx);
231 	return (ep);
232 }
233 
234 static void
235 g_mirror_event_remove(struct g_mirror_softc *sc, struct g_mirror_event *ep)
236 {
237 
238 	mtx_lock(&sc->sc_events_mtx);
239 	TAILQ_REMOVE(&sc->sc_events, ep, e_next);
240 	mtx_unlock(&sc->sc_events_mtx);
241 }
242 
243 static void
244 g_mirror_event_cancel(struct g_mirror_disk *disk)
245 {
246 	struct g_mirror_softc *sc;
247 	struct g_mirror_event *ep, *tmpep;
248 
249 	sc = disk->d_softc;
250 	sx_assert(&sc->sc_lock, SX_XLOCKED);
251 
252 	mtx_lock(&sc->sc_events_mtx);
253 	TAILQ_FOREACH_SAFE(ep, &sc->sc_events, e_next, tmpep) {
254 		if ((ep->e_flags & G_MIRROR_EVENT_DEVICE) != 0)
255 			continue;
256 		if (ep->e_disk != disk)
257 			continue;
258 		TAILQ_REMOVE(&sc->sc_events, ep, e_next);
259 		if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0)
260 			g_mirror_event_free(ep);
261 		else {
262 			ep->e_error = ECANCELED;
263 			wakeup(ep);
264 		}
265 	}
266 	mtx_unlock(&sc->sc_events_mtx);
267 }
268 
269 /*
270  * Return the number of disks in given state.
271  * If state is equal to -1, count all connected disks.
272  */
273 u_int
274 g_mirror_ndisks(struct g_mirror_softc *sc, int state)
275 {
276 	struct g_mirror_disk *disk;
277 	u_int n = 0;
278 
279 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
280 		if (state == -1 || disk->d_state == state)
281 			n++;
282 	}
283 	return (n);
284 }
285 
286 /*
287  * Find a disk in mirror by its disk ID.
288  */
289 static struct g_mirror_disk *
290 g_mirror_id2disk(struct g_mirror_softc *sc, uint32_t id)
291 {
292 	struct g_mirror_disk *disk;
293 
294 	sx_assert(&sc->sc_lock, SX_XLOCKED);
295 
296 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
297 		if (disk->d_id == id)
298 			return (disk);
299 	}
300 	return (NULL);
301 }
302 
303 static u_int
304 g_mirror_nrequests(struct g_mirror_softc *sc, struct g_consumer *cp)
305 {
306 	struct bio *bp;
307 	u_int nreqs = 0;
308 
309 	mtx_lock(&sc->sc_queue_mtx);
310 	TAILQ_FOREACH(bp, &sc->sc_queue, bio_queue) {
311 		if (bp->bio_from == cp)
312 			nreqs++;
313 	}
314 	mtx_unlock(&sc->sc_queue_mtx);
315 	return (nreqs);
316 }
317 
318 static int
319 g_mirror_is_busy(struct g_mirror_softc *sc, struct g_consumer *cp)
320 {
321 
322 	if (cp->index > 0) {
323 		G_MIRROR_DEBUG(2,
324 		    "I/O requests for %s exist, can't destroy it now.",
325 		    cp->provider->name);
326 		return (1);
327 	}
328 	if (g_mirror_nrequests(sc, cp) > 0) {
329 		G_MIRROR_DEBUG(2,
330 		    "I/O requests for %s in queue, can't destroy it now.",
331 		    cp->provider->name);
332 		return (1);
333 	}
334 	return (0);
335 }
336 
337 static void
338 g_mirror_destroy_consumer(void *arg, int flags __unused)
339 {
340 	struct g_consumer *cp;
341 
342 	g_topology_assert();
343 
344 	cp = arg;
345 	G_MIRROR_DEBUG(1, "Consumer %s destroyed.", cp->provider->name);
346 	g_detach(cp);
347 	g_destroy_consumer(cp);
348 }
349 
350 static void
351 g_mirror_kill_consumer(struct g_mirror_softc *sc, struct g_consumer *cp)
352 {
353 	struct g_provider *pp;
354 	int retaste_wait;
355 
356 	g_topology_assert();
357 
358 	cp->private = NULL;
359 	if (g_mirror_is_busy(sc, cp))
360 		return;
361 	pp = cp->provider;
362 	retaste_wait = 0;
363 	if (cp->acw == 1) {
364 		if ((pp->geom->flags & G_GEOM_WITHER) == 0)
365 			retaste_wait = 1;
366 	}
367 	G_MIRROR_DEBUG(2, "Access %s r%dw%de%d = %d", pp->name, -cp->acr,
368 	    -cp->acw, -cp->ace, 0);
369 	if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0)
370 		g_access(cp, -cp->acr, -cp->acw, -cp->ace);
371 	if (retaste_wait) {
372 		/*
373 		 * After retaste event was send (inside g_access()), we can send
374 		 * event to detach and destroy consumer.
375 		 * A class, which has consumer to the given provider connected
376 		 * will not receive retaste event for the provider.
377 		 * This is the way how I ignore retaste events when I close
378 		 * consumers opened for write: I detach and destroy consumer
379 		 * after retaste event is sent.
380 		 */
381 		g_post_event(g_mirror_destroy_consumer, cp, M_WAITOK, NULL);
382 		return;
383 	}
384 	G_MIRROR_DEBUG(1, "Consumer %s destroyed.", pp->name);
385 	g_detach(cp);
386 	g_destroy_consumer(cp);
387 }
388 
389 static int
390 g_mirror_connect_disk(struct g_mirror_disk *disk, struct g_provider *pp)
391 {
392 	struct g_consumer *cp;
393 	int error;
394 
395 	g_topology_assert_not();
396 	KASSERT(disk->d_consumer == NULL,
397 	    ("Disk already connected (device %s).", disk->d_softc->sc_name));
398 
399 	g_topology_lock();
400 	cp = g_new_consumer(disk->d_softc->sc_geom);
401 	cp->flags |= G_CF_DIRECT_RECEIVE;
402 	error = g_attach(cp, pp);
403 	if (error != 0) {
404 		g_destroy_consumer(cp);
405 		g_topology_unlock();
406 		return (error);
407 	}
408 	error = g_access(cp, 1, 1, 1);
409 	if (error != 0) {
410 		g_detach(cp);
411 		g_destroy_consumer(cp);
412 		g_topology_unlock();
413 		G_MIRROR_DEBUG(0, "Cannot open consumer %s (error=%d).",
414 		    pp->name, error);
415 		return (error);
416 	}
417 	g_topology_unlock();
418 	disk->d_consumer = cp;
419 	disk->d_consumer->private = disk;
420 	disk->d_consumer->index = 0;
421 
422 	G_MIRROR_DEBUG(2, "Disk %s connected.", g_mirror_get_diskname(disk));
423 	return (0);
424 }
425 
426 static void
427 g_mirror_disconnect_consumer(struct g_mirror_softc *sc, struct g_consumer *cp)
428 {
429 
430 	g_topology_assert();
431 
432 	if (cp == NULL)
433 		return;
434 	if (cp->provider != NULL)
435 		g_mirror_kill_consumer(sc, cp);
436 	else
437 		g_destroy_consumer(cp);
438 }
439 
440 /*
441  * Initialize disk. This means allocate memory, create consumer, attach it
442  * to the provider and open access (r1w1e1) to it.
443  */
444 static struct g_mirror_disk *
445 g_mirror_init_disk(struct g_mirror_softc *sc, struct g_provider *pp,
446     struct g_mirror_metadata *md, int *errorp)
447 {
448 	struct g_mirror_disk *disk;
449 	int i, error;
450 
451 	disk = malloc(sizeof(*disk), M_MIRROR, M_NOWAIT | M_ZERO);
452 	if (disk == NULL) {
453 		error = ENOMEM;
454 		goto fail;
455 	}
456 	disk->d_softc = sc;
457 	error = g_mirror_connect_disk(disk, pp);
458 	if (error != 0)
459 		goto fail;
460 	disk->d_id = md->md_did;
461 	disk->d_state = G_MIRROR_DISK_STATE_NONE;
462 	disk->d_priority = md->md_priority;
463 	disk->d_flags = md->md_dflags;
464 	error = g_getattr("GEOM::candelete", disk->d_consumer, &i);
465 	if (error == 0 && i != 0)
466 		disk->d_flags |= G_MIRROR_DISK_FLAG_CANDELETE;
467 	if (md->md_provider[0] != '\0')
468 		disk->d_flags |= G_MIRROR_DISK_FLAG_HARDCODED;
469 	disk->d_sync.ds_consumer = NULL;
470 	disk->d_sync.ds_offset = md->md_sync_offset;
471 	disk->d_sync.ds_offset_done = md->md_sync_offset;
472 	disk->d_sync.ds_update_ts = time_uptime;
473 	disk->d_genid = md->md_genid;
474 	disk->d_sync.ds_syncid = md->md_syncid;
475 	if (errorp != NULL)
476 		*errorp = 0;
477 	return (disk);
478 fail:
479 	if (errorp != NULL)
480 		*errorp = error;
481 	if (disk != NULL)
482 		free(disk, M_MIRROR);
483 	return (NULL);
484 }
485 
486 static void
487 g_mirror_destroy_disk(struct g_mirror_disk *disk)
488 {
489 	struct g_mirror_softc *sc;
490 
491 	g_topology_assert_not();
492 	sc = disk->d_softc;
493 	sx_assert(&sc->sc_lock, SX_XLOCKED);
494 
495 	g_topology_lock();
496 	LIST_REMOVE(disk, d_next);
497 	g_topology_unlock();
498 	g_mirror_event_cancel(disk);
499 	if (sc->sc_hint == disk)
500 		sc->sc_hint = NULL;
501 	switch (disk->d_state) {
502 	case G_MIRROR_DISK_STATE_SYNCHRONIZING:
503 		g_mirror_sync_stop(disk, 1);
504 		/* FALLTHROUGH */
505 	case G_MIRROR_DISK_STATE_NEW:
506 	case G_MIRROR_DISK_STATE_STALE:
507 	case G_MIRROR_DISK_STATE_ACTIVE:
508 		g_topology_lock();
509 		g_mirror_disconnect_consumer(sc, disk->d_consumer);
510 		g_topology_unlock();
511 		free(disk, M_MIRROR);
512 		break;
513 	default:
514 		KASSERT(0 == 1, ("Wrong disk state (%s, %s).",
515 		    g_mirror_get_diskname(disk),
516 		    g_mirror_disk_state2str(disk->d_state)));
517 	}
518 }
519 
520 static void
521 g_mirror_free_device(struct g_mirror_softc *sc)
522 {
523 
524 	g_topology_assert();
525 
526 	mtx_destroy(&sc->sc_queue_mtx);
527 	mtx_destroy(&sc->sc_events_mtx);
528 	mtx_destroy(&sc->sc_done_mtx);
529 	sx_destroy(&sc->sc_lock);
530 	free(sc, M_MIRROR);
531 }
532 
533 static void
534 g_mirror_providergone(struct g_provider *pp)
535 {
536 	struct g_mirror_softc *sc = pp->private;
537 
538 	if ((--sc->sc_refcnt) == 0)
539 		g_mirror_free_device(sc);
540 }
541 
542 static void
543 g_mirror_destroy_device(struct g_mirror_softc *sc)
544 {
545 	struct g_mirror_disk *disk;
546 	struct g_mirror_event *ep;
547 	struct g_geom *gp;
548 	struct g_consumer *cp, *tmpcp;
549 
550 	g_topology_assert_not();
551 	sx_assert(&sc->sc_lock, SX_XLOCKED);
552 
553 	gp = sc->sc_geom;
554 	if (sc->sc_provider != NULL)
555 		g_mirror_destroy_provider(sc);
556 	for (disk = LIST_FIRST(&sc->sc_disks); disk != NULL;
557 	    disk = LIST_FIRST(&sc->sc_disks)) {
558 		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
559 		g_mirror_update_metadata(disk);
560 		g_mirror_destroy_disk(disk);
561 	}
562 	while ((ep = g_mirror_event_first(sc)) != NULL) {
563 		g_mirror_event_remove(sc, ep);
564 		if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0)
565 			g_mirror_event_free(ep);
566 		else {
567 			ep->e_error = ECANCELED;
568 			ep->e_flags |= G_MIRROR_EVENT_DONE;
569 			G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, ep);
570 			mtx_lock(&sc->sc_events_mtx);
571 			wakeup(ep);
572 			mtx_unlock(&sc->sc_events_mtx);
573 		}
574 	}
575 	callout_drain(&sc->sc_callout);
576 
577 	g_topology_lock();
578 	LIST_FOREACH_SAFE(cp, &sc->sc_sync.ds_geom->consumer, consumer, tmpcp) {
579 		g_mirror_disconnect_consumer(sc, cp);
580 	}
581 	g_wither_geom(sc->sc_sync.ds_geom, ENXIO);
582 	G_MIRROR_DEBUG(0, "Device %s destroyed.", gp->name);
583 	g_wither_geom(gp, ENXIO);
584 	sx_xunlock(&sc->sc_lock);
585 	if ((--sc->sc_refcnt) == 0)
586 		g_mirror_free_device(sc);
587 	g_topology_unlock();
588 }
589 
590 static void
591 g_mirror_orphan(struct g_consumer *cp)
592 {
593 	struct g_mirror_disk *disk;
594 
595 	g_topology_assert();
596 
597 	disk = cp->private;
598 	if (disk == NULL)
599 		return;
600 	disk->d_softc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
601 	g_mirror_event_send(disk, G_MIRROR_DISK_STATE_DISCONNECTED,
602 	    G_MIRROR_EVENT_DONTWAIT);
603 }
604 
605 /*
606  * Function should return the next active disk on the list.
607  * It is possible that it will be the same disk as given.
608  * If there are no active disks on list, NULL is returned.
609  */
610 static __inline struct g_mirror_disk *
611 g_mirror_find_next(struct g_mirror_softc *sc, struct g_mirror_disk *disk)
612 {
613 	struct g_mirror_disk *dp;
614 
615 	for (dp = LIST_NEXT(disk, d_next); dp != disk;
616 	    dp = LIST_NEXT(dp, d_next)) {
617 		if (dp == NULL)
618 			dp = LIST_FIRST(&sc->sc_disks);
619 		if (dp->d_state == G_MIRROR_DISK_STATE_ACTIVE)
620 			break;
621 	}
622 	if (dp->d_state != G_MIRROR_DISK_STATE_ACTIVE)
623 		return (NULL);
624 	return (dp);
625 }
626 
627 static struct g_mirror_disk *
628 g_mirror_get_disk(struct g_mirror_softc *sc)
629 {
630 	struct g_mirror_disk *disk;
631 
632 	if (sc->sc_hint == NULL) {
633 		sc->sc_hint = LIST_FIRST(&sc->sc_disks);
634 		if (sc->sc_hint == NULL)
635 			return (NULL);
636 	}
637 	disk = sc->sc_hint;
638 	if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE) {
639 		disk = g_mirror_find_next(sc, disk);
640 		if (disk == NULL)
641 			return (NULL);
642 	}
643 	sc->sc_hint = g_mirror_find_next(sc, disk);
644 	return (disk);
645 }
646 
647 static int
648 g_mirror_write_metadata(struct g_mirror_disk *disk,
649     struct g_mirror_metadata *md)
650 {
651 	struct g_mirror_softc *sc;
652 	struct g_consumer *cp;
653 	off_t offset, length;
654 	u_char *sector;
655 	int error = 0;
656 
657 	g_topology_assert_not();
658 	sc = disk->d_softc;
659 	sx_assert(&sc->sc_lock, SX_LOCKED);
660 
661 	cp = disk->d_consumer;
662 	KASSERT(cp != NULL, ("NULL consumer (%s).", sc->sc_name));
663 	KASSERT(cp->provider != NULL, ("NULL provider (%s).", sc->sc_name));
664 	KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
665 	    ("Consumer %s closed? (r%dw%de%d).", cp->provider->name, cp->acr,
666 	    cp->acw, cp->ace));
667 	length = cp->provider->sectorsize;
668 	offset = cp->provider->mediasize - length;
669 	sector = malloc((size_t)length, M_MIRROR, M_WAITOK | M_ZERO);
670 	if (md != NULL &&
671 	    (sc->sc_flags & G_MIRROR_DEVICE_FLAG_WIPE) == 0) {
672 		/*
673 		 * Handle the case, when the size of parent provider reduced.
674 		 */
675 		if (offset < md->md_mediasize)
676 			error = ENOSPC;
677 		else
678 			mirror_metadata_encode(md, sector);
679 	}
680 	KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_metadata_write, error);
681 	if (error == 0)
682 		error = g_write_data(cp, offset, sector, length);
683 	free(sector, M_MIRROR);
684 	if (error != 0) {
685 		if ((disk->d_flags & G_MIRROR_DISK_FLAG_BROKEN) == 0) {
686 			disk->d_flags |= G_MIRROR_DISK_FLAG_BROKEN;
687 			G_MIRROR_DEBUG(0, "Cannot write metadata on %s "
688 			    "(device=%s, error=%d).",
689 			    g_mirror_get_diskname(disk), sc->sc_name, error);
690 		} else {
691 			G_MIRROR_DEBUG(1, "Cannot write metadata on %s "
692 			    "(device=%s, error=%d).",
693 			    g_mirror_get_diskname(disk), sc->sc_name, error);
694 		}
695 		if (g_mirror_disconnect_on_failure &&
696 		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 1) {
697 			sc->sc_bump_id |= G_MIRROR_BUMP_GENID;
698 			g_mirror_event_send(disk,
699 			    G_MIRROR_DISK_STATE_DISCONNECTED,
700 			    G_MIRROR_EVENT_DONTWAIT);
701 		}
702 	}
703 	return (error);
704 }
705 
706 static int
707 g_mirror_clear_metadata(struct g_mirror_disk *disk)
708 {
709 	int error;
710 
711 	g_topology_assert_not();
712 	sx_assert(&disk->d_softc->sc_lock, SX_LOCKED);
713 
714 	if (disk->d_softc->sc_type != G_MIRROR_TYPE_AUTOMATIC)
715 		return (0);
716 	error = g_mirror_write_metadata(disk, NULL);
717 	if (error == 0) {
718 		G_MIRROR_DEBUG(2, "Metadata on %s cleared.",
719 		    g_mirror_get_diskname(disk));
720 	} else {
721 		G_MIRROR_DEBUG(0,
722 		    "Cannot clear metadata on disk %s (error=%d).",
723 		    g_mirror_get_diskname(disk), error);
724 	}
725 	return (error);
726 }
727 
728 void
729 g_mirror_fill_metadata(struct g_mirror_softc *sc, struct g_mirror_disk *disk,
730     struct g_mirror_metadata *md)
731 {
732 
733 	strlcpy(md->md_magic, G_MIRROR_MAGIC, sizeof(md->md_magic));
734 	md->md_version = G_MIRROR_VERSION;
735 	strlcpy(md->md_name, sc->sc_name, sizeof(md->md_name));
736 	md->md_mid = sc->sc_id;
737 	md->md_all = sc->sc_ndisks;
738 	md->md_slice = sc->sc_slice;
739 	md->md_balance = sc->sc_balance;
740 	md->md_genid = sc->sc_genid;
741 	md->md_mediasize = sc->sc_mediasize;
742 	md->md_sectorsize = sc->sc_sectorsize;
743 	md->md_mflags = (sc->sc_flags & G_MIRROR_DEVICE_FLAG_MASK);
744 	bzero(md->md_provider, sizeof(md->md_provider));
745 	if (disk == NULL) {
746 		md->md_did = arc4random();
747 		md->md_priority = 0;
748 		md->md_syncid = 0;
749 		md->md_dflags = 0;
750 		md->md_sync_offset = 0;
751 		md->md_provsize = 0;
752 	} else {
753 		md->md_did = disk->d_id;
754 		md->md_priority = disk->d_priority;
755 		md->md_syncid = disk->d_sync.ds_syncid;
756 		md->md_dflags = (disk->d_flags & G_MIRROR_DISK_FLAG_MASK);
757 		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
758 			md->md_sync_offset = disk->d_sync.ds_offset_done;
759 		else
760 			md->md_sync_offset = 0;
761 		if ((disk->d_flags & G_MIRROR_DISK_FLAG_HARDCODED) != 0) {
762 			strlcpy(md->md_provider,
763 			    disk->d_consumer->provider->name,
764 			    sizeof(md->md_provider));
765 		}
766 		md->md_provsize = disk->d_consumer->provider->mediasize;
767 	}
768 }
769 
770 void
771 g_mirror_update_metadata(struct g_mirror_disk *disk)
772 {
773 	struct g_mirror_softc *sc;
774 	struct g_mirror_metadata md;
775 	int error;
776 
777 	g_topology_assert_not();
778 	sc = disk->d_softc;
779 	sx_assert(&sc->sc_lock, SX_LOCKED);
780 
781 	if (sc->sc_type != G_MIRROR_TYPE_AUTOMATIC)
782 		return;
783 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_WIPE) == 0)
784 		g_mirror_fill_metadata(sc, disk, &md);
785 	error = g_mirror_write_metadata(disk, &md);
786 	if (error == 0) {
787 		G_MIRROR_DEBUG(2, "Metadata on %s updated.",
788 		    g_mirror_get_diskname(disk));
789 	} else {
790 		G_MIRROR_DEBUG(0,
791 		    "Cannot update metadata on disk %s (error=%d).",
792 		    g_mirror_get_diskname(disk), error);
793 	}
794 }
795 
796 static void
797 g_mirror_bump_syncid(struct g_mirror_softc *sc)
798 {
799 	struct g_mirror_disk *disk;
800 
801 	g_topology_assert_not();
802 	sx_assert(&sc->sc_lock, SX_XLOCKED);
803 	KASSERT(g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 0,
804 	    ("%s called with no active disks (device=%s).", __func__,
805 	    sc->sc_name));
806 
807 	sc->sc_syncid++;
808 	G_MIRROR_DEBUG(1, "Device %s: syncid bumped to %u.", sc->sc_name,
809 	    sc->sc_syncid);
810 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
811 		if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE ||
812 		    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
813 			disk->d_sync.ds_syncid = sc->sc_syncid;
814 			g_mirror_update_metadata(disk);
815 		}
816 	}
817 }
818 
819 static void
820 g_mirror_bump_genid(struct g_mirror_softc *sc)
821 {
822 	struct g_mirror_disk *disk;
823 
824 	g_topology_assert_not();
825 	sx_assert(&sc->sc_lock, SX_XLOCKED);
826 	KASSERT(g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 0,
827 	    ("%s called with no active disks (device=%s).", __func__,
828 	    sc->sc_name));
829 
830 	sc->sc_genid++;
831 	G_MIRROR_DEBUG(1, "Device %s: genid bumped to %u.", sc->sc_name,
832 	    sc->sc_genid);
833 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
834 		if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE ||
835 		    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
836 			disk->d_genid = sc->sc_genid;
837 			g_mirror_update_metadata(disk);
838 		}
839 	}
840 }
841 
842 static int
843 g_mirror_idle(struct g_mirror_softc *sc, int acw)
844 {
845 	struct g_mirror_disk *disk;
846 	int timeout;
847 
848 	g_topology_assert_not();
849 	sx_assert(&sc->sc_lock, SX_XLOCKED);
850 
851 	if (sc->sc_provider == NULL)
852 		return (0);
853 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOFAILSYNC) != 0)
854 		return (0);
855 	if (sc->sc_idle)
856 		return (0);
857 	if (sc->sc_writes > 0)
858 		return (0);
859 	if (acw > 0 || (acw == -1 && sc->sc_provider->acw > 0)) {
860 		timeout = g_mirror_idletime - (time_uptime - sc->sc_last_write);
861 		if (!g_mirror_shutdown && timeout > 0)
862 			return (timeout);
863 	}
864 	sc->sc_idle = 1;
865 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
866 		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
867 			continue;
868 		G_MIRROR_DEBUG(2, "Disk %s (device %s) marked as clean.",
869 		    g_mirror_get_diskname(disk), sc->sc_name);
870 		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
871 		g_mirror_update_metadata(disk);
872 	}
873 	return (0);
874 }
875 
876 static void
877 g_mirror_unidle(struct g_mirror_softc *sc)
878 {
879 	struct g_mirror_disk *disk;
880 
881 	g_topology_assert_not();
882 	sx_assert(&sc->sc_lock, SX_XLOCKED);
883 
884 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOFAILSYNC) != 0)
885 		return;
886 	sc->sc_idle = 0;
887 	sc->sc_last_write = time_uptime;
888 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
889 		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
890 			continue;
891 		G_MIRROR_DEBUG(2, "Disk %s (device %s) marked as dirty.",
892 		    g_mirror_get_diskname(disk), sc->sc_name);
893 		disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
894 		g_mirror_update_metadata(disk);
895 	}
896 }
897 
898 static void
899 g_mirror_done(struct bio *bp)
900 {
901 	struct g_mirror_softc *sc;
902 
903 	sc = bp->bio_from->geom->softc;
904 	bp->bio_cflags = G_MIRROR_BIO_FLAG_REGULAR;
905 	mtx_lock(&sc->sc_queue_mtx);
906 	TAILQ_INSERT_TAIL(&sc->sc_queue, bp, bio_queue);
907 	mtx_unlock(&sc->sc_queue_mtx);
908 	wakeup(sc);
909 }
910 
911 static void
912 g_mirror_regular_request_error(struct g_mirror_softc *sc,
913     struct g_mirror_disk *disk, struct bio *bp)
914 {
915 
916 	if (bp->bio_cmd == BIO_FLUSH && bp->bio_error == EOPNOTSUPP)
917 		return;
918 
919 	if ((disk->d_flags & G_MIRROR_DISK_FLAG_BROKEN) == 0) {
920 		disk->d_flags |= G_MIRROR_DISK_FLAG_BROKEN;
921 		G_MIRROR_LOGREQ(0, bp, "Request failed (error=%d).",
922 		    bp->bio_error);
923 	} else {
924 		G_MIRROR_LOGREQ(1, bp, "Request failed (error=%d).",
925 		    bp->bio_error);
926 	}
927 	if (g_mirror_disconnect_on_failure &&
928 	    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 1) {
929 		if (bp->bio_error == ENXIO &&
930 		    bp->bio_cmd == BIO_READ)
931 			sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
932 		else if (bp->bio_error == ENXIO)
933 			sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID_NOW;
934 		else
935 			sc->sc_bump_id |= G_MIRROR_BUMP_GENID;
936 		g_mirror_event_send(disk, G_MIRROR_DISK_STATE_DISCONNECTED,
937 		    G_MIRROR_EVENT_DONTWAIT);
938 	}
939 }
940 
941 static void
942 g_mirror_regular_request(struct g_mirror_softc *sc, struct bio *bp)
943 {
944 	struct g_mirror_disk *disk;
945 	struct bio *pbp;
946 
947 	g_topology_assert_not();
948 	KASSERT(sc->sc_provider == bp->bio_parent->bio_to,
949 	    ("regular request %p with unexpected origin", bp));
950 
951 	pbp = bp->bio_parent;
952 	bp->bio_from->index--;
953 	if (bp->bio_cmd == BIO_WRITE || bp->bio_cmd == BIO_DELETE)
954 		sc->sc_writes--;
955 	disk = bp->bio_from->private;
956 	if (disk == NULL) {
957 		g_topology_lock();
958 		g_mirror_kill_consumer(sc, bp->bio_from);
959 		g_topology_unlock();
960 	}
961 
962 	switch (bp->bio_cmd) {
963 	case BIO_READ:
964 		KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_regular_request_read,
965 		    bp->bio_error);
966 		break;
967 	case BIO_WRITE:
968 		KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_regular_request_write,
969 		    bp->bio_error);
970 		break;
971 	case BIO_DELETE:
972 		KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_regular_request_delete,
973 		    bp->bio_error);
974 		break;
975 	case BIO_FLUSH:
976 		KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_regular_request_flush,
977 		    bp->bio_error);
978 		break;
979 	}
980 
981 	pbp->bio_inbed++;
982 	KASSERT(pbp->bio_inbed <= pbp->bio_children,
983 	    ("bio_inbed (%u) is bigger than bio_children (%u).", pbp->bio_inbed,
984 	    pbp->bio_children));
985 	if (bp->bio_error == 0 && pbp->bio_error == 0) {
986 		G_MIRROR_LOGREQ(3, bp, "Request delivered.");
987 		g_destroy_bio(bp);
988 		if (pbp->bio_children == pbp->bio_inbed) {
989 			G_MIRROR_LOGREQ(3, pbp, "Request delivered.");
990 			pbp->bio_completed = pbp->bio_length;
991 			if (pbp->bio_cmd == BIO_WRITE ||
992 			    pbp->bio_cmd == BIO_DELETE) {
993 				TAILQ_REMOVE(&sc->sc_inflight, pbp, bio_queue);
994 				/* Release delayed sync requests if possible. */
995 				g_mirror_sync_release(sc);
996 			}
997 			g_io_deliver(pbp, pbp->bio_error);
998 		}
999 		return;
1000 	} else if (bp->bio_error != 0) {
1001 		if (pbp->bio_error == 0)
1002 			pbp->bio_error = bp->bio_error;
1003 		if (disk != NULL)
1004 			g_mirror_regular_request_error(sc, disk, bp);
1005 		switch (pbp->bio_cmd) {
1006 		case BIO_DELETE:
1007 		case BIO_WRITE:
1008 		case BIO_FLUSH:
1009 			pbp->bio_inbed--;
1010 			pbp->bio_children--;
1011 			break;
1012 		}
1013 	}
1014 	g_destroy_bio(bp);
1015 
1016 	switch (pbp->bio_cmd) {
1017 	case BIO_READ:
1018 		if (pbp->bio_inbed < pbp->bio_children)
1019 			break;
1020 		if (g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) == 1)
1021 			g_io_deliver(pbp, pbp->bio_error);
1022 		else {
1023 			pbp->bio_error = 0;
1024 			mtx_lock(&sc->sc_queue_mtx);
1025 			TAILQ_INSERT_TAIL(&sc->sc_queue, pbp, bio_queue);
1026 			mtx_unlock(&sc->sc_queue_mtx);
1027 			G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
1028 			wakeup(sc);
1029 		}
1030 		break;
1031 	case BIO_DELETE:
1032 	case BIO_WRITE:
1033 	case BIO_FLUSH:
1034 		if (pbp->bio_children == 0) {
1035 			/*
1036 			 * All requests failed.
1037 			 */
1038 		} else if (pbp->bio_inbed < pbp->bio_children) {
1039 			/* Do nothing. */
1040 			break;
1041 		} else if (pbp->bio_children == pbp->bio_inbed) {
1042 			/* Some requests succeeded. */
1043 			pbp->bio_error = 0;
1044 			pbp->bio_completed = pbp->bio_length;
1045 		}
1046 		if (pbp->bio_cmd == BIO_WRITE || pbp->bio_cmd == BIO_DELETE) {
1047 			TAILQ_REMOVE(&sc->sc_inflight, pbp, bio_queue);
1048 			/* Release delayed sync requests if possible. */
1049 			g_mirror_sync_release(sc);
1050 		}
1051 		g_io_deliver(pbp, pbp->bio_error);
1052 		break;
1053 	default:
1054 		KASSERT(1 == 0, ("Invalid request: %u.", pbp->bio_cmd));
1055 		break;
1056 	}
1057 }
1058 
1059 static void
1060 g_mirror_sync_done(struct bio *bp)
1061 {
1062 	struct g_mirror_softc *sc;
1063 
1064 	G_MIRROR_LOGREQ(3, bp, "Synchronization request delivered.");
1065 	sc = bp->bio_from->geom->softc;
1066 	bp->bio_cflags = G_MIRROR_BIO_FLAG_SYNC;
1067 	mtx_lock(&sc->sc_queue_mtx);
1068 	TAILQ_INSERT_TAIL(&sc->sc_queue, bp, bio_queue);
1069 	mtx_unlock(&sc->sc_queue_mtx);
1070 	wakeup(sc);
1071 }
1072 
1073 static void
1074 g_mirror_candelete(struct bio *bp)
1075 {
1076 	struct g_mirror_softc *sc;
1077 	struct g_mirror_disk *disk;
1078 	int *val;
1079 
1080 	sc = bp->bio_to->private;
1081 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1082 		if (disk->d_flags & G_MIRROR_DISK_FLAG_CANDELETE)
1083 			break;
1084 	}
1085 	val = (int *)bp->bio_data;
1086 	*val = (disk != NULL);
1087 	g_io_deliver(bp, 0);
1088 }
1089 
1090 static void
1091 g_mirror_kernel_dump(struct bio *bp)
1092 {
1093 	struct g_mirror_softc *sc;
1094 	struct g_mirror_disk *disk;
1095 	struct bio *cbp;
1096 	struct g_kerneldump *gkd;
1097 
1098 	/*
1099 	 * We configure dumping to the first component, because this component
1100 	 * will be used for reading with 'prefer' balance algorithm.
1101 	 * If the component with the highest priority is currently disconnected
1102 	 * we will not be able to read the dump after the reboot if it will be
1103 	 * connected and synchronized later. Can we do something better?
1104 	 */
1105 	sc = bp->bio_to->private;
1106 	disk = LIST_FIRST(&sc->sc_disks);
1107 
1108 	gkd = (struct g_kerneldump *)bp->bio_data;
1109 	if (gkd->length > bp->bio_to->mediasize)
1110 		gkd->length = bp->bio_to->mediasize;
1111 	cbp = g_clone_bio(bp);
1112 	if (cbp == NULL) {
1113 		g_io_deliver(bp, ENOMEM);
1114 		return;
1115 	}
1116 	cbp->bio_done = g_std_done;
1117 	g_io_request(cbp, disk->d_consumer);
1118 	G_MIRROR_DEBUG(1, "Kernel dump will go to %s.",
1119 	    g_mirror_get_diskname(disk));
1120 }
1121 
1122 static void
1123 g_mirror_start(struct bio *bp)
1124 {
1125 	struct g_mirror_softc *sc;
1126 
1127 	sc = bp->bio_to->private;
1128 	/*
1129 	 * If sc == NULL or there are no valid disks, provider's error
1130 	 * should be set and g_mirror_start() should not be called at all.
1131 	 */
1132 	KASSERT(sc != NULL && sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
1133 	    ("Provider's error should be set (error=%d)(mirror=%s).",
1134 	    bp->bio_to->error, bp->bio_to->name));
1135 	G_MIRROR_LOGREQ(3, bp, "Request received.");
1136 
1137 	switch (bp->bio_cmd) {
1138 	case BIO_READ:
1139 	case BIO_WRITE:
1140 	case BIO_DELETE:
1141 	case BIO_FLUSH:
1142 		break;
1143 	case BIO_GETATTR:
1144 		if (!strcmp(bp->bio_attribute, "GEOM::candelete")) {
1145 			g_mirror_candelete(bp);
1146 			return;
1147 		} else if (strcmp("GEOM::kerneldump", bp->bio_attribute) == 0) {
1148 			g_mirror_kernel_dump(bp);
1149 			return;
1150 		}
1151 		/* FALLTHROUGH */
1152 	default:
1153 		g_io_deliver(bp, EOPNOTSUPP);
1154 		return;
1155 	}
1156 	mtx_lock(&sc->sc_queue_mtx);
1157 	if (bp->bio_to->error != 0) {
1158 		mtx_unlock(&sc->sc_queue_mtx);
1159 		g_io_deliver(bp, bp->bio_to->error);
1160 		return;
1161 	}
1162 	TAILQ_INSERT_TAIL(&sc->sc_queue, bp, bio_queue);
1163 	mtx_unlock(&sc->sc_queue_mtx);
1164 	G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
1165 	wakeup(sc);
1166 }
1167 
1168 /*
1169  * Return TRUE if the given request is colliding with a in-progress
1170  * synchronization request.
1171  */
1172 static bool
1173 g_mirror_sync_collision(struct g_mirror_softc *sc, struct bio *bp)
1174 {
1175 	struct g_mirror_disk *disk;
1176 	struct bio *sbp;
1177 	off_t rstart, rend, sstart, send;
1178 	u_int i;
1179 
1180 	if (sc->sc_sync.ds_ndisks == 0)
1181 		return (false);
1182 	rstart = bp->bio_offset;
1183 	rend = bp->bio_offset + bp->bio_length;
1184 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1185 		if (disk->d_state != G_MIRROR_DISK_STATE_SYNCHRONIZING)
1186 			continue;
1187 		for (i = 0; i < g_mirror_syncreqs; i++) {
1188 			sbp = disk->d_sync.ds_bios[i];
1189 			if (sbp == NULL)
1190 				continue;
1191 			sstart = sbp->bio_offset;
1192 			send = sbp->bio_offset + sbp->bio_length;
1193 			if (rend > sstart && rstart < send)
1194 				return (true);
1195 		}
1196 	}
1197 	return (false);
1198 }
1199 
1200 /*
1201  * Return TRUE if the given sync request is colliding with a in-progress regular
1202  * request.
1203  */
1204 static bool
1205 g_mirror_regular_collision(struct g_mirror_softc *sc, struct bio *sbp)
1206 {
1207 	off_t rstart, rend, sstart, send;
1208 	struct bio *bp;
1209 
1210 	if (sc->sc_sync.ds_ndisks == 0)
1211 		return (false);
1212 	sstart = sbp->bio_offset;
1213 	send = sbp->bio_offset + sbp->bio_length;
1214 	TAILQ_FOREACH(bp, &sc->sc_inflight, bio_queue) {
1215 		rstart = bp->bio_offset;
1216 		rend = bp->bio_offset + bp->bio_length;
1217 		if (rend > sstart && rstart < send)
1218 			return (true);
1219 	}
1220 	return (false);
1221 }
1222 
1223 /*
1224  * Puts regular request onto delayed queue.
1225  */
1226 static void
1227 g_mirror_regular_delay(struct g_mirror_softc *sc, struct bio *bp)
1228 {
1229 
1230 	G_MIRROR_LOGREQ(2, bp, "Delaying request.");
1231 	TAILQ_INSERT_TAIL(&sc->sc_regular_delayed, bp, bio_queue);
1232 }
1233 
1234 /*
1235  * Puts synchronization request onto delayed queue.
1236  */
1237 static void
1238 g_mirror_sync_delay(struct g_mirror_softc *sc, struct bio *bp)
1239 {
1240 
1241 	G_MIRROR_LOGREQ(2, bp, "Delaying synchronization request.");
1242 	TAILQ_INSERT_TAIL(&sc->sc_sync_delayed, bp, bio_queue);
1243 }
1244 
1245 /*
1246  * Requeue delayed regular requests.
1247  */
1248 static void
1249 g_mirror_regular_release(struct g_mirror_softc *sc)
1250 {
1251 	struct bio *bp;
1252 
1253 	if ((bp = TAILQ_FIRST(&sc->sc_regular_delayed)) == NULL)
1254 		return;
1255 	if (g_mirror_sync_collision(sc, bp))
1256 		return;
1257 
1258 	G_MIRROR_DEBUG(2, "Requeuing regular requests after collision.");
1259 	mtx_lock(&sc->sc_queue_mtx);
1260 	TAILQ_CONCAT(&sc->sc_regular_delayed, &sc->sc_queue, bio_queue);
1261 	TAILQ_SWAP(&sc->sc_regular_delayed, &sc->sc_queue, bio, bio_queue);
1262 	mtx_unlock(&sc->sc_queue_mtx);
1263 }
1264 
1265 /*
1266  * Releases delayed sync requests which don't collide anymore with regular
1267  * requests.
1268  */
1269 static void
1270 g_mirror_sync_release(struct g_mirror_softc *sc)
1271 {
1272 	struct bio *bp, *bp2;
1273 
1274 	TAILQ_FOREACH_SAFE(bp, &sc->sc_sync_delayed, bio_queue, bp2) {
1275 		if (g_mirror_regular_collision(sc, bp))
1276 			continue;
1277 		TAILQ_REMOVE(&sc->sc_sync_delayed, bp, bio_queue);
1278 		G_MIRROR_LOGREQ(2, bp,
1279 		    "Releasing delayed synchronization request.");
1280 		g_io_request(bp, bp->bio_from);
1281 	}
1282 }
1283 
1284 /*
1285  * Free a synchronization request and clear its slot in the array.
1286  */
1287 static void
1288 g_mirror_sync_request_free(struct g_mirror_disk *disk, struct bio *bp)
1289 {
1290 	int idx;
1291 
1292 	if (disk != NULL && disk->d_sync.ds_bios != NULL) {
1293 		idx = (int)(uintptr_t)bp->bio_caller1;
1294 		KASSERT(disk->d_sync.ds_bios[idx] == bp,
1295 		    ("unexpected sync BIO at %p:%d", disk, idx));
1296 		disk->d_sync.ds_bios[idx] = NULL;
1297 	}
1298 	free(bp->bio_data, M_MIRROR);
1299 	g_destroy_bio(bp);
1300 }
1301 
1302 /*
1303  * Handle synchronization requests.
1304  * Every synchronization request is a two-step process: first, a read request is
1305  * sent to the mirror provider via the sync consumer. If that request completes
1306  * successfully, it is converted to a write and sent to the disk being
1307  * synchronized. If the write also completes successfully, the synchronization
1308  * offset is advanced and a new read request is submitted.
1309  */
1310 static void
1311 g_mirror_sync_request(struct g_mirror_softc *sc, struct bio *bp)
1312 {
1313 	struct g_mirror_disk *disk;
1314 	struct g_mirror_disk_sync *sync;
1315 
1316 	KASSERT((bp->bio_cmd == BIO_READ &&
1317 	    bp->bio_from->geom == sc->sc_sync.ds_geom) ||
1318 	    (bp->bio_cmd == BIO_WRITE && bp->bio_from->geom == sc->sc_geom),
1319 	    ("Sync BIO %p with unexpected origin", bp));
1320 
1321 	bp->bio_from->index--;
1322 	disk = bp->bio_from->private;
1323 	if (disk == NULL) {
1324 		sx_xunlock(&sc->sc_lock); /* Avoid recursion on sc_lock. */
1325 		g_topology_lock();
1326 		g_mirror_kill_consumer(sc, bp->bio_from);
1327 		g_topology_unlock();
1328 		g_mirror_sync_request_free(NULL, bp);
1329 		sx_xlock(&sc->sc_lock);
1330 		return;
1331 	}
1332 
1333 	sync = &disk->d_sync;
1334 
1335 	/*
1336 	 * Synchronization request.
1337 	 */
1338 	switch (bp->bio_cmd) {
1339 	case BIO_READ: {
1340 		struct g_consumer *cp;
1341 
1342 		KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_sync_request_read,
1343 		    bp->bio_error);
1344 
1345 		if (bp->bio_error != 0) {
1346 			G_MIRROR_LOGREQ(0, bp,
1347 			    "Synchronization request failed (error=%d).",
1348 			    bp->bio_error);
1349 
1350 			/*
1351 			 * The read error will trigger a syncid bump, so there's
1352 			 * no need to do that here.
1353 			 *
1354 			 * The read error handling for regular requests will
1355 			 * retry the read from all active mirrors before passing
1356 			 * the error back up, so there's no need to retry here.
1357 			 */
1358 			g_mirror_sync_request_free(disk, bp);
1359 			g_mirror_event_send(disk,
1360 			    G_MIRROR_DISK_STATE_DISCONNECTED,
1361 			    G_MIRROR_EVENT_DONTWAIT);
1362 			return;
1363 		}
1364 		G_MIRROR_LOGREQ(3, bp,
1365 		    "Synchronization request half-finished.");
1366 		bp->bio_cmd = BIO_WRITE;
1367 		bp->bio_cflags = 0;
1368 		cp = disk->d_consumer;
1369 		KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1370 		    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
1371 		    cp->acr, cp->acw, cp->ace));
1372 		cp->index++;
1373 		g_io_request(bp, cp);
1374 		return;
1375 	}
1376 	case BIO_WRITE: {
1377 		off_t offset;
1378 		int i;
1379 
1380 		KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_sync_request_write,
1381 		    bp->bio_error);
1382 
1383 		if (bp->bio_error != 0) {
1384 			G_MIRROR_LOGREQ(0, bp,
1385 			    "Synchronization request failed (error=%d).",
1386 			    bp->bio_error);
1387 			g_mirror_sync_request_free(disk, bp);
1388 			sc->sc_bump_id |= G_MIRROR_BUMP_GENID;
1389 			g_mirror_event_send(disk,
1390 			    G_MIRROR_DISK_STATE_DISCONNECTED,
1391 			    G_MIRROR_EVENT_DONTWAIT);
1392 			return;
1393 		}
1394 		G_MIRROR_LOGREQ(3, bp, "Synchronization request finished.");
1395 		if (sync->ds_offset >= sc->sc_mediasize ||
1396 		    sync->ds_consumer == NULL ||
1397 		    (sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
1398 			/* Don't send more synchronization requests. */
1399 			sync->ds_inflight--;
1400 			g_mirror_sync_request_free(disk, bp);
1401 			if (sync->ds_inflight > 0)
1402 				return;
1403 			if (sync->ds_consumer == NULL ||
1404 			    (sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
1405 				return;
1406 			}
1407 			/* Disk up-to-date, activate it. */
1408 			g_mirror_event_send(disk, G_MIRROR_DISK_STATE_ACTIVE,
1409 			    G_MIRROR_EVENT_DONTWAIT);
1410 			return;
1411 		}
1412 
1413 		/* Send next synchronization request. */
1414 		g_mirror_sync_reinit(disk, bp, sync->ds_offset);
1415 		sync->ds_offset += bp->bio_length;
1416 
1417 		G_MIRROR_LOGREQ(3, bp, "Sending synchronization request.");
1418 		sync->ds_consumer->index++;
1419 
1420 		/*
1421 		 * Delay the request if it is colliding with a regular request.
1422 		 */
1423 		if (g_mirror_regular_collision(sc, bp))
1424 			g_mirror_sync_delay(sc, bp);
1425 		else
1426 			g_io_request(bp, sync->ds_consumer);
1427 
1428 		/* Requeue delayed requests if possible. */
1429 		g_mirror_regular_release(sc);
1430 
1431 		/* Find the smallest offset */
1432 		offset = sc->sc_mediasize;
1433 		for (i = 0; i < g_mirror_syncreqs; i++) {
1434 			bp = sync->ds_bios[i];
1435 			if (bp != NULL && bp->bio_offset < offset)
1436 				offset = bp->bio_offset;
1437 		}
1438 		if (g_mirror_sync_period > 0 &&
1439 		    time_uptime - sync->ds_update_ts > g_mirror_sync_period) {
1440 			sync->ds_offset_done = offset;
1441 			g_mirror_update_metadata(disk);
1442 			sync->ds_update_ts = time_uptime;
1443 		}
1444 		return;
1445 	}
1446 	default:
1447 		panic("Invalid I/O request %p", bp);
1448 	}
1449 }
1450 
1451 static void
1452 g_mirror_request_prefer(struct g_mirror_softc *sc, struct bio *bp)
1453 {
1454 	struct g_mirror_disk *disk;
1455 	struct g_consumer *cp;
1456 	struct bio *cbp;
1457 
1458 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1459 		if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE)
1460 			break;
1461 	}
1462 	if (disk == NULL) {
1463 		if (bp->bio_error == 0)
1464 			bp->bio_error = ENXIO;
1465 		g_io_deliver(bp, bp->bio_error);
1466 		return;
1467 	}
1468 	cbp = g_clone_bio(bp);
1469 	if (cbp == NULL) {
1470 		if (bp->bio_error == 0)
1471 			bp->bio_error = ENOMEM;
1472 		g_io_deliver(bp, bp->bio_error);
1473 		return;
1474 	}
1475 	/*
1476 	 * Fill in the component buf structure.
1477 	 */
1478 	cp = disk->d_consumer;
1479 	cbp->bio_done = g_mirror_done;
1480 	cbp->bio_to = cp->provider;
1481 	G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1482 	KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1483 	    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr,
1484 	    cp->acw, cp->ace));
1485 	cp->index++;
1486 	g_io_request(cbp, cp);
1487 }
1488 
1489 static void
1490 g_mirror_request_round_robin(struct g_mirror_softc *sc, struct bio *bp)
1491 {
1492 	struct g_mirror_disk *disk;
1493 	struct g_consumer *cp;
1494 	struct bio *cbp;
1495 
1496 	disk = g_mirror_get_disk(sc);
1497 	if (disk == NULL) {
1498 		if (bp->bio_error == 0)
1499 			bp->bio_error = ENXIO;
1500 		g_io_deliver(bp, bp->bio_error);
1501 		return;
1502 	}
1503 	cbp = g_clone_bio(bp);
1504 	if (cbp == NULL) {
1505 		if (bp->bio_error == 0)
1506 			bp->bio_error = ENOMEM;
1507 		g_io_deliver(bp, bp->bio_error);
1508 		return;
1509 	}
1510 	/*
1511 	 * Fill in the component buf structure.
1512 	 */
1513 	cp = disk->d_consumer;
1514 	cbp->bio_done = g_mirror_done;
1515 	cbp->bio_to = cp->provider;
1516 	G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1517 	KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1518 	    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr,
1519 	    cp->acw, cp->ace));
1520 	cp->index++;
1521 	g_io_request(cbp, cp);
1522 }
1523 
1524 #define TRACK_SIZE  (1 * 1024 * 1024)
1525 #define LOAD_SCALE	256
1526 #define ABS(x)		(((x) >= 0) ? (x) : (-(x)))
1527 
1528 static void
1529 g_mirror_request_load(struct g_mirror_softc *sc, struct bio *bp)
1530 {
1531 	struct g_mirror_disk *disk, *dp;
1532 	struct g_consumer *cp;
1533 	struct bio *cbp;
1534 	int prio, best;
1535 
1536 	/* Find a disk with the smallest load. */
1537 	disk = NULL;
1538 	best = INT_MAX;
1539 	LIST_FOREACH(dp, &sc->sc_disks, d_next) {
1540 		if (dp->d_state != G_MIRROR_DISK_STATE_ACTIVE)
1541 			continue;
1542 		prio = dp->load;
1543 		/* If disk head is precisely in position - highly prefer it. */
1544 		if (dp->d_last_offset == bp->bio_offset)
1545 			prio -= 2 * LOAD_SCALE;
1546 		else
1547 		/* If disk head is close to position - prefer it. */
1548 		if (ABS(dp->d_last_offset - bp->bio_offset) < TRACK_SIZE)
1549 			prio -= 1 * LOAD_SCALE;
1550 		if (prio <= best) {
1551 			disk = dp;
1552 			best = prio;
1553 		}
1554 	}
1555 	KASSERT(disk != NULL, ("NULL disk for %s.", sc->sc_name));
1556 	cbp = g_clone_bio(bp);
1557 	if (cbp == NULL) {
1558 		if (bp->bio_error == 0)
1559 			bp->bio_error = ENOMEM;
1560 		g_io_deliver(bp, bp->bio_error);
1561 		return;
1562 	}
1563 	/*
1564 	 * Fill in the component buf structure.
1565 	 */
1566 	cp = disk->d_consumer;
1567 	cbp->bio_done = g_mirror_done;
1568 	cbp->bio_to = cp->provider;
1569 	G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1570 	KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1571 	    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr,
1572 	    cp->acw, cp->ace));
1573 	cp->index++;
1574 	/* Remember last head position */
1575 	disk->d_last_offset = bp->bio_offset + bp->bio_length;
1576 	/* Update loads. */
1577 	LIST_FOREACH(dp, &sc->sc_disks, d_next) {
1578 		dp->load = (dp->d_consumer->index * LOAD_SCALE +
1579 		    dp->load * 7) / 8;
1580 	}
1581 	g_io_request(cbp, cp);
1582 }
1583 
1584 static void
1585 g_mirror_request_split(struct g_mirror_softc *sc, struct bio *bp)
1586 {
1587 	struct bio_queue queue;
1588 	struct g_mirror_disk *disk;
1589 	struct g_consumer *cp;
1590 	struct bio *cbp;
1591 	off_t left, mod, offset, slice;
1592 	u_char *data;
1593 	u_int ndisks;
1594 
1595 	if (bp->bio_length <= sc->sc_slice) {
1596 		g_mirror_request_round_robin(sc, bp);
1597 		return;
1598 	}
1599 	ndisks = g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE);
1600 	slice = bp->bio_length / ndisks;
1601 	mod = slice % sc->sc_provider->sectorsize;
1602 	if (mod != 0)
1603 		slice += sc->sc_provider->sectorsize - mod;
1604 	/*
1605 	 * Allocate all bios before sending any request, so we can
1606 	 * return ENOMEM in nice and clean way.
1607 	 */
1608 	left = bp->bio_length;
1609 	offset = bp->bio_offset;
1610 	data = bp->bio_data;
1611 	TAILQ_INIT(&queue);
1612 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1613 		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
1614 			continue;
1615 		cbp = g_clone_bio(bp);
1616 		if (cbp == NULL) {
1617 			while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
1618 				TAILQ_REMOVE(&queue, cbp, bio_queue);
1619 				g_destroy_bio(cbp);
1620 			}
1621 			if (bp->bio_error == 0)
1622 				bp->bio_error = ENOMEM;
1623 			g_io_deliver(bp, bp->bio_error);
1624 			return;
1625 		}
1626 		TAILQ_INSERT_TAIL(&queue, cbp, bio_queue);
1627 		cbp->bio_done = g_mirror_done;
1628 		cbp->bio_caller1 = disk;
1629 		cbp->bio_to = disk->d_consumer->provider;
1630 		cbp->bio_offset = offset;
1631 		cbp->bio_data = data;
1632 		cbp->bio_length = MIN(left, slice);
1633 		left -= cbp->bio_length;
1634 		if (left == 0)
1635 			break;
1636 		offset += cbp->bio_length;
1637 		data += cbp->bio_length;
1638 	}
1639 	while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
1640 		TAILQ_REMOVE(&queue, cbp, bio_queue);
1641 		G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1642 		disk = cbp->bio_caller1;
1643 		cbp->bio_caller1 = NULL;
1644 		cp = disk->d_consumer;
1645 		KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1646 		    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
1647 		    cp->acr, cp->acw, cp->ace));
1648 		disk->d_consumer->index++;
1649 		g_io_request(cbp, disk->d_consumer);
1650 	}
1651 }
1652 
1653 static void
1654 g_mirror_register_request(struct g_mirror_softc *sc, struct bio *bp)
1655 {
1656 	struct bio_queue queue;
1657 	struct bio *cbp;
1658 	struct g_consumer *cp;
1659 	struct g_mirror_disk *disk;
1660 
1661 	sx_assert(&sc->sc_lock, SA_XLOCKED);
1662 
1663 	/*
1664 	 * To avoid ordering issues, if a write is deferred because of a
1665 	 * collision with a sync request, all I/O is deferred until that
1666 	 * write is initiated.
1667 	 */
1668 	if (bp->bio_from->geom != sc->sc_sync.ds_geom &&
1669 	    !TAILQ_EMPTY(&sc->sc_regular_delayed)) {
1670 		g_mirror_regular_delay(sc, bp);
1671 		return;
1672 	}
1673 
1674 	switch (bp->bio_cmd) {
1675 	case BIO_READ:
1676 		switch (sc->sc_balance) {
1677 		case G_MIRROR_BALANCE_LOAD:
1678 			g_mirror_request_load(sc, bp);
1679 			break;
1680 		case G_MIRROR_BALANCE_PREFER:
1681 			g_mirror_request_prefer(sc, bp);
1682 			break;
1683 		case G_MIRROR_BALANCE_ROUND_ROBIN:
1684 			g_mirror_request_round_robin(sc, bp);
1685 			break;
1686 		case G_MIRROR_BALANCE_SPLIT:
1687 			g_mirror_request_split(sc, bp);
1688 			break;
1689 		}
1690 		return;
1691 	case BIO_WRITE:
1692 	case BIO_DELETE:
1693 		/*
1694 		 * Delay the request if it is colliding with a synchronization
1695 		 * request.
1696 		 */
1697 		if (g_mirror_sync_collision(sc, bp)) {
1698 			g_mirror_regular_delay(sc, bp);
1699 			return;
1700 		}
1701 
1702 		if (sc->sc_idle)
1703 			g_mirror_unidle(sc);
1704 		else
1705 			sc->sc_last_write = time_uptime;
1706 
1707 		/*
1708 		 * Bump syncid on first write.
1709 		 */
1710 		if ((sc->sc_bump_id & G_MIRROR_BUMP_SYNCID) != 0) {
1711 			sc->sc_bump_id &= ~G_MIRROR_BUMP_SYNCID;
1712 			g_mirror_bump_syncid(sc);
1713 		}
1714 
1715 		/*
1716 		 * Allocate all bios before sending any request, so we can
1717 		 * return ENOMEM in nice and clean way.
1718 		 */
1719 		TAILQ_INIT(&queue);
1720 		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1721 			switch (disk->d_state) {
1722 			case G_MIRROR_DISK_STATE_ACTIVE:
1723 				break;
1724 			case G_MIRROR_DISK_STATE_SYNCHRONIZING:
1725 				if (bp->bio_offset >= disk->d_sync.ds_offset)
1726 					continue;
1727 				break;
1728 			default:
1729 				continue;
1730 			}
1731 			if (bp->bio_cmd == BIO_DELETE &&
1732 			    (disk->d_flags & G_MIRROR_DISK_FLAG_CANDELETE) == 0)
1733 				continue;
1734 			cbp = g_clone_bio(bp);
1735 			if (cbp == NULL) {
1736 				while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
1737 					TAILQ_REMOVE(&queue, cbp, bio_queue);
1738 					g_destroy_bio(cbp);
1739 				}
1740 				if (bp->bio_error == 0)
1741 					bp->bio_error = ENOMEM;
1742 				g_io_deliver(bp, bp->bio_error);
1743 				return;
1744 			}
1745 			TAILQ_INSERT_TAIL(&queue, cbp, bio_queue);
1746 			cbp->bio_done = g_mirror_done;
1747 			cp = disk->d_consumer;
1748 			cbp->bio_caller1 = cp;
1749 			cbp->bio_to = cp->provider;
1750 			KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1751 			    ("Consumer %s not opened (r%dw%de%d).",
1752 			    cp->provider->name, cp->acr, cp->acw, cp->ace));
1753 		}
1754 		if (TAILQ_EMPTY(&queue)) {
1755 			KASSERT(bp->bio_cmd == BIO_DELETE,
1756 			    ("No consumers for regular request %p", bp));
1757 			g_io_deliver(bp, EOPNOTSUPP);
1758 			return;
1759 		}
1760 		while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
1761 			G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1762 			TAILQ_REMOVE(&queue, cbp, bio_queue);
1763 			cp = cbp->bio_caller1;
1764 			cbp->bio_caller1 = NULL;
1765 			cp->index++;
1766 			sc->sc_writes++;
1767 			g_io_request(cbp, cp);
1768 		}
1769 		/*
1770 		 * Put request onto inflight queue, so we can check if new
1771 		 * synchronization requests don't collide with it.
1772 		 */
1773 		TAILQ_INSERT_TAIL(&sc->sc_inflight, bp, bio_queue);
1774 		return;
1775 	case BIO_FLUSH:
1776 		TAILQ_INIT(&queue);
1777 		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1778 			if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
1779 				continue;
1780 			cbp = g_clone_bio(bp);
1781 			if (cbp == NULL) {
1782 				while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
1783 					TAILQ_REMOVE(&queue, cbp, bio_queue);
1784 					g_destroy_bio(cbp);
1785 				}
1786 				if (bp->bio_error == 0)
1787 					bp->bio_error = ENOMEM;
1788 				g_io_deliver(bp, bp->bio_error);
1789 				return;
1790 			}
1791 			TAILQ_INSERT_TAIL(&queue, cbp, bio_queue);
1792 			cbp->bio_done = g_mirror_done;
1793 			cbp->bio_caller1 = disk;
1794 			cbp->bio_to = disk->d_consumer->provider;
1795 		}
1796 		KASSERT(!TAILQ_EMPTY(&queue),
1797 		    ("No consumers for regular request %p", bp));
1798 		while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
1799 			G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1800 			TAILQ_REMOVE(&queue, cbp, bio_queue);
1801 			disk = cbp->bio_caller1;
1802 			cbp->bio_caller1 = NULL;
1803 			cp = disk->d_consumer;
1804 			KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1805 			    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
1806 			    cp->acr, cp->acw, cp->ace));
1807 			cp->index++;
1808 			g_io_request(cbp, cp);
1809 		}
1810 		break;
1811 	default:
1812 		KASSERT(1 == 0, ("Invalid command here: %u (device=%s)",
1813 		    bp->bio_cmd, sc->sc_name));
1814 		break;
1815 	}
1816 }
1817 
1818 static int
1819 g_mirror_can_destroy(struct g_mirror_softc *sc)
1820 {
1821 	struct g_geom *gp;
1822 	struct g_consumer *cp;
1823 
1824 	g_topology_assert();
1825 	gp = sc->sc_geom;
1826 	if (gp->softc == NULL)
1827 		return (1);
1828 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_TASTING) != 0)
1829 		return (0);
1830 	LIST_FOREACH(cp, &gp->consumer, consumer) {
1831 		if (g_mirror_is_busy(sc, cp))
1832 			return (0);
1833 	}
1834 	gp = sc->sc_sync.ds_geom;
1835 	LIST_FOREACH(cp, &gp->consumer, consumer) {
1836 		if (g_mirror_is_busy(sc, cp))
1837 			return (0);
1838 	}
1839 	G_MIRROR_DEBUG(2, "No I/O requests for %s, it can be destroyed.",
1840 	    sc->sc_name);
1841 	return (1);
1842 }
1843 
1844 static int
1845 g_mirror_try_destroy(struct g_mirror_softc *sc)
1846 {
1847 
1848 	if (sc->sc_rootmount != NULL) {
1849 		G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p", __LINE__,
1850 		    sc->sc_rootmount);
1851 		root_mount_rel(sc->sc_rootmount);
1852 		sc->sc_rootmount = NULL;
1853 	}
1854 	g_topology_lock();
1855 	if (!g_mirror_can_destroy(sc)) {
1856 		g_topology_unlock();
1857 		return (0);
1858 	}
1859 	sc->sc_geom->softc = NULL;
1860 	sc->sc_sync.ds_geom->softc = NULL;
1861 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DRAIN) != 0) {
1862 		g_topology_unlock();
1863 		G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__,
1864 		    &sc->sc_worker);
1865 		/* Unlock sc_lock here, as it can be destroyed after wakeup. */
1866 		sx_xunlock(&sc->sc_lock);
1867 		wakeup(&sc->sc_worker);
1868 		sc->sc_worker = NULL;
1869 	} else {
1870 		g_topology_unlock();
1871 		g_mirror_destroy_device(sc);
1872 	}
1873 	return (1);
1874 }
1875 
1876 /*
1877  * Worker thread.
1878  */
1879 static void
1880 g_mirror_worker(void *arg)
1881 {
1882 	struct g_mirror_softc *sc;
1883 	struct g_mirror_event *ep;
1884 	struct bio *bp;
1885 	int timeout;
1886 
1887 	sc = arg;
1888 	thread_lock(curthread);
1889 	sched_prio(curthread, PRIBIO);
1890 	thread_unlock(curthread);
1891 
1892 	sx_xlock(&sc->sc_lock);
1893 	for (;;) {
1894 		G_MIRROR_DEBUG(5, "%s: Let's see...", __func__);
1895 		/*
1896 		 * First take a look at events.
1897 		 * This is important to handle events before any I/O requests.
1898 		 */
1899 		ep = g_mirror_event_first(sc);
1900 		if (ep != NULL) {
1901 			g_mirror_event_remove(sc, ep);
1902 			if ((ep->e_flags & G_MIRROR_EVENT_DEVICE) != 0) {
1903 				/* Update only device status. */
1904 				G_MIRROR_DEBUG(3,
1905 				    "Running event for device %s.",
1906 				    sc->sc_name);
1907 				ep->e_error = 0;
1908 				g_mirror_update_device(sc, true);
1909 			} else {
1910 				/* Update disk status. */
1911 				G_MIRROR_DEBUG(3, "Running event for disk %s.",
1912 				     g_mirror_get_diskname(ep->e_disk));
1913 				ep->e_error = g_mirror_update_disk(ep->e_disk,
1914 				    ep->e_state);
1915 				if (ep->e_error == 0)
1916 					g_mirror_update_device(sc, false);
1917 			}
1918 			if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0) {
1919 				KASSERT(ep->e_error == 0,
1920 				    ("Error cannot be handled."));
1921 				g_mirror_event_free(ep);
1922 			} else {
1923 				ep->e_flags |= G_MIRROR_EVENT_DONE;
1924 				G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__,
1925 				    ep);
1926 				mtx_lock(&sc->sc_events_mtx);
1927 				wakeup(ep);
1928 				mtx_unlock(&sc->sc_events_mtx);
1929 			}
1930 			if ((sc->sc_flags &
1931 			    G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
1932 				if (g_mirror_try_destroy(sc)) {
1933 					curthread->td_pflags &= ~TDP_GEOM;
1934 					G_MIRROR_DEBUG(1, "Thread exiting.");
1935 					kproc_exit(0);
1936 				}
1937 			}
1938 			G_MIRROR_DEBUG(5, "%s: I'm here 1.", __func__);
1939 			continue;
1940 		}
1941 
1942 		/*
1943 		 * Check if we can mark array as CLEAN and if we can't take
1944 		 * how much seconds should we wait.
1945 		 */
1946 		timeout = g_mirror_idle(sc, -1);
1947 
1948 		/*
1949 		 * Handle I/O requests.
1950 		 */
1951 		mtx_lock(&sc->sc_queue_mtx);
1952 		bp = TAILQ_FIRST(&sc->sc_queue);
1953 		if (bp != NULL)
1954 			TAILQ_REMOVE(&sc->sc_queue, bp, bio_queue);
1955 		else {
1956 			if ((sc->sc_flags &
1957 			    G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
1958 				mtx_unlock(&sc->sc_queue_mtx);
1959 				if (g_mirror_try_destroy(sc)) {
1960 					curthread->td_pflags &= ~TDP_GEOM;
1961 					G_MIRROR_DEBUG(1, "Thread exiting.");
1962 					kproc_exit(0);
1963 				}
1964 				mtx_lock(&sc->sc_queue_mtx);
1965 				if (!TAILQ_EMPTY(&sc->sc_queue)) {
1966 					mtx_unlock(&sc->sc_queue_mtx);
1967 					continue;
1968 				}
1969 			}
1970 			if (g_mirror_event_first(sc) != NULL) {
1971 				mtx_unlock(&sc->sc_queue_mtx);
1972 				continue;
1973 			}
1974 			sx_xunlock(&sc->sc_lock);
1975 			MSLEEP(sc, &sc->sc_queue_mtx, PRIBIO | PDROP, "m:w1",
1976 			    timeout * hz);
1977 			sx_xlock(&sc->sc_lock);
1978 			G_MIRROR_DEBUG(5, "%s: I'm here 4.", __func__);
1979 			continue;
1980 		}
1981 		mtx_unlock(&sc->sc_queue_mtx);
1982 
1983 		if (bp->bio_from->geom == sc->sc_sync.ds_geom &&
1984 		    (bp->bio_cflags & G_MIRROR_BIO_FLAG_SYNC) != 0) {
1985 			/*
1986 			 * Handle completion of the first half (the read) of a
1987 			 * block synchronization operation.
1988 			 */
1989 			g_mirror_sync_request(sc, bp);
1990 		} else if (bp->bio_to != sc->sc_provider) {
1991 			if ((bp->bio_cflags & G_MIRROR_BIO_FLAG_REGULAR) != 0)
1992 				/*
1993 				 * Handle completion of a regular I/O request.
1994 				 */
1995 				g_mirror_regular_request(sc, bp);
1996 			else if ((bp->bio_cflags & G_MIRROR_BIO_FLAG_SYNC) != 0)
1997 				/*
1998 				 * Handle completion of the second half (the
1999 				 * write) of a block synchronization operation.
2000 				 */
2001 				g_mirror_sync_request(sc, bp);
2002 			else {
2003 				KASSERT(0,
2004 				    ("Invalid request cflags=0x%hx to=%s.",
2005 				    bp->bio_cflags, bp->bio_to->name));
2006 			}
2007 		} else {
2008 			/*
2009 			 * Initiate an I/O request.
2010 			 */
2011 			g_mirror_register_request(sc, bp);
2012 		}
2013 		G_MIRROR_DEBUG(5, "%s: I'm here 9.", __func__);
2014 	}
2015 }
2016 
2017 static void
2018 g_mirror_update_idle(struct g_mirror_softc *sc, struct g_mirror_disk *disk)
2019 {
2020 
2021 	sx_assert(&sc->sc_lock, SX_LOCKED);
2022 
2023 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOFAILSYNC) != 0)
2024 		return;
2025 	if (!sc->sc_idle && (disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) == 0) {
2026 		G_MIRROR_DEBUG(2, "Disk %s (device %s) marked as dirty.",
2027 		    g_mirror_get_diskname(disk), sc->sc_name);
2028 		disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
2029 	} else if (sc->sc_idle &&
2030 	    (disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) != 0) {
2031 		G_MIRROR_DEBUG(2, "Disk %s (device %s) marked as clean.",
2032 		    g_mirror_get_diskname(disk), sc->sc_name);
2033 		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2034 	}
2035 }
2036 
2037 static void
2038 g_mirror_sync_reinit(const struct g_mirror_disk *disk, struct bio *bp,
2039     off_t offset)
2040 {
2041 	void *data;
2042 	int idx;
2043 
2044 	data = bp->bio_data;
2045 	idx = (int)(uintptr_t)bp->bio_caller1;
2046 	g_reset_bio(bp);
2047 
2048 	bp->bio_cmd = BIO_READ;
2049 	bp->bio_data = data;
2050 	bp->bio_done = g_mirror_sync_done;
2051 	bp->bio_from = disk->d_sync.ds_consumer;
2052 	bp->bio_to = disk->d_softc->sc_provider;
2053 	bp->bio_caller1 = (void *)(uintptr_t)idx;
2054 	bp->bio_offset = offset;
2055 	bp->bio_length = MIN(MAXPHYS,
2056 	    disk->d_softc->sc_mediasize - bp->bio_offset);
2057 }
2058 
2059 static void
2060 g_mirror_sync_start(struct g_mirror_disk *disk)
2061 {
2062 	struct g_mirror_softc *sc;
2063 	struct g_mirror_disk_sync *sync;
2064 	struct g_consumer *cp;
2065 	struct bio *bp;
2066 	int error, i;
2067 
2068 	g_topology_assert_not();
2069 	sc = disk->d_softc;
2070 	sync = &disk->d_sync;
2071 	sx_assert(&sc->sc_lock, SX_LOCKED);
2072 
2073 	KASSERT(disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
2074 	    ("Disk %s is not marked for synchronization.",
2075 	    g_mirror_get_diskname(disk)));
2076 	KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2077 	    ("Device not in RUNNING state (%s, %u).", sc->sc_name,
2078 	    sc->sc_state));
2079 
2080 	sx_xunlock(&sc->sc_lock);
2081 	g_topology_lock();
2082 	cp = g_new_consumer(sc->sc_sync.ds_geom);
2083 	cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
2084 	error = g_attach(cp, sc->sc_provider);
2085 	KASSERT(error == 0,
2086 	    ("Cannot attach to %s (error=%d).", sc->sc_name, error));
2087 	error = g_access(cp, 1, 0, 0);
2088 	KASSERT(error == 0, ("Cannot open %s (error=%d).", sc->sc_name, error));
2089 	g_topology_unlock();
2090 	sx_xlock(&sc->sc_lock);
2091 
2092 	G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s.", sc->sc_name,
2093 	    g_mirror_get_diskname(disk));
2094 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOFAILSYNC) == 0)
2095 		disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
2096 	KASSERT(sync->ds_consumer == NULL,
2097 	    ("Sync consumer already exists (device=%s, disk=%s).",
2098 	    sc->sc_name, g_mirror_get_diskname(disk)));
2099 
2100 	sync->ds_consumer = cp;
2101 	sync->ds_consumer->private = disk;
2102 	sync->ds_consumer->index = 0;
2103 
2104 	/*
2105 	 * Allocate memory for synchronization bios and initialize them.
2106 	 */
2107 	sync->ds_bios = malloc(sizeof(struct bio *) * g_mirror_syncreqs,
2108 	    M_MIRROR, M_WAITOK);
2109 	for (i = 0; i < g_mirror_syncreqs; i++) {
2110 		bp = g_alloc_bio();
2111 		sync->ds_bios[i] = bp;
2112 
2113 		bp->bio_data = malloc(MAXPHYS, M_MIRROR, M_WAITOK);
2114 		bp->bio_caller1 = (void *)(uintptr_t)i;
2115 		g_mirror_sync_reinit(disk, bp, sync->ds_offset);
2116 		sync->ds_offset += bp->bio_length;
2117 	}
2118 
2119 	/* Increase the number of disks in SYNCHRONIZING state. */
2120 	sc->sc_sync.ds_ndisks++;
2121 	/* Set the number of in-flight synchronization requests. */
2122 	sync->ds_inflight = g_mirror_syncreqs;
2123 
2124 	/*
2125 	 * Fire off first synchronization requests.
2126 	 */
2127 	for (i = 0; i < g_mirror_syncreqs; i++) {
2128 		bp = sync->ds_bios[i];
2129 		G_MIRROR_LOGREQ(3, bp, "Sending synchronization request.");
2130 		sync->ds_consumer->index++;
2131 		/*
2132 		 * Delay the request if it is colliding with a regular request.
2133 		 */
2134 		if (g_mirror_regular_collision(sc, bp))
2135 			g_mirror_sync_delay(sc, bp);
2136 		else
2137 			g_io_request(bp, sync->ds_consumer);
2138 	}
2139 }
2140 
2141 /*
2142  * Stop synchronization process.
2143  * type: 0 - synchronization finished
2144  *       1 - synchronization stopped
2145  */
2146 static void
2147 g_mirror_sync_stop(struct g_mirror_disk *disk, int type)
2148 {
2149 	struct g_mirror_softc *sc;
2150 	struct g_consumer *cp;
2151 
2152 	g_topology_assert_not();
2153 	sc = disk->d_softc;
2154 	sx_assert(&sc->sc_lock, SX_LOCKED);
2155 
2156 	KASSERT(disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
2157 	    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2158 	    g_mirror_disk_state2str(disk->d_state)));
2159 	if (disk->d_sync.ds_consumer == NULL)
2160 		return;
2161 
2162 	if (type == 0) {
2163 		G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s finished.",
2164 		    sc->sc_name, g_mirror_get_diskname(disk));
2165 	} else /* if (type == 1) */ {
2166 		G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s stopped.",
2167 		    sc->sc_name, g_mirror_get_diskname(disk));
2168 	}
2169 	g_mirror_regular_release(sc);
2170 	free(disk->d_sync.ds_bios, M_MIRROR);
2171 	disk->d_sync.ds_bios = NULL;
2172 	cp = disk->d_sync.ds_consumer;
2173 	disk->d_sync.ds_consumer = NULL;
2174 	disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2175 	sc->sc_sync.ds_ndisks--;
2176 	sx_xunlock(&sc->sc_lock); /* Avoid recursion on sc_lock. */
2177 	g_topology_lock();
2178 	g_mirror_kill_consumer(sc, cp);
2179 	g_topology_unlock();
2180 	sx_xlock(&sc->sc_lock);
2181 }
2182 
2183 static void
2184 g_mirror_launch_provider(struct g_mirror_softc *sc)
2185 {
2186 	struct g_mirror_disk *disk;
2187 	struct g_provider *pp, *dp;
2188 
2189 	sx_assert(&sc->sc_lock, SX_LOCKED);
2190 
2191 	g_topology_lock();
2192 	pp = g_new_providerf(sc->sc_geom, "mirror/%s", sc->sc_name);
2193 	pp->flags |= G_PF_DIRECT_RECEIVE;
2194 	pp->mediasize = sc->sc_mediasize;
2195 	pp->sectorsize = sc->sc_sectorsize;
2196 	pp->stripesize = 0;
2197 	pp->stripeoffset = 0;
2198 
2199 	/* Splitting of unmapped BIO's could work but isn't implemented now */
2200 	if (sc->sc_balance != G_MIRROR_BALANCE_SPLIT)
2201 		pp->flags |= G_PF_ACCEPT_UNMAPPED;
2202 
2203 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2204 		if (disk->d_consumer && disk->d_consumer->provider) {
2205 			dp = disk->d_consumer->provider;
2206 			if (dp->stripesize > pp->stripesize) {
2207 				pp->stripesize = dp->stripesize;
2208 				pp->stripeoffset = dp->stripeoffset;
2209 			}
2210 			/* A provider underneath us doesn't support unmapped */
2211 			if ((dp->flags & G_PF_ACCEPT_UNMAPPED) == 0) {
2212 				G_MIRROR_DEBUG(0, "Cancelling unmapped "
2213 				    "because of %s.", dp->name);
2214 				pp->flags &= ~G_PF_ACCEPT_UNMAPPED;
2215 			}
2216 		}
2217 	}
2218 	pp->private = sc;
2219 	sc->sc_refcnt++;
2220 	sc->sc_provider = pp;
2221 	g_error_provider(pp, 0);
2222 	g_topology_unlock();
2223 	G_MIRROR_DEBUG(0, "Device %s launched (%u/%u).", pp->name,
2224 	    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE), sc->sc_ndisks);
2225 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2226 		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
2227 			g_mirror_sync_start(disk);
2228 	}
2229 }
2230 
2231 static void
2232 g_mirror_destroy_provider(struct g_mirror_softc *sc)
2233 {
2234 	struct g_mirror_disk *disk;
2235 	struct bio *bp;
2236 
2237 	g_topology_assert_not();
2238 	KASSERT(sc->sc_provider != NULL, ("NULL provider (device=%s).",
2239 	    sc->sc_name));
2240 
2241 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2242 		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
2243 			g_mirror_sync_stop(disk, 1);
2244 	}
2245 
2246 	g_topology_lock();
2247 	g_error_provider(sc->sc_provider, ENXIO);
2248 	mtx_lock(&sc->sc_queue_mtx);
2249 	while ((bp = TAILQ_FIRST(&sc->sc_queue)) != NULL) {
2250 		TAILQ_REMOVE(&sc->sc_queue, bp, bio_queue);
2251 		/*
2252 		 * Abort any pending I/O that wasn't generated by us.
2253 		 * Synchronization requests and requests destined for individual
2254 		 * mirror components can be destroyed immediately.
2255 		 */
2256 		if (bp->bio_to == sc->sc_provider &&
2257 		    bp->bio_from->geom != sc->sc_sync.ds_geom) {
2258 			g_io_deliver(bp, ENXIO);
2259 		} else {
2260 			if ((bp->bio_cflags & G_MIRROR_BIO_FLAG_SYNC) != 0)
2261 				free(bp->bio_data, M_MIRROR);
2262 			g_destroy_bio(bp);
2263 		}
2264 	}
2265 	mtx_unlock(&sc->sc_queue_mtx);
2266 	g_wither_provider(sc->sc_provider, ENXIO);
2267 	sc->sc_provider = NULL;
2268 	G_MIRROR_DEBUG(0, "Device %s: provider destroyed.", sc->sc_name);
2269 	g_topology_unlock();
2270 }
2271 
2272 static void
2273 g_mirror_go(void *arg)
2274 {
2275 	struct g_mirror_softc *sc;
2276 
2277 	sc = arg;
2278 	G_MIRROR_DEBUG(0, "Force device %s start due to timeout.", sc->sc_name);
2279 	g_mirror_event_send(sc, 0,
2280 	    G_MIRROR_EVENT_DONTWAIT | G_MIRROR_EVENT_DEVICE);
2281 }
2282 
2283 static u_int
2284 g_mirror_determine_state(struct g_mirror_disk *disk)
2285 {
2286 	struct g_mirror_softc *sc;
2287 	u_int state;
2288 
2289 	sc = disk->d_softc;
2290 	if (sc->sc_syncid == disk->d_sync.ds_syncid) {
2291 		if ((disk->d_flags &
2292 		    G_MIRROR_DISK_FLAG_SYNCHRONIZING) == 0 &&
2293 		    (g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) == 0 ||
2294 		     (disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) == 0)) {
2295 			/* Disk does not need synchronization. */
2296 			state = G_MIRROR_DISK_STATE_ACTIVE;
2297 		} else {
2298 			if ((sc->sc_flags &
2299 			     G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) == 0 ||
2300 			    (disk->d_flags &
2301 			     G_MIRROR_DISK_FLAG_FORCE_SYNC) != 0) {
2302 				/*
2303 				 * We can start synchronization from
2304 				 * the stored offset.
2305 				 */
2306 				state = G_MIRROR_DISK_STATE_SYNCHRONIZING;
2307 			} else {
2308 				state = G_MIRROR_DISK_STATE_STALE;
2309 			}
2310 		}
2311 	} else if (disk->d_sync.ds_syncid < sc->sc_syncid) {
2312 		/*
2313 		 * Reset all synchronization data for this disk,
2314 		 * because if it even was synchronized, it was
2315 		 * synchronized to disks with different syncid.
2316 		 */
2317 		disk->d_flags |= G_MIRROR_DISK_FLAG_SYNCHRONIZING;
2318 		disk->d_sync.ds_offset = 0;
2319 		disk->d_sync.ds_offset_done = 0;
2320 		disk->d_sync.ds_syncid = sc->sc_syncid;
2321 		if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) == 0 ||
2322 		    (disk->d_flags & G_MIRROR_DISK_FLAG_FORCE_SYNC) != 0) {
2323 			state = G_MIRROR_DISK_STATE_SYNCHRONIZING;
2324 		} else {
2325 			state = G_MIRROR_DISK_STATE_STALE;
2326 		}
2327 	} else /* if (sc->sc_syncid < disk->d_sync.ds_syncid) */ {
2328 		/*
2329 		 * Not good, NOT GOOD!
2330 		 * It means that mirror was started on stale disks
2331 		 * and more fresh disk just arrive.
2332 		 * If there were writes, mirror is broken, sorry.
2333 		 * I think the best choice here is don't touch
2334 		 * this disk and inform the user loudly.
2335 		 */
2336 		G_MIRROR_DEBUG(0, "Device %s was started before the freshest "
2337 		    "disk (%s) arrives!! It will not be connected to the "
2338 		    "running device.", sc->sc_name,
2339 		    g_mirror_get_diskname(disk));
2340 		g_mirror_destroy_disk(disk);
2341 		state = G_MIRROR_DISK_STATE_NONE;
2342 		/* Return immediately, because disk was destroyed. */
2343 		return (state);
2344 	}
2345 	G_MIRROR_DEBUG(3, "State for %s disk: %s.",
2346 	    g_mirror_get_diskname(disk), g_mirror_disk_state2str(state));
2347 	return (state);
2348 }
2349 
2350 /*
2351  * Update device state.
2352  */
2353 static void
2354 g_mirror_update_device(struct g_mirror_softc *sc, bool force)
2355 {
2356 	struct g_mirror_disk *disk;
2357 	u_int state;
2358 
2359 	sx_assert(&sc->sc_lock, SX_XLOCKED);
2360 
2361 	switch (sc->sc_state) {
2362 	case G_MIRROR_DEVICE_STATE_STARTING:
2363 	    {
2364 		struct g_mirror_disk *pdisk, *tdisk;
2365 		u_int dirty, ndisks, genid, syncid;
2366 		bool broken;
2367 
2368 		KASSERT(sc->sc_provider == NULL,
2369 		    ("Non-NULL provider in STARTING state (%s).", sc->sc_name));
2370 		/*
2371 		 * Are we ready? We are, if all disks are connected or
2372 		 * if we have any disks and 'force' is true.
2373 		 */
2374 		ndisks = g_mirror_ndisks(sc, -1);
2375 		if (sc->sc_ndisks == ndisks || (force && ndisks > 0)) {
2376 			;
2377 		} else if (ndisks == 0) {
2378 			/*
2379 			 * Disks went down in starting phase, so destroy
2380 			 * device.
2381 			 */
2382 			callout_drain(&sc->sc_callout);
2383 			sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
2384 			G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p", __LINE__,
2385 			    sc->sc_rootmount);
2386 			root_mount_rel(sc->sc_rootmount);
2387 			sc->sc_rootmount = NULL;
2388 			return;
2389 		} else {
2390 			return;
2391 		}
2392 
2393 		/*
2394 		 * Activate all disks with the biggest syncid.
2395 		 */
2396 		if (force) {
2397 			/*
2398 			 * If 'force' is true, we have been called due to
2399 			 * timeout, so don't bother canceling timeout.
2400 			 */
2401 			ndisks = 0;
2402 			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2403 				if ((disk->d_flags &
2404 				    G_MIRROR_DISK_FLAG_SYNCHRONIZING) == 0) {
2405 					ndisks++;
2406 				}
2407 			}
2408 			if (ndisks == 0) {
2409 				/* No valid disks found, destroy device. */
2410 				sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
2411 				G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p",
2412 				    __LINE__, sc->sc_rootmount);
2413 				root_mount_rel(sc->sc_rootmount);
2414 				sc->sc_rootmount = NULL;
2415 				return;
2416 			}
2417 		} else {
2418 			/* Cancel timeout. */
2419 			callout_drain(&sc->sc_callout);
2420 		}
2421 
2422 		/*
2423 		 * Find the biggest genid.
2424 		 */
2425 		genid = 0;
2426 		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2427 			if (disk->d_genid > genid)
2428 				genid = disk->d_genid;
2429 		}
2430 		sc->sc_genid = genid;
2431 		/*
2432 		 * Remove all disks without the biggest genid.
2433 		 */
2434 		broken = false;
2435 		LIST_FOREACH_SAFE(disk, &sc->sc_disks, d_next, tdisk) {
2436 			if (disk->d_genid < genid) {
2437 				G_MIRROR_DEBUG(0,
2438 				    "Component %s (device %s) broken, skipping.",
2439 				    g_mirror_get_diskname(disk), sc->sc_name);
2440 				g_mirror_destroy_disk(disk);
2441 				/*
2442 				 * Bump the syncid in case we discover a healthy
2443 				 * replacement disk after starting the mirror.
2444 				 */
2445 				broken = true;
2446 			}
2447 		}
2448 
2449 		/*
2450 		 * Find the biggest syncid.
2451 		 */
2452 		syncid = 0;
2453 		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2454 			if (disk->d_sync.ds_syncid > syncid)
2455 				syncid = disk->d_sync.ds_syncid;
2456 		}
2457 
2458 		/*
2459 		 * Here we need to look for dirty disks and if all disks
2460 		 * with the biggest syncid are dirty, we have to choose
2461 		 * one with the biggest priority and rebuild the rest.
2462 		 */
2463 		/*
2464 		 * Find the number of dirty disks with the biggest syncid.
2465 		 * Find the number of disks with the biggest syncid.
2466 		 * While here, find a disk with the biggest priority.
2467 		 */
2468 		dirty = ndisks = 0;
2469 		pdisk = NULL;
2470 		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2471 			if (disk->d_sync.ds_syncid != syncid)
2472 				continue;
2473 			if ((disk->d_flags &
2474 			    G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
2475 				continue;
2476 			}
2477 			ndisks++;
2478 			if ((disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) != 0) {
2479 				dirty++;
2480 				if (pdisk == NULL ||
2481 				    pdisk->d_priority < disk->d_priority) {
2482 					pdisk = disk;
2483 				}
2484 			}
2485 		}
2486 		if (dirty == 0) {
2487 			/* No dirty disks at all, great. */
2488 		} else if (dirty == ndisks) {
2489 			/*
2490 			 * Force synchronization for all dirty disks except one
2491 			 * with the biggest priority.
2492 			 */
2493 			KASSERT(pdisk != NULL, ("pdisk == NULL"));
2494 			G_MIRROR_DEBUG(1, "Using disk %s (device %s) as a "
2495 			    "master disk for synchronization.",
2496 			    g_mirror_get_diskname(pdisk), sc->sc_name);
2497 			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2498 				if (disk->d_sync.ds_syncid != syncid)
2499 					continue;
2500 				if ((disk->d_flags &
2501 				    G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
2502 					continue;
2503 				}
2504 				KASSERT((disk->d_flags &
2505 				    G_MIRROR_DISK_FLAG_DIRTY) != 0,
2506 				    ("Disk %s isn't marked as dirty.",
2507 				    g_mirror_get_diskname(disk)));
2508 				/* Skip the disk with the biggest priority. */
2509 				if (disk == pdisk)
2510 					continue;
2511 				disk->d_sync.ds_syncid = 0;
2512 			}
2513 		} else if (dirty < ndisks) {
2514 			/*
2515 			 * Force synchronization for all dirty disks.
2516 			 * We have some non-dirty disks.
2517 			 */
2518 			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2519 				if (disk->d_sync.ds_syncid != syncid)
2520 					continue;
2521 				if ((disk->d_flags &
2522 				    G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
2523 					continue;
2524 				}
2525 				if ((disk->d_flags &
2526 				    G_MIRROR_DISK_FLAG_DIRTY) == 0) {
2527 					continue;
2528 				}
2529 				disk->d_sync.ds_syncid = 0;
2530 			}
2531 		}
2532 
2533 		/* Reset hint. */
2534 		sc->sc_hint = NULL;
2535 		sc->sc_syncid = syncid;
2536 		if (force || broken) {
2537 			/* Remember to bump syncid on first write. */
2538 			sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
2539 		}
2540 		state = G_MIRROR_DEVICE_STATE_RUNNING;
2541 		G_MIRROR_DEBUG(1, "Device %s state changed from %s to %s.",
2542 		    sc->sc_name, g_mirror_device_state2str(sc->sc_state),
2543 		    g_mirror_device_state2str(state));
2544 		sc->sc_state = state;
2545 		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2546 			state = g_mirror_determine_state(disk);
2547 			g_mirror_event_send(disk, state,
2548 			    G_MIRROR_EVENT_DONTWAIT);
2549 			if (state == G_MIRROR_DISK_STATE_STALE)
2550 				sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
2551 		}
2552 		break;
2553 	    }
2554 	case G_MIRROR_DEVICE_STATE_RUNNING:
2555 		if (g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) == 0 &&
2556 		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_NEW) == 0) {
2557 			/*
2558 			 * No usable disks, so destroy the device.
2559 			 */
2560 			sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
2561 			break;
2562 		} else if (g_mirror_ndisks(sc,
2563 		    G_MIRROR_DISK_STATE_ACTIVE) > 0 &&
2564 		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_NEW) == 0) {
2565 			/*
2566 			 * We have active disks, launch provider if it doesn't
2567 			 * exist.
2568 			 */
2569 			if (sc->sc_provider == NULL)
2570 				g_mirror_launch_provider(sc);
2571 			if (sc->sc_rootmount != NULL) {
2572 				G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p",
2573 				    __LINE__, sc->sc_rootmount);
2574 				root_mount_rel(sc->sc_rootmount);
2575 				sc->sc_rootmount = NULL;
2576 			}
2577 		}
2578 		/*
2579 		 * Genid should be bumped immediately, so do it here.
2580 		 */
2581 		if ((sc->sc_bump_id & G_MIRROR_BUMP_GENID) != 0) {
2582 			sc->sc_bump_id &= ~G_MIRROR_BUMP_GENID;
2583 			g_mirror_bump_genid(sc);
2584 		}
2585 		if ((sc->sc_bump_id & G_MIRROR_BUMP_SYNCID_NOW) != 0) {
2586 			sc->sc_bump_id &= ~G_MIRROR_BUMP_SYNCID_NOW;
2587 			g_mirror_bump_syncid(sc);
2588 		}
2589 		break;
2590 	default:
2591 		KASSERT(1 == 0, ("Wrong device state (%s, %s).",
2592 		    sc->sc_name, g_mirror_device_state2str(sc->sc_state)));
2593 		break;
2594 	}
2595 }
2596 
2597 /*
2598  * Update disk state and device state if needed.
2599  */
2600 #define	DISK_STATE_CHANGED()	G_MIRROR_DEBUG(1,			\
2601 	"Disk %s state changed from %s to %s (device %s).",		\
2602 	g_mirror_get_diskname(disk),					\
2603 	g_mirror_disk_state2str(disk->d_state),				\
2604 	g_mirror_disk_state2str(state), sc->sc_name)
2605 static int
2606 g_mirror_update_disk(struct g_mirror_disk *disk, u_int state)
2607 {
2608 	struct g_mirror_softc *sc;
2609 
2610 	sc = disk->d_softc;
2611 	sx_assert(&sc->sc_lock, SX_XLOCKED);
2612 
2613 again:
2614 	G_MIRROR_DEBUG(3, "Changing disk %s state from %s to %s.",
2615 	    g_mirror_get_diskname(disk), g_mirror_disk_state2str(disk->d_state),
2616 	    g_mirror_disk_state2str(state));
2617 	switch (state) {
2618 	case G_MIRROR_DISK_STATE_NEW:
2619 		/*
2620 		 * Possible scenarios:
2621 		 * 1. New disk arrive.
2622 		 */
2623 		/* Previous state should be NONE. */
2624 		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NONE,
2625 		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2626 		    g_mirror_disk_state2str(disk->d_state)));
2627 		DISK_STATE_CHANGED();
2628 
2629 		disk->d_state = state;
2630 		g_topology_lock();
2631 		if (LIST_EMPTY(&sc->sc_disks))
2632 			LIST_INSERT_HEAD(&sc->sc_disks, disk, d_next);
2633 		else {
2634 			struct g_mirror_disk *dp;
2635 
2636 			LIST_FOREACH(dp, &sc->sc_disks, d_next) {
2637 				if (disk->d_priority >= dp->d_priority) {
2638 					LIST_INSERT_BEFORE(dp, disk, d_next);
2639 					dp = NULL;
2640 					break;
2641 				}
2642 				if (LIST_NEXT(dp, d_next) == NULL)
2643 					break;
2644 			}
2645 			if (dp != NULL)
2646 				LIST_INSERT_AFTER(dp, disk, d_next);
2647 		}
2648 		g_topology_unlock();
2649 		G_MIRROR_DEBUG(1, "Device %s: provider %s detected.",
2650 		    sc->sc_name, g_mirror_get_diskname(disk));
2651 		if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING)
2652 			break;
2653 		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2654 		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2655 		    g_mirror_device_state2str(sc->sc_state),
2656 		    g_mirror_get_diskname(disk),
2657 		    g_mirror_disk_state2str(disk->d_state)));
2658 		state = g_mirror_determine_state(disk);
2659 		if (state != G_MIRROR_DISK_STATE_NONE)
2660 			goto again;
2661 		break;
2662 	case G_MIRROR_DISK_STATE_ACTIVE:
2663 		/*
2664 		 * Possible scenarios:
2665 		 * 1. New disk does not need synchronization.
2666 		 * 2. Synchronization process finished successfully.
2667 		 */
2668 		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2669 		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2670 		    g_mirror_device_state2str(sc->sc_state),
2671 		    g_mirror_get_diskname(disk),
2672 		    g_mirror_disk_state2str(disk->d_state)));
2673 		/* Previous state should be NEW or SYNCHRONIZING. */
2674 		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW ||
2675 		    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
2676 		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2677 		    g_mirror_disk_state2str(disk->d_state)));
2678 		DISK_STATE_CHANGED();
2679 
2680 		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
2681 			disk->d_flags &= ~G_MIRROR_DISK_FLAG_SYNCHRONIZING;
2682 			disk->d_flags &= ~G_MIRROR_DISK_FLAG_FORCE_SYNC;
2683 			g_mirror_sync_stop(disk, 0);
2684 		}
2685 		disk->d_state = state;
2686 		disk->d_sync.ds_offset = 0;
2687 		disk->d_sync.ds_offset_done = 0;
2688 		g_mirror_update_idle(sc, disk);
2689 		g_mirror_update_metadata(disk);
2690 		G_MIRROR_DEBUG(1, "Device %s: provider %s activated.",
2691 		    sc->sc_name, g_mirror_get_diskname(disk));
2692 		break;
2693 	case G_MIRROR_DISK_STATE_STALE:
2694 		/*
2695 		 * Possible scenarios:
2696 		 * 1. Stale disk was connected.
2697 		 */
2698 		/* Previous state should be NEW. */
2699 		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW,
2700 		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2701 		    g_mirror_disk_state2str(disk->d_state)));
2702 		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2703 		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2704 		    g_mirror_device_state2str(sc->sc_state),
2705 		    g_mirror_get_diskname(disk),
2706 		    g_mirror_disk_state2str(disk->d_state)));
2707 		/*
2708 		 * STALE state is only possible if device is marked
2709 		 * NOAUTOSYNC.
2710 		 */
2711 		KASSERT((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) != 0,
2712 		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2713 		    g_mirror_device_state2str(sc->sc_state),
2714 		    g_mirror_get_diskname(disk),
2715 		    g_mirror_disk_state2str(disk->d_state)));
2716 		DISK_STATE_CHANGED();
2717 
2718 		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2719 		disk->d_state = state;
2720 		g_mirror_update_metadata(disk);
2721 		G_MIRROR_DEBUG(0, "Device %s: provider %s is stale.",
2722 		    sc->sc_name, g_mirror_get_diskname(disk));
2723 		break;
2724 	case G_MIRROR_DISK_STATE_SYNCHRONIZING:
2725 		/*
2726 		 * Possible scenarios:
2727 		 * 1. Disk which needs synchronization was connected.
2728 		 */
2729 		/* Previous state should be NEW. */
2730 		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW,
2731 		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2732 		    g_mirror_disk_state2str(disk->d_state)));
2733 		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2734 		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2735 		    g_mirror_device_state2str(sc->sc_state),
2736 		    g_mirror_get_diskname(disk),
2737 		    g_mirror_disk_state2str(disk->d_state)));
2738 		DISK_STATE_CHANGED();
2739 
2740 		if (disk->d_state == G_MIRROR_DISK_STATE_NEW)
2741 			disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2742 		disk->d_state = state;
2743 		if (sc->sc_provider != NULL) {
2744 			g_mirror_sync_start(disk);
2745 			g_mirror_update_metadata(disk);
2746 		}
2747 		break;
2748 	case G_MIRROR_DISK_STATE_DISCONNECTED:
2749 		/*
2750 		 * Possible scenarios:
2751 		 * 1. Device wasn't running yet, but disk disappear.
2752 		 * 2. Disk was active and disapppear.
2753 		 * 3. Disk disappear during synchronization process.
2754 		 */
2755 		if (sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING) {
2756 			/*
2757 			 * Previous state should be ACTIVE, STALE or
2758 			 * SYNCHRONIZING.
2759 			 */
2760 			KASSERT(disk->d_state == G_MIRROR_DISK_STATE_ACTIVE ||
2761 			    disk->d_state == G_MIRROR_DISK_STATE_STALE ||
2762 			    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
2763 			    ("Wrong disk state (%s, %s).",
2764 			    g_mirror_get_diskname(disk),
2765 			    g_mirror_disk_state2str(disk->d_state)));
2766 		} else if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING) {
2767 			/* Previous state should be NEW. */
2768 			KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW,
2769 			    ("Wrong disk state (%s, %s).",
2770 			    g_mirror_get_diskname(disk),
2771 			    g_mirror_disk_state2str(disk->d_state)));
2772 			/*
2773 			 * Reset bumping syncid if disk disappeared in STARTING
2774 			 * state.
2775 			 */
2776 			if ((sc->sc_bump_id & G_MIRROR_BUMP_SYNCID) != 0)
2777 				sc->sc_bump_id &= ~G_MIRROR_BUMP_SYNCID;
2778 #ifdef	INVARIANTS
2779 		} else {
2780 			KASSERT(1 == 0, ("Wrong device state (%s, %s, %s, %s).",
2781 			    sc->sc_name,
2782 			    g_mirror_device_state2str(sc->sc_state),
2783 			    g_mirror_get_diskname(disk),
2784 			    g_mirror_disk_state2str(disk->d_state)));
2785 #endif
2786 		}
2787 		DISK_STATE_CHANGED();
2788 		G_MIRROR_DEBUG(0, "Device %s: provider %s disconnected.",
2789 		    sc->sc_name, g_mirror_get_diskname(disk));
2790 
2791 		g_mirror_destroy_disk(disk);
2792 		break;
2793 	case G_MIRROR_DISK_STATE_DESTROY:
2794 	    {
2795 		int error;
2796 
2797 		error = g_mirror_clear_metadata(disk);
2798 		if (error != 0) {
2799 			G_MIRROR_DEBUG(0,
2800 			    "Device %s: failed to clear metadata on %s: %d.",
2801 			    sc->sc_name, g_mirror_get_diskname(disk), error);
2802 			break;
2803 		}
2804 		DISK_STATE_CHANGED();
2805 		G_MIRROR_DEBUG(0, "Device %s: provider %s destroyed.",
2806 		    sc->sc_name, g_mirror_get_diskname(disk));
2807 
2808 		g_mirror_destroy_disk(disk);
2809 		sc->sc_ndisks--;
2810 		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2811 			g_mirror_update_metadata(disk);
2812 		}
2813 		break;
2814 	    }
2815 	default:
2816 		KASSERT(1 == 0, ("Unknown state (%u).", state));
2817 		break;
2818 	}
2819 	return (0);
2820 }
2821 #undef	DISK_STATE_CHANGED
2822 
2823 int
2824 g_mirror_read_metadata(struct g_consumer *cp, struct g_mirror_metadata *md)
2825 {
2826 	struct g_provider *pp;
2827 	u_char *buf;
2828 	int error;
2829 
2830 	g_topology_assert();
2831 
2832 	error = g_access(cp, 1, 0, 0);
2833 	if (error != 0)
2834 		return (error);
2835 	pp = cp->provider;
2836 	g_topology_unlock();
2837 	/* Metadata are stored on last sector. */
2838 	buf = g_read_data(cp, pp->mediasize - pp->sectorsize, pp->sectorsize,
2839 	    &error);
2840 	g_topology_lock();
2841 	g_access(cp, -1, 0, 0);
2842 	if (buf == NULL) {
2843 		G_MIRROR_DEBUG(1, "Cannot read metadata from %s (error=%d).",
2844 		    cp->provider->name, error);
2845 		return (error);
2846 	}
2847 
2848 	/* Decode metadata. */
2849 	error = mirror_metadata_decode(buf, md);
2850 	g_free(buf);
2851 	if (strcmp(md->md_magic, G_MIRROR_MAGIC) != 0)
2852 		return (EINVAL);
2853 	if (md->md_version > G_MIRROR_VERSION) {
2854 		G_MIRROR_DEBUG(0,
2855 		    "Kernel module is too old to handle metadata from %s.",
2856 		    cp->provider->name);
2857 		return (EINVAL);
2858 	}
2859 	if (error != 0) {
2860 		G_MIRROR_DEBUG(1, "MD5 metadata hash mismatch for provider %s.",
2861 		    cp->provider->name);
2862 		return (error);
2863 	}
2864 
2865 	return (0);
2866 }
2867 
2868 static int
2869 g_mirror_check_metadata(struct g_mirror_softc *sc, struct g_provider *pp,
2870     struct g_mirror_metadata *md)
2871 {
2872 
2873 	if (g_mirror_id2disk(sc, md->md_did) != NULL) {
2874 		G_MIRROR_DEBUG(1, "Disk %s (id=%u) already exists, skipping.",
2875 		    pp->name, md->md_did);
2876 		return (EEXIST);
2877 	}
2878 	if (md->md_all != sc->sc_ndisks) {
2879 		G_MIRROR_DEBUG(1,
2880 		    "Invalid '%s' field on disk %s (device %s), skipping.",
2881 		    "md_all", pp->name, sc->sc_name);
2882 		return (EINVAL);
2883 	}
2884 	if (md->md_slice != sc->sc_slice) {
2885 		G_MIRROR_DEBUG(1,
2886 		    "Invalid '%s' field on disk %s (device %s), skipping.",
2887 		    "md_slice", pp->name, sc->sc_name);
2888 		return (EINVAL);
2889 	}
2890 	if (md->md_balance != sc->sc_balance) {
2891 		G_MIRROR_DEBUG(1,
2892 		    "Invalid '%s' field on disk %s (device %s), skipping.",
2893 		    "md_balance", pp->name, sc->sc_name);
2894 		return (EINVAL);
2895 	}
2896 #if 0
2897 	if (md->md_mediasize != sc->sc_mediasize) {
2898 		G_MIRROR_DEBUG(1,
2899 		    "Invalid '%s' field on disk %s (device %s), skipping.",
2900 		    "md_mediasize", pp->name, sc->sc_name);
2901 		return (EINVAL);
2902 	}
2903 #endif
2904 	if (sc->sc_mediasize > pp->mediasize) {
2905 		G_MIRROR_DEBUG(1,
2906 		    "Invalid size of disk %s (device %s), skipping.", pp->name,
2907 		    sc->sc_name);
2908 		return (EINVAL);
2909 	}
2910 	if (md->md_sectorsize != sc->sc_sectorsize) {
2911 		G_MIRROR_DEBUG(1,
2912 		    "Invalid '%s' field on disk %s (device %s), skipping.",
2913 		    "md_sectorsize", pp->name, sc->sc_name);
2914 		return (EINVAL);
2915 	}
2916 	if ((sc->sc_sectorsize % pp->sectorsize) != 0) {
2917 		G_MIRROR_DEBUG(1,
2918 		    "Invalid sector size of disk %s (device %s), skipping.",
2919 		    pp->name, sc->sc_name);
2920 		return (EINVAL);
2921 	}
2922 	if ((md->md_mflags & ~G_MIRROR_DEVICE_FLAG_MASK) != 0) {
2923 		G_MIRROR_DEBUG(1,
2924 		    "Invalid device flags on disk %s (device %s), skipping.",
2925 		    pp->name, sc->sc_name);
2926 		return (EINVAL);
2927 	}
2928 	if ((md->md_dflags & ~G_MIRROR_DISK_FLAG_MASK) != 0) {
2929 		G_MIRROR_DEBUG(1,
2930 		    "Invalid disk flags on disk %s (device %s), skipping.",
2931 		    pp->name, sc->sc_name);
2932 		return (EINVAL);
2933 	}
2934 	return (0);
2935 }
2936 
2937 int
2938 g_mirror_add_disk(struct g_mirror_softc *sc, struct g_provider *pp,
2939     struct g_mirror_metadata *md)
2940 {
2941 	struct g_mirror_disk *disk;
2942 	int error;
2943 
2944 	g_topology_assert_not();
2945 	G_MIRROR_DEBUG(2, "Adding disk %s.", pp->name);
2946 
2947 	error = g_mirror_check_metadata(sc, pp, md);
2948 	if (error != 0)
2949 		return (error);
2950 	if (sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING &&
2951 	    md->md_genid < sc->sc_genid) {
2952 		G_MIRROR_DEBUG(0, "Component %s (device %s) broken, skipping.",
2953 		    pp->name, sc->sc_name);
2954 		return (EINVAL);
2955 	}
2956 	disk = g_mirror_init_disk(sc, pp, md, &error);
2957 	if (disk == NULL)
2958 		return (error);
2959 	error = g_mirror_event_send(disk, G_MIRROR_DISK_STATE_NEW,
2960 	    G_MIRROR_EVENT_WAIT);
2961 	if (error != 0)
2962 		return (error);
2963 	if (md->md_version < G_MIRROR_VERSION) {
2964 		G_MIRROR_DEBUG(0, "Upgrading metadata on %s (v%d->v%d).",
2965 		    pp->name, md->md_version, G_MIRROR_VERSION);
2966 		g_mirror_update_metadata(disk);
2967 	}
2968 	return (0);
2969 }
2970 
2971 static void
2972 g_mirror_destroy_delayed(void *arg, int flag)
2973 {
2974 	struct g_mirror_softc *sc;
2975 	int error;
2976 
2977 	if (flag == EV_CANCEL) {
2978 		G_MIRROR_DEBUG(1, "Destroying canceled.");
2979 		return;
2980 	}
2981 	sc = arg;
2982 	g_topology_unlock();
2983 	sx_xlock(&sc->sc_lock);
2984 	KASSERT((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) == 0,
2985 	    ("DESTROY flag set on %s.", sc->sc_name));
2986 	KASSERT((sc->sc_flags & G_MIRROR_DEVICE_FLAG_CLOSEWAIT) != 0,
2987 	    ("CLOSEWAIT flag not set on %s.", sc->sc_name));
2988 	G_MIRROR_DEBUG(1, "Destroying %s (delayed).", sc->sc_name);
2989 	error = g_mirror_destroy(sc, G_MIRROR_DESTROY_SOFT);
2990 	if (error != 0) {
2991 		G_MIRROR_DEBUG(0, "Cannot destroy %s (error=%d).",
2992 		    sc->sc_name, error);
2993 		sx_xunlock(&sc->sc_lock);
2994 	}
2995 	g_topology_lock();
2996 }
2997 
2998 static int
2999 g_mirror_access(struct g_provider *pp, int acr, int acw, int ace)
3000 {
3001 	struct g_mirror_softc *sc;
3002 	int error = 0;
3003 
3004 	g_topology_assert();
3005 	G_MIRROR_DEBUG(2, "Access request for %s: r%dw%de%d.", pp->name, acr,
3006 	    acw, ace);
3007 
3008 	sc = pp->private;
3009 	KASSERT(sc != NULL, ("NULL softc (provider=%s).", pp->name));
3010 
3011 	g_topology_unlock();
3012 	sx_xlock(&sc->sc_lock);
3013 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0 ||
3014 	    (sc->sc_flags & G_MIRROR_DEVICE_FLAG_CLOSEWAIT) != 0 ||
3015 	    LIST_EMPTY(&sc->sc_disks)) {
3016 		if (acr > 0 || acw > 0 || ace > 0)
3017 			error = ENXIO;
3018 		goto end;
3019 	}
3020 	sc->sc_provider_open += acr + acw + ace;
3021 	if (pp->acw + acw == 0)
3022 		g_mirror_idle(sc, 0);
3023 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_CLOSEWAIT) != 0 &&
3024 	    sc->sc_provider_open == 0)
3025 		g_post_event(g_mirror_destroy_delayed, sc, M_WAITOK, sc, NULL);
3026 end:
3027 	sx_xunlock(&sc->sc_lock);
3028 	g_topology_lock();
3029 	return (error);
3030 }
3031 
3032 struct g_geom *
3033 g_mirror_create(struct g_class *mp, const struct g_mirror_metadata *md,
3034     u_int type)
3035 {
3036 	struct g_mirror_softc *sc;
3037 	struct g_geom *gp;
3038 	int error, timeout;
3039 
3040 	g_topology_assert();
3041 	G_MIRROR_DEBUG(1, "Creating device %s (id=%u).", md->md_name,
3042 	    md->md_mid);
3043 
3044 	/* One disk is minimum. */
3045 	if (md->md_all < 1)
3046 		return (NULL);
3047 	/*
3048 	 * Action geom.
3049 	 */
3050 	gp = g_new_geomf(mp, "%s", md->md_name);
3051 	sc = malloc(sizeof(*sc), M_MIRROR, M_WAITOK | M_ZERO);
3052 	gp->start = g_mirror_start;
3053 	gp->orphan = g_mirror_orphan;
3054 	gp->access = g_mirror_access;
3055 	gp->dumpconf = g_mirror_dumpconf;
3056 
3057 	sc->sc_type = type;
3058 	sc->sc_id = md->md_mid;
3059 	sc->sc_slice = md->md_slice;
3060 	sc->sc_balance = md->md_balance;
3061 	sc->sc_mediasize = md->md_mediasize;
3062 	sc->sc_sectorsize = md->md_sectorsize;
3063 	sc->sc_ndisks = md->md_all;
3064 	sc->sc_flags = md->md_mflags;
3065 	sc->sc_bump_id = 0;
3066 	sc->sc_idle = 1;
3067 	sc->sc_last_write = time_uptime;
3068 	sc->sc_writes = 0;
3069 	sc->sc_refcnt = 1;
3070 	sx_init(&sc->sc_lock, "gmirror:lock");
3071 	TAILQ_INIT(&sc->sc_queue);
3072 	mtx_init(&sc->sc_queue_mtx, "gmirror:queue", NULL, MTX_DEF);
3073 	TAILQ_INIT(&sc->sc_regular_delayed);
3074 	TAILQ_INIT(&sc->sc_inflight);
3075 	TAILQ_INIT(&sc->sc_sync_delayed);
3076 	LIST_INIT(&sc->sc_disks);
3077 	TAILQ_INIT(&sc->sc_events);
3078 	mtx_init(&sc->sc_events_mtx, "gmirror:events", NULL, MTX_DEF);
3079 	callout_init(&sc->sc_callout, 1);
3080 	mtx_init(&sc->sc_done_mtx, "gmirror:done", NULL, MTX_DEF);
3081 	sc->sc_state = G_MIRROR_DEVICE_STATE_STARTING;
3082 	gp->softc = sc;
3083 	sc->sc_geom = gp;
3084 	sc->sc_provider = NULL;
3085 	sc->sc_provider_open = 0;
3086 	/*
3087 	 * Synchronization geom.
3088 	 */
3089 	gp = g_new_geomf(mp, "%s.sync", md->md_name);
3090 	gp->softc = sc;
3091 	gp->orphan = g_mirror_orphan;
3092 	sc->sc_sync.ds_geom = gp;
3093 	sc->sc_sync.ds_ndisks = 0;
3094 	error = kproc_create(g_mirror_worker, sc, &sc->sc_worker, 0, 0,
3095 	    "g_mirror %s", md->md_name);
3096 	if (error != 0) {
3097 		G_MIRROR_DEBUG(1, "Cannot create kernel thread for %s.",
3098 		    sc->sc_name);
3099 		g_destroy_geom(sc->sc_sync.ds_geom);
3100 		g_destroy_geom(sc->sc_geom);
3101 		g_mirror_free_device(sc);
3102 		return (NULL);
3103 	}
3104 
3105 	G_MIRROR_DEBUG(1, "Device %s created (%u components, id=%u).",
3106 	    sc->sc_name, sc->sc_ndisks, sc->sc_id);
3107 
3108 	sc->sc_rootmount = root_mount_hold("GMIRROR");
3109 	G_MIRROR_DEBUG(1, "root_mount_hold %p", sc->sc_rootmount);
3110 	/*
3111 	 * Run timeout.
3112 	 */
3113 	timeout = g_mirror_timeout * hz;
3114 	callout_reset(&sc->sc_callout, timeout, g_mirror_go, sc);
3115 	return (sc->sc_geom);
3116 }
3117 
3118 int
3119 g_mirror_destroy(struct g_mirror_softc *sc, int how)
3120 {
3121 	struct g_mirror_disk *disk;
3122 
3123 	g_topology_assert_not();
3124 	sx_assert(&sc->sc_lock, SX_XLOCKED);
3125 
3126 	if (sc->sc_provider_open != 0) {
3127 		switch (how) {
3128 		case G_MIRROR_DESTROY_SOFT:
3129 			G_MIRROR_DEBUG(1,
3130 			    "Device %s is still open (%d).", sc->sc_name,
3131 			    sc->sc_provider_open);
3132 			return (EBUSY);
3133 		case G_MIRROR_DESTROY_DELAYED:
3134 			G_MIRROR_DEBUG(1,
3135 			    "Device %s will be destroyed on last close.",
3136 			    sc->sc_name);
3137 			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
3138 				if (disk->d_state ==
3139 				    G_MIRROR_DISK_STATE_SYNCHRONIZING) {
3140 					g_mirror_sync_stop(disk, 1);
3141 				}
3142 			}
3143 			sc->sc_flags |= G_MIRROR_DEVICE_FLAG_CLOSEWAIT;
3144 			return (EBUSY);
3145 		case G_MIRROR_DESTROY_HARD:
3146 			G_MIRROR_DEBUG(1, "Device %s is still open, so it "
3147 			    "can't be definitely removed.", sc->sc_name);
3148 		}
3149 	}
3150 
3151 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
3152 		sx_xunlock(&sc->sc_lock);
3153 		return (0);
3154 	}
3155 	sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
3156 	sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DRAIN;
3157 	G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
3158 	sx_xunlock(&sc->sc_lock);
3159 	mtx_lock(&sc->sc_queue_mtx);
3160 	wakeup(sc);
3161 	mtx_unlock(&sc->sc_queue_mtx);
3162 	G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, &sc->sc_worker);
3163 	while (sc->sc_worker != NULL)
3164 		tsleep(&sc->sc_worker, PRIBIO, "m:destroy", hz / 5);
3165 	G_MIRROR_DEBUG(4, "%s: Woken up %p.", __func__, &sc->sc_worker);
3166 	sx_xlock(&sc->sc_lock);
3167 	g_mirror_destroy_device(sc);
3168 	return (0);
3169 }
3170 
3171 static void
3172 g_mirror_taste_orphan(struct g_consumer *cp)
3173 {
3174 
3175 	KASSERT(1 == 0, ("%s called while tasting %s.", __func__,
3176 	    cp->provider->name));
3177 }
3178 
3179 static struct g_geom *
3180 g_mirror_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
3181 {
3182 	struct g_mirror_metadata md;
3183 	struct g_mirror_softc *sc;
3184 	struct g_consumer *cp;
3185 	struct g_geom *gp;
3186 	int error;
3187 
3188 	g_topology_assert();
3189 	g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name);
3190 	G_MIRROR_DEBUG(2, "Tasting %s.", pp->name);
3191 
3192 	gp = g_new_geomf(mp, "mirror:taste");
3193 	/*
3194 	 * This orphan function should be never called.
3195 	 */
3196 	gp->orphan = g_mirror_taste_orphan;
3197 	cp = g_new_consumer(gp);
3198 	g_attach(cp, pp);
3199 	error = g_mirror_read_metadata(cp, &md);
3200 	g_detach(cp);
3201 	g_destroy_consumer(cp);
3202 	g_destroy_geom(gp);
3203 	if (error != 0)
3204 		return (NULL);
3205 	gp = NULL;
3206 
3207 	if (md.md_provider[0] != '\0' &&
3208 	    !g_compare_names(md.md_provider, pp->name))
3209 		return (NULL);
3210 	if (md.md_provsize != 0 && md.md_provsize != pp->mediasize)
3211 		return (NULL);
3212 	if ((md.md_dflags & G_MIRROR_DISK_FLAG_INACTIVE) != 0) {
3213 		G_MIRROR_DEBUG(0,
3214 		    "Device %s: provider %s marked as inactive, skipping.",
3215 		    md.md_name, pp->name);
3216 		return (NULL);
3217 	}
3218 	if (g_mirror_debug >= 2)
3219 		mirror_metadata_dump(&md);
3220 
3221 	/*
3222 	 * Let's check if device already exists.
3223 	 */
3224 	sc = NULL;
3225 	LIST_FOREACH(gp, &mp->geom, geom) {
3226 		sc = gp->softc;
3227 		if (sc == NULL)
3228 			continue;
3229 		if (sc->sc_type != G_MIRROR_TYPE_AUTOMATIC)
3230 			continue;
3231 		if (sc->sc_sync.ds_geom == gp)
3232 			continue;
3233 		if (strcmp(md.md_name, sc->sc_name) != 0)
3234 			continue;
3235 		if (md.md_mid != sc->sc_id) {
3236 			G_MIRROR_DEBUG(0, "Device %s already configured.",
3237 			    sc->sc_name);
3238 			return (NULL);
3239 		}
3240 		break;
3241 	}
3242 	if (gp == NULL) {
3243 		gp = g_mirror_create(mp, &md, G_MIRROR_TYPE_AUTOMATIC);
3244 		if (gp == NULL) {
3245 			G_MIRROR_DEBUG(0, "Cannot create device %s.",
3246 			    md.md_name);
3247 			return (NULL);
3248 		}
3249 		sc = gp->softc;
3250 	}
3251 	G_MIRROR_DEBUG(1, "Adding disk %s to %s.", pp->name, gp->name);
3252 	g_topology_unlock();
3253 	sx_xlock(&sc->sc_lock);
3254 	sc->sc_flags |= G_MIRROR_DEVICE_FLAG_TASTING;
3255 	error = g_mirror_add_disk(sc, pp, &md);
3256 	if (error != 0) {
3257 		G_MIRROR_DEBUG(0, "Cannot add disk %s to %s (error=%d).",
3258 		    pp->name, gp->name, error);
3259 		if (LIST_EMPTY(&sc->sc_disks)) {
3260 			g_cancel_event(sc);
3261 			g_mirror_destroy(sc, G_MIRROR_DESTROY_HARD);
3262 			g_topology_lock();
3263 			return (NULL);
3264 		}
3265 		gp = NULL;
3266 	}
3267 	sc->sc_flags &= ~G_MIRROR_DEVICE_FLAG_TASTING;
3268 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
3269 		g_mirror_destroy(sc, G_MIRROR_DESTROY_HARD);
3270 		g_topology_lock();
3271 		return (NULL);
3272 	}
3273 	sx_xunlock(&sc->sc_lock);
3274 	g_topology_lock();
3275 	return (gp);
3276 }
3277 
3278 static void
3279 g_mirror_resize(struct g_consumer *cp)
3280 {
3281 	struct g_mirror_disk *disk;
3282 
3283 	g_topology_assert();
3284 	g_trace(G_T_TOPOLOGY, "%s(%s)", __func__, cp->provider->name);
3285 
3286 	disk = cp->private;
3287 	if (disk == NULL)
3288 		return;
3289 	g_topology_unlock();
3290 	g_mirror_update_metadata(disk);
3291 	g_topology_lock();
3292 }
3293 
3294 static int
3295 g_mirror_destroy_geom(struct gctl_req *req __unused,
3296     struct g_class *mp __unused, struct g_geom *gp)
3297 {
3298 	struct g_mirror_softc *sc;
3299 	int error;
3300 
3301 	g_topology_unlock();
3302 	sc = gp->softc;
3303 	sx_xlock(&sc->sc_lock);
3304 	g_cancel_event(sc);
3305 	error = g_mirror_destroy(gp->softc, G_MIRROR_DESTROY_SOFT);
3306 	if (error != 0)
3307 		sx_xunlock(&sc->sc_lock);
3308 	g_topology_lock();
3309 	return (error);
3310 }
3311 
3312 static void
3313 g_mirror_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
3314     struct g_consumer *cp, struct g_provider *pp)
3315 {
3316 	struct g_mirror_softc *sc;
3317 
3318 	g_topology_assert();
3319 
3320 	sc = gp->softc;
3321 	if (sc == NULL)
3322 		return;
3323 	/* Skip synchronization geom. */
3324 	if (gp == sc->sc_sync.ds_geom)
3325 		return;
3326 	if (pp != NULL) {
3327 		/* Nothing here. */
3328 	} else if (cp != NULL) {
3329 		struct g_mirror_disk *disk;
3330 
3331 		disk = cp->private;
3332 		if (disk == NULL)
3333 			return;
3334 		sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)disk->d_id);
3335 		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
3336 			sbuf_printf(sb, "%s<Synchronized>", indent);
3337 			if (disk->d_sync.ds_offset == 0)
3338 				sbuf_printf(sb, "0%%");
3339 			else
3340 				sbuf_printf(sb, "%u%%",
3341 				    (u_int)((disk->d_sync.ds_offset * 100) /
3342 				    sc->sc_mediasize));
3343 			sbuf_printf(sb, "</Synchronized>\n");
3344 			if (disk->d_sync.ds_offset > 0)
3345 				sbuf_printf(sb, "%s<BytesSynced>%jd"
3346 				    "</BytesSynced>\n", indent,
3347 				    (intmax_t)disk->d_sync.ds_offset);
3348 		}
3349 		sbuf_printf(sb, "%s<SyncID>%u</SyncID>\n", indent,
3350 		    disk->d_sync.ds_syncid);
3351 		sbuf_printf(sb, "%s<GenID>%u</GenID>\n", indent,
3352 		    disk->d_genid);
3353 		sbuf_printf(sb, "%s<Flags>", indent);
3354 		if (disk->d_flags == 0)
3355 			sbuf_printf(sb, "NONE");
3356 		else {
3357 			int first = 1;
3358 
3359 #define	ADD_FLAG(flag, name)	do {					\
3360 	if ((disk->d_flags & (flag)) != 0) {				\
3361 		if (!first)						\
3362 			sbuf_printf(sb, ", ");				\
3363 		else							\
3364 			first = 0;					\
3365 		sbuf_printf(sb, name);					\
3366 	}								\
3367 } while (0)
3368 			ADD_FLAG(G_MIRROR_DISK_FLAG_DIRTY, "DIRTY");
3369 			ADD_FLAG(G_MIRROR_DISK_FLAG_HARDCODED, "HARDCODED");
3370 			ADD_FLAG(G_MIRROR_DISK_FLAG_INACTIVE, "INACTIVE");
3371 			ADD_FLAG(G_MIRROR_DISK_FLAG_SYNCHRONIZING,
3372 			    "SYNCHRONIZING");
3373 			ADD_FLAG(G_MIRROR_DISK_FLAG_FORCE_SYNC, "FORCE_SYNC");
3374 			ADD_FLAG(G_MIRROR_DISK_FLAG_BROKEN, "BROKEN");
3375 #undef	ADD_FLAG
3376 		}
3377 		sbuf_printf(sb, "</Flags>\n");
3378 		sbuf_printf(sb, "%s<Priority>%u</Priority>\n", indent,
3379 		    disk->d_priority);
3380 		sbuf_printf(sb, "%s<State>%s</State>\n", indent,
3381 		    g_mirror_disk_state2str(disk->d_state));
3382 	} else {
3383 		sbuf_printf(sb, "%s<Type>", indent);
3384 		switch (sc->sc_type) {
3385 		case G_MIRROR_TYPE_AUTOMATIC:
3386 			sbuf_printf(sb, "AUTOMATIC");
3387 			break;
3388 		case G_MIRROR_TYPE_MANUAL:
3389 			sbuf_printf(sb, "MANUAL");
3390 			break;
3391 		default:
3392 			sbuf_printf(sb, "UNKNOWN");
3393 			break;
3394 		}
3395 		sbuf_printf(sb, "</Type>\n");
3396 		sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)sc->sc_id);
3397 		sbuf_printf(sb, "%s<SyncID>%u</SyncID>\n", indent, sc->sc_syncid);
3398 		sbuf_printf(sb, "%s<GenID>%u</GenID>\n", indent, sc->sc_genid);
3399 		sbuf_printf(sb, "%s<Flags>", indent);
3400 		if (sc->sc_flags == 0)
3401 			sbuf_printf(sb, "NONE");
3402 		else {
3403 			int first = 1;
3404 
3405 #define	ADD_FLAG(flag, name)	do {					\
3406 	if ((sc->sc_flags & (flag)) != 0) {				\
3407 		if (!first)						\
3408 			sbuf_printf(sb, ", ");				\
3409 		else							\
3410 			first = 0;					\
3411 		sbuf_printf(sb, name);					\
3412 	}								\
3413 } while (0)
3414 			ADD_FLAG(G_MIRROR_DEVICE_FLAG_NOFAILSYNC, "NOFAILSYNC");
3415 			ADD_FLAG(G_MIRROR_DEVICE_FLAG_NOAUTOSYNC, "NOAUTOSYNC");
3416 #undef	ADD_FLAG
3417 		}
3418 		sbuf_printf(sb, "</Flags>\n");
3419 		sbuf_printf(sb, "%s<Slice>%u</Slice>\n", indent,
3420 		    (u_int)sc->sc_slice);
3421 		sbuf_printf(sb, "%s<Balance>%s</Balance>\n", indent,
3422 		    balance_name(sc->sc_balance));
3423 		sbuf_printf(sb, "%s<Components>%u</Components>\n", indent,
3424 		    sc->sc_ndisks);
3425 		sbuf_printf(sb, "%s<State>", indent);
3426 		if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING)
3427 			sbuf_printf(sb, "%s", "STARTING");
3428 		else if (sc->sc_ndisks ==
3429 		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE))
3430 			sbuf_printf(sb, "%s", "COMPLETE");
3431 		else
3432 			sbuf_printf(sb, "%s", "DEGRADED");
3433 		sbuf_printf(sb, "</State>\n");
3434 	}
3435 }
3436 
3437 static void
3438 g_mirror_shutdown_post_sync(void *arg, int howto)
3439 {
3440 	struct g_class *mp;
3441 	struct g_geom *gp, *gp2;
3442 	struct g_mirror_softc *sc;
3443 	int error;
3444 
3445 	if (panicstr != NULL)
3446 		return;
3447 
3448 	mp = arg;
3449 	g_topology_lock();
3450 	g_mirror_shutdown = 1;
3451 	LIST_FOREACH_SAFE(gp, &mp->geom, geom, gp2) {
3452 		if ((sc = gp->softc) == NULL)
3453 			continue;
3454 		/* Skip synchronization geom. */
3455 		if (gp == sc->sc_sync.ds_geom)
3456 			continue;
3457 		g_topology_unlock();
3458 		sx_xlock(&sc->sc_lock);
3459 		g_mirror_idle(sc, -1);
3460 		g_cancel_event(sc);
3461 		error = g_mirror_destroy(sc, G_MIRROR_DESTROY_DELAYED);
3462 		if (error != 0)
3463 			sx_xunlock(&sc->sc_lock);
3464 		g_topology_lock();
3465 	}
3466 	g_topology_unlock();
3467 }
3468 
3469 static void
3470 g_mirror_init(struct g_class *mp)
3471 {
3472 
3473 	g_mirror_post_sync = EVENTHANDLER_REGISTER(shutdown_post_sync,
3474 	    g_mirror_shutdown_post_sync, mp, SHUTDOWN_PRI_FIRST);
3475 	if (g_mirror_post_sync == NULL)
3476 		G_MIRROR_DEBUG(0, "Warning! Cannot register shutdown event.");
3477 }
3478 
3479 static void
3480 g_mirror_fini(struct g_class *mp)
3481 {
3482 
3483 	if (g_mirror_post_sync != NULL)
3484 		EVENTHANDLER_DEREGISTER(shutdown_post_sync, g_mirror_post_sync);
3485 }
3486 
3487 DECLARE_GEOM_CLASS(g_mirror_class, g_mirror);
3488 MODULE_VERSION(geom_mirror, 0);
3489