xref: /freebsd/sys/geom/mirror/g_mirror.c (revision 52f72944b8f5abb2386eae924357dee8aea17d5b)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2004-2006 Pawel Jakub Dawidek <pjd@FreeBSD.org>
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/bio.h>
35 #include <sys/eventhandler.h>
36 #include <sys/fail.h>
37 #include <sys/kernel.h>
38 #include <sys/kthread.h>
39 #include <sys/limits.h>
40 #include <sys/lock.h>
41 #include <sys/malloc.h>
42 #include <sys/mutex.h>
43 #include <sys/proc.h>
44 #include <sys/sbuf.h>
45 #include <sys/sched.h>
46 #include <sys/sx.h>
47 #include <sys/sysctl.h>
48 
49 #include <geom/geom.h>
50 #include <geom/mirror/g_mirror.h>
51 
52 FEATURE(geom_mirror, "GEOM mirroring support");
53 
54 static MALLOC_DEFINE(M_MIRROR, "mirror_data", "GEOM_MIRROR Data");
55 
56 SYSCTL_DECL(_kern_geom);
57 static SYSCTL_NODE(_kern_geom, OID_AUTO, mirror, CTLFLAG_RW, 0,
58     "GEOM_MIRROR stuff");
59 int g_mirror_debug = 0;
60 SYSCTL_INT(_kern_geom_mirror, OID_AUTO, debug, CTLFLAG_RWTUN, &g_mirror_debug, 0,
61     "Debug level");
62 static u_int g_mirror_timeout = 4;
63 SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, timeout, CTLFLAG_RWTUN, &g_mirror_timeout,
64     0, "Time to wait on all mirror components");
65 static u_int g_mirror_idletime = 5;
66 SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, idletime, CTLFLAG_RWTUN,
67     &g_mirror_idletime, 0, "Mark components as clean when idling");
68 static u_int g_mirror_disconnect_on_failure = 1;
69 SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, disconnect_on_failure, CTLFLAG_RWTUN,
70     &g_mirror_disconnect_on_failure, 0, "Disconnect component on I/O failure.");
71 static u_int g_mirror_syncreqs = 2;
72 SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, sync_requests, CTLFLAG_RDTUN,
73     &g_mirror_syncreqs, 0, "Parallel synchronization I/O requests.");
74 static u_int g_mirror_sync_period = 5;
75 SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, sync_update_period, CTLFLAG_RWTUN,
76     &g_mirror_sync_period, 0,
77     "Metadata update period during synchronization, in seconds");
78 
79 #define	MSLEEP(ident, mtx, priority, wmesg, timeout)	do {		\
80 	G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, (ident));	\
81 	msleep((ident), (mtx), (priority), (wmesg), (timeout));		\
82 	G_MIRROR_DEBUG(4, "%s: Woken up %p.", __func__, (ident));	\
83 } while (0)
84 
85 static eventhandler_tag g_mirror_post_sync = NULL;
86 static int g_mirror_shutdown = 0;
87 
88 static g_ctl_destroy_geom_t g_mirror_destroy_geom;
89 static g_taste_t g_mirror_taste;
90 static g_init_t g_mirror_init;
91 static g_fini_t g_mirror_fini;
92 static g_provgone_t g_mirror_providergone;
93 static g_resize_t g_mirror_resize;
94 
95 struct g_class g_mirror_class = {
96 	.name = G_MIRROR_CLASS_NAME,
97 	.version = G_VERSION,
98 	.ctlreq = g_mirror_config,
99 	.taste = g_mirror_taste,
100 	.destroy_geom = g_mirror_destroy_geom,
101 	.init = g_mirror_init,
102 	.fini = g_mirror_fini,
103 	.providergone = g_mirror_providergone,
104 	.resize = g_mirror_resize
105 };
106 
107 
108 static void g_mirror_destroy_provider(struct g_mirror_softc *sc);
109 static int g_mirror_update_disk(struct g_mirror_disk *disk, u_int state);
110 static void g_mirror_update_device(struct g_mirror_softc *sc, bool force);
111 static void g_mirror_dumpconf(struct sbuf *sb, const char *indent,
112     struct g_geom *gp, struct g_consumer *cp, struct g_provider *pp);
113 static void g_mirror_sync_reinit(const struct g_mirror_disk *disk,
114     struct bio *bp, off_t offset);
115 static void g_mirror_sync_stop(struct g_mirror_disk *disk, int type);
116 static void g_mirror_register_request(struct g_mirror_softc *sc,
117     struct bio *bp);
118 static void g_mirror_sync_release(struct g_mirror_softc *sc);
119 
120 
121 static const char *
122 g_mirror_disk_state2str(int state)
123 {
124 
125 	switch (state) {
126 	case G_MIRROR_DISK_STATE_NONE:
127 		return ("NONE");
128 	case G_MIRROR_DISK_STATE_NEW:
129 		return ("NEW");
130 	case G_MIRROR_DISK_STATE_ACTIVE:
131 		return ("ACTIVE");
132 	case G_MIRROR_DISK_STATE_STALE:
133 		return ("STALE");
134 	case G_MIRROR_DISK_STATE_SYNCHRONIZING:
135 		return ("SYNCHRONIZING");
136 	case G_MIRROR_DISK_STATE_DISCONNECTED:
137 		return ("DISCONNECTED");
138 	case G_MIRROR_DISK_STATE_DESTROY:
139 		return ("DESTROY");
140 	default:
141 		return ("INVALID");
142 	}
143 }
144 
145 static const char *
146 g_mirror_device_state2str(int state)
147 {
148 
149 	switch (state) {
150 	case G_MIRROR_DEVICE_STATE_STARTING:
151 		return ("STARTING");
152 	case G_MIRROR_DEVICE_STATE_RUNNING:
153 		return ("RUNNING");
154 	default:
155 		return ("INVALID");
156 	}
157 }
158 
159 static const char *
160 g_mirror_get_diskname(struct g_mirror_disk *disk)
161 {
162 
163 	if (disk->d_consumer == NULL || disk->d_consumer->provider == NULL)
164 		return ("[unknown]");
165 	return (disk->d_name);
166 }
167 
168 /*
169  * --- Events handling functions ---
170  * Events in geom_mirror are used to maintain disks and device status
171  * from one thread to simplify locking.
172  */
173 static void
174 g_mirror_event_free(struct g_mirror_event *ep)
175 {
176 
177 	free(ep, M_MIRROR);
178 }
179 
180 int
181 g_mirror_event_send(void *arg, int state, int flags)
182 {
183 	struct g_mirror_softc *sc;
184 	struct g_mirror_disk *disk;
185 	struct g_mirror_event *ep;
186 	int error;
187 
188 	ep = malloc(sizeof(*ep), M_MIRROR, M_WAITOK);
189 	G_MIRROR_DEBUG(4, "%s: Sending event %p.", __func__, ep);
190 	if ((flags & G_MIRROR_EVENT_DEVICE) != 0) {
191 		disk = NULL;
192 		sc = arg;
193 	} else {
194 		disk = arg;
195 		sc = disk->d_softc;
196 	}
197 	ep->e_disk = disk;
198 	ep->e_state = state;
199 	ep->e_flags = flags;
200 	ep->e_error = 0;
201 	mtx_lock(&sc->sc_events_mtx);
202 	TAILQ_INSERT_TAIL(&sc->sc_events, ep, e_next);
203 	mtx_unlock(&sc->sc_events_mtx);
204 	G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
205 	mtx_lock(&sc->sc_queue_mtx);
206 	wakeup(sc);
207 	mtx_unlock(&sc->sc_queue_mtx);
208 	if ((flags & G_MIRROR_EVENT_DONTWAIT) != 0)
209 		return (0);
210 	sx_assert(&sc->sc_lock, SX_XLOCKED);
211 	G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, ep);
212 	sx_xunlock(&sc->sc_lock);
213 	while ((ep->e_flags & G_MIRROR_EVENT_DONE) == 0) {
214 		mtx_lock(&sc->sc_events_mtx);
215 		MSLEEP(ep, &sc->sc_events_mtx, PRIBIO | PDROP, "m:event",
216 		    hz * 5);
217 	}
218 	error = ep->e_error;
219 	g_mirror_event_free(ep);
220 	sx_xlock(&sc->sc_lock);
221 	return (error);
222 }
223 
224 static struct g_mirror_event *
225 g_mirror_event_first(struct g_mirror_softc *sc)
226 {
227 	struct g_mirror_event *ep;
228 
229 	mtx_lock(&sc->sc_events_mtx);
230 	ep = TAILQ_FIRST(&sc->sc_events);
231 	mtx_unlock(&sc->sc_events_mtx);
232 	return (ep);
233 }
234 
235 static void
236 g_mirror_event_remove(struct g_mirror_softc *sc, struct g_mirror_event *ep)
237 {
238 
239 	mtx_lock(&sc->sc_events_mtx);
240 	TAILQ_REMOVE(&sc->sc_events, ep, e_next);
241 	mtx_unlock(&sc->sc_events_mtx);
242 }
243 
244 static void
245 g_mirror_event_cancel(struct g_mirror_disk *disk)
246 {
247 	struct g_mirror_softc *sc;
248 	struct g_mirror_event *ep, *tmpep;
249 
250 	sc = disk->d_softc;
251 	sx_assert(&sc->sc_lock, SX_XLOCKED);
252 
253 	mtx_lock(&sc->sc_events_mtx);
254 	TAILQ_FOREACH_SAFE(ep, &sc->sc_events, e_next, tmpep) {
255 		if ((ep->e_flags & G_MIRROR_EVENT_DEVICE) != 0)
256 			continue;
257 		if (ep->e_disk != disk)
258 			continue;
259 		TAILQ_REMOVE(&sc->sc_events, ep, e_next);
260 		if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0)
261 			g_mirror_event_free(ep);
262 		else {
263 			ep->e_error = ECANCELED;
264 			wakeup(ep);
265 		}
266 	}
267 	mtx_unlock(&sc->sc_events_mtx);
268 }
269 
270 /*
271  * Return the number of disks in given state.
272  * If state is equal to -1, count all connected disks.
273  */
274 u_int
275 g_mirror_ndisks(struct g_mirror_softc *sc, int state)
276 {
277 	struct g_mirror_disk *disk;
278 	u_int n = 0;
279 
280 	sx_assert(&sc->sc_lock, SX_LOCKED);
281 
282 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
283 		if (state == -1 || disk->d_state == state)
284 			n++;
285 	}
286 	return (n);
287 }
288 
289 /*
290  * Find a disk in mirror by its disk ID.
291  */
292 static struct g_mirror_disk *
293 g_mirror_id2disk(struct g_mirror_softc *sc, uint32_t id)
294 {
295 	struct g_mirror_disk *disk;
296 
297 	sx_assert(&sc->sc_lock, SX_XLOCKED);
298 
299 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
300 		if (disk->d_id == id)
301 			return (disk);
302 	}
303 	return (NULL);
304 }
305 
306 static u_int
307 g_mirror_nrequests(struct g_mirror_softc *sc, struct g_consumer *cp)
308 {
309 	struct bio *bp;
310 	u_int nreqs = 0;
311 
312 	mtx_lock(&sc->sc_queue_mtx);
313 	TAILQ_FOREACH(bp, &sc->sc_queue, bio_queue) {
314 		if (bp->bio_from == cp)
315 			nreqs++;
316 	}
317 	mtx_unlock(&sc->sc_queue_mtx);
318 	return (nreqs);
319 }
320 
321 static int
322 g_mirror_is_busy(struct g_mirror_softc *sc, struct g_consumer *cp)
323 {
324 
325 	if (cp->index > 0) {
326 		G_MIRROR_DEBUG(2,
327 		    "I/O requests for %s exist, can't destroy it now.",
328 		    cp->provider->name);
329 		return (1);
330 	}
331 	if (g_mirror_nrequests(sc, cp) > 0) {
332 		G_MIRROR_DEBUG(2,
333 		    "I/O requests for %s in queue, can't destroy it now.",
334 		    cp->provider->name);
335 		return (1);
336 	}
337 	return (0);
338 }
339 
340 static void
341 g_mirror_destroy_consumer(void *arg, int flags __unused)
342 {
343 	struct g_consumer *cp;
344 
345 	g_topology_assert();
346 
347 	cp = arg;
348 	G_MIRROR_DEBUG(1, "Consumer %s destroyed.", cp->provider->name);
349 	g_detach(cp);
350 	g_destroy_consumer(cp);
351 }
352 
353 static void
354 g_mirror_kill_consumer(struct g_mirror_softc *sc, struct g_consumer *cp)
355 {
356 	struct g_provider *pp;
357 	int retaste_wait;
358 
359 	g_topology_assert();
360 
361 	cp->private = NULL;
362 	if (g_mirror_is_busy(sc, cp))
363 		return;
364 	pp = cp->provider;
365 	retaste_wait = 0;
366 	if (cp->acw == 1) {
367 		if ((pp->geom->flags & G_GEOM_WITHER) == 0)
368 			retaste_wait = 1;
369 	}
370 	G_MIRROR_DEBUG(2, "Access %s r%dw%de%d = %d", pp->name, -cp->acr,
371 	    -cp->acw, -cp->ace, 0);
372 	if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0)
373 		g_access(cp, -cp->acr, -cp->acw, -cp->ace);
374 	if (retaste_wait) {
375 		/*
376 		 * After retaste event was send (inside g_access()), we can send
377 		 * event to detach and destroy consumer.
378 		 * A class, which has consumer to the given provider connected
379 		 * will not receive retaste event for the provider.
380 		 * This is the way how I ignore retaste events when I close
381 		 * consumers opened for write: I detach and destroy consumer
382 		 * after retaste event is sent.
383 		 */
384 		g_post_event(g_mirror_destroy_consumer, cp, M_WAITOK, NULL);
385 		return;
386 	}
387 	G_MIRROR_DEBUG(1, "Consumer %s destroyed.", pp->name);
388 	g_detach(cp);
389 	g_destroy_consumer(cp);
390 }
391 
392 static int
393 g_mirror_connect_disk(struct g_mirror_disk *disk, struct g_provider *pp)
394 {
395 	struct g_consumer *cp;
396 	int error;
397 
398 	g_topology_assert_not();
399 	KASSERT(disk->d_consumer == NULL,
400 	    ("Disk already connected (device %s).", disk->d_softc->sc_name));
401 
402 	g_topology_lock();
403 	cp = g_new_consumer(disk->d_softc->sc_geom);
404 	cp->flags |= G_CF_DIRECT_RECEIVE;
405 	error = g_attach(cp, pp);
406 	if (error != 0) {
407 		g_destroy_consumer(cp);
408 		g_topology_unlock();
409 		return (error);
410 	}
411 	error = g_access(cp, 1, 1, 1);
412 	if (error != 0) {
413 		g_detach(cp);
414 		g_destroy_consumer(cp);
415 		g_topology_unlock();
416 		G_MIRROR_DEBUG(0, "Cannot open consumer %s (error=%d).",
417 		    pp->name, error);
418 		return (error);
419 	}
420 	g_topology_unlock();
421 	disk->d_consumer = cp;
422 	disk->d_consumer->private = disk;
423 	disk->d_consumer->index = 0;
424 
425 	G_MIRROR_DEBUG(2, "Disk %s connected.", g_mirror_get_diskname(disk));
426 	return (0);
427 }
428 
429 static void
430 g_mirror_disconnect_consumer(struct g_mirror_softc *sc, struct g_consumer *cp)
431 {
432 
433 	g_topology_assert();
434 
435 	if (cp == NULL)
436 		return;
437 	if (cp->provider != NULL)
438 		g_mirror_kill_consumer(sc, cp);
439 	else
440 		g_destroy_consumer(cp);
441 }
442 
443 /*
444  * Initialize disk. This means allocate memory, create consumer, attach it
445  * to the provider and open access (r1w1e1) to it.
446  */
447 static struct g_mirror_disk *
448 g_mirror_init_disk(struct g_mirror_softc *sc, struct g_provider *pp,
449     struct g_mirror_metadata *md, int *errorp)
450 {
451 	struct g_mirror_disk *disk;
452 	int i, error;
453 
454 	disk = malloc(sizeof(*disk), M_MIRROR, M_NOWAIT | M_ZERO);
455 	if (disk == NULL) {
456 		error = ENOMEM;
457 		goto fail;
458 	}
459 	disk->d_softc = sc;
460 	error = g_mirror_connect_disk(disk, pp);
461 	if (error != 0)
462 		goto fail;
463 	disk->d_id = md->md_did;
464 	disk->d_state = G_MIRROR_DISK_STATE_NONE;
465 	disk->d_priority = md->md_priority;
466 	disk->d_flags = md->md_dflags;
467 	error = g_getattr("GEOM::candelete", disk->d_consumer, &i);
468 	if (error == 0 && i != 0)
469 		disk->d_flags |= G_MIRROR_DISK_FLAG_CANDELETE;
470 	if (md->md_provider[0] != '\0')
471 		disk->d_flags |= G_MIRROR_DISK_FLAG_HARDCODED;
472 	disk->d_sync.ds_consumer = NULL;
473 	disk->d_sync.ds_offset = md->md_sync_offset;
474 	disk->d_sync.ds_offset_done = md->md_sync_offset;
475 	disk->d_sync.ds_update_ts = time_uptime;
476 	disk->d_genid = md->md_genid;
477 	disk->d_sync.ds_syncid = md->md_syncid;
478 	if (errorp != NULL)
479 		*errorp = 0;
480 	return (disk);
481 fail:
482 	if (errorp != NULL)
483 		*errorp = error;
484 	if (disk != NULL)
485 		free(disk, M_MIRROR);
486 	return (NULL);
487 }
488 
489 static void
490 g_mirror_destroy_disk(struct g_mirror_disk *disk)
491 {
492 	struct g_mirror_softc *sc;
493 
494 	g_topology_assert_not();
495 	sc = disk->d_softc;
496 	sx_assert(&sc->sc_lock, SX_XLOCKED);
497 
498 	LIST_REMOVE(disk, d_next);
499 	g_mirror_event_cancel(disk);
500 	if (sc->sc_hint == disk)
501 		sc->sc_hint = NULL;
502 	switch (disk->d_state) {
503 	case G_MIRROR_DISK_STATE_SYNCHRONIZING:
504 		g_mirror_sync_stop(disk, 1);
505 		/* FALLTHROUGH */
506 	case G_MIRROR_DISK_STATE_NEW:
507 	case G_MIRROR_DISK_STATE_STALE:
508 	case G_MIRROR_DISK_STATE_ACTIVE:
509 		g_topology_lock();
510 		g_mirror_disconnect_consumer(sc, disk->d_consumer);
511 		g_topology_unlock();
512 		free(disk, M_MIRROR);
513 		break;
514 	default:
515 		KASSERT(0 == 1, ("Wrong disk state (%s, %s).",
516 		    g_mirror_get_diskname(disk),
517 		    g_mirror_disk_state2str(disk->d_state)));
518 	}
519 }
520 
521 static void
522 g_mirror_free_device(struct g_mirror_softc *sc)
523 {
524 
525 	mtx_destroy(&sc->sc_queue_mtx);
526 	mtx_destroy(&sc->sc_events_mtx);
527 	mtx_destroy(&sc->sc_done_mtx);
528 	sx_destroy(&sc->sc_lock);
529 	free(sc, M_MIRROR);
530 }
531 
532 static void
533 g_mirror_providergone(struct g_provider *pp)
534 {
535 	struct g_mirror_softc *sc = pp->private;
536 
537 	if ((--sc->sc_refcnt) == 0)
538 		g_mirror_free_device(sc);
539 }
540 
541 static void
542 g_mirror_destroy_device(struct g_mirror_softc *sc)
543 {
544 	struct g_mirror_disk *disk;
545 	struct g_mirror_event *ep;
546 	struct g_geom *gp;
547 	struct g_consumer *cp, *tmpcp;
548 
549 	g_topology_assert_not();
550 	sx_assert(&sc->sc_lock, SX_XLOCKED);
551 
552 	gp = sc->sc_geom;
553 	if (sc->sc_provider != NULL)
554 		g_mirror_destroy_provider(sc);
555 	for (disk = LIST_FIRST(&sc->sc_disks); disk != NULL;
556 	    disk = LIST_FIRST(&sc->sc_disks)) {
557 		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
558 		g_mirror_update_metadata(disk);
559 		g_mirror_destroy_disk(disk);
560 	}
561 	while ((ep = g_mirror_event_first(sc)) != NULL) {
562 		g_mirror_event_remove(sc, ep);
563 		if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0)
564 			g_mirror_event_free(ep);
565 		else {
566 			ep->e_error = ECANCELED;
567 			ep->e_flags |= G_MIRROR_EVENT_DONE;
568 			G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, ep);
569 			mtx_lock(&sc->sc_events_mtx);
570 			wakeup(ep);
571 			mtx_unlock(&sc->sc_events_mtx);
572 		}
573 	}
574 	callout_drain(&sc->sc_callout);
575 
576 	g_topology_lock();
577 	LIST_FOREACH_SAFE(cp, &sc->sc_sync.ds_geom->consumer, consumer, tmpcp) {
578 		g_mirror_disconnect_consumer(sc, cp);
579 	}
580 	g_wither_geom(sc->sc_sync.ds_geom, ENXIO);
581 	G_MIRROR_DEBUG(0, "Device %s destroyed.", gp->name);
582 	g_wither_geom(gp, ENXIO);
583 	sx_xunlock(&sc->sc_lock);
584 	if ((--sc->sc_refcnt) == 0)
585 		g_mirror_free_device(sc);
586 	g_topology_unlock();
587 }
588 
589 static void
590 g_mirror_orphan(struct g_consumer *cp)
591 {
592 	struct g_mirror_disk *disk;
593 
594 	g_topology_assert();
595 
596 	disk = cp->private;
597 	if (disk == NULL)
598 		return;
599 	disk->d_softc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
600 	g_mirror_event_send(disk, G_MIRROR_DISK_STATE_DISCONNECTED,
601 	    G_MIRROR_EVENT_DONTWAIT);
602 }
603 
604 /*
605  * Function should return the next active disk on the list.
606  * It is possible that it will be the same disk as given.
607  * If there are no active disks on list, NULL is returned.
608  */
609 static __inline struct g_mirror_disk *
610 g_mirror_find_next(struct g_mirror_softc *sc, struct g_mirror_disk *disk)
611 {
612 	struct g_mirror_disk *dp;
613 
614 	for (dp = LIST_NEXT(disk, d_next); dp != disk;
615 	    dp = LIST_NEXT(dp, d_next)) {
616 		if (dp == NULL)
617 			dp = LIST_FIRST(&sc->sc_disks);
618 		if (dp->d_state == G_MIRROR_DISK_STATE_ACTIVE)
619 			break;
620 	}
621 	if (dp->d_state != G_MIRROR_DISK_STATE_ACTIVE)
622 		return (NULL);
623 	return (dp);
624 }
625 
626 static struct g_mirror_disk *
627 g_mirror_get_disk(struct g_mirror_softc *sc)
628 {
629 	struct g_mirror_disk *disk;
630 
631 	if (sc->sc_hint == NULL) {
632 		sc->sc_hint = LIST_FIRST(&sc->sc_disks);
633 		if (sc->sc_hint == NULL)
634 			return (NULL);
635 	}
636 	disk = sc->sc_hint;
637 	if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE) {
638 		disk = g_mirror_find_next(sc, disk);
639 		if (disk == NULL)
640 			return (NULL);
641 	}
642 	sc->sc_hint = g_mirror_find_next(sc, disk);
643 	return (disk);
644 }
645 
646 static int
647 g_mirror_write_metadata(struct g_mirror_disk *disk,
648     struct g_mirror_metadata *md)
649 {
650 	struct g_mirror_softc *sc;
651 	struct g_consumer *cp;
652 	off_t offset, length;
653 	u_char *sector;
654 	int error = 0;
655 
656 	g_topology_assert_not();
657 	sc = disk->d_softc;
658 	sx_assert(&sc->sc_lock, SX_LOCKED);
659 
660 	cp = disk->d_consumer;
661 	KASSERT(cp != NULL, ("NULL consumer (%s).", sc->sc_name));
662 	KASSERT(cp->provider != NULL, ("NULL provider (%s).", sc->sc_name));
663 	KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
664 	    ("Consumer %s closed? (r%dw%de%d).", cp->provider->name, cp->acr,
665 	    cp->acw, cp->ace));
666 	length = cp->provider->sectorsize;
667 	offset = cp->provider->mediasize - length;
668 	sector = malloc((size_t)length, M_MIRROR, M_WAITOK | M_ZERO);
669 	if (md != NULL &&
670 	    (sc->sc_flags & G_MIRROR_DEVICE_FLAG_WIPE) == 0) {
671 		/*
672 		 * Handle the case, when the size of parent provider reduced.
673 		 */
674 		if (offset < md->md_mediasize)
675 			error = ENOSPC;
676 		else
677 			mirror_metadata_encode(md, sector);
678 	}
679 	KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_metadata_write, error);
680 	if (error == 0)
681 		error = g_write_data(cp, offset, sector, length);
682 	free(sector, M_MIRROR);
683 	if (error != 0) {
684 		if ((disk->d_flags & G_MIRROR_DISK_FLAG_BROKEN) == 0) {
685 			disk->d_flags |= G_MIRROR_DISK_FLAG_BROKEN;
686 			G_MIRROR_DEBUG(0, "Cannot write metadata on %s "
687 			    "(device=%s, error=%d).",
688 			    g_mirror_get_diskname(disk), sc->sc_name, error);
689 		} else {
690 			G_MIRROR_DEBUG(1, "Cannot write metadata on %s "
691 			    "(device=%s, error=%d).",
692 			    g_mirror_get_diskname(disk), sc->sc_name, error);
693 		}
694 		if (g_mirror_disconnect_on_failure &&
695 		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 1) {
696 			sc->sc_bump_id |= G_MIRROR_BUMP_GENID;
697 			g_mirror_event_send(disk,
698 			    G_MIRROR_DISK_STATE_DISCONNECTED,
699 			    G_MIRROR_EVENT_DONTWAIT);
700 		}
701 	}
702 	return (error);
703 }
704 
705 static int
706 g_mirror_clear_metadata(struct g_mirror_disk *disk)
707 {
708 	int error;
709 
710 	g_topology_assert_not();
711 	sx_assert(&disk->d_softc->sc_lock, SX_LOCKED);
712 
713 	if (disk->d_softc->sc_type != G_MIRROR_TYPE_AUTOMATIC)
714 		return (0);
715 	error = g_mirror_write_metadata(disk, NULL);
716 	if (error == 0) {
717 		G_MIRROR_DEBUG(2, "Metadata on %s cleared.",
718 		    g_mirror_get_diskname(disk));
719 	} else {
720 		G_MIRROR_DEBUG(0,
721 		    "Cannot clear metadata on disk %s (error=%d).",
722 		    g_mirror_get_diskname(disk), error);
723 	}
724 	return (error);
725 }
726 
727 void
728 g_mirror_fill_metadata(struct g_mirror_softc *sc, struct g_mirror_disk *disk,
729     struct g_mirror_metadata *md)
730 {
731 
732 	strlcpy(md->md_magic, G_MIRROR_MAGIC, sizeof(md->md_magic));
733 	md->md_version = G_MIRROR_VERSION;
734 	strlcpy(md->md_name, sc->sc_name, sizeof(md->md_name));
735 	md->md_mid = sc->sc_id;
736 	md->md_all = sc->sc_ndisks;
737 	md->md_slice = sc->sc_slice;
738 	md->md_balance = sc->sc_balance;
739 	md->md_genid = sc->sc_genid;
740 	md->md_mediasize = sc->sc_mediasize;
741 	md->md_sectorsize = sc->sc_sectorsize;
742 	md->md_mflags = (sc->sc_flags & G_MIRROR_DEVICE_FLAG_MASK);
743 	bzero(md->md_provider, sizeof(md->md_provider));
744 	if (disk == NULL) {
745 		md->md_did = arc4random();
746 		md->md_priority = 0;
747 		md->md_syncid = 0;
748 		md->md_dflags = 0;
749 		md->md_sync_offset = 0;
750 		md->md_provsize = 0;
751 	} else {
752 		md->md_did = disk->d_id;
753 		md->md_priority = disk->d_priority;
754 		md->md_syncid = disk->d_sync.ds_syncid;
755 		md->md_dflags = (disk->d_flags & G_MIRROR_DISK_FLAG_MASK);
756 		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
757 			md->md_sync_offset = disk->d_sync.ds_offset_done;
758 		else
759 			md->md_sync_offset = 0;
760 		if ((disk->d_flags & G_MIRROR_DISK_FLAG_HARDCODED) != 0) {
761 			strlcpy(md->md_provider,
762 			    disk->d_consumer->provider->name,
763 			    sizeof(md->md_provider));
764 		}
765 		md->md_provsize = disk->d_consumer->provider->mediasize;
766 	}
767 }
768 
769 void
770 g_mirror_update_metadata(struct g_mirror_disk *disk)
771 {
772 	struct g_mirror_softc *sc;
773 	struct g_mirror_metadata md;
774 	int error;
775 
776 	g_topology_assert_not();
777 	sc = disk->d_softc;
778 	sx_assert(&sc->sc_lock, SX_LOCKED);
779 
780 	if (sc->sc_type != G_MIRROR_TYPE_AUTOMATIC)
781 		return;
782 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_WIPE) == 0)
783 		g_mirror_fill_metadata(sc, disk, &md);
784 	error = g_mirror_write_metadata(disk, &md);
785 	if (error == 0) {
786 		G_MIRROR_DEBUG(2, "Metadata on %s updated.",
787 		    g_mirror_get_diskname(disk));
788 	} else {
789 		G_MIRROR_DEBUG(0,
790 		    "Cannot update metadata on disk %s (error=%d).",
791 		    g_mirror_get_diskname(disk), error);
792 	}
793 }
794 
795 static void
796 g_mirror_bump_syncid(struct g_mirror_softc *sc)
797 {
798 	struct g_mirror_disk *disk;
799 
800 	g_topology_assert_not();
801 	sx_assert(&sc->sc_lock, SX_XLOCKED);
802 	KASSERT(g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 0,
803 	    ("%s called with no active disks (device=%s).", __func__,
804 	    sc->sc_name));
805 
806 	sc->sc_syncid++;
807 	G_MIRROR_DEBUG(1, "Device %s: syncid bumped to %u.", sc->sc_name,
808 	    sc->sc_syncid);
809 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
810 		if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE ||
811 		    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
812 			disk->d_sync.ds_syncid = sc->sc_syncid;
813 			g_mirror_update_metadata(disk);
814 		}
815 	}
816 }
817 
818 static void
819 g_mirror_bump_genid(struct g_mirror_softc *sc)
820 {
821 	struct g_mirror_disk *disk;
822 
823 	g_topology_assert_not();
824 	sx_assert(&sc->sc_lock, SX_XLOCKED);
825 	KASSERT(g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 0,
826 	    ("%s called with no active disks (device=%s).", __func__,
827 	    sc->sc_name));
828 
829 	sc->sc_genid++;
830 	G_MIRROR_DEBUG(1, "Device %s: genid bumped to %u.", sc->sc_name,
831 	    sc->sc_genid);
832 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
833 		if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE ||
834 		    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
835 			disk->d_genid = sc->sc_genid;
836 			g_mirror_update_metadata(disk);
837 		}
838 	}
839 }
840 
841 static int
842 g_mirror_idle(struct g_mirror_softc *sc, int acw)
843 {
844 	struct g_mirror_disk *disk;
845 	int timeout;
846 
847 	g_topology_assert_not();
848 	sx_assert(&sc->sc_lock, SX_XLOCKED);
849 
850 	if (sc->sc_provider == NULL)
851 		return (0);
852 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOFAILSYNC) != 0)
853 		return (0);
854 	if (sc->sc_idle)
855 		return (0);
856 	if (sc->sc_writes > 0)
857 		return (0);
858 	if (acw > 0 || (acw == -1 && sc->sc_provider->acw > 0)) {
859 		timeout = g_mirror_idletime - (time_uptime - sc->sc_last_write);
860 		if (!g_mirror_shutdown && timeout > 0)
861 			return (timeout);
862 	}
863 	sc->sc_idle = 1;
864 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
865 		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
866 			continue;
867 		G_MIRROR_DEBUG(2, "Disk %s (device %s) marked as clean.",
868 		    g_mirror_get_diskname(disk), sc->sc_name);
869 		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
870 		g_mirror_update_metadata(disk);
871 	}
872 	return (0);
873 }
874 
875 static void
876 g_mirror_unidle(struct g_mirror_softc *sc)
877 {
878 	struct g_mirror_disk *disk;
879 
880 	g_topology_assert_not();
881 	sx_assert(&sc->sc_lock, SX_XLOCKED);
882 
883 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOFAILSYNC) != 0)
884 		return;
885 	sc->sc_idle = 0;
886 	sc->sc_last_write = time_uptime;
887 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
888 		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
889 			continue;
890 		G_MIRROR_DEBUG(2, "Disk %s (device %s) marked as dirty.",
891 		    g_mirror_get_diskname(disk), sc->sc_name);
892 		disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
893 		g_mirror_update_metadata(disk);
894 	}
895 }
896 
897 static void
898 g_mirror_done(struct bio *bp)
899 {
900 	struct g_mirror_softc *sc;
901 
902 	sc = bp->bio_from->geom->softc;
903 	bp->bio_cflags = G_MIRROR_BIO_FLAG_REGULAR;
904 	mtx_lock(&sc->sc_queue_mtx);
905 	TAILQ_INSERT_TAIL(&sc->sc_queue, bp, bio_queue);
906 	mtx_unlock(&sc->sc_queue_mtx);
907 	wakeup(sc);
908 }
909 
910 static void
911 g_mirror_regular_request_error(struct g_mirror_softc *sc,
912     struct g_mirror_disk *disk, struct bio *bp)
913 {
914 
915 	if (bp->bio_cmd == BIO_FLUSH && bp->bio_error == EOPNOTSUPP)
916 		return;
917 
918 	if ((disk->d_flags & G_MIRROR_DISK_FLAG_BROKEN) == 0) {
919 		disk->d_flags |= G_MIRROR_DISK_FLAG_BROKEN;
920 		G_MIRROR_LOGREQ(0, bp, "Request failed (error=%d).",
921 		    bp->bio_error);
922 	} else {
923 		G_MIRROR_LOGREQ(1, bp, "Request failed (error=%d).",
924 		    bp->bio_error);
925 	}
926 	if (g_mirror_disconnect_on_failure &&
927 	    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 1) {
928 		if (bp->bio_error == ENXIO &&
929 		    bp->bio_cmd == BIO_READ)
930 			sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
931 		else if (bp->bio_error == ENXIO)
932 			sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID_NOW;
933 		else
934 			sc->sc_bump_id |= G_MIRROR_BUMP_GENID;
935 		g_mirror_event_send(disk, G_MIRROR_DISK_STATE_DISCONNECTED,
936 		    G_MIRROR_EVENT_DONTWAIT);
937 	}
938 }
939 
940 static void
941 g_mirror_regular_request(struct g_mirror_softc *sc, struct bio *bp)
942 {
943 	struct g_mirror_disk *disk;
944 	struct bio *pbp;
945 
946 	g_topology_assert_not();
947 	KASSERT(sc->sc_provider == bp->bio_parent->bio_to,
948 	    ("regular request %p with unexpected origin", bp));
949 
950 	pbp = bp->bio_parent;
951 	bp->bio_from->index--;
952 	if (bp->bio_cmd == BIO_WRITE || bp->bio_cmd == BIO_DELETE)
953 		sc->sc_writes--;
954 	disk = bp->bio_from->private;
955 	if (disk == NULL) {
956 		g_topology_lock();
957 		g_mirror_kill_consumer(sc, bp->bio_from);
958 		g_topology_unlock();
959 	}
960 
961 	switch (bp->bio_cmd) {
962 	case BIO_READ:
963 		KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_regular_request_read,
964 		    bp->bio_error);
965 		break;
966 	case BIO_WRITE:
967 		KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_regular_request_write,
968 		    bp->bio_error);
969 		break;
970 	case BIO_DELETE:
971 		KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_regular_request_delete,
972 		    bp->bio_error);
973 		break;
974 	case BIO_FLUSH:
975 		KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_regular_request_flush,
976 		    bp->bio_error);
977 		break;
978 	}
979 
980 	pbp->bio_inbed++;
981 	KASSERT(pbp->bio_inbed <= pbp->bio_children,
982 	    ("bio_inbed (%u) is bigger than bio_children (%u).", pbp->bio_inbed,
983 	    pbp->bio_children));
984 	if (bp->bio_error == 0 && pbp->bio_error == 0) {
985 		G_MIRROR_LOGREQ(3, bp, "Request delivered.");
986 		g_destroy_bio(bp);
987 		if (pbp->bio_children == pbp->bio_inbed) {
988 			G_MIRROR_LOGREQ(3, pbp, "Request delivered.");
989 			pbp->bio_completed = pbp->bio_length;
990 			if (pbp->bio_cmd == BIO_WRITE ||
991 			    pbp->bio_cmd == BIO_DELETE) {
992 				TAILQ_REMOVE(&sc->sc_inflight, pbp, bio_queue);
993 				/* Release delayed sync requests if possible. */
994 				g_mirror_sync_release(sc);
995 			}
996 			g_io_deliver(pbp, pbp->bio_error);
997 		}
998 		return;
999 	} else if (bp->bio_error != 0) {
1000 		if (pbp->bio_error == 0)
1001 			pbp->bio_error = bp->bio_error;
1002 		if (disk != NULL)
1003 			g_mirror_regular_request_error(sc, disk, bp);
1004 		switch (pbp->bio_cmd) {
1005 		case BIO_DELETE:
1006 		case BIO_WRITE:
1007 		case BIO_FLUSH:
1008 			pbp->bio_inbed--;
1009 			pbp->bio_children--;
1010 			break;
1011 		}
1012 	}
1013 	g_destroy_bio(bp);
1014 
1015 	switch (pbp->bio_cmd) {
1016 	case BIO_READ:
1017 		if (pbp->bio_inbed < pbp->bio_children)
1018 			break;
1019 		if (g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) == 1)
1020 			g_io_deliver(pbp, pbp->bio_error);
1021 		else {
1022 			pbp->bio_error = 0;
1023 			mtx_lock(&sc->sc_queue_mtx);
1024 			TAILQ_INSERT_TAIL(&sc->sc_queue, pbp, bio_queue);
1025 			mtx_unlock(&sc->sc_queue_mtx);
1026 			G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
1027 			wakeup(sc);
1028 		}
1029 		break;
1030 	case BIO_DELETE:
1031 	case BIO_WRITE:
1032 	case BIO_FLUSH:
1033 		if (pbp->bio_children == 0) {
1034 			/*
1035 			 * All requests failed.
1036 			 */
1037 		} else if (pbp->bio_inbed < pbp->bio_children) {
1038 			/* Do nothing. */
1039 			break;
1040 		} else if (pbp->bio_children == pbp->bio_inbed) {
1041 			/* Some requests succeeded. */
1042 			pbp->bio_error = 0;
1043 			pbp->bio_completed = pbp->bio_length;
1044 		}
1045 		if (pbp->bio_cmd == BIO_WRITE || pbp->bio_cmd == BIO_DELETE) {
1046 			TAILQ_REMOVE(&sc->sc_inflight, pbp, bio_queue);
1047 			/* Release delayed sync requests if possible. */
1048 			g_mirror_sync_release(sc);
1049 		}
1050 		g_io_deliver(pbp, pbp->bio_error);
1051 		break;
1052 	default:
1053 		KASSERT(1 == 0, ("Invalid request: %u.", pbp->bio_cmd));
1054 		break;
1055 	}
1056 }
1057 
1058 static void
1059 g_mirror_sync_done(struct bio *bp)
1060 {
1061 	struct g_mirror_softc *sc;
1062 
1063 	G_MIRROR_LOGREQ(3, bp, "Synchronization request delivered.");
1064 	sc = bp->bio_from->geom->softc;
1065 	bp->bio_cflags = G_MIRROR_BIO_FLAG_SYNC;
1066 	mtx_lock(&sc->sc_queue_mtx);
1067 	TAILQ_INSERT_TAIL(&sc->sc_queue, bp, bio_queue);
1068 	mtx_unlock(&sc->sc_queue_mtx);
1069 	wakeup(sc);
1070 }
1071 
1072 static void
1073 g_mirror_candelete(struct bio *bp)
1074 {
1075 	struct g_mirror_softc *sc;
1076 	struct g_mirror_disk *disk;
1077 	int *val;
1078 
1079 	sc = bp->bio_to->private;
1080 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1081 		if (disk->d_flags & G_MIRROR_DISK_FLAG_CANDELETE)
1082 			break;
1083 	}
1084 	val = (int *)bp->bio_data;
1085 	*val = (disk != NULL);
1086 	g_io_deliver(bp, 0);
1087 }
1088 
1089 static void
1090 g_mirror_kernel_dump(struct bio *bp)
1091 {
1092 	struct g_mirror_softc *sc;
1093 	struct g_mirror_disk *disk;
1094 	struct bio *cbp;
1095 	struct g_kerneldump *gkd;
1096 
1097 	/*
1098 	 * We configure dumping to the first component, because this component
1099 	 * will be used for reading with 'prefer' balance algorithm.
1100 	 * If the component with the highest priority is currently disconnected
1101 	 * we will not be able to read the dump after the reboot if it will be
1102 	 * connected and synchronized later. Can we do something better?
1103 	 */
1104 	sc = bp->bio_to->private;
1105 	disk = LIST_FIRST(&sc->sc_disks);
1106 
1107 	gkd = (struct g_kerneldump *)bp->bio_data;
1108 	if (gkd->length > bp->bio_to->mediasize)
1109 		gkd->length = bp->bio_to->mediasize;
1110 	cbp = g_clone_bio(bp);
1111 	if (cbp == NULL) {
1112 		g_io_deliver(bp, ENOMEM);
1113 		return;
1114 	}
1115 	cbp->bio_done = g_std_done;
1116 	g_io_request(cbp, disk->d_consumer);
1117 	G_MIRROR_DEBUG(1, "Kernel dump will go to %s.",
1118 	    g_mirror_get_diskname(disk));
1119 }
1120 
1121 static void
1122 g_mirror_start(struct bio *bp)
1123 {
1124 	struct g_mirror_softc *sc;
1125 
1126 	sc = bp->bio_to->private;
1127 	/*
1128 	 * If sc == NULL or there are no valid disks, provider's error
1129 	 * should be set and g_mirror_start() should not be called at all.
1130 	 */
1131 	KASSERT(sc != NULL && sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
1132 	    ("Provider's error should be set (error=%d)(mirror=%s).",
1133 	    bp->bio_to->error, bp->bio_to->name));
1134 	G_MIRROR_LOGREQ(3, bp, "Request received.");
1135 
1136 	switch (bp->bio_cmd) {
1137 	case BIO_READ:
1138 	case BIO_WRITE:
1139 	case BIO_DELETE:
1140 	case BIO_FLUSH:
1141 		break;
1142 	case BIO_GETATTR:
1143 		if (!strcmp(bp->bio_attribute, "GEOM::candelete")) {
1144 			g_mirror_candelete(bp);
1145 			return;
1146 		} else if (strcmp("GEOM::kerneldump", bp->bio_attribute) == 0) {
1147 			g_mirror_kernel_dump(bp);
1148 			return;
1149 		}
1150 		/* FALLTHROUGH */
1151 	default:
1152 		g_io_deliver(bp, EOPNOTSUPP);
1153 		return;
1154 	}
1155 	mtx_lock(&sc->sc_queue_mtx);
1156 	if (bp->bio_to->error != 0) {
1157 		mtx_unlock(&sc->sc_queue_mtx);
1158 		g_io_deliver(bp, bp->bio_to->error);
1159 		return;
1160 	}
1161 	TAILQ_INSERT_TAIL(&sc->sc_queue, bp, bio_queue);
1162 	mtx_unlock(&sc->sc_queue_mtx);
1163 	G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
1164 	wakeup(sc);
1165 }
1166 
1167 /*
1168  * Return TRUE if the given request is colliding with a in-progress
1169  * synchronization request.
1170  */
1171 static bool
1172 g_mirror_sync_collision(struct g_mirror_softc *sc, struct bio *bp)
1173 {
1174 	struct g_mirror_disk *disk;
1175 	struct bio *sbp;
1176 	off_t rstart, rend, sstart, send;
1177 	u_int i;
1178 
1179 	if (sc->sc_sync.ds_ndisks == 0)
1180 		return (false);
1181 	rstart = bp->bio_offset;
1182 	rend = bp->bio_offset + bp->bio_length;
1183 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1184 		if (disk->d_state != G_MIRROR_DISK_STATE_SYNCHRONIZING)
1185 			continue;
1186 		for (i = 0; i < g_mirror_syncreqs; i++) {
1187 			sbp = disk->d_sync.ds_bios[i];
1188 			if (sbp == NULL)
1189 				continue;
1190 			sstart = sbp->bio_offset;
1191 			send = sbp->bio_offset + sbp->bio_length;
1192 			if (rend > sstart && rstart < send)
1193 				return (true);
1194 		}
1195 	}
1196 	return (false);
1197 }
1198 
1199 /*
1200  * Return TRUE if the given sync request is colliding with a in-progress regular
1201  * request.
1202  */
1203 static bool
1204 g_mirror_regular_collision(struct g_mirror_softc *sc, struct bio *sbp)
1205 {
1206 	off_t rstart, rend, sstart, send;
1207 	struct bio *bp;
1208 
1209 	if (sc->sc_sync.ds_ndisks == 0)
1210 		return (false);
1211 	sstart = sbp->bio_offset;
1212 	send = sbp->bio_offset + sbp->bio_length;
1213 	TAILQ_FOREACH(bp, &sc->sc_inflight, bio_queue) {
1214 		rstart = bp->bio_offset;
1215 		rend = bp->bio_offset + bp->bio_length;
1216 		if (rend > sstart && rstart < send)
1217 			return (true);
1218 	}
1219 	return (false);
1220 }
1221 
1222 /*
1223  * Puts regular request onto delayed queue.
1224  */
1225 static void
1226 g_mirror_regular_delay(struct g_mirror_softc *sc, struct bio *bp)
1227 {
1228 
1229 	G_MIRROR_LOGREQ(2, bp, "Delaying request.");
1230 	TAILQ_INSERT_TAIL(&sc->sc_regular_delayed, bp, bio_queue);
1231 }
1232 
1233 /*
1234  * Puts synchronization request onto delayed queue.
1235  */
1236 static void
1237 g_mirror_sync_delay(struct g_mirror_softc *sc, struct bio *bp)
1238 {
1239 
1240 	G_MIRROR_LOGREQ(2, bp, "Delaying synchronization request.");
1241 	TAILQ_INSERT_TAIL(&sc->sc_sync_delayed, bp, bio_queue);
1242 }
1243 
1244 /*
1245  * Requeue delayed regular requests.
1246  */
1247 static void
1248 g_mirror_regular_release(struct g_mirror_softc *sc)
1249 {
1250 	struct bio *bp;
1251 
1252 	if ((bp = TAILQ_FIRST(&sc->sc_regular_delayed)) == NULL)
1253 		return;
1254 	if (g_mirror_sync_collision(sc, bp))
1255 		return;
1256 
1257 	G_MIRROR_DEBUG(2, "Requeuing regular requests after collision.");
1258 	mtx_lock(&sc->sc_queue_mtx);
1259 	TAILQ_CONCAT(&sc->sc_regular_delayed, &sc->sc_queue, bio_queue);
1260 	TAILQ_SWAP(&sc->sc_regular_delayed, &sc->sc_queue, bio, bio_queue);
1261 	mtx_unlock(&sc->sc_queue_mtx);
1262 }
1263 
1264 /*
1265  * Releases delayed sync requests which don't collide anymore with regular
1266  * requests.
1267  */
1268 static void
1269 g_mirror_sync_release(struct g_mirror_softc *sc)
1270 {
1271 	struct bio *bp, *bp2;
1272 
1273 	TAILQ_FOREACH_SAFE(bp, &sc->sc_sync_delayed, bio_queue, bp2) {
1274 		if (g_mirror_regular_collision(sc, bp))
1275 			continue;
1276 		TAILQ_REMOVE(&sc->sc_sync_delayed, bp, bio_queue);
1277 		G_MIRROR_LOGREQ(2, bp,
1278 		    "Releasing delayed synchronization request.");
1279 		g_io_request(bp, bp->bio_from);
1280 	}
1281 }
1282 
1283 /*
1284  * Free a synchronization request and clear its slot in the array.
1285  */
1286 static void
1287 g_mirror_sync_request_free(struct g_mirror_disk *disk, struct bio *bp)
1288 {
1289 	int idx;
1290 
1291 	if (disk != NULL && disk->d_sync.ds_bios != NULL) {
1292 		idx = (int)(uintptr_t)bp->bio_caller1;
1293 		KASSERT(disk->d_sync.ds_bios[idx] == bp,
1294 		    ("unexpected sync BIO at %p:%d", disk, idx));
1295 		disk->d_sync.ds_bios[idx] = NULL;
1296 	}
1297 	free(bp->bio_data, M_MIRROR);
1298 	g_destroy_bio(bp);
1299 }
1300 
1301 /*
1302  * Handle synchronization requests.
1303  * Every synchronization request is a two-step process: first, a read request is
1304  * sent to the mirror provider via the sync consumer. If that request completes
1305  * successfully, it is converted to a write and sent to the disk being
1306  * synchronized. If the write also completes successfully, the synchronization
1307  * offset is advanced and a new read request is submitted.
1308  */
1309 static void
1310 g_mirror_sync_request(struct g_mirror_softc *sc, struct bio *bp)
1311 {
1312 	struct g_mirror_disk *disk;
1313 	struct g_mirror_disk_sync *sync;
1314 
1315 	KASSERT((bp->bio_cmd == BIO_READ &&
1316 	    bp->bio_from->geom == sc->sc_sync.ds_geom) ||
1317 	    (bp->bio_cmd == BIO_WRITE && bp->bio_from->geom == sc->sc_geom),
1318 	    ("Sync BIO %p with unexpected origin", bp));
1319 
1320 	bp->bio_from->index--;
1321 	disk = bp->bio_from->private;
1322 	if (disk == NULL) {
1323 		sx_xunlock(&sc->sc_lock); /* Avoid recursion on sc_lock. */
1324 		g_topology_lock();
1325 		g_mirror_kill_consumer(sc, bp->bio_from);
1326 		g_topology_unlock();
1327 		g_mirror_sync_request_free(NULL, bp);
1328 		sx_xlock(&sc->sc_lock);
1329 		return;
1330 	}
1331 
1332 	sync = &disk->d_sync;
1333 
1334 	/*
1335 	 * Synchronization request.
1336 	 */
1337 	switch (bp->bio_cmd) {
1338 	case BIO_READ: {
1339 		struct g_consumer *cp;
1340 
1341 		KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_sync_request_read,
1342 		    bp->bio_error);
1343 
1344 		if (bp->bio_error != 0) {
1345 			G_MIRROR_LOGREQ(0, bp,
1346 			    "Synchronization request failed (error=%d).",
1347 			    bp->bio_error);
1348 
1349 			/*
1350 			 * The read error will trigger a syncid bump, so there's
1351 			 * no need to do that here.
1352 			 *
1353 			 * The read error handling for regular requests will
1354 			 * retry the read from all active mirrors before passing
1355 			 * the error back up, so there's no need to retry here.
1356 			 */
1357 			g_mirror_sync_request_free(disk, bp);
1358 			g_mirror_event_send(disk,
1359 			    G_MIRROR_DISK_STATE_DISCONNECTED,
1360 			    G_MIRROR_EVENT_DONTWAIT);
1361 			return;
1362 		}
1363 		G_MIRROR_LOGREQ(3, bp,
1364 		    "Synchronization request half-finished.");
1365 		bp->bio_cmd = BIO_WRITE;
1366 		bp->bio_cflags = 0;
1367 		cp = disk->d_consumer;
1368 		KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1369 		    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
1370 		    cp->acr, cp->acw, cp->ace));
1371 		cp->index++;
1372 		g_io_request(bp, cp);
1373 		return;
1374 	}
1375 	case BIO_WRITE: {
1376 		off_t offset;
1377 		int i;
1378 
1379 		KFAIL_POINT_ERROR(DEBUG_FP, g_mirror_sync_request_write,
1380 		    bp->bio_error);
1381 
1382 		if (bp->bio_error != 0) {
1383 			G_MIRROR_LOGREQ(0, bp,
1384 			    "Synchronization request failed (error=%d).",
1385 			    bp->bio_error);
1386 			g_mirror_sync_request_free(disk, bp);
1387 			sc->sc_bump_id |= G_MIRROR_BUMP_GENID;
1388 			g_mirror_event_send(disk,
1389 			    G_MIRROR_DISK_STATE_DISCONNECTED,
1390 			    G_MIRROR_EVENT_DONTWAIT);
1391 			return;
1392 		}
1393 		G_MIRROR_LOGREQ(3, bp, "Synchronization request finished.");
1394 		if (sync->ds_offset >= sc->sc_mediasize ||
1395 		    sync->ds_consumer == NULL ||
1396 		    (sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
1397 			/* Don't send more synchronization requests. */
1398 			sync->ds_inflight--;
1399 			g_mirror_sync_request_free(disk, bp);
1400 			if (sync->ds_inflight > 0)
1401 				return;
1402 			if (sync->ds_consumer == NULL ||
1403 			    (sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
1404 				return;
1405 			}
1406 			/* Disk up-to-date, activate it. */
1407 			g_mirror_event_send(disk, G_MIRROR_DISK_STATE_ACTIVE,
1408 			    G_MIRROR_EVENT_DONTWAIT);
1409 			return;
1410 		}
1411 
1412 		/* Send next synchronization request. */
1413 		g_mirror_sync_reinit(disk, bp, sync->ds_offset);
1414 		sync->ds_offset += bp->bio_length;
1415 
1416 		G_MIRROR_LOGREQ(3, bp, "Sending synchronization request.");
1417 		sync->ds_consumer->index++;
1418 
1419 		/*
1420 		 * Delay the request if it is colliding with a regular request.
1421 		 */
1422 		if (g_mirror_regular_collision(sc, bp))
1423 			g_mirror_sync_delay(sc, bp);
1424 		else
1425 			g_io_request(bp, sync->ds_consumer);
1426 
1427 		/* Requeue delayed requests if possible. */
1428 		g_mirror_regular_release(sc);
1429 
1430 		/* Find the smallest offset */
1431 		offset = sc->sc_mediasize;
1432 		for (i = 0; i < g_mirror_syncreqs; i++) {
1433 			bp = sync->ds_bios[i];
1434 			if (bp != NULL && bp->bio_offset < offset)
1435 				offset = bp->bio_offset;
1436 		}
1437 		if (g_mirror_sync_period > 0 &&
1438 		    time_uptime - sync->ds_update_ts > g_mirror_sync_period) {
1439 			sync->ds_offset_done = offset;
1440 			g_mirror_update_metadata(disk);
1441 			sync->ds_update_ts = time_uptime;
1442 		}
1443 		return;
1444 	}
1445 	default:
1446 		panic("Invalid I/O request %p", bp);
1447 	}
1448 }
1449 
1450 static void
1451 g_mirror_request_prefer(struct g_mirror_softc *sc, struct bio *bp)
1452 {
1453 	struct g_mirror_disk *disk;
1454 	struct g_consumer *cp;
1455 	struct bio *cbp;
1456 
1457 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1458 		if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE)
1459 			break;
1460 	}
1461 	if (disk == NULL) {
1462 		if (bp->bio_error == 0)
1463 			bp->bio_error = ENXIO;
1464 		g_io_deliver(bp, bp->bio_error);
1465 		return;
1466 	}
1467 	cbp = g_clone_bio(bp);
1468 	if (cbp == NULL) {
1469 		if (bp->bio_error == 0)
1470 			bp->bio_error = ENOMEM;
1471 		g_io_deliver(bp, bp->bio_error);
1472 		return;
1473 	}
1474 	/*
1475 	 * Fill in the component buf structure.
1476 	 */
1477 	cp = disk->d_consumer;
1478 	cbp->bio_done = g_mirror_done;
1479 	cbp->bio_to = cp->provider;
1480 	G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1481 	KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1482 	    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr,
1483 	    cp->acw, cp->ace));
1484 	cp->index++;
1485 	g_io_request(cbp, cp);
1486 }
1487 
1488 static void
1489 g_mirror_request_round_robin(struct g_mirror_softc *sc, struct bio *bp)
1490 {
1491 	struct g_mirror_disk *disk;
1492 	struct g_consumer *cp;
1493 	struct bio *cbp;
1494 
1495 	disk = g_mirror_get_disk(sc);
1496 	if (disk == NULL) {
1497 		if (bp->bio_error == 0)
1498 			bp->bio_error = ENXIO;
1499 		g_io_deliver(bp, bp->bio_error);
1500 		return;
1501 	}
1502 	cbp = g_clone_bio(bp);
1503 	if (cbp == NULL) {
1504 		if (bp->bio_error == 0)
1505 			bp->bio_error = ENOMEM;
1506 		g_io_deliver(bp, bp->bio_error);
1507 		return;
1508 	}
1509 	/*
1510 	 * Fill in the component buf structure.
1511 	 */
1512 	cp = disk->d_consumer;
1513 	cbp->bio_done = g_mirror_done;
1514 	cbp->bio_to = cp->provider;
1515 	G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1516 	KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1517 	    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr,
1518 	    cp->acw, cp->ace));
1519 	cp->index++;
1520 	g_io_request(cbp, cp);
1521 }
1522 
1523 #define TRACK_SIZE  (1 * 1024 * 1024)
1524 #define LOAD_SCALE	256
1525 #define ABS(x)		(((x) >= 0) ? (x) : (-(x)))
1526 
1527 static void
1528 g_mirror_request_load(struct g_mirror_softc *sc, struct bio *bp)
1529 {
1530 	struct g_mirror_disk *disk, *dp;
1531 	struct g_consumer *cp;
1532 	struct bio *cbp;
1533 	int prio, best;
1534 
1535 	/* Find a disk with the smallest load. */
1536 	disk = NULL;
1537 	best = INT_MAX;
1538 	LIST_FOREACH(dp, &sc->sc_disks, d_next) {
1539 		if (dp->d_state != G_MIRROR_DISK_STATE_ACTIVE)
1540 			continue;
1541 		prio = dp->load;
1542 		/* If disk head is precisely in position - highly prefer it. */
1543 		if (dp->d_last_offset == bp->bio_offset)
1544 			prio -= 2 * LOAD_SCALE;
1545 		else
1546 		/* If disk head is close to position - prefer it. */
1547 		if (ABS(dp->d_last_offset - bp->bio_offset) < TRACK_SIZE)
1548 			prio -= 1 * LOAD_SCALE;
1549 		if (prio <= best) {
1550 			disk = dp;
1551 			best = prio;
1552 		}
1553 	}
1554 	KASSERT(disk != NULL, ("NULL disk for %s.", sc->sc_name));
1555 	cbp = g_clone_bio(bp);
1556 	if (cbp == NULL) {
1557 		if (bp->bio_error == 0)
1558 			bp->bio_error = ENOMEM;
1559 		g_io_deliver(bp, bp->bio_error);
1560 		return;
1561 	}
1562 	/*
1563 	 * Fill in the component buf structure.
1564 	 */
1565 	cp = disk->d_consumer;
1566 	cbp->bio_done = g_mirror_done;
1567 	cbp->bio_to = cp->provider;
1568 	G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1569 	KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1570 	    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr,
1571 	    cp->acw, cp->ace));
1572 	cp->index++;
1573 	/* Remember last head position */
1574 	disk->d_last_offset = bp->bio_offset + bp->bio_length;
1575 	/* Update loads. */
1576 	LIST_FOREACH(dp, &sc->sc_disks, d_next) {
1577 		dp->load = (dp->d_consumer->index * LOAD_SCALE +
1578 		    dp->load * 7) / 8;
1579 	}
1580 	g_io_request(cbp, cp);
1581 }
1582 
1583 static void
1584 g_mirror_request_split(struct g_mirror_softc *sc, struct bio *bp)
1585 {
1586 	struct bio_queue queue;
1587 	struct g_mirror_disk *disk;
1588 	struct g_consumer *cp;
1589 	struct bio *cbp;
1590 	off_t left, mod, offset, slice;
1591 	u_char *data;
1592 	u_int ndisks;
1593 
1594 	if (bp->bio_length <= sc->sc_slice) {
1595 		g_mirror_request_round_robin(sc, bp);
1596 		return;
1597 	}
1598 	ndisks = g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE);
1599 	slice = bp->bio_length / ndisks;
1600 	mod = slice % sc->sc_provider->sectorsize;
1601 	if (mod != 0)
1602 		slice += sc->sc_provider->sectorsize - mod;
1603 	/*
1604 	 * Allocate all bios before sending any request, so we can
1605 	 * return ENOMEM in nice and clean way.
1606 	 */
1607 	left = bp->bio_length;
1608 	offset = bp->bio_offset;
1609 	data = bp->bio_data;
1610 	TAILQ_INIT(&queue);
1611 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1612 		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
1613 			continue;
1614 		cbp = g_clone_bio(bp);
1615 		if (cbp == NULL) {
1616 			while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
1617 				TAILQ_REMOVE(&queue, cbp, bio_queue);
1618 				g_destroy_bio(cbp);
1619 			}
1620 			if (bp->bio_error == 0)
1621 				bp->bio_error = ENOMEM;
1622 			g_io_deliver(bp, bp->bio_error);
1623 			return;
1624 		}
1625 		TAILQ_INSERT_TAIL(&queue, cbp, bio_queue);
1626 		cbp->bio_done = g_mirror_done;
1627 		cbp->bio_caller1 = disk;
1628 		cbp->bio_to = disk->d_consumer->provider;
1629 		cbp->bio_offset = offset;
1630 		cbp->bio_data = data;
1631 		cbp->bio_length = MIN(left, slice);
1632 		left -= cbp->bio_length;
1633 		if (left == 0)
1634 			break;
1635 		offset += cbp->bio_length;
1636 		data += cbp->bio_length;
1637 	}
1638 	while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
1639 		TAILQ_REMOVE(&queue, cbp, bio_queue);
1640 		G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1641 		disk = cbp->bio_caller1;
1642 		cbp->bio_caller1 = NULL;
1643 		cp = disk->d_consumer;
1644 		KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1645 		    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
1646 		    cp->acr, cp->acw, cp->ace));
1647 		disk->d_consumer->index++;
1648 		g_io_request(cbp, disk->d_consumer);
1649 	}
1650 }
1651 
1652 static void
1653 g_mirror_register_request(struct g_mirror_softc *sc, struct bio *bp)
1654 {
1655 	struct bio_queue queue;
1656 	struct bio *cbp;
1657 	struct g_consumer *cp;
1658 	struct g_mirror_disk *disk;
1659 
1660 	sx_assert(&sc->sc_lock, SA_XLOCKED);
1661 
1662 	/*
1663 	 * To avoid ordering issues, if a write is deferred because of a
1664 	 * collision with a sync request, all I/O is deferred until that
1665 	 * write is initiated.
1666 	 */
1667 	if (bp->bio_from->geom != sc->sc_sync.ds_geom &&
1668 	    !TAILQ_EMPTY(&sc->sc_regular_delayed)) {
1669 		g_mirror_regular_delay(sc, bp);
1670 		return;
1671 	}
1672 
1673 	switch (bp->bio_cmd) {
1674 	case BIO_READ:
1675 		switch (sc->sc_balance) {
1676 		case G_MIRROR_BALANCE_LOAD:
1677 			g_mirror_request_load(sc, bp);
1678 			break;
1679 		case G_MIRROR_BALANCE_PREFER:
1680 			g_mirror_request_prefer(sc, bp);
1681 			break;
1682 		case G_MIRROR_BALANCE_ROUND_ROBIN:
1683 			g_mirror_request_round_robin(sc, bp);
1684 			break;
1685 		case G_MIRROR_BALANCE_SPLIT:
1686 			g_mirror_request_split(sc, bp);
1687 			break;
1688 		}
1689 		return;
1690 	case BIO_WRITE:
1691 	case BIO_DELETE:
1692 		/*
1693 		 * Delay the request if it is colliding with a synchronization
1694 		 * request.
1695 		 */
1696 		if (g_mirror_sync_collision(sc, bp)) {
1697 			g_mirror_regular_delay(sc, bp);
1698 			return;
1699 		}
1700 
1701 		if (sc->sc_idle)
1702 			g_mirror_unidle(sc);
1703 		else
1704 			sc->sc_last_write = time_uptime;
1705 
1706 		/*
1707 		 * Bump syncid on first write.
1708 		 */
1709 		if ((sc->sc_bump_id & G_MIRROR_BUMP_SYNCID) != 0) {
1710 			sc->sc_bump_id &= ~G_MIRROR_BUMP_SYNCID;
1711 			g_mirror_bump_syncid(sc);
1712 		}
1713 
1714 		/*
1715 		 * Allocate all bios before sending any request, so we can
1716 		 * return ENOMEM in nice and clean way.
1717 		 */
1718 		TAILQ_INIT(&queue);
1719 		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1720 			switch (disk->d_state) {
1721 			case G_MIRROR_DISK_STATE_ACTIVE:
1722 				break;
1723 			case G_MIRROR_DISK_STATE_SYNCHRONIZING:
1724 				if (bp->bio_offset >= disk->d_sync.ds_offset)
1725 					continue;
1726 				break;
1727 			default:
1728 				continue;
1729 			}
1730 			if (bp->bio_cmd == BIO_DELETE &&
1731 			    (disk->d_flags & G_MIRROR_DISK_FLAG_CANDELETE) == 0)
1732 				continue;
1733 			cbp = g_clone_bio(bp);
1734 			if (cbp == NULL) {
1735 				while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
1736 					TAILQ_REMOVE(&queue, cbp, bio_queue);
1737 					g_destroy_bio(cbp);
1738 				}
1739 				if (bp->bio_error == 0)
1740 					bp->bio_error = ENOMEM;
1741 				g_io_deliver(bp, bp->bio_error);
1742 				return;
1743 			}
1744 			TAILQ_INSERT_TAIL(&queue, cbp, bio_queue);
1745 			cbp->bio_done = g_mirror_done;
1746 			cp = disk->d_consumer;
1747 			cbp->bio_caller1 = cp;
1748 			cbp->bio_to = cp->provider;
1749 			KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1750 			    ("Consumer %s not opened (r%dw%de%d).",
1751 			    cp->provider->name, cp->acr, cp->acw, cp->ace));
1752 		}
1753 		if (TAILQ_EMPTY(&queue)) {
1754 			KASSERT(bp->bio_cmd == BIO_DELETE,
1755 			    ("No consumers for regular request %p", bp));
1756 			g_io_deliver(bp, EOPNOTSUPP);
1757 			return;
1758 		}
1759 		while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
1760 			G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1761 			TAILQ_REMOVE(&queue, cbp, bio_queue);
1762 			cp = cbp->bio_caller1;
1763 			cbp->bio_caller1 = NULL;
1764 			cp->index++;
1765 			sc->sc_writes++;
1766 			g_io_request(cbp, cp);
1767 		}
1768 		/*
1769 		 * Put request onto inflight queue, so we can check if new
1770 		 * synchronization requests don't collide with it.
1771 		 */
1772 		TAILQ_INSERT_TAIL(&sc->sc_inflight, bp, bio_queue);
1773 		return;
1774 	case BIO_FLUSH:
1775 		TAILQ_INIT(&queue);
1776 		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1777 			if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
1778 				continue;
1779 			cbp = g_clone_bio(bp);
1780 			if (cbp == NULL) {
1781 				while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
1782 					TAILQ_REMOVE(&queue, cbp, bio_queue);
1783 					g_destroy_bio(cbp);
1784 				}
1785 				if (bp->bio_error == 0)
1786 					bp->bio_error = ENOMEM;
1787 				g_io_deliver(bp, bp->bio_error);
1788 				return;
1789 			}
1790 			TAILQ_INSERT_TAIL(&queue, cbp, bio_queue);
1791 			cbp->bio_done = g_mirror_done;
1792 			cbp->bio_caller1 = disk;
1793 			cbp->bio_to = disk->d_consumer->provider;
1794 		}
1795 		KASSERT(!TAILQ_EMPTY(&queue),
1796 		    ("No consumers for regular request %p", bp));
1797 		while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
1798 			G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1799 			TAILQ_REMOVE(&queue, cbp, bio_queue);
1800 			disk = cbp->bio_caller1;
1801 			cbp->bio_caller1 = NULL;
1802 			cp = disk->d_consumer;
1803 			KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1804 			    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
1805 			    cp->acr, cp->acw, cp->ace));
1806 			cp->index++;
1807 			g_io_request(cbp, cp);
1808 		}
1809 		break;
1810 	default:
1811 		KASSERT(1 == 0, ("Invalid command here: %u (device=%s)",
1812 		    bp->bio_cmd, sc->sc_name));
1813 		break;
1814 	}
1815 }
1816 
1817 static int
1818 g_mirror_can_destroy(struct g_mirror_softc *sc)
1819 {
1820 	struct g_geom *gp;
1821 	struct g_consumer *cp;
1822 
1823 	g_topology_assert();
1824 	gp = sc->sc_geom;
1825 	if (gp->softc == NULL)
1826 		return (1);
1827 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_TASTING) != 0)
1828 		return (0);
1829 	LIST_FOREACH(cp, &gp->consumer, consumer) {
1830 		if (g_mirror_is_busy(sc, cp))
1831 			return (0);
1832 	}
1833 	gp = sc->sc_sync.ds_geom;
1834 	LIST_FOREACH(cp, &gp->consumer, consumer) {
1835 		if (g_mirror_is_busy(sc, cp))
1836 			return (0);
1837 	}
1838 	G_MIRROR_DEBUG(2, "No I/O requests for %s, it can be destroyed.",
1839 	    sc->sc_name);
1840 	return (1);
1841 }
1842 
1843 static int
1844 g_mirror_try_destroy(struct g_mirror_softc *sc)
1845 {
1846 
1847 	if (sc->sc_rootmount != NULL) {
1848 		G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p", __LINE__,
1849 		    sc->sc_rootmount);
1850 		root_mount_rel(sc->sc_rootmount);
1851 		sc->sc_rootmount = NULL;
1852 	}
1853 	g_topology_lock();
1854 	if (!g_mirror_can_destroy(sc)) {
1855 		g_topology_unlock();
1856 		return (0);
1857 	}
1858 	sc->sc_geom->softc = NULL;
1859 	sc->sc_sync.ds_geom->softc = NULL;
1860 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DRAIN) != 0) {
1861 		g_topology_unlock();
1862 		G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__,
1863 		    &sc->sc_worker);
1864 		/* Unlock sc_lock here, as it can be destroyed after wakeup. */
1865 		sx_xunlock(&sc->sc_lock);
1866 		wakeup(&sc->sc_worker);
1867 		sc->sc_worker = NULL;
1868 	} else {
1869 		g_topology_unlock();
1870 		g_mirror_destroy_device(sc);
1871 	}
1872 	return (1);
1873 }
1874 
1875 /*
1876  * Worker thread.
1877  */
1878 static void
1879 g_mirror_worker(void *arg)
1880 {
1881 	struct g_mirror_softc *sc;
1882 	struct g_mirror_event *ep;
1883 	struct bio *bp;
1884 	int timeout;
1885 
1886 	sc = arg;
1887 	thread_lock(curthread);
1888 	sched_prio(curthread, PRIBIO);
1889 	thread_unlock(curthread);
1890 
1891 	sx_xlock(&sc->sc_lock);
1892 	for (;;) {
1893 		G_MIRROR_DEBUG(5, "%s: Let's see...", __func__);
1894 		/*
1895 		 * First take a look at events.
1896 		 * This is important to handle events before any I/O requests.
1897 		 */
1898 		ep = g_mirror_event_first(sc);
1899 		if (ep != NULL) {
1900 			g_mirror_event_remove(sc, ep);
1901 			if ((ep->e_flags & G_MIRROR_EVENT_DEVICE) != 0) {
1902 				/* Update only device status. */
1903 				G_MIRROR_DEBUG(3,
1904 				    "Running event for device %s.",
1905 				    sc->sc_name);
1906 				ep->e_error = 0;
1907 				g_mirror_update_device(sc, true);
1908 			} else {
1909 				/* Update disk status. */
1910 				G_MIRROR_DEBUG(3, "Running event for disk %s.",
1911 				     g_mirror_get_diskname(ep->e_disk));
1912 				ep->e_error = g_mirror_update_disk(ep->e_disk,
1913 				    ep->e_state);
1914 				if (ep->e_error == 0)
1915 					g_mirror_update_device(sc, false);
1916 			}
1917 			if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0) {
1918 				KASSERT(ep->e_error == 0,
1919 				    ("Error cannot be handled."));
1920 				g_mirror_event_free(ep);
1921 			} else {
1922 				ep->e_flags |= G_MIRROR_EVENT_DONE;
1923 				G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__,
1924 				    ep);
1925 				mtx_lock(&sc->sc_events_mtx);
1926 				wakeup(ep);
1927 				mtx_unlock(&sc->sc_events_mtx);
1928 			}
1929 			if ((sc->sc_flags &
1930 			    G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
1931 				if (g_mirror_try_destroy(sc)) {
1932 					curthread->td_pflags &= ~TDP_GEOM;
1933 					G_MIRROR_DEBUG(1, "Thread exiting.");
1934 					kproc_exit(0);
1935 				}
1936 			}
1937 			G_MIRROR_DEBUG(5, "%s: I'm here 1.", __func__);
1938 			continue;
1939 		}
1940 
1941 		/*
1942 		 * Check if we can mark array as CLEAN and if we can't take
1943 		 * how much seconds should we wait.
1944 		 */
1945 		timeout = g_mirror_idle(sc, -1);
1946 
1947 		/*
1948 		 * Handle I/O requests.
1949 		 */
1950 		mtx_lock(&sc->sc_queue_mtx);
1951 		bp = TAILQ_FIRST(&sc->sc_queue);
1952 		if (bp != NULL)
1953 			TAILQ_REMOVE(&sc->sc_queue, bp, bio_queue);
1954 		else {
1955 			if ((sc->sc_flags &
1956 			    G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
1957 				mtx_unlock(&sc->sc_queue_mtx);
1958 				if (g_mirror_try_destroy(sc)) {
1959 					curthread->td_pflags &= ~TDP_GEOM;
1960 					G_MIRROR_DEBUG(1, "Thread exiting.");
1961 					kproc_exit(0);
1962 				}
1963 				mtx_lock(&sc->sc_queue_mtx);
1964 				if (!TAILQ_EMPTY(&sc->sc_queue)) {
1965 					mtx_unlock(&sc->sc_queue_mtx);
1966 					continue;
1967 				}
1968 			}
1969 			if (g_mirror_event_first(sc) != NULL) {
1970 				mtx_unlock(&sc->sc_queue_mtx);
1971 				continue;
1972 			}
1973 			sx_xunlock(&sc->sc_lock);
1974 			MSLEEP(sc, &sc->sc_queue_mtx, PRIBIO | PDROP, "m:w1",
1975 			    timeout * hz);
1976 			sx_xlock(&sc->sc_lock);
1977 			G_MIRROR_DEBUG(5, "%s: I'm here 4.", __func__);
1978 			continue;
1979 		}
1980 		mtx_unlock(&sc->sc_queue_mtx);
1981 
1982 		if (bp->bio_from->geom == sc->sc_sync.ds_geom &&
1983 		    (bp->bio_cflags & G_MIRROR_BIO_FLAG_SYNC) != 0) {
1984 			/*
1985 			 * Handle completion of the first half (the read) of a
1986 			 * block synchronization operation.
1987 			 */
1988 			g_mirror_sync_request(sc, bp);
1989 		} else if (bp->bio_to != sc->sc_provider) {
1990 			if ((bp->bio_cflags & G_MIRROR_BIO_FLAG_REGULAR) != 0)
1991 				/*
1992 				 * Handle completion of a regular I/O request.
1993 				 */
1994 				g_mirror_regular_request(sc, bp);
1995 			else if ((bp->bio_cflags & G_MIRROR_BIO_FLAG_SYNC) != 0)
1996 				/*
1997 				 * Handle completion of the second half (the
1998 				 * write) of a block synchronization operation.
1999 				 */
2000 				g_mirror_sync_request(sc, bp);
2001 			else {
2002 				KASSERT(0,
2003 				    ("Invalid request cflags=0x%hx to=%s.",
2004 				    bp->bio_cflags, bp->bio_to->name));
2005 			}
2006 		} else {
2007 			/*
2008 			 * Initiate an I/O request.
2009 			 */
2010 			g_mirror_register_request(sc, bp);
2011 		}
2012 		G_MIRROR_DEBUG(5, "%s: I'm here 9.", __func__);
2013 	}
2014 }
2015 
2016 static void
2017 g_mirror_update_idle(struct g_mirror_softc *sc, struct g_mirror_disk *disk)
2018 {
2019 
2020 	sx_assert(&sc->sc_lock, SX_LOCKED);
2021 
2022 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOFAILSYNC) != 0)
2023 		return;
2024 	if (!sc->sc_idle && (disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) == 0) {
2025 		G_MIRROR_DEBUG(2, "Disk %s (device %s) marked as dirty.",
2026 		    g_mirror_get_diskname(disk), sc->sc_name);
2027 		disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
2028 	} else if (sc->sc_idle &&
2029 	    (disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) != 0) {
2030 		G_MIRROR_DEBUG(2, "Disk %s (device %s) marked as clean.",
2031 		    g_mirror_get_diskname(disk), sc->sc_name);
2032 		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2033 	}
2034 }
2035 
2036 static void
2037 g_mirror_sync_reinit(const struct g_mirror_disk *disk, struct bio *bp,
2038     off_t offset)
2039 {
2040 	void *data;
2041 	int idx;
2042 
2043 	data = bp->bio_data;
2044 	idx = (int)(uintptr_t)bp->bio_caller1;
2045 	g_reset_bio(bp);
2046 
2047 	bp->bio_cmd = BIO_READ;
2048 	bp->bio_data = data;
2049 	bp->bio_done = g_mirror_sync_done;
2050 	bp->bio_from = disk->d_sync.ds_consumer;
2051 	bp->bio_to = disk->d_softc->sc_provider;
2052 	bp->bio_caller1 = (void *)(uintptr_t)idx;
2053 	bp->bio_offset = offset;
2054 	bp->bio_length = MIN(MAXPHYS,
2055 	    disk->d_softc->sc_mediasize - bp->bio_offset);
2056 }
2057 
2058 static void
2059 g_mirror_sync_start(struct g_mirror_disk *disk)
2060 {
2061 	struct g_mirror_softc *sc;
2062 	struct g_mirror_disk_sync *sync;
2063 	struct g_consumer *cp;
2064 	struct bio *bp;
2065 	int error, i;
2066 
2067 	g_topology_assert_not();
2068 	sc = disk->d_softc;
2069 	sync = &disk->d_sync;
2070 	sx_assert(&sc->sc_lock, SX_LOCKED);
2071 
2072 	KASSERT(disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
2073 	    ("Disk %s is not marked for synchronization.",
2074 	    g_mirror_get_diskname(disk)));
2075 	KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2076 	    ("Device not in RUNNING state (%s, %u).", sc->sc_name,
2077 	    sc->sc_state));
2078 
2079 	sx_xunlock(&sc->sc_lock);
2080 	g_topology_lock();
2081 	cp = g_new_consumer(sc->sc_sync.ds_geom);
2082 	cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
2083 	error = g_attach(cp, sc->sc_provider);
2084 	KASSERT(error == 0,
2085 	    ("Cannot attach to %s (error=%d).", sc->sc_name, error));
2086 	error = g_access(cp, 1, 0, 0);
2087 	KASSERT(error == 0, ("Cannot open %s (error=%d).", sc->sc_name, error));
2088 	g_topology_unlock();
2089 	sx_xlock(&sc->sc_lock);
2090 
2091 	G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s.", sc->sc_name,
2092 	    g_mirror_get_diskname(disk));
2093 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOFAILSYNC) == 0)
2094 		disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
2095 	KASSERT(sync->ds_consumer == NULL,
2096 	    ("Sync consumer already exists (device=%s, disk=%s).",
2097 	    sc->sc_name, g_mirror_get_diskname(disk)));
2098 
2099 	sync->ds_consumer = cp;
2100 	sync->ds_consumer->private = disk;
2101 	sync->ds_consumer->index = 0;
2102 
2103 	/*
2104 	 * Allocate memory for synchronization bios and initialize them.
2105 	 */
2106 	sync->ds_bios = malloc(sizeof(struct bio *) * g_mirror_syncreqs,
2107 	    M_MIRROR, M_WAITOK);
2108 	for (i = 0; i < g_mirror_syncreqs; i++) {
2109 		bp = g_alloc_bio();
2110 		sync->ds_bios[i] = bp;
2111 
2112 		bp->bio_data = malloc(MAXPHYS, M_MIRROR, M_WAITOK);
2113 		bp->bio_caller1 = (void *)(uintptr_t)i;
2114 		g_mirror_sync_reinit(disk, bp, sync->ds_offset);
2115 		sync->ds_offset += bp->bio_length;
2116 	}
2117 
2118 	/* Increase the number of disks in SYNCHRONIZING state. */
2119 	sc->sc_sync.ds_ndisks++;
2120 	/* Set the number of in-flight synchronization requests. */
2121 	sync->ds_inflight = g_mirror_syncreqs;
2122 
2123 	/*
2124 	 * Fire off first synchronization requests.
2125 	 */
2126 	for (i = 0; i < g_mirror_syncreqs; i++) {
2127 		bp = sync->ds_bios[i];
2128 		G_MIRROR_LOGREQ(3, bp, "Sending synchronization request.");
2129 		sync->ds_consumer->index++;
2130 		/*
2131 		 * Delay the request if it is colliding with a regular request.
2132 		 */
2133 		if (g_mirror_regular_collision(sc, bp))
2134 			g_mirror_sync_delay(sc, bp);
2135 		else
2136 			g_io_request(bp, sync->ds_consumer);
2137 	}
2138 }
2139 
2140 /*
2141  * Stop synchronization process.
2142  * type: 0 - synchronization finished
2143  *       1 - synchronization stopped
2144  */
2145 static void
2146 g_mirror_sync_stop(struct g_mirror_disk *disk, int type)
2147 {
2148 	struct g_mirror_softc *sc;
2149 	struct g_consumer *cp;
2150 
2151 	g_topology_assert_not();
2152 	sc = disk->d_softc;
2153 	sx_assert(&sc->sc_lock, SX_LOCKED);
2154 
2155 	KASSERT(disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
2156 	    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2157 	    g_mirror_disk_state2str(disk->d_state)));
2158 	if (disk->d_sync.ds_consumer == NULL)
2159 		return;
2160 
2161 	if (type == 0) {
2162 		G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s finished.",
2163 		    sc->sc_name, g_mirror_get_diskname(disk));
2164 	} else /* if (type == 1) */ {
2165 		G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s stopped.",
2166 		    sc->sc_name, g_mirror_get_diskname(disk));
2167 	}
2168 	g_mirror_regular_release(sc);
2169 	free(disk->d_sync.ds_bios, M_MIRROR);
2170 	disk->d_sync.ds_bios = NULL;
2171 	cp = disk->d_sync.ds_consumer;
2172 	disk->d_sync.ds_consumer = NULL;
2173 	disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2174 	sc->sc_sync.ds_ndisks--;
2175 	sx_xunlock(&sc->sc_lock); /* Avoid recursion on sc_lock. */
2176 	g_topology_lock();
2177 	g_mirror_kill_consumer(sc, cp);
2178 	g_topology_unlock();
2179 	sx_xlock(&sc->sc_lock);
2180 }
2181 
2182 static void
2183 g_mirror_launch_provider(struct g_mirror_softc *sc)
2184 {
2185 	struct g_mirror_disk *disk;
2186 	struct g_provider *pp, *dp;
2187 
2188 	sx_assert(&sc->sc_lock, SX_LOCKED);
2189 
2190 	g_topology_lock();
2191 	pp = g_new_providerf(sc->sc_geom, "mirror/%s", sc->sc_name);
2192 	pp->flags |= G_PF_DIRECT_RECEIVE;
2193 	pp->mediasize = sc->sc_mediasize;
2194 	pp->sectorsize = sc->sc_sectorsize;
2195 	pp->stripesize = 0;
2196 	pp->stripeoffset = 0;
2197 
2198 	/* Splitting of unmapped BIO's could work but isn't implemented now */
2199 	if (sc->sc_balance != G_MIRROR_BALANCE_SPLIT)
2200 		pp->flags |= G_PF_ACCEPT_UNMAPPED;
2201 
2202 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2203 		if (disk->d_consumer && disk->d_consumer->provider) {
2204 			dp = disk->d_consumer->provider;
2205 			if (dp->stripesize > pp->stripesize) {
2206 				pp->stripesize = dp->stripesize;
2207 				pp->stripeoffset = dp->stripeoffset;
2208 			}
2209 			/* A provider underneath us doesn't support unmapped */
2210 			if ((dp->flags & G_PF_ACCEPT_UNMAPPED) == 0) {
2211 				G_MIRROR_DEBUG(0, "Cancelling unmapped "
2212 				    "because of %s.", dp->name);
2213 				pp->flags &= ~G_PF_ACCEPT_UNMAPPED;
2214 			}
2215 		}
2216 	}
2217 	pp->private = sc;
2218 	sc->sc_refcnt++;
2219 	sc->sc_provider = pp;
2220 	g_error_provider(pp, 0);
2221 	g_topology_unlock();
2222 	G_MIRROR_DEBUG(0, "Device %s launched (%u/%u).", pp->name,
2223 	    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE), sc->sc_ndisks);
2224 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2225 		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
2226 			g_mirror_sync_start(disk);
2227 	}
2228 }
2229 
2230 static void
2231 g_mirror_destroy_provider(struct g_mirror_softc *sc)
2232 {
2233 	struct g_mirror_disk *disk;
2234 	struct bio *bp;
2235 
2236 	g_topology_assert_not();
2237 	KASSERT(sc->sc_provider != NULL, ("NULL provider (device=%s).",
2238 	    sc->sc_name));
2239 
2240 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2241 		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
2242 			g_mirror_sync_stop(disk, 1);
2243 	}
2244 
2245 	g_topology_lock();
2246 	g_error_provider(sc->sc_provider, ENXIO);
2247 	mtx_lock(&sc->sc_queue_mtx);
2248 	while ((bp = TAILQ_FIRST(&sc->sc_queue)) != NULL) {
2249 		TAILQ_REMOVE(&sc->sc_queue, bp, bio_queue);
2250 		/*
2251 		 * Abort any pending I/O that wasn't generated by us.
2252 		 * Synchronization requests and requests destined for individual
2253 		 * mirror components can be destroyed immediately.
2254 		 */
2255 		if (bp->bio_to == sc->sc_provider &&
2256 		    bp->bio_from->geom != sc->sc_sync.ds_geom) {
2257 			g_io_deliver(bp, ENXIO);
2258 		} else {
2259 			if ((bp->bio_cflags & G_MIRROR_BIO_FLAG_SYNC) != 0)
2260 				free(bp->bio_data, M_MIRROR);
2261 			g_destroy_bio(bp);
2262 		}
2263 	}
2264 	mtx_unlock(&sc->sc_queue_mtx);
2265 	g_wither_provider(sc->sc_provider, ENXIO);
2266 	sc->sc_provider = NULL;
2267 	G_MIRROR_DEBUG(0, "Device %s: provider destroyed.", sc->sc_name);
2268 	g_topology_unlock();
2269 }
2270 
2271 static void
2272 g_mirror_go(void *arg)
2273 {
2274 	struct g_mirror_softc *sc;
2275 
2276 	sc = arg;
2277 	G_MIRROR_DEBUG(0, "Force device %s start due to timeout.", sc->sc_name);
2278 	g_mirror_event_send(sc, 0,
2279 	    G_MIRROR_EVENT_DONTWAIT | G_MIRROR_EVENT_DEVICE);
2280 }
2281 
2282 static u_int
2283 g_mirror_determine_state(struct g_mirror_disk *disk)
2284 {
2285 	struct g_mirror_softc *sc;
2286 	u_int state;
2287 
2288 	sc = disk->d_softc;
2289 	if (sc->sc_syncid == disk->d_sync.ds_syncid) {
2290 		if ((disk->d_flags &
2291 		    G_MIRROR_DISK_FLAG_SYNCHRONIZING) == 0 &&
2292 		    (g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) == 0 ||
2293 		     (disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) == 0)) {
2294 			/* Disk does not need synchronization. */
2295 			state = G_MIRROR_DISK_STATE_ACTIVE;
2296 		} else {
2297 			if ((sc->sc_flags &
2298 			     G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) == 0 ||
2299 			    (disk->d_flags &
2300 			     G_MIRROR_DISK_FLAG_FORCE_SYNC) != 0) {
2301 				/*
2302 				 * We can start synchronization from
2303 				 * the stored offset.
2304 				 */
2305 				state = G_MIRROR_DISK_STATE_SYNCHRONIZING;
2306 			} else {
2307 				state = G_MIRROR_DISK_STATE_STALE;
2308 			}
2309 		}
2310 	} else if (disk->d_sync.ds_syncid < sc->sc_syncid) {
2311 		/*
2312 		 * Reset all synchronization data for this disk,
2313 		 * because if it even was synchronized, it was
2314 		 * synchronized to disks with different syncid.
2315 		 */
2316 		disk->d_flags |= G_MIRROR_DISK_FLAG_SYNCHRONIZING;
2317 		disk->d_sync.ds_offset = 0;
2318 		disk->d_sync.ds_offset_done = 0;
2319 		disk->d_sync.ds_syncid = sc->sc_syncid;
2320 		if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) == 0 ||
2321 		    (disk->d_flags & G_MIRROR_DISK_FLAG_FORCE_SYNC) != 0) {
2322 			state = G_MIRROR_DISK_STATE_SYNCHRONIZING;
2323 		} else {
2324 			state = G_MIRROR_DISK_STATE_STALE;
2325 		}
2326 	} else /* if (sc->sc_syncid < disk->d_sync.ds_syncid) */ {
2327 		/*
2328 		 * Not good, NOT GOOD!
2329 		 * It means that mirror was started on stale disks
2330 		 * and more fresh disk just arrive.
2331 		 * If there were writes, mirror is broken, sorry.
2332 		 * I think the best choice here is don't touch
2333 		 * this disk and inform the user loudly.
2334 		 */
2335 		G_MIRROR_DEBUG(0, "Device %s was started before the freshest "
2336 		    "disk (%s) arrives!! It will not be connected to the "
2337 		    "running device.", sc->sc_name,
2338 		    g_mirror_get_diskname(disk));
2339 		g_mirror_destroy_disk(disk);
2340 		state = G_MIRROR_DISK_STATE_NONE;
2341 		/* Return immediately, because disk was destroyed. */
2342 		return (state);
2343 	}
2344 	G_MIRROR_DEBUG(3, "State for %s disk: %s.",
2345 	    g_mirror_get_diskname(disk), g_mirror_disk_state2str(state));
2346 	return (state);
2347 }
2348 
2349 /*
2350  * Update device state.
2351  */
2352 static void
2353 g_mirror_update_device(struct g_mirror_softc *sc, bool force)
2354 {
2355 	struct g_mirror_disk *disk;
2356 	u_int state;
2357 
2358 	sx_assert(&sc->sc_lock, SX_XLOCKED);
2359 
2360 	switch (sc->sc_state) {
2361 	case G_MIRROR_DEVICE_STATE_STARTING:
2362 	    {
2363 		struct g_mirror_disk *pdisk, *tdisk;
2364 		u_int dirty, ndisks, genid, syncid;
2365 		bool broken;
2366 
2367 		KASSERT(sc->sc_provider == NULL,
2368 		    ("Non-NULL provider in STARTING state (%s).", sc->sc_name));
2369 		/*
2370 		 * Are we ready? We are, if all disks are connected or
2371 		 * if we have any disks and 'force' is true.
2372 		 */
2373 		ndisks = g_mirror_ndisks(sc, -1);
2374 		if (sc->sc_ndisks == ndisks || (force && ndisks > 0)) {
2375 			;
2376 		} else if (ndisks == 0) {
2377 			/*
2378 			 * Disks went down in starting phase, so destroy
2379 			 * device.
2380 			 */
2381 			callout_drain(&sc->sc_callout);
2382 			sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
2383 			G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p", __LINE__,
2384 			    sc->sc_rootmount);
2385 			root_mount_rel(sc->sc_rootmount);
2386 			sc->sc_rootmount = NULL;
2387 			return;
2388 		} else {
2389 			return;
2390 		}
2391 
2392 		/*
2393 		 * Activate all disks with the biggest syncid.
2394 		 */
2395 		if (force) {
2396 			/*
2397 			 * If 'force' is true, we have been called due to
2398 			 * timeout, so don't bother canceling timeout.
2399 			 */
2400 			ndisks = 0;
2401 			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2402 				if ((disk->d_flags &
2403 				    G_MIRROR_DISK_FLAG_SYNCHRONIZING) == 0) {
2404 					ndisks++;
2405 				}
2406 			}
2407 			if (ndisks == 0) {
2408 				/* No valid disks found, destroy device. */
2409 				sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
2410 				G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p",
2411 				    __LINE__, sc->sc_rootmount);
2412 				root_mount_rel(sc->sc_rootmount);
2413 				sc->sc_rootmount = NULL;
2414 				return;
2415 			}
2416 		} else {
2417 			/* Cancel timeout. */
2418 			callout_drain(&sc->sc_callout);
2419 		}
2420 
2421 		/*
2422 		 * Find the biggest genid.
2423 		 */
2424 		genid = 0;
2425 		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2426 			if (disk->d_genid > genid)
2427 				genid = disk->d_genid;
2428 		}
2429 		sc->sc_genid = genid;
2430 		/*
2431 		 * Remove all disks without the biggest genid.
2432 		 */
2433 		broken = false;
2434 		LIST_FOREACH_SAFE(disk, &sc->sc_disks, d_next, tdisk) {
2435 			if (disk->d_genid < genid) {
2436 				G_MIRROR_DEBUG(0,
2437 				    "Component %s (device %s) broken, skipping.",
2438 				    g_mirror_get_diskname(disk), sc->sc_name);
2439 				g_mirror_destroy_disk(disk);
2440 				/*
2441 				 * Bump the syncid in case we discover a healthy
2442 				 * replacement disk after starting the mirror.
2443 				 */
2444 				broken = true;
2445 			}
2446 		}
2447 
2448 		/*
2449 		 * Find the biggest syncid.
2450 		 */
2451 		syncid = 0;
2452 		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2453 			if (disk->d_sync.ds_syncid > syncid)
2454 				syncid = disk->d_sync.ds_syncid;
2455 		}
2456 
2457 		/*
2458 		 * Here we need to look for dirty disks and if all disks
2459 		 * with the biggest syncid are dirty, we have to choose
2460 		 * one with the biggest priority and rebuild the rest.
2461 		 */
2462 		/*
2463 		 * Find the number of dirty disks with the biggest syncid.
2464 		 * Find the number of disks with the biggest syncid.
2465 		 * While here, find a disk with the biggest priority.
2466 		 */
2467 		dirty = ndisks = 0;
2468 		pdisk = NULL;
2469 		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2470 			if (disk->d_sync.ds_syncid != syncid)
2471 				continue;
2472 			if ((disk->d_flags &
2473 			    G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
2474 				continue;
2475 			}
2476 			ndisks++;
2477 			if ((disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) != 0) {
2478 				dirty++;
2479 				if (pdisk == NULL ||
2480 				    pdisk->d_priority < disk->d_priority) {
2481 					pdisk = disk;
2482 				}
2483 			}
2484 		}
2485 		if (dirty == 0) {
2486 			/* No dirty disks at all, great. */
2487 		} else if (dirty == ndisks) {
2488 			/*
2489 			 * Force synchronization for all dirty disks except one
2490 			 * with the biggest priority.
2491 			 */
2492 			KASSERT(pdisk != NULL, ("pdisk == NULL"));
2493 			G_MIRROR_DEBUG(1, "Using disk %s (device %s) as a "
2494 			    "master disk for synchronization.",
2495 			    g_mirror_get_diskname(pdisk), sc->sc_name);
2496 			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2497 				if (disk->d_sync.ds_syncid != syncid)
2498 					continue;
2499 				if ((disk->d_flags &
2500 				    G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
2501 					continue;
2502 				}
2503 				KASSERT((disk->d_flags &
2504 				    G_MIRROR_DISK_FLAG_DIRTY) != 0,
2505 				    ("Disk %s isn't marked as dirty.",
2506 				    g_mirror_get_diskname(disk)));
2507 				/* Skip the disk with the biggest priority. */
2508 				if (disk == pdisk)
2509 					continue;
2510 				disk->d_sync.ds_syncid = 0;
2511 			}
2512 		} else if (dirty < ndisks) {
2513 			/*
2514 			 * Force synchronization for all dirty disks.
2515 			 * We have some non-dirty disks.
2516 			 */
2517 			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2518 				if (disk->d_sync.ds_syncid != syncid)
2519 					continue;
2520 				if ((disk->d_flags &
2521 				    G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
2522 					continue;
2523 				}
2524 				if ((disk->d_flags &
2525 				    G_MIRROR_DISK_FLAG_DIRTY) == 0) {
2526 					continue;
2527 				}
2528 				disk->d_sync.ds_syncid = 0;
2529 			}
2530 		}
2531 
2532 		/* Reset hint. */
2533 		sc->sc_hint = NULL;
2534 		sc->sc_syncid = syncid;
2535 		if (force || broken) {
2536 			/* Remember to bump syncid on first write. */
2537 			sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
2538 		}
2539 		state = G_MIRROR_DEVICE_STATE_RUNNING;
2540 		G_MIRROR_DEBUG(1, "Device %s state changed from %s to %s.",
2541 		    sc->sc_name, g_mirror_device_state2str(sc->sc_state),
2542 		    g_mirror_device_state2str(state));
2543 		sc->sc_state = state;
2544 		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2545 			state = g_mirror_determine_state(disk);
2546 			g_mirror_event_send(disk, state,
2547 			    G_MIRROR_EVENT_DONTWAIT);
2548 			if (state == G_MIRROR_DISK_STATE_STALE)
2549 				sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
2550 		}
2551 		break;
2552 	    }
2553 	case G_MIRROR_DEVICE_STATE_RUNNING:
2554 		if (g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) == 0 &&
2555 		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_NEW) == 0) {
2556 			/*
2557 			 * No usable disks, so destroy the device.
2558 			 */
2559 			sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
2560 			break;
2561 		} else if (g_mirror_ndisks(sc,
2562 		    G_MIRROR_DISK_STATE_ACTIVE) > 0 &&
2563 		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_NEW) == 0) {
2564 			/*
2565 			 * We have active disks, launch provider if it doesn't
2566 			 * exist.
2567 			 */
2568 			if (sc->sc_provider == NULL)
2569 				g_mirror_launch_provider(sc);
2570 			if (sc->sc_rootmount != NULL) {
2571 				G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p",
2572 				    __LINE__, sc->sc_rootmount);
2573 				root_mount_rel(sc->sc_rootmount);
2574 				sc->sc_rootmount = NULL;
2575 			}
2576 		}
2577 		/*
2578 		 * Genid should be bumped immediately, so do it here.
2579 		 */
2580 		if ((sc->sc_bump_id & G_MIRROR_BUMP_GENID) != 0) {
2581 			sc->sc_bump_id &= ~G_MIRROR_BUMP_GENID;
2582 			g_mirror_bump_genid(sc);
2583 		}
2584 		if ((sc->sc_bump_id & G_MIRROR_BUMP_SYNCID_NOW) != 0) {
2585 			sc->sc_bump_id &= ~G_MIRROR_BUMP_SYNCID_NOW;
2586 			g_mirror_bump_syncid(sc);
2587 		}
2588 		break;
2589 	default:
2590 		KASSERT(1 == 0, ("Wrong device state (%s, %s).",
2591 		    sc->sc_name, g_mirror_device_state2str(sc->sc_state)));
2592 		break;
2593 	}
2594 }
2595 
2596 /*
2597  * Update disk state and device state if needed.
2598  */
2599 #define	DISK_STATE_CHANGED()	G_MIRROR_DEBUG(1,			\
2600 	"Disk %s state changed from %s to %s (device %s).",		\
2601 	g_mirror_get_diskname(disk),					\
2602 	g_mirror_disk_state2str(disk->d_state),				\
2603 	g_mirror_disk_state2str(state), sc->sc_name)
2604 static int
2605 g_mirror_update_disk(struct g_mirror_disk *disk, u_int state)
2606 {
2607 	struct g_mirror_softc *sc;
2608 
2609 	sc = disk->d_softc;
2610 	sx_assert(&sc->sc_lock, SX_XLOCKED);
2611 
2612 again:
2613 	G_MIRROR_DEBUG(3, "Changing disk %s state from %s to %s.",
2614 	    g_mirror_get_diskname(disk), g_mirror_disk_state2str(disk->d_state),
2615 	    g_mirror_disk_state2str(state));
2616 	switch (state) {
2617 	case G_MIRROR_DISK_STATE_NEW:
2618 		/*
2619 		 * Possible scenarios:
2620 		 * 1. New disk arrive.
2621 		 */
2622 		/* Previous state should be NONE. */
2623 		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NONE,
2624 		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2625 		    g_mirror_disk_state2str(disk->d_state)));
2626 		DISK_STATE_CHANGED();
2627 
2628 		disk->d_state = state;
2629 		if (LIST_EMPTY(&sc->sc_disks))
2630 			LIST_INSERT_HEAD(&sc->sc_disks, disk, d_next);
2631 		else {
2632 			struct g_mirror_disk *dp;
2633 
2634 			LIST_FOREACH(dp, &sc->sc_disks, d_next) {
2635 				if (disk->d_priority >= dp->d_priority) {
2636 					LIST_INSERT_BEFORE(dp, disk, d_next);
2637 					dp = NULL;
2638 					break;
2639 				}
2640 				if (LIST_NEXT(dp, d_next) == NULL)
2641 					break;
2642 			}
2643 			if (dp != NULL)
2644 				LIST_INSERT_AFTER(dp, disk, d_next);
2645 		}
2646 		G_MIRROR_DEBUG(1, "Device %s: provider %s detected.",
2647 		    sc->sc_name, g_mirror_get_diskname(disk));
2648 		if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING)
2649 			break;
2650 		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2651 		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2652 		    g_mirror_device_state2str(sc->sc_state),
2653 		    g_mirror_get_diskname(disk),
2654 		    g_mirror_disk_state2str(disk->d_state)));
2655 		state = g_mirror_determine_state(disk);
2656 		if (state != G_MIRROR_DISK_STATE_NONE)
2657 			goto again;
2658 		break;
2659 	case G_MIRROR_DISK_STATE_ACTIVE:
2660 		/*
2661 		 * Possible scenarios:
2662 		 * 1. New disk does not need synchronization.
2663 		 * 2. Synchronization process finished successfully.
2664 		 */
2665 		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2666 		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2667 		    g_mirror_device_state2str(sc->sc_state),
2668 		    g_mirror_get_diskname(disk),
2669 		    g_mirror_disk_state2str(disk->d_state)));
2670 		/* Previous state should be NEW or SYNCHRONIZING. */
2671 		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW ||
2672 		    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
2673 		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2674 		    g_mirror_disk_state2str(disk->d_state)));
2675 		DISK_STATE_CHANGED();
2676 
2677 		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
2678 			disk->d_flags &= ~G_MIRROR_DISK_FLAG_SYNCHRONIZING;
2679 			disk->d_flags &= ~G_MIRROR_DISK_FLAG_FORCE_SYNC;
2680 			g_mirror_sync_stop(disk, 0);
2681 		}
2682 		disk->d_state = state;
2683 		disk->d_sync.ds_offset = 0;
2684 		disk->d_sync.ds_offset_done = 0;
2685 		g_mirror_update_idle(sc, disk);
2686 		g_mirror_update_metadata(disk);
2687 		G_MIRROR_DEBUG(1, "Device %s: provider %s activated.",
2688 		    sc->sc_name, g_mirror_get_diskname(disk));
2689 		break;
2690 	case G_MIRROR_DISK_STATE_STALE:
2691 		/*
2692 		 * Possible scenarios:
2693 		 * 1. Stale disk was connected.
2694 		 */
2695 		/* Previous state should be NEW. */
2696 		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW,
2697 		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2698 		    g_mirror_disk_state2str(disk->d_state)));
2699 		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2700 		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2701 		    g_mirror_device_state2str(sc->sc_state),
2702 		    g_mirror_get_diskname(disk),
2703 		    g_mirror_disk_state2str(disk->d_state)));
2704 		/*
2705 		 * STALE state is only possible if device is marked
2706 		 * NOAUTOSYNC.
2707 		 */
2708 		KASSERT((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) != 0,
2709 		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2710 		    g_mirror_device_state2str(sc->sc_state),
2711 		    g_mirror_get_diskname(disk),
2712 		    g_mirror_disk_state2str(disk->d_state)));
2713 		DISK_STATE_CHANGED();
2714 
2715 		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2716 		disk->d_state = state;
2717 		g_mirror_update_metadata(disk);
2718 		G_MIRROR_DEBUG(0, "Device %s: provider %s is stale.",
2719 		    sc->sc_name, g_mirror_get_diskname(disk));
2720 		break;
2721 	case G_MIRROR_DISK_STATE_SYNCHRONIZING:
2722 		/*
2723 		 * Possible scenarios:
2724 		 * 1. Disk which needs synchronization was connected.
2725 		 */
2726 		/* Previous state should be NEW. */
2727 		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW,
2728 		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2729 		    g_mirror_disk_state2str(disk->d_state)));
2730 		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2731 		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2732 		    g_mirror_device_state2str(sc->sc_state),
2733 		    g_mirror_get_diskname(disk),
2734 		    g_mirror_disk_state2str(disk->d_state)));
2735 		DISK_STATE_CHANGED();
2736 
2737 		if (disk->d_state == G_MIRROR_DISK_STATE_NEW)
2738 			disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2739 		disk->d_state = state;
2740 		if (sc->sc_provider != NULL) {
2741 			g_mirror_sync_start(disk);
2742 			g_mirror_update_metadata(disk);
2743 		}
2744 		break;
2745 	case G_MIRROR_DISK_STATE_DISCONNECTED:
2746 		/*
2747 		 * Possible scenarios:
2748 		 * 1. Device wasn't running yet, but disk disappear.
2749 		 * 2. Disk was active and disapppear.
2750 		 * 3. Disk disappear during synchronization process.
2751 		 */
2752 		if (sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING) {
2753 			/*
2754 			 * Previous state should be ACTIVE, STALE or
2755 			 * SYNCHRONIZING.
2756 			 */
2757 			KASSERT(disk->d_state == G_MIRROR_DISK_STATE_ACTIVE ||
2758 			    disk->d_state == G_MIRROR_DISK_STATE_STALE ||
2759 			    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
2760 			    ("Wrong disk state (%s, %s).",
2761 			    g_mirror_get_diskname(disk),
2762 			    g_mirror_disk_state2str(disk->d_state)));
2763 		} else if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING) {
2764 			/* Previous state should be NEW. */
2765 			KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW,
2766 			    ("Wrong disk state (%s, %s).",
2767 			    g_mirror_get_diskname(disk),
2768 			    g_mirror_disk_state2str(disk->d_state)));
2769 			/*
2770 			 * Reset bumping syncid if disk disappeared in STARTING
2771 			 * state.
2772 			 */
2773 			if ((sc->sc_bump_id & G_MIRROR_BUMP_SYNCID) != 0)
2774 				sc->sc_bump_id &= ~G_MIRROR_BUMP_SYNCID;
2775 #ifdef	INVARIANTS
2776 		} else {
2777 			KASSERT(1 == 0, ("Wrong device state (%s, %s, %s, %s).",
2778 			    sc->sc_name,
2779 			    g_mirror_device_state2str(sc->sc_state),
2780 			    g_mirror_get_diskname(disk),
2781 			    g_mirror_disk_state2str(disk->d_state)));
2782 #endif
2783 		}
2784 		DISK_STATE_CHANGED();
2785 		G_MIRROR_DEBUG(0, "Device %s: provider %s disconnected.",
2786 		    sc->sc_name, g_mirror_get_diskname(disk));
2787 
2788 		g_mirror_destroy_disk(disk);
2789 		break;
2790 	case G_MIRROR_DISK_STATE_DESTROY:
2791 	    {
2792 		int error;
2793 
2794 		error = g_mirror_clear_metadata(disk);
2795 		if (error != 0) {
2796 			G_MIRROR_DEBUG(0,
2797 			    "Device %s: failed to clear metadata on %s: %d.",
2798 			    sc->sc_name, g_mirror_get_diskname(disk), error);
2799 			break;
2800 		}
2801 		DISK_STATE_CHANGED();
2802 		G_MIRROR_DEBUG(0, "Device %s: provider %s destroyed.",
2803 		    sc->sc_name, g_mirror_get_diskname(disk));
2804 
2805 		g_mirror_destroy_disk(disk);
2806 		sc->sc_ndisks--;
2807 		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2808 			g_mirror_update_metadata(disk);
2809 		}
2810 		break;
2811 	    }
2812 	default:
2813 		KASSERT(1 == 0, ("Unknown state (%u).", state));
2814 		break;
2815 	}
2816 	return (0);
2817 }
2818 #undef	DISK_STATE_CHANGED
2819 
2820 int
2821 g_mirror_read_metadata(struct g_consumer *cp, struct g_mirror_metadata *md)
2822 {
2823 	struct g_provider *pp;
2824 	u_char *buf;
2825 	int error;
2826 
2827 	g_topology_assert();
2828 
2829 	error = g_access(cp, 1, 0, 0);
2830 	if (error != 0)
2831 		return (error);
2832 	pp = cp->provider;
2833 	g_topology_unlock();
2834 	/* Metadata are stored on last sector. */
2835 	buf = g_read_data(cp, pp->mediasize - pp->sectorsize, pp->sectorsize,
2836 	    &error);
2837 	g_topology_lock();
2838 	g_access(cp, -1, 0, 0);
2839 	if (buf == NULL) {
2840 		G_MIRROR_DEBUG(1, "Cannot read metadata from %s (error=%d).",
2841 		    cp->provider->name, error);
2842 		return (error);
2843 	}
2844 
2845 	/* Decode metadata. */
2846 	error = mirror_metadata_decode(buf, md);
2847 	g_free(buf);
2848 	if (strcmp(md->md_magic, G_MIRROR_MAGIC) != 0)
2849 		return (EINVAL);
2850 	if (md->md_version > G_MIRROR_VERSION) {
2851 		G_MIRROR_DEBUG(0,
2852 		    "Kernel module is too old to handle metadata from %s.",
2853 		    cp->provider->name);
2854 		return (EINVAL);
2855 	}
2856 	if (error != 0) {
2857 		G_MIRROR_DEBUG(1, "MD5 metadata hash mismatch for provider %s.",
2858 		    cp->provider->name);
2859 		return (error);
2860 	}
2861 
2862 	return (0);
2863 }
2864 
2865 static int
2866 g_mirror_check_metadata(struct g_mirror_softc *sc, struct g_provider *pp,
2867     struct g_mirror_metadata *md)
2868 {
2869 
2870 	if (g_mirror_id2disk(sc, md->md_did) != NULL) {
2871 		G_MIRROR_DEBUG(1, "Disk %s (id=%u) already exists, skipping.",
2872 		    pp->name, md->md_did);
2873 		return (EEXIST);
2874 	}
2875 	if (md->md_all != sc->sc_ndisks) {
2876 		G_MIRROR_DEBUG(1,
2877 		    "Invalid '%s' field on disk %s (device %s), skipping.",
2878 		    "md_all", pp->name, sc->sc_name);
2879 		return (EINVAL);
2880 	}
2881 	if (md->md_slice != sc->sc_slice) {
2882 		G_MIRROR_DEBUG(1,
2883 		    "Invalid '%s' field on disk %s (device %s), skipping.",
2884 		    "md_slice", pp->name, sc->sc_name);
2885 		return (EINVAL);
2886 	}
2887 	if (md->md_balance != sc->sc_balance) {
2888 		G_MIRROR_DEBUG(1,
2889 		    "Invalid '%s' field on disk %s (device %s), skipping.",
2890 		    "md_balance", pp->name, sc->sc_name);
2891 		return (EINVAL);
2892 	}
2893 #if 0
2894 	if (md->md_mediasize != sc->sc_mediasize) {
2895 		G_MIRROR_DEBUG(1,
2896 		    "Invalid '%s' field on disk %s (device %s), skipping.",
2897 		    "md_mediasize", pp->name, sc->sc_name);
2898 		return (EINVAL);
2899 	}
2900 #endif
2901 	if (sc->sc_mediasize > pp->mediasize) {
2902 		G_MIRROR_DEBUG(1,
2903 		    "Invalid size of disk %s (device %s), skipping.", pp->name,
2904 		    sc->sc_name);
2905 		return (EINVAL);
2906 	}
2907 	if (md->md_sectorsize != sc->sc_sectorsize) {
2908 		G_MIRROR_DEBUG(1,
2909 		    "Invalid '%s' field on disk %s (device %s), skipping.",
2910 		    "md_sectorsize", pp->name, sc->sc_name);
2911 		return (EINVAL);
2912 	}
2913 	if ((sc->sc_sectorsize % pp->sectorsize) != 0) {
2914 		G_MIRROR_DEBUG(1,
2915 		    "Invalid sector size of disk %s (device %s), skipping.",
2916 		    pp->name, sc->sc_name);
2917 		return (EINVAL);
2918 	}
2919 	if ((md->md_mflags & ~G_MIRROR_DEVICE_FLAG_MASK) != 0) {
2920 		G_MIRROR_DEBUG(1,
2921 		    "Invalid device flags on disk %s (device %s), skipping.",
2922 		    pp->name, sc->sc_name);
2923 		return (EINVAL);
2924 	}
2925 	if ((md->md_dflags & ~G_MIRROR_DISK_FLAG_MASK) != 0) {
2926 		G_MIRROR_DEBUG(1,
2927 		    "Invalid disk flags on disk %s (device %s), skipping.",
2928 		    pp->name, sc->sc_name);
2929 		return (EINVAL);
2930 	}
2931 	return (0);
2932 }
2933 
2934 int
2935 g_mirror_add_disk(struct g_mirror_softc *sc, struct g_provider *pp,
2936     struct g_mirror_metadata *md)
2937 {
2938 	struct g_mirror_disk *disk;
2939 	int error;
2940 
2941 	g_topology_assert_not();
2942 	G_MIRROR_DEBUG(2, "Adding disk %s.", pp->name);
2943 
2944 	error = g_mirror_check_metadata(sc, pp, md);
2945 	if (error != 0)
2946 		return (error);
2947 	if (sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING &&
2948 	    md->md_genid < sc->sc_genid) {
2949 		G_MIRROR_DEBUG(0, "Component %s (device %s) broken, skipping.",
2950 		    pp->name, sc->sc_name);
2951 		return (EINVAL);
2952 	}
2953 	disk = g_mirror_init_disk(sc, pp, md, &error);
2954 	if (disk == NULL)
2955 		return (error);
2956 	error = g_mirror_event_send(disk, G_MIRROR_DISK_STATE_NEW,
2957 	    G_MIRROR_EVENT_WAIT);
2958 	if (error != 0)
2959 		return (error);
2960 	if (md->md_version < G_MIRROR_VERSION) {
2961 		G_MIRROR_DEBUG(0, "Upgrading metadata on %s (v%d->v%d).",
2962 		    pp->name, md->md_version, G_MIRROR_VERSION);
2963 		g_mirror_update_metadata(disk);
2964 	}
2965 	return (0);
2966 }
2967 
2968 static void
2969 g_mirror_destroy_delayed(void *arg, int flag)
2970 {
2971 	struct g_mirror_softc *sc;
2972 	int error;
2973 
2974 	if (flag == EV_CANCEL) {
2975 		G_MIRROR_DEBUG(1, "Destroying canceled.");
2976 		return;
2977 	}
2978 	sc = arg;
2979 	g_topology_unlock();
2980 	sx_xlock(&sc->sc_lock);
2981 	KASSERT((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) == 0,
2982 	    ("DESTROY flag set on %s.", sc->sc_name));
2983 	KASSERT((sc->sc_flags & G_MIRROR_DEVICE_FLAG_CLOSEWAIT) != 0,
2984 	    ("CLOSEWAIT flag not set on %s.", sc->sc_name));
2985 	G_MIRROR_DEBUG(1, "Destroying %s (delayed).", sc->sc_name);
2986 	error = g_mirror_destroy(sc, G_MIRROR_DESTROY_SOFT);
2987 	if (error != 0) {
2988 		G_MIRROR_DEBUG(0, "Cannot destroy %s (error=%d).",
2989 		    sc->sc_name, error);
2990 		sx_xunlock(&sc->sc_lock);
2991 	}
2992 	g_topology_lock();
2993 }
2994 
2995 static int
2996 g_mirror_access(struct g_provider *pp, int acr, int acw, int ace)
2997 {
2998 	struct g_mirror_softc *sc;
2999 	int error = 0;
3000 
3001 	g_topology_assert();
3002 	G_MIRROR_DEBUG(2, "Access request for %s: r%dw%de%d.", pp->name, acr,
3003 	    acw, ace);
3004 
3005 	sc = pp->private;
3006 	KASSERT(sc != NULL, ("NULL softc (provider=%s).", pp->name));
3007 
3008 	g_topology_unlock();
3009 	sx_xlock(&sc->sc_lock);
3010 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0 ||
3011 	    (sc->sc_flags & G_MIRROR_DEVICE_FLAG_CLOSEWAIT) != 0 ||
3012 	    LIST_EMPTY(&sc->sc_disks)) {
3013 		if (acr > 0 || acw > 0 || ace > 0)
3014 			error = ENXIO;
3015 		goto end;
3016 	}
3017 	sc->sc_provider_open += acr + acw + ace;
3018 	if (pp->acw + acw == 0)
3019 		g_mirror_idle(sc, 0);
3020 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_CLOSEWAIT) != 0 &&
3021 	    sc->sc_provider_open == 0)
3022 		g_post_event(g_mirror_destroy_delayed, sc, M_WAITOK, sc, NULL);
3023 end:
3024 	sx_xunlock(&sc->sc_lock);
3025 	g_topology_lock();
3026 	return (error);
3027 }
3028 
3029 struct g_geom *
3030 g_mirror_create(struct g_class *mp, const struct g_mirror_metadata *md,
3031     u_int type)
3032 {
3033 	struct g_mirror_softc *sc;
3034 	struct g_geom *gp;
3035 	int error, timeout;
3036 
3037 	g_topology_assert();
3038 	G_MIRROR_DEBUG(1, "Creating device %s (id=%u).", md->md_name,
3039 	    md->md_mid);
3040 
3041 	/* One disk is minimum. */
3042 	if (md->md_all < 1)
3043 		return (NULL);
3044 	/*
3045 	 * Action geom.
3046 	 */
3047 	gp = g_new_geomf(mp, "%s", md->md_name);
3048 	sc = malloc(sizeof(*sc), M_MIRROR, M_WAITOK | M_ZERO);
3049 	gp->start = g_mirror_start;
3050 	gp->orphan = g_mirror_orphan;
3051 	gp->access = g_mirror_access;
3052 	gp->dumpconf = g_mirror_dumpconf;
3053 
3054 	sc->sc_type = type;
3055 	sc->sc_id = md->md_mid;
3056 	sc->sc_slice = md->md_slice;
3057 	sc->sc_balance = md->md_balance;
3058 	sc->sc_mediasize = md->md_mediasize;
3059 	sc->sc_sectorsize = md->md_sectorsize;
3060 	sc->sc_ndisks = md->md_all;
3061 	sc->sc_flags = md->md_mflags;
3062 	sc->sc_bump_id = 0;
3063 	sc->sc_idle = 1;
3064 	sc->sc_last_write = time_uptime;
3065 	sc->sc_writes = 0;
3066 	sc->sc_refcnt = 1;
3067 	sx_init(&sc->sc_lock, "gmirror:lock");
3068 	TAILQ_INIT(&sc->sc_queue);
3069 	mtx_init(&sc->sc_queue_mtx, "gmirror:queue", NULL, MTX_DEF);
3070 	TAILQ_INIT(&sc->sc_regular_delayed);
3071 	TAILQ_INIT(&sc->sc_inflight);
3072 	TAILQ_INIT(&sc->sc_sync_delayed);
3073 	LIST_INIT(&sc->sc_disks);
3074 	TAILQ_INIT(&sc->sc_events);
3075 	mtx_init(&sc->sc_events_mtx, "gmirror:events", NULL, MTX_DEF);
3076 	callout_init(&sc->sc_callout, 1);
3077 	mtx_init(&sc->sc_done_mtx, "gmirror:done", NULL, MTX_DEF);
3078 	sc->sc_state = G_MIRROR_DEVICE_STATE_STARTING;
3079 	gp->softc = sc;
3080 	sc->sc_geom = gp;
3081 	sc->sc_provider = NULL;
3082 	sc->sc_provider_open = 0;
3083 	/*
3084 	 * Synchronization geom.
3085 	 */
3086 	gp = g_new_geomf(mp, "%s.sync", md->md_name);
3087 	gp->softc = sc;
3088 	gp->orphan = g_mirror_orphan;
3089 	sc->sc_sync.ds_geom = gp;
3090 	sc->sc_sync.ds_ndisks = 0;
3091 	error = kproc_create(g_mirror_worker, sc, &sc->sc_worker, 0, 0,
3092 	    "g_mirror %s", md->md_name);
3093 	if (error != 0) {
3094 		G_MIRROR_DEBUG(1, "Cannot create kernel thread for %s.",
3095 		    sc->sc_name);
3096 		g_destroy_geom(sc->sc_sync.ds_geom);
3097 		g_destroy_geom(sc->sc_geom);
3098 		g_mirror_free_device(sc);
3099 		return (NULL);
3100 	}
3101 
3102 	G_MIRROR_DEBUG(1, "Device %s created (%u components, id=%u).",
3103 	    sc->sc_name, sc->sc_ndisks, sc->sc_id);
3104 
3105 	sc->sc_rootmount = root_mount_hold("GMIRROR");
3106 	G_MIRROR_DEBUG(1, "root_mount_hold %p", sc->sc_rootmount);
3107 	/*
3108 	 * Run timeout.
3109 	 */
3110 	timeout = g_mirror_timeout * hz;
3111 	callout_reset(&sc->sc_callout, timeout, g_mirror_go, sc);
3112 	return (sc->sc_geom);
3113 }
3114 
3115 int
3116 g_mirror_destroy(struct g_mirror_softc *sc, int how)
3117 {
3118 	struct g_mirror_disk *disk;
3119 
3120 	g_topology_assert_not();
3121 	sx_assert(&sc->sc_lock, SX_XLOCKED);
3122 
3123 	if (sc->sc_provider_open != 0) {
3124 		switch (how) {
3125 		case G_MIRROR_DESTROY_SOFT:
3126 			G_MIRROR_DEBUG(1,
3127 			    "Device %s is still open (%d).", sc->sc_name,
3128 			    sc->sc_provider_open);
3129 			return (EBUSY);
3130 		case G_MIRROR_DESTROY_DELAYED:
3131 			G_MIRROR_DEBUG(1,
3132 			    "Device %s will be destroyed on last close.",
3133 			    sc->sc_name);
3134 			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
3135 				if (disk->d_state ==
3136 				    G_MIRROR_DISK_STATE_SYNCHRONIZING) {
3137 					g_mirror_sync_stop(disk, 1);
3138 				}
3139 			}
3140 			sc->sc_flags |= G_MIRROR_DEVICE_FLAG_CLOSEWAIT;
3141 			return (EBUSY);
3142 		case G_MIRROR_DESTROY_HARD:
3143 			G_MIRROR_DEBUG(1, "Device %s is still open, so it "
3144 			    "can't be definitely removed.", sc->sc_name);
3145 		}
3146 	}
3147 
3148 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
3149 		sx_xunlock(&sc->sc_lock);
3150 		return (0);
3151 	}
3152 	sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
3153 	sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DRAIN;
3154 	G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
3155 	sx_xunlock(&sc->sc_lock);
3156 	mtx_lock(&sc->sc_queue_mtx);
3157 	wakeup(sc);
3158 	mtx_unlock(&sc->sc_queue_mtx);
3159 	G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, &sc->sc_worker);
3160 	while (sc->sc_worker != NULL)
3161 		tsleep(&sc->sc_worker, PRIBIO, "m:destroy", hz / 5);
3162 	G_MIRROR_DEBUG(4, "%s: Woken up %p.", __func__, &sc->sc_worker);
3163 	sx_xlock(&sc->sc_lock);
3164 	g_mirror_destroy_device(sc);
3165 	return (0);
3166 }
3167 
3168 static void
3169 g_mirror_taste_orphan(struct g_consumer *cp)
3170 {
3171 
3172 	KASSERT(1 == 0, ("%s called while tasting %s.", __func__,
3173 	    cp->provider->name));
3174 }
3175 
3176 static struct g_geom *
3177 g_mirror_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
3178 {
3179 	struct g_mirror_metadata md;
3180 	struct g_mirror_softc *sc;
3181 	struct g_consumer *cp;
3182 	struct g_geom *gp;
3183 	int error;
3184 
3185 	g_topology_assert();
3186 	g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name);
3187 	G_MIRROR_DEBUG(2, "Tasting %s.", pp->name);
3188 
3189 	gp = g_new_geomf(mp, "mirror:taste");
3190 	/*
3191 	 * This orphan function should be never called.
3192 	 */
3193 	gp->orphan = g_mirror_taste_orphan;
3194 	cp = g_new_consumer(gp);
3195 	g_attach(cp, pp);
3196 	error = g_mirror_read_metadata(cp, &md);
3197 	g_detach(cp);
3198 	g_destroy_consumer(cp);
3199 	g_destroy_geom(gp);
3200 	if (error != 0)
3201 		return (NULL);
3202 	gp = NULL;
3203 
3204 	if (md.md_provider[0] != '\0' &&
3205 	    !g_compare_names(md.md_provider, pp->name))
3206 		return (NULL);
3207 	if (md.md_provsize != 0 && md.md_provsize != pp->mediasize)
3208 		return (NULL);
3209 	if ((md.md_dflags & G_MIRROR_DISK_FLAG_INACTIVE) != 0) {
3210 		G_MIRROR_DEBUG(0,
3211 		    "Device %s: provider %s marked as inactive, skipping.",
3212 		    md.md_name, pp->name);
3213 		return (NULL);
3214 	}
3215 	if (g_mirror_debug >= 2)
3216 		mirror_metadata_dump(&md);
3217 
3218 	/*
3219 	 * Let's check if device already exists.
3220 	 */
3221 	sc = NULL;
3222 	LIST_FOREACH(gp, &mp->geom, geom) {
3223 		sc = gp->softc;
3224 		if (sc == NULL)
3225 			continue;
3226 		if (sc->sc_type != G_MIRROR_TYPE_AUTOMATIC)
3227 			continue;
3228 		if (sc->sc_sync.ds_geom == gp)
3229 			continue;
3230 		if (strcmp(md.md_name, sc->sc_name) != 0)
3231 			continue;
3232 		if (md.md_mid != sc->sc_id) {
3233 			G_MIRROR_DEBUG(0, "Device %s already configured.",
3234 			    sc->sc_name);
3235 			return (NULL);
3236 		}
3237 		break;
3238 	}
3239 	if (gp == NULL) {
3240 		gp = g_mirror_create(mp, &md, G_MIRROR_TYPE_AUTOMATIC);
3241 		if (gp == NULL) {
3242 			G_MIRROR_DEBUG(0, "Cannot create device %s.",
3243 			    md.md_name);
3244 			return (NULL);
3245 		}
3246 		sc = gp->softc;
3247 	}
3248 	G_MIRROR_DEBUG(1, "Adding disk %s to %s.", pp->name, gp->name);
3249 	g_topology_unlock();
3250 	sx_xlock(&sc->sc_lock);
3251 	sc->sc_flags |= G_MIRROR_DEVICE_FLAG_TASTING;
3252 	error = g_mirror_add_disk(sc, pp, &md);
3253 	if (error != 0) {
3254 		G_MIRROR_DEBUG(0, "Cannot add disk %s to %s (error=%d).",
3255 		    pp->name, gp->name, error);
3256 		if (LIST_EMPTY(&sc->sc_disks)) {
3257 			g_cancel_event(sc);
3258 			g_mirror_destroy(sc, G_MIRROR_DESTROY_HARD);
3259 			g_topology_lock();
3260 			return (NULL);
3261 		}
3262 		gp = NULL;
3263 	}
3264 	sc->sc_flags &= ~G_MIRROR_DEVICE_FLAG_TASTING;
3265 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
3266 		g_mirror_destroy(sc, G_MIRROR_DESTROY_HARD);
3267 		g_topology_lock();
3268 		return (NULL);
3269 	}
3270 	sx_xunlock(&sc->sc_lock);
3271 	g_topology_lock();
3272 	return (gp);
3273 }
3274 
3275 static void
3276 g_mirror_resize(struct g_consumer *cp)
3277 {
3278 	struct g_mirror_disk *disk;
3279 
3280 	g_topology_assert();
3281 	g_trace(G_T_TOPOLOGY, "%s(%s)", __func__, cp->provider->name);
3282 
3283 	disk = cp->private;
3284 	if (disk == NULL)
3285 		return;
3286 	g_topology_unlock();
3287 	g_mirror_update_metadata(disk);
3288 	g_topology_lock();
3289 }
3290 
3291 static int
3292 g_mirror_destroy_geom(struct gctl_req *req __unused,
3293     struct g_class *mp __unused, struct g_geom *gp)
3294 {
3295 	struct g_mirror_softc *sc;
3296 	int error;
3297 
3298 	g_topology_unlock();
3299 	sc = gp->softc;
3300 	sx_xlock(&sc->sc_lock);
3301 	g_cancel_event(sc);
3302 	error = g_mirror_destroy(gp->softc, G_MIRROR_DESTROY_SOFT);
3303 	if (error != 0)
3304 		sx_xunlock(&sc->sc_lock);
3305 	g_topology_lock();
3306 	return (error);
3307 }
3308 
3309 static void
3310 g_mirror_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
3311     struct g_consumer *cp, struct g_provider *pp)
3312 {
3313 	struct g_mirror_softc *sc;
3314 
3315 	g_topology_assert();
3316 
3317 	sc = gp->softc;
3318 	if (sc == NULL)
3319 		return;
3320 	/* Skip synchronization geom. */
3321 	if (gp == sc->sc_sync.ds_geom)
3322 		return;
3323 	if (pp != NULL) {
3324 		/* Nothing here. */
3325 	} else if (cp != NULL) {
3326 		struct g_mirror_disk *disk;
3327 
3328 		disk = cp->private;
3329 		if (disk == NULL)
3330 			return;
3331 		g_topology_unlock();
3332 		sx_xlock(&sc->sc_lock);
3333 		sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)disk->d_id);
3334 		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
3335 			sbuf_printf(sb, "%s<Synchronized>", indent);
3336 			if (disk->d_sync.ds_offset == 0)
3337 				sbuf_printf(sb, "0%%");
3338 			else {
3339 				sbuf_printf(sb, "%u%%",
3340 				    (u_int)((disk->d_sync.ds_offset * 100) /
3341 				    sc->sc_provider->mediasize));
3342 			}
3343 			sbuf_printf(sb, "</Synchronized>\n");
3344 			if (disk->d_sync.ds_offset > 0) {
3345 				sbuf_printf(sb, "%s<BytesSynced>%jd"
3346 				    "</BytesSynced>\n", indent,
3347 				    (intmax_t)disk->d_sync.ds_offset);
3348 			}
3349 		}
3350 		sbuf_printf(sb, "%s<SyncID>%u</SyncID>\n", indent,
3351 		    disk->d_sync.ds_syncid);
3352 		sbuf_printf(sb, "%s<GenID>%u</GenID>\n", indent,
3353 		    disk->d_genid);
3354 		sbuf_printf(sb, "%s<Flags>", indent);
3355 		if (disk->d_flags == 0)
3356 			sbuf_printf(sb, "NONE");
3357 		else {
3358 			int first = 1;
3359 
3360 #define	ADD_FLAG(flag, name)	do {					\
3361 	if ((disk->d_flags & (flag)) != 0) {				\
3362 		if (!first)						\
3363 			sbuf_printf(sb, ", ");				\
3364 		else							\
3365 			first = 0;					\
3366 		sbuf_printf(sb, name);					\
3367 	}								\
3368 } while (0)
3369 			ADD_FLAG(G_MIRROR_DISK_FLAG_DIRTY, "DIRTY");
3370 			ADD_FLAG(G_MIRROR_DISK_FLAG_HARDCODED, "HARDCODED");
3371 			ADD_FLAG(G_MIRROR_DISK_FLAG_INACTIVE, "INACTIVE");
3372 			ADD_FLAG(G_MIRROR_DISK_FLAG_SYNCHRONIZING,
3373 			    "SYNCHRONIZING");
3374 			ADD_FLAG(G_MIRROR_DISK_FLAG_FORCE_SYNC, "FORCE_SYNC");
3375 			ADD_FLAG(G_MIRROR_DISK_FLAG_BROKEN, "BROKEN");
3376 #undef	ADD_FLAG
3377 		}
3378 		sbuf_printf(sb, "</Flags>\n");
3379 		sbuf_printf(sb, "%s<Priority>%u</Priority>\n", indent,
3380 		    disk->d_priority);
3381 		sbuf_printf(sb, "%s<State>%s</State>\n", indent,
3382 		    g_mirror_disk_state2str(disk->d_state));
3383 		sx_xunlock(&sc->sc_lock);
3384 		g_topology_lock();
3385 	} else {
3386 		g_topology_unlock();
3387 		sx_xlock(&sc->sc_lock);
3388 		sbuf_printf(sb, "%s<Type>", indent);
3389 		switch (sc->sc_type) {
3390 		case G_MIRROR_TYPE_AUTOMATIC:
3391 			sbuf_printf(sb, "AUTOMATIC");
3392 			break;
3393 		case G_MIRROR_TYPE_MANUAL:
3394 			sbuf_printf(sb, "MANUAL");
3395 			break;
3396 		default:
3397 			sbuf_printf(sb, "UNKNOWN");
3398 			break;
3399 		}
3400 		sbuf_printf(sb, "</Type>\n");
3401 		sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)sc->sc_id);
3402 		sbuf_printf(sb, "%s<SyncID>%u</SyncID>\n", indent, sc->sc_syncid);
3403 		sbuf_printf(sb, "%s<GenID>%u</GenID>\n", indent, sc->sc_genid);
3404 		sbuf_printf(sb, "%s<Flags>", indent);
3405 		if (sc->sc_flags == 0)
3406 			sbuf_printf(sb, "NONE");
3407 		else {
3408 			int first = 1;
3409 
3410 #define	ADD_FLAG(flag, name)	do {					\
3411 	if ((sc->sc_flags & (flag)) != 0) {				\
3412 		if (!first)						\
3413 			sbuf_printf(sb, ", ");				\
3414 		else							\
3415 			first = 0;					\
3416 		sbuf_printf(sb, name);					\
3417 	}								\
3418 } while (0)
3419 			ADD_FLAG(G_MIRROR_DEVICE_FLAG_NOFAILSYNC, "NOFAILSYNC");
3420 			ADD_FLAG(G_MIRROR_DEVICE_FLAG_NOAUTOSYNC, "NOAUTOSYNC");
3421 #undef	ADD_FLAG
3422 		}
3423 		sbuf_printf(sb, "</Flags>\n");
3424 		sbuf_printf(sb, "%s<Slice>%u</Slice>\n", indent,
3425 		    (u_int)sc->sc_slice);
3426 		sbuf_printf(sb, "%s<Balance>%s</Balance>\n", indent,
3427 		    balance_name(sc->sc_balance));
3428 		sbuf_printf(sb, "%s<Components>%u</Components>\n", indent,
3429 		    sc->sc_ndisks);
3430 		sbuf_printf(sb, "%s<State>", indent);
3431 		if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING)
3432 			sbuf_printf(sb, "%s", "STARTING");
3433 		else if (sc->sc_ndisks ==
3434 		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE))
3435 			sbuf_printf(sb, "%s", "COMPLETE");
3436 		else
3437 			sbuf_printf(sb, "%s", "DEGRADED");
3438 		sbuf_printf(sb, "</State>\n");
3439 		sx_xunlock(&sc->sc_lock);
3440 		g_topology_lock();
3441 	}
3442 }
3443 
3444 static void
3445 g_mirror_shutdown_post_sync(void *arg, int howto)
3446 {
3447 	struct g_class *mp;
3448 	struct g_geom *gp, *gp2;
3449 	struct g_mirror_softc *sc;
3450 	int error;
3451 
3452 	if (panicstr != NULL)
3453 		return;
3454 
3455 	mp = arg;
3456 	g_topology_lock();
3457 	g_mirror_shutdown = 1;
3458 	LIST_FOREACH_SAFE(gp, &mp->geom, geom, gp2) {
3459 		if ((sc = gp->softc) == NULL)
3460 			continue;
3461 		/* Skip synchronization geom. */
3462 		if (gp == sc->sc_sync.ds_geom)
3463 			continue;
3464 		g_topology_unlock();
3465 		sx_xlock(&sc->sc_lock);
3466 		g_mirror_idle(sc, -1);
3467 		g_cancel_event(sc);
3468 		error = g_mirror_destroy(sc, G_MIRROR_DESTROY_DELAYED);
3469 		if (error != 0)
3470 			sx_xunlock(&sc->sc_lock);
3471 		g_topology_lock();
3472 	}
3473 	g_topology_unlock();
3474 }
3475 
3476 static void
3477 g_mirror_init(struct g_class *mp)
3478 {
3479 
3480 	g_mirror_post_sync = EVENTHANDLER_REGISTER(shutdown_post_sync,
3481 	    g_mirror_shutdown_post_sync, mp, SHUTDOWN_PRI_FIRST);
3482 	if (g_mirror_post_sync == NULL)
3483 		G_MIRROR_DEBUG(0, "Warning! Cannot register shutdown event.");
3484 }
3485 
3486 static void
3487 g_mirror_fini(struct g_class *mp)
3488 {
3489 
3490 	if (g_mirror_post_sync != NULL)
3491 		EVENTHANDLER_DEREGISTER(shutdown_post_sync, g_mirror_post_sync);
3492 }
3493 
3494 DECLARE_GEOM_CLASS(g_mirror_class, g_mirror);
3495