xref: /freebsd/sys/geom/mirror/g_mirror.c (revision ab2043b81eaba0d7d7769b4a58b2b6d17bc464a3)
1 /*-
2  * Copyright (c) 2004-2006 Pawel Jakub Dawidek <pjd@FreeBSD.org>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 
27 #include <sys/cdefs.h>
28 __FBSDID("$FreeBSD$");
29 
30 #include <sys/param.h>
31 #include <sys/systm.h>
32 #include <sys/kernel.h>
33 #include <sys/module.h>
34 #include <sys/limits.h>
35 #include <sys/lock.h>
36 #include <sys/mutex.h>
37 #include <sys/bio.h>
38 #include <sys/sbuf.h>
39 #include <sys/sysctl.h>
40 #include <sys/malloc.h>
41 #include <sys/eventhandler.h>
42 #include <vm/uma.h>
43 #include <geom/geom.h>
44 #include <sys/proc.h>
45 #include <sys/kthread.h>
46 #include <sys/sched.h>
47 #include <geom/mirror/g_mirror.h>
48 
49 FEATURE(geom_mirror, "GEOM mirroring support");
50 
51 static MALLOC_DEFINE(M_MIRROR, "mirror_data", "GEOM_MIRROR Data");
52 
53 SYSCTL_DECL(_kern_geom);
54 static SYSCTL_NODE(_kern_geom, OID_AUTO, mirror, CTLFLAG_RW, 0,
55     "GEOM_MIRROR stuff");
56 u_int g_mirror_debug = 0;
57 TUNABLE_INT("kern.geom.mirror.debug", &g_mirror_debug);
58 SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, debug, CTLFLAG_RW, &g_mirror_debug, 0,
59     "Debug level");
60 static u_int g_mirror_timeout = 4;
61 TUNABLE_INT("kern.geom.mirror.timeout", &g_mirror_timeout);
62 SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, timeout, CTLFLAG_RW, &g_mirror_timeout,
63     0, "Time to wait on all mirror components");
64 static u_int g_mirror_idletime = 5;
65 TUNABLE_INT("kern.geom.mirror.idletime", &g_mirror_idletime);
66 SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, idletime, CTLFLAG_RW,
67     &g_mirror_idletime, 0, "Mark components as clean when idling");
68 static u_int g_mirror_disconnect_on_failure = 1;
69 TUNABLE_INT("kern.geom.mirror.disconnect_on_failure",
70     &g_mirror_disconnect_on_failure);
71 SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, disconnect_on_failure, CTLFLAG_RW,
72     &g_mirror_disconnect_on_failure, 0, "Disconnect component on I/O failure.");
73 static u_int g_mirror_syncreqs = 2;
74 TUNABLE_INT("kern.geom.mirror.sync_requests", &g_mirror_syncreqs);
75 SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, sync_requests, CTLFLAG_RDTUN,
76     &g_mirror_syncreqs, 0, "Parallel synchronization I/O requests.");
77 
78 #define	MSLEEP(ident, mtx, priority, wmesg, timeout)	do {		\
79 	G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, (ident));	\
80 	msleep((ident), (mtx), (priority), (wmesg), (timeout));		\
81 	G_MIRROR_DEBUG(4, "%s: Woken up %p.", __func__, (ident));	\
82 } while (0)
83 
84 static eventhandler_tag g_mirror_pre_sync = NULL;
85 
86 static int g_mirror_destroy_geom(struct gctl_req *req, struct g_class *mp,
87     struct g_geom *gp);
88 static g_taste_t g_mirror_taste;
89 static void g_mirror_init(struct g_class *mp);
90 static void g_mirror_fini(struct g_class *mp);
91 
92 struct g_class g_mirror_class = {
93 	.name = G_MIRROR_CLASS_NAME,
94 	.version = G_VERSION,
95 	.ctlreq = g_mirror_config,
96 	.taste = g_mirror_taste,
97 	.destroy_geom = g_mirror_destroy_geom,
98 	.init = g_mirror_init,
99 	.fini = g_mirror_fini
100 };
101 
102 
103 static void g_mirror_destroy_provider(struct g_mirror_softc *sc);
104 static int g_mirror_update_disk(struct g_mirror_disk *disk, u_int state);
105 static void g_mirror_update_device(struct g_mirror_softc *sc, boolean_t force);
106 static void g_mirror_dumpconf(struct sbuf *sb, const char *indent,
107     struct g_geom *gp, struct g_consumer *cp, struct g_provider *pp);
108 static void g_mirror_sync_stop(struct g_mirror_disk *disk, int type);
109 static void g_mirror_register_request(struct bio *bp);
110 static void g_mirror_sync_release(struct g_mirror_softc *sc);
111 
112 
113 static const char *
114 g_mirror_disk_state2str(int state)
115 {
116 
117 	switch (state) {
118 	case G_MIRROR_DISK_STATE_NONE:
119 		return ("NONE");
120 	case G_MIRROR_DISK_STATE_NEW:
121 		return ("NEW");
122 	case G_MIRROR_DISK_STATE_ACTIVE:
123 		return ("ACTIVE");
124 	case G_MIRROR_DISK_STATE_STALE:
125 		return ("STALE");
126 	case G_MIRROR_DISK_STATE_SYNCHRONIZING:
127 		return ("SYNCHRONIZING");
128 	case G_MIRROR_DISK_STATE_DISCONNECTED:
129 		return ("DISCONNECTED");
130 	case G_MIRROR_DISK_STATE_DESTROY:
131 		return ("DESTROY");
132 	default:
133 		return ("INVALID");
134 	}
135 }
136 
137 static const char *
138 g_mirror_device_state2str(int state)
139 {
140 
141 	switch (state) {
142 	case G_MIRROR_DEVICE_STATE_STARTING:
143 		return ("STARTING");
144 	case G_MIRROR_DEVICE_STATE_RUNNING:
145 		return ("RUNNING");
146 	default:
147 		return ("INVALID");
148 	}
149 }
150 
151 static const char *
152 g_mirror_get_diskname(struct g_mirror_disk *disk)
153 {
154 
155 	if (disk->d_consumer == NULL || disk->d_consumer->provider == NULL)
156 		return ("[unknown]");
157 	return (disk->d_name);
158 }
159 
160 /*
161  * --- Events handling functions ---
162  * Events in geom_mirror are used to maintain disks and device status
163  * from one thread to simplify locking.
164  */
165 static void
166 g_mirror_event_free(struct g_mirror_event *ep)
167 {
168 
169 	free(ep, M_MIRROR);
170 }
171 
172 int
173 g_mirror_event_send(void *arg, int state, int flags)
174 {
175 	struct g_mirror_softc *sc;
176 	struct g_mirror_disk *disk;
177 	struct g_mirror_event *ep;
178 	int error;
179 
180 	ep = malloc(sizeof(*ep), M_MIRROR, M_WAITOK);
181 	G_MIRROR_DEBUG(4, "%s: Sending event %p.", __func__, ep);
182 	if ((flags & G_MIRROR_EVENT_DEVICE) != 0) {
183 		disk = NULL;
184 		sc = arg;
185 	} else {
186 		disk = arg;
187 		sc = disk->d_softc;
188 	}
189 	ep->e_disk = disk;
190 	ep->e_state = state;
191 	ep->e_flags = flags;
192 	ep->e_error = 0;
193 	mtx_lock(&sc->sc_events_mtx);
194 	TAILQ_INSERT_TAIL(&sc->sc_events, ep, e_next);
195 	mtx_unlock(&sc->sc_events_mtx);
196 	G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
197 	mtx_lock(&sc->sc_queue_mtx);
198 	wakeup(sc);
199 	mtx_unlock(&sc->sc_queue_mtx);
200 	if ((flags & G_MIRROR_EVENT_DONTWAIT) != 0)
201 		return (0);
202 	sx_assert(&sc->sc_lock, SX_XLOCKED);
203 	G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, ep);
204 	sx_xunlock(&sc->sc_lock);
205 	while ((ep->e_flags & G_MIRROR_EVENT_DONE) == 0) {
206 		mtx_lock(&sc->sc_events_mtx);
207 		MSLEEP(ep, &sc->sc_events_mtx, PRIBIO | PDROP, "m:event",
208 		    hz * 5);
209 	}
210 	error = ep->e_error;
211 	g_mirror_event_free(ep);
212 	sx_xlock(&sc->sc_lock);
213 	return (error);
214 }
215 
216 static struct g_mirror_event *
217 g_mirror_event_get(struct g_mirror_softc *sc)
218 {
219 	struct g_mirror_event *ep;
220 
221 	mtx_lock(&sc->sc_events_mtx);
222 	ep = TAILQ_FIRST(&sc->sc_events);
223 	mtx_unlock(&sc->sc_events_mtx);
224 	return (ep);
225 }
226 
227 static void
228 g_mirror_event_remove(struct g_mirror_softc *sc, struct g_mirror_event *ep)
229 {
230 
231 	mtx_lock(&sc->sc_events_mtx);
232 	TAILQ_REMOVE(&sc->sc_events, ep, e_next);
233 	mtx_unlock(&sc->sc_events_mtx);
234 }
235 
236 static void
237 g_mirror_event_cancel(struct g_mirror_disk *disk)
238 {
239 	struct g_mirror_softc *sc;
240 	struct g_mirror_event *ep, *tmpep;
241 
242 	sc = disk->d_softc;
243 	sx_assert(&sc->sc_lock, SX_XLOCKED);
244 
245 	mtx_lock(&sc->sc_events_mtx);
246 	TAILQ_FOREACH_SAFE(ep, &sc->sc_events, e_next, tmpep) {
247 		if ((ep->e_flags & G_MIRROR_EVENT_DEVICE) != 0)
248 			continue;
249 		if (ep->e_disk != disk)
250 			continue;
251 		TAILQ_REMOVE(&sc->sc_events, ep, e_next);
252 		if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0)
253 			g_mirror_event_free(ep);
254 		else {
255 			ep->e_error = ECANCELED;
256 			wakeup(ep);
257 		}
258 	}
259 	mtx_unlock(&sc->sc_events_mtx);
260 }
261 
262 /*
263  * Return the number of disks in given state.
264  * If state is equal to -1, count all connected disks.
265  */
266 u_int
267 g_mirror_ndisks(struct g_mirror_softc *sc, int state)
268 {
269 	struct g_mirror_disk *disk;
270 	u_int n = 0;
271 
272 	sx_assert(&sc->sc_lock, SX_LOCKED);
273 
274 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
275 		if (state == -1 || disk->d_state == state)
276 			n++;
277 	}
278 	return (n);
279 }
280 
281 /*
282  * Find a disk in mirror by its disk ID.
283  */
284 static struct g_mirror_disk *
285 g_mirror_id2disk(struct g_mirror_softc *sc, uint32_t id)
286 {
287 	struct g_mirror_disk *disk;
288 
289 	sx_assert(&sc->sc_lock, SX_XLOCKED);
290 
291 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
292 		if (disk->d_id == id)
293 			return (disk);
294 	}
295 	return (NULL);
296 }
297 
298 static u_int
299 g_mirror_nrequests(struct g_mirror_softc *sc, struct g_consumer *cp)
300 {
301 	struct bio *bp;
302 	u_int nreqs = 0;
303 
304 	mtx_lock(&sc->sc_queue_mtx);
305 	TAILQ_FOREACH(bp, &sc->sc_queue.queue, bio_queue) {
306 		if (bp->bio_from == cp)
307 			nreqs++;
308 	}
309 	mtx_unlock(&sc->sc_queue_mtx);
310 	return (nreqs);
311 }
312 
313 static int
314 g_mirror_is_busy(struct g_mirror_softc *sc, struct g_consumer *cp)
315 {
316 
317 	if (cp->index > 0) {
318 		G_MIRROR_DEBUG(2,
319 		    "I/O requests for %s exist, can't destroy it now.",
320 		    cp->provider->name);
321 		return (1);
322 	}
323 	if (g_mirror_nrequests(sc, cp) > 0) {
324 		G_MIRROR_DEBUG(2,
325 		    "I/O requests for %s in queue, can't destroy it now.",
326 		    cp->provider->name);
327 		return (1);
328 	}
329 	return (0);
330 }
331 
332 static void
333 g_mirror_destroy_consumer(void *arg, int flags __unused)
334 {
335 	struct g_consumer *cp;
336 
337 	g_topology_assert();
338 
339 	cp = arg;
340 	G_MIRROR_DEBUG(1, "Consumer %s destroyed.", cp->provider->name);
341 	g_detach(cp);
342 	g_destroy_consumer(cp);
343 }
344 
345 static void
346 g_mirror_kill_consumer(struct g_mirror_softc *sc, struct g_consumer *cp)
347 {
348 	struct g_provider *pp;
349 	int retaste_wait;
350 
351 	g_topology_assert();
352 
353 	cp->private = NULL;
354 	if (g_mirror_is_busy(sc, cp))
355 		return;
356 	pp = cp->provider;
357 	retaste_wait = 0;
358 	if (cp->acw == 1) {
359 		if ((pp->geom->flags & G_GEOM_WITHER) == 0)
360 			retaste_wait = 1;
361 	}
362 	G_MIRROR_DEBUG(2, "Access %s r%dw%de%d = %d", pp->name, -cp->acr,
363 	    -cp->acw, -cp->ace, 0);
364 	if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0)
365 		g_access(cp, -cp->acr, -cp->acw, -cp->ace);
366 	if (retaste_wait) {
367 		/*
368 		 * After retaste event was send (inside g_access()), we can send
369 		 * event to detach and destroy consumer.
370 		 * A class, which has consumer to the given provider connected
371 		 * will not receive retaste event for the provider.
372 		 * This is the way how I ignore retaste events when I close
373 		 * consumers opened for write: I detach and destroy consumer
374 		 * after retaste event is sent.
375 		 */
376 		g_post_event(g_mirror_destroy_consumer, cp, M_WAITOK, NULL);
377 		return;
378 	}
379 	G_MIRROR_DEBUG(1, "Consumer %s destroyed.", pp->name);
380 	g_detach(cp);
381 	g_destroy_consumer(cp);
382 }
383 
384 static int
385 g_mirror_connect_disk(struct g_mirror_disk *disk, struct g_provider *pp)
386 {
387 	struct g_consumer *cp;
388 	int error;
389 
390 	g_topology_assert_not();
391 	KASSERT(disk->d_consumer == NULL,
392 	    ("Disk already connected (device %s).", disk->d_softc->sc_name));
393 
394 	g_topology_lock();
395 	cp = g_new_consumer(disk->d_softc->sc_geom);
396 	error = g_attach(cp, pp);
397 	if (error != 0) {
398 		g_destroy_consumer(cp);
399 		g_topology_unlock();
400 		return (error);
401 	}
402 	error = g_access(cp, 1, 1, 1);
403 	if (error != 0) {
404 		g_detach(cp);
405 		g_destroy_consumer(cp);
406 		g_topology_unlock();
407 		G_MIRROR_DEBUG(0, "Cannot open consumer %s (error=%d).",
408 		    pp->name, error);
409 		return (error);
410 	}
411 	g_topology_unlock();
412 	disk->d_consumer = cp;
413 	disk->d_consumer->private = disk;
414 	disk->d_consumer->index = 0;
415 
416 	G_MIRROR_DEBUG(2, "Disk %s connected.", g_mirror_get_diskname(disk));
417 	return (0);
418 }
419 
420 static void
421 g_mirror_disconnect_consumer(struct g_mirror_softc *sc, struct g_consumer *cp)
422 {
423 
424 	g_topology_assert();
425 
426 	if (cp == NULL)
427 		return;
428 	if (cp->provider != NULL)
429 		g_mirror_kill_consumer(sc, cp);
430 	else
431 		g_destroy_consumer(cp);
432 }
433 
434 /*
435  * Initialize disk. This means allocate memory, create consumer, attach it
436  * to the provider and open access (r1w1e1) to it.
437  */
438 static struct g_mirror_disk *
439 g_mirror_init_disk(struct g_mirror_softc *sc, struct g_provider *pp,
440     struct g_mirror_metadata *md, int *errorp)
441 {
442 	struct g_mirror_disk *disk;
443 	int i, error;
444 
445 	disk = malloc(sizeof(*disk), M_MIRROR, M_NOWAIT | M_ZERO);
446 	if (disk == NULL) {
447 		error = ENOMEM;
448 		goto fail;
449 	}
450 	disk->d_softc = sc;
451 	error = g_mirror_connect_disk(disk, pp);
452 	if (error != 0)
453 		goto fail;
454 	disk->d_id = md->md_did;
455 	disk->d_state = G_MIRROR_DISK_STATE_NONE;
456 	disk->d_priority = md->md_priority;
457 	disk->d_flags = md->md_dflags;
458 	error = g_getattr("GEOM::candelete", disk->d_consumer, &i);
459 	if (error != 0)
460 		goto fail;
461 	if (i)
462 		disk->d_flags |= G_MIRROR_DISK_FLAG_CANDELETE;
463 	if (md->md_provider[0] != '\0')
464 		disk->d_flags |= G_MIRROR_DISK_FLAG_HARDCODED;
465 	disk->d_sync.ds_consumer = NULL;
466 	disk->d_sync.ds_offset = md->md_sync_offset;
467 	disk->d_sync.ds_offset_done = md->md_sync_offset;
468 	disk->d_genid = md->md_genid;
469 	disk->d_sync.ds_syncid = md->md_syncid;
470 	if (errorp != NULL)
471 		*errorp = 0;
472 	return (disk);
473 fail:
474 	if (errorp != NULL)
475 		*errorp = error;
476 	if (disk != NULL)
477 		free(disk, M_MIRROR);
478 	return (NULL);
479 }
480 
481 static void
482 g_mirror_destroy_disk(struct g_mirror_disk *disk)
483 {
484 	struct g_mirror_softc *sc;
485 
486 	g_topology_assert_not();
487 	sc = disk->d_softc;
488 	sx_assert(&sc->sc_lock, SX_XLOCKED);
489 
490 	LIST_REMOVE(disk, d_next);
491 	g_mirror_event_cancel(disk);
492 	if (sc->sc_hint == disk)
493 		sc->sc_hint = NULL;
494 	switch (disk->d_state) {
495 	case G_MIRROR_DISK_STATE_SYNCHRONIZING:
496 		g_mirror_sync_stop(disk, 1);
497 		/* FALLTHROUGH */
498 	case G_MIRROR_DISK_STATE_NEW:
499 	case G_MIRROR_DISK_STATE_STALE:
500 	case G_MIRROR_DISK_STATE_ACTIVE:
501 		g_topology_lock();
502 		g_mirror_disconnect_consumer(sc, disk->d_consumer);
503 		g_topology_unlock();
504 		free(disk, M_MIRROR);
505 		break;
506 	default:
507 		KASSERT(0 == 1, ("Wrong disk state (%s, %s).",
508 		    g_mirror_get_diskname(disk),
509 		    g_mirror_disk_state2str(disk->d_state)));
510 	}
511 }
512 
513 static void
514 g_mirror_destroy_device(struct g_mirror_softc *sc)
515 {
516 	struct g_mirror_disk *disk;
517 	struct g_mirror_event *ep;
518 	struct g_geom *gp;
519 	struct g_consumer *cp, *tmpcp;
520 
521 	g_topology_assert_not();
522 	sx_assert(&sc->sc_lock, SX_XLOCKED);
523 
524 	gp = sc->sc_geom;
525 	if (sc->sc_provider != NULL)
526 		g_mirror_destroy_provider(sc);
527 	for (disk = LIST_FIRST(&sc->sc_disks); disk != NULL;
528 	    disk = LIST_FIRST(&sc->sc_disks)) {
529 		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
530 		g_mirror_update_metadata(disk);
531 		g_mirror_destroy_disk(disk);
532 	}
533 	while ((ep = g_mirror_event_get(sc)) != NULL) {
534 		g_mirror_event_remove(sc, ep);
535 		if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0)
536 			g_mirror_event_free(ep);
537 		else {
538 			ep->e_error = ECANCELED;
539 			ep->e_flags |= G_MIRROR_EVENT_DONE;
540 			G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, ep);
541 			mtx_lock(&sc->sc_events_mtx);
542 			wakeup(ep);
543 			mtx_unlock(&sc->sc_events_mtx);
544 		}
545 	}
546 	callout_drain(&sc->sc_callout);
547 
548 	g_topology_lock();
549 	LIST_FOREACH_SAFE(cp, &sc->sc_sync.ds_geom->consumer, consumer, tmpcp) {
550 		g_mirror_disconnect_consumer(sc, cp);
551 	}
552 	g_wither_geom(sc->sc_sync.ds_geom, ENXIO);
553 	G_MIRROR_DEBUG(0, "Device %s destroyed.", gp->name);
554 	g_wither_geom(gp, ENXIO);
555 	g_topology_unlock();
556 	mtx_destroy(&sc->sc_queue_mtx);
557 	mtx_destroy(&sc->sc_events_mtx);
558 	sx_xunlock(&sc->sc_lock);
559 	sx_destroy(&sc->sc_lock);
560 }
561 
562 static void
563 g_mirror_orphan(struct g_consumer *cp)
564 {
565 	struct g_mirror_disk *disk;
566 
567 	g_topology_assert();
568 
569 	disk = cp->private;
570 	if (disk == NULL)
571 		return;
572 	disk->d_softc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
573 	g_mirror_event_send(disk, G_MIRROR_DISK_STATE_DISCONNECTED,
574 	    G_MIRROR_EVENT_DONTWAIT);
575 }
576 
577 /*
578  * Function should return the next active disk on the list.
579  * It is possible that it will be the same disk as given.
580  * If there are no active disks on list, NULL is returned.
581  */
582 static __inline struct g_mirror_disk *
583 g_mirror_find_next(struct g_mirror_softc *sc, struct g_mirror_disk *disk)
584 {
585 	struct g_mirror_disk *dp;
586 
587 	for (dp = LIST_NEXT(disk, d_next); dp != disk;
588 	    dp = LIST_NEXT(dp, d_next)) {
589 		if (dp == NULL)
590 			dp = LIST_FIRST(&sc->sc_disks);
591 		if (dp->d_state == G_MIRROR_DISK_STATE_ACTIVE)
592 			break;
593 	}
594 	if (dp->d_state != G_MIRROR_DISK_STATE_ACTIVE)
595 		return (NULL);
596 	return (dp);
597 }
598 
599 static struct g_mirror_disk *
600 g_mirror_get_disk(struct g_mirror_softc *sc)
601 {
602 	struct g_mirror_disk *disk;
603 
604 	if (sc->sc_hint == NULL) {
605 		sc->sc_hint = LIST_FIRST(&sc->sc_disks);
606 		if (sc->sc_hint == NULL)
607 			return (NULL);
608 	}
609 	disk = sc->sc_hint;
610 	if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE) {
611 		disk = g_mirror_find_next(sc, disk);
612 		if (disk == NULL)
613 			return (NULL);
614 	}
615 	sc->sc_hint = g_mirror_find_next(sc, disk);
616 	return (disk);
617 }
618 
619 static int
620 g_mirror_write_metadata(struct g_mirror_disk *disk,
621     struct g_mirror_metadata *md)
622 {
623 	struct g_mirror_softc *sc;
624 	struct g_consumer *cp;
625 	off_t offset, length;
626 	u_char *sector;
627 	int error = 0;
628 
629 	g_topology_assert_not();
630 	sc = disk->d_softc;
631 	sx_assert(&sc->sc_lock, SX_LOCKED);
632 
633 	cp = disk->d_consumer;
634 	KASSERT(cp != NULL, ("NULL consumer (%s).", sc->sc_name));
635 	KASSERT(cp->provider != NULL, ("NULL provider (%s).", sc->sc_name));
636 	KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
637 	    ("Consumer %s closed? (r%dw%de%d).", cp->provider->name, cp->acr,
638 	    cp->acw, cp->ace));
639 	length = cp->provider->sectorsize;
640 	offset = cp->provider->mediasize - length;
641 	sector = malloc((size_t)length, M_MIRROR, M_WAITOK | M_ZERO);
642 	if (md != NULL)
643 		mirror_metadata_encode(md, sector);
644 	error = g_write_data(cp, offset, sector, length);
645 	free(sector, M_MIRROR);
646 	if (error != 0) {
647 		if ((disk->d_flags & G_MIRROR_DISK_FLAG_BROKEN) == 0) {
648 			disk->d_flags |= G_MIRROR_DISK_FLAG_BROKEN;
649 			G_MIRROR_DEBUG(0, "Cannot write metadata on %s "
650 			    "(device=%s, error=%d).",
651 			    g_mirror_get_diskname(disk), sc->sc_name, error);
652 		} else {
653 			G_MIRROR_DEBUG(1, "Cannot write metadata on %s "
654 			    "(device=%s, error=%d).",
655 			    g_mirror_get_diskname(disk), sc->sc_name, error);
656 		}
657 		if (g_mirror_disconnect_on_failure &&
658 		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 1) {
659 			sc->sc_bump_id |= G_MIRROR_BUMP_GENID;
660 			g_mirror_event_send(disk,
661 			    G_MIRROR_DISK_STATE_DISCONNECTED,
662 			    G_MIRROR_EVENT_DONTWAIT);
663 		}
664 	}
665 	return (error);
666 }
667 
668 static int
669 g_mirror_clear_metadata(struct g_mirror_disk *disk)
670 {
671 	int error;
672 
673 	g_topology_assert_not();
674 	sx_assert(&disk->d_softc->sc_lock, SX_LOCKED);
675 
676 	error = g_mirror_write_metadata(disk, NULL);
677 	if (error == 0) {
678 		G_MIRROR_DEBUG(2, "Metadata on %s cleared.",
679 		    g_mirror_get_diskname(disk));
680 	} else {
681 		G_MIRROR_DEBUG(0,
682 		    "Cannot clear metadata on disk %s (error=%d).",
683 		    g_mirror_get_diskname(disk), error);
684 	}
685 	return (error);
686 }
687 
688 void
689 g_mirror_fill_metadata(struct g_mirror_softc *sc, struct g_mirror_disk *disk,
690     struct g_mirror_metadata *md)
691 {
692 
693 	strlcpy(md->md_magic, G_MIRROR_MAGIC, sizeof(md->md_magic));
694 	md->md_version = G_MIRROR_VERSION;
695 	strlcpy(md->md_name, sc->sc_name, sizeof(md->md_name));
696 	md->md_mid = sc->sc_id;
697 	md->md_all = sc->sc_ndisks;
698 	md->md_slice = sc->sc_slice;
699 	md->md_balance = sc->sc_balance;
700 	md->md_genid = sc->sc_genid;
701 	md->md_mediasize = sc->sc_mediasize;
702 	md->md_sectorsize = sc->sc_sectorsize;
703 	md->md_mflags = (sc->sc_flags & G_MIRROR_DEVICE_FLAG_MASK);
704 	bzero(md->md_provider, sizeof(md->md_provider));
705 	if (disk == NULL) {
706 		md->md_did = arc4random();
707 		md->md_priority = 0;
708 		md->md_syncid = 0;
709 		md->md_dflags = 0;
710 		md->md_sync_offset = 0;
711 		md->md_provsize = 0;
712 	} else {
713 		md->md_did = disk->d_id;
714 		md->md_priority = disk->d_priority;
715 		md->md_syncid = disk->d_sync.ds_syncid;
716 		md->md_dflags = (disk->d_flags & G_MIRROR_DISK_FLAG_MASK);
717 		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
718 			md->md_sync_offset = disk->d_sync.ds_offset_done;
719 		else
720 			md->md_sync_offset = 0;
721 		if ((disk->d_flags & G_MIRROR_DISK_FLAG_HARDCODED) != 0) {
722 			strlcpy(md->md_provider,
723 			    disk->d_consumer->provider->name,
724 			    sizeof(md->md_provider));
725 		}
726 		md->md_provsize = disk->d_consumer->provider->mediasize;
727 	}
728 }
729 
730 void
731 g_mirror_update_metadata(struct g_mirror_disk *disk)
732 {
733 	struct g_mirror_softc *sc;
734 	struct g_mirror_metadata md;
735 	int error;
736 
737 	g_topology_assert_not();
738 	sc = disk->d_softc;
739 	sx_assert(&sc->sc_lock, SX_LOCKED);
740 
741 	g_mirror_fill_metadata(sc, disk, &md);
742 	error = g_mirror_write_metadata(disk, &md);
743 	if (error == 0) {
744 		G_MIRROR_DEBUG(2, "Metadata on %s updated.",
745 		    g_mirror_get_diskname(disk));
746 	} else {
747 		G_MIRROR_DEBUG(0,
748 		    "Cannot update metadata on disk %s (error=%d).",
749 		    g_mirror_get_diskname(disk), error);
750 	}
751 }
752 
753 static void
754 g_mirror_bump_syncid(struct g_mirror_softc *sc)
755 {
756 	struct g_mirror_disk *disk;
757 
758 	g_topology_assert_not();
759 	sx_assert(&sc->sc_lock, SX_XLOCKED);
760 	KASSERT(g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 0,
761 	    ("%s called with no active disks (device=%s).", __func__,
762 	    sc->sc_name));
763 
764 	sc->sc_syncid++;
765 	G_MIRROR_DEBUG(1, "Device %s: syncid bumped to %u.", sc->sc_name,
766 	    sc->sc_syncid);
767 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
768 		if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE ||
769 		    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
770 			disk->d_sync.ds_syncid = sc->sc_syncid;
771 			g_mirror_update_metadata(disk);
772 		}
773 	}
774 }
775 
776 static void
777 g_mirror_bump_genid(struct g_mirror_softc *sc)
778 {
779 	struct g_mirror_disk *disk;
780 
781 	g_topology_assert_not();
782 	sx_assert(&sc->sc_lock, SX_XLOCKED);
783 	KASSERT(g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 0,
784 	    ("%s called with no active disks (device=%s).", __func__,
785 	    sc->sc_name));
786 
787 	sc->sc_genid++;
788 	G_MIRROR_DEBUG(1, "Device %s: genid bumped to %u.", sc->sc_name,
789 	    sc->sc_genid);
790 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
791 		if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE ||
792 		    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
793 			disk->d_genid = sc->sc_genid;
794 			g_mirror_update_metadata(disk);
795 		}
796 	}
797 }
798 
799 static int
800 g_mirror_idle(struct g_mirror_softc *sc, int acw)
801 {
802 	struct g_mirror_disk *disk;
803 	int timeout;
804 
805 	g_topology_assert_not();
806 	sx_assert(&sc->sc_lock, SX_XLOCKED);
807 
808 	if (sc->sc_provider == NULL)
809 		return (0);
810 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOFAILSYNC) != 0)
811 		return (0);
812 	if (sc->sc_idle)
813 		return (0);
814 	if (sc->sc_writes > 0)
815 		return (0);
816 	if (acw > 0 || (acw == -1 && sc->sc_provider->acw > 0)) {
817 		timeout = g_mirror_idletime - (time_uptime - sc->sc_last_write);
818 		if (timeout > 0)
819 			return (timeout);
820 	}
821 	sc->sc_idle = 1;
822 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
823 		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
824 			continue;
825 		G_MIRROR_DEBUG(1, "Disk %s (device %s) marked as clean.",
826 		    g_mirror_get_diskname(disk), sc->sc_name);
827 		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
828 		g_mirror_update_metadata(disk);
829 	}
830 	return (0);
831 }
832 
833 static void
834 g_mirror_unidle(struct g_mirror_softc *sc)
835 {
836 	struct g_mirror_disk *disk;
837 
838 	g_topology_assert_not();
839 	sx_assert(&sc->sc_lock, SX_XLOCKED);
840 
841 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOFAILSYNC) != 0)
842 		return;
843 	sc->sc_idle = 0;
844 	sc->sc_last_write = time_uptime;
845 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
846 		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
847 			continue;
848 		G_MIRROR_DEBUG(1, "Disk %s (device %s) marked as dirty.",
849 		    g_mirror_get_diskname(disk), sc->sc_name);
850 		disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
851 		g_mirror_update_metadata(disk);
852 	}
853 }
854 
855 static void
856 g_mirror_done(struct bio *bp)
857 {
858 	struct g_mirror_softc *sc;
859 
860 	sc = bp->bio_from->geom->softc;
861 	bp->bio_cflags = G_MIRROR_BIO_FLAG_REGULAR;
862 	mtx_lock(&sc->sc_queue_mtx);
863 	bioq_disksort(&sc->sc_queue, bp);
864 	mtx_unlock(&sc->sc_queue_mtx);
865 	wakeup(sc);
866 }
867 
868 static void
869 g_mirror_regular_request(struct bio *bp)
870 {
871 	struct g_mirror_softc *sc;
872 	struct g_mirror_disk *disk;
873 	struct bio *pbp;
874 
875 	g_topology_assert_not();
876 
877 	pbp = bp->bio_parent;
878 	sc = pbp->bio_to->geom->softc;
879 	bp->bio_from->index--;
880 	if (bp->bio_cmd == BIO_WRITE)
881 		sc->sc_writes--;
882 	disk = bp->bio_from->private;
883 	if (disk == NULL) {
884 		g_topology_lock();
885 		g_mirror_kill_consumer(sc, bp->bio_from);
886 		g_topology_unlock();
887 	}
888 
889 	pbp->bio_inbed++;
890 	KASSERT(pbp->bio_inbed <= pbp->bio_children,
891 	    ("bio_inbed (%u) is bigger than bio_children (%u).", pbp->bio_inbed,
892 	    pbp->bio_children));
893 	if (bp->bio_error == 0 && pbp->bio_error == 0) {
894 		G_MIRROR_LOGREQ(3, bp, "Request delivered.");
895 		g_destroy_bio(bp);
896 		if (pbp->bio_children == pbp->bio_inbed) {
897 			G_MIRROR_LOGREQ(3, pbp, "Request delivered.");
898 			pbp->bio_completed = pbp->bio_length;
899 			if (pbp->bio_cmd == BIO_WRITE ||
900 			    pbp->bio_cmd == BIO_DELETE) {
901 				bioq_remove(&sc->sc_inflight, pbp);
902 				/* Release delayed sync requests if possible. */
903 				g_mirror_sync_release(sc);
904 			}
905 			g_io_deliver(pbp, pbp->bio_error);
906 		}
907 		return;
908 	} else if (bp->bio_error != 0) {
909 		if (pbp->bio_error == 0)
910 			pbp->bio_error = bp->bio_error;
911 		if (disk != NULL) {
912 			if ((disk->d_flags & G_MIRROR_DISK_FLAG_BROKEN) == 0) {
913 				disk->d_flags |= G_MIRROR_DISK_FLAG_BROKEN;
914 				G_MIRROR_LOGREQ(0, bp,
915 				    "Request failed (error=%d).",
916 				    bp->bio_error);
917 			} else {
918 				G_MIRROR_LOGREQ(1, bp,
919 				    "Request failed (error=%d).",
920 				    bp->bio_error);
921 			}
922 			if (g_mirror_disconnect_on_failure &&
923 			    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 1)
924 			{
925 				sc->sc_bump_id |= G_MIRROR_BUMP_GENID;
926 				g_mirror_event_send(disk,
927 				    G_MIRROR_DISK_STATE_DISCONNECTED,
928 				    G_MIRROR_EVENT_DONTWAIT);
929 			}
930 		}
931 		switch (pbp->bio_cmd) {
932 		case BIO_DELETE:
933 		case BIO_WRITE:
934 			pbp->bio_inbed--;
935 			pbp->bio_children--;
936 			break;
937 		}
938 	}
939 	g_destroy_bio(bp);
940 
941 	switch (pbp->bio_cmd) {
942 	case BIO_READ:
943 		if (pbp->bio_inbed < pbp->bio_children)
944 			break;
945 		if (g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) == 1)
946 			g_io_deliver(pbp, pbp->bio_error);
947 		else {
948 			pbp->bio_error = 0;
949 			mtx_lock(&sc->sc_queue_mtx);
950 			bioq_disksort(&sc->sc_queue, pbp);
951 			mtx_unlock(&sc->sc_queue_mtx);
952 			G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
953 			wakeup(sc);
954 		}
955 		break;
956 	case BIO_DELETE:
957 	case BIO_WRITE:
958 		if (pbp->bio_children == 0) {
959 			/*
960 			 * All requests failed.
961 			 */
962 		} else if (pbp->bio_inbed < pbp->bio_children) {
963 			/* Do nothing. */
964 			break;
965 		} else if (pbp->bio_children == pbp->bio_inbed) {
966 			/* Some requests succeeded. */
967 			pbp->bio_error = 0;
968 			pbp->bio_completed = pbp->bio_length;
969 		}
970 		bioq_remove(&sc->sc_inflight, pbp);
971 		/* Release delayed sync requests if possible. */
972 		g_mirror_sync_release(sc);
973 		g_io_deliver(pbp, pbp->bio_error);
974 		break;
975 	default:
976 		KASSERT(1 == 0, ("Invalid request: %u.", pbp->bio_cmd));
977 		break;
978 	}
979 }
980 
981 static void
982 g_mirror_sync_done(struct bio *bp)
983 {
984 	struct g_mirror_softc *sc;
985 
986 	G_MIRROR_LOGREQ(3, bp, "Synchronization request delivered.");
987 	sc = bp->bio_from->geom->softc;
988 	bp->bio_cflags = G_MIRROR_BIO_FLAG_SYNC;
989 	mtx_lock(&sc->sc_queue_mtx);
990 	bioq_disksort(&sc->sc_queue, bp);
991 	mtx_unlock(&sc->sc_queue_mtx);
992 	wakeup(sc);
993 }
994 
995 static void
996 g_mirror_kernel_dump(struct bio *bp)
997 {
998 	struct g_mirror_softc *sc;
999 	struct g_mirror_disk *disk;
1000 	struct bio *cbp;
1001 	struct g_kerneldump *gkd;
1002 
1003 	/*
1004 	 * We configure dumping to the first component, because this component
1005 	 * will be used for reading with 'prefer' balance algorithm.
1006 	 * If the component with the higest priority is currently disconnected
1007 	 * we will not be able to read the dump after the reboot if it will be
1008 	 * connected and synchronized later. Can we do something better?
1009 	 */
1010 	sc = bp->bio_to->geom->softc;
1011 	disk = LIST_FIRST(&sc->sc_disks);
1012 
1013 	gkd = (struct g_kerneldump *)bp->bio_data;
1014 	if (gkd->length > bp->bio_to->mediasize)
1015 		gkd->length = bp->bio_to->mediasize;
1016 	cbp = g_clone_bio(bp);
1017 	if (cbp == NULL) {
1018 		g_io_deliver(bp, ENOMEM);
1019 		return;
1020 	}
1021 	cbp->bio_done = g_std_done;
1022 	g_io_request(cbp, disk->d_consumer);
1023 	G_MIRROR_DEBUG(1, "Kernel dump will go to %s.",
1024 	    g_mirror_get_diskname(disk));
1025 }
1026 
1027 static void
1028 g_mirror_flush(struct g_mirror_softc *sc, struct bio *bp)
1029 {
1030 	struct bio_queue_head queue;
1031 	struct g_mirror_disk *disk;
1032 	struct g_consumer *cp;
1033 	struct bio *cbp;
1034 
1035 	bioq_init(&queue);
1036 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1037 		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
1038 			continue;
1039 		cbp = g_clone_bio(bp);
1040 		if (cbp == NULL) {
1041 			for (cbp = bioq_first(&queue); cbp != NULL;
1042 			    cbp = bioq_first(&queue)) {
1043 				bioq_remove(&queue, cbp);
1044 				g_destroy_bio(cbp);
1045 			}
1046 			if (bp->bio_error == 0)
1047 				bp->bio_error = ENOMEM;
1048 			g_io_deliver(bp, bp->bio_error);
1049 			return;
1050 		}
1051 		bioq_insert_tail(&queue, cbp);
1052 		cbp->bio_done = g_std_done;
1053 		cbp->bio_caller1 = disk;
1054 		cbp->bio_to = disk->d_consumer->provider;
1055 	}
1056 	for (cbp = bioq_first(&queue); cbp != NULL; cbp = bioq_first(&queue)) {
1057 		bioq_remove(&queue, cbp);
1058 		G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1059 		disk = cbp->bio_caller1;
1060 		cbp->bio_caller1 = NULL;
1061 		cp = disk->d_consumer;
1062 		KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1063 		    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
1064 		    cp->acr, cp->acw, cp->ace));
1065 		g_io_request(cbp, disk->d_consumer);
1066 	}
1067 }
1068 
1069 static void
1070 g_mirror_start(struct bio *bp)
1071 {
1072 	struct g_mirror_softc *sc;
1073 
1074 	sc = bp->bio_to->geom->softc;
1075 	/*
1076 	 * If sc == NULL or there are no valid disks, provider's error
1077 	 * should be set and g_mirror_start() should not be called at all.
1078 	 */
1079 	KASSERT(sc != NULL && sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
1080 	    ("Provider's error should be set (error=%d)(mirror=%s).",
1081 	    bp->bio_to->error, bp->bio_to->name));
1082 	G_MIRROR_LOGREQ(3, bp, "Request received.");
1083 
1084 	switch (bp->bio_cmd) {
1085 	case BIO_READ:
1086 	case BIO_WRITE:
1087 	case BIO_DELETE:
1088 		break;
1089 	case BIO_FLUSH:
1090 		g_mirror_flush(sc, bp);
1091 		return;
1092 	case BIO_GETATTR:
1093 		if (g_handleattr_int(bp, "GEOM::candelete", 1))
1094 			return;
1095 		else if (strcmp("GEOM::kerneldump", bp->bio_attribute) == 0) {
1096 			g_mirror_kernel_dump(bp);
1097 			return;
1098 		}
1099 		/* FALLTHROUGH */
1100 	default:
1101 		g_io_deliver(bp, EOPNOTSUPP);
1102 		return;
1103 	}
1104 	mtx_lock(&sc->sc_queue_mtx);
1105 	bioq_disksort(&sc->sc_queue, bp);
1106 	mtx_unlock(&sc->sc_queue_mtx);
1107 	G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
1108 	wakeup(sc);
1109 }
1110 
1111 /*
1112  * Return TRUE if the given request is colliding with a in-progress
1113  * synchronization request.
1114  */
1115 static int
1116 g_mirror_sync_collision(struct g_mirror_softc *sc, struct bio *bp)
1117 {
1118 	struct g_mirror_disk *disk;
1119 	struct bio *sbp;
1120 	off_t rstart, rend, sstart, send;
1121 	int i;
1122 
1123 	if (sc->sc_sync.ds_ndisks == 0)
1124 		return (0);
1125 	rstart = bp->bio_offset;
1126 	rend = bp->bio_offset + bp->bio_length;
1127 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1128 		if (disk->d_state != G_MIRROR_DISK_STATE_SYNCHRONIZING)
1129 			continue;
1130 		for (i = 0; i < g_mirror_syncreqs; i++) {
1131 			sbp = disk->d_sync.ds_bios[i];
1132 			if (sbp == NULL)
1133 				continue;
1134 			sstart = sbp->bio_offset;
1135 			send = sbp->bio_offset + sbp->bio_length;
1136 			if (rend > sstart && rstart < send)
1137 				return (1);
1138 		}
1139 	}
1140 	return (0);
1141 }
1142 
1143 /*
1144  * Return TRUE if the given sync request is colliding with a in-progress regular
1145  * request.
1146  */
1147 static int
1148 g_mirror_regular_collision(struct g_mirror_softc *sc, struct bio *sbp)
1149 {
1150 	off_t rstart, rend, sstart, send;
1151 	struct bio *bp;
1152 
1153 	if (sc->sc_sync.ds_ndisks == 0)
1154 		return (0);
1155 	sstart = sbp->bio_offset;
1156 	send = sbp->bio_offset + sbp->bio_length;
1157 	TAILQ_FOREACH(bp, &sc->sc_inflight.queue, bio_queue) {
1158 		rstart = bp->bio_offset;
1159 		rend = bp->bio_offset + bp->bio_length;
1160 		if (rend > sstart && rstart < send)
1161 			return (1);
1162 	}
1163 	return (0);
1164 }
1165 
1166 /*
1167  * Puts request onto delayed queue.
1168  */
1169 static void
1170 g_mirror_regular_delay(struct g_mirror_softc *sc, struct bio *bp)
1171 {
1172 
1173 	G_MIRROR_LOGREQ(2, bp, "Delaying request.");
1174 	bioq_insert_head(&sc->sc_regular_delayed, bp);
1175 }
1176 
1177 /*
1178  * Puts synchronization request onto delayed queue.
1179  */
1180 static void
1181 g_mirror_sync_delay(struct g_mirror_softc *sc, struct bio *bp)
1182 {
1183 
1184 	G_MIRROR_LOGREQ(2, bp, "Delaying synchronization request.");
1185 	bioq_insert_tail(&sc->sc_sync_delayed, bp);
1186 }
1187 
1188 /*
1189  * Releases delayed regular requests which don't collide anymore with sync
1190  * requests.
1191  */
1192 static void
1193 g_mirror_regular_release(struct g_mirror_softc *sc)
1194 {
1195 	struct bio *bp, *bp2;
1196 
1197 	TAILQ_FOREACH_SAFE(bp, &sc->sc_regular_delayed.queue, bio_queue, bp2) {
1198 		if (g_mirror_sync_collision(sc, bp))
1199 			continue;
1200 		bioq_remove(&sc->sc_regular_delayed, bp);
1201 		G_MIRROR_LOGREQ(2, bp, "Releasing delayed request (%p).", bp);
1202 		mtx_lock(&sc->sc_queue_mtx);
1203 		bioq_insert_head(&sc->sc_queue, bp);
1204 #if 0
1205 		/*
1206 		 * wakeup() is not needed, because this function is called from
1207 		 * the worker thread.
1208 		 */
1209 		wakeup(&sc->sc_queue);
1210 #endif
1211 		mtx_unlock(&sc->sc_queue_mtx);
1212 	}
1213 }
1214 
1215 /*
1216  * Releases delayed sync requests which don't collide anymore with regular
1217  * requests.
1218  */
1219 static void
1220 g_mirror_sync_release(struct g_mirror_softc *sc)
1221 {
1222 	struct bio *bp, *bp2;
1223 
1224 	TAILQ_FOREACH_SAFE(bp, &sc->sc_sync_delayed.queue, bio_queue, bp2) {
1225 		if (g_mirror_regular_collision(sc, bp))
1226 			continue;
1227 		bioq_remove(&sc->sc_sync_delayed, bp);
1228 		G_MIRROR_LOGREQ(2, bp,
1229 		    "Releasing delayed synchronization request.");
1230 		g_io_request(bp, bp->bio_from);
1231 	}
1232 }
1233 
1234 /*
1235  * Handle synchronization requests.
1236  * Every synchronization request is two-steps process: first, READ request is
1237  * send to active provider and then WRITE request (with read data) to the provider
1238  * beeing synchronized. When WRITE is finished, new synchronization request is
1239  * send.
1240  */
1241 static void
1242 g_mirror_sync_request(struct bio *bp)
1243 {
1244 	struct g_mirror_softc *sc;
1245 	struct g_mirror_disk *disk;
1246 
1247 	bp->bio_from->index--;
1248 	sc = bp->bio_from->geom->softc;
1249 	disk = bp->bio_from->private;
1250 	if (disk == NULL) {
1251 		sx_xunlock(&sc->sc_lock); /* Avoid recursion on sc_lock. */
1252 		g_topology_lock();
1253 		g_mirror_kill_consumer(sc, bp->bio_from);
1254 		g_topology_unlock();
1255 		free(bp->bio_data, M_MIRROR);
1256 		g_destroy_bio(bp);
1257 		sx_xlock(&sc->sc_lock);
1258 		return;
1259 	}
1260 
1261 	/*
1262 	 * Synchronization request.
1263 	 */
1264 	switch (bp->bio_cmd) {
1265 	case BIO_READ:
1266 	    {
1267 		struct g_consumer *cp;
1268 
1269 		if (bp->bio_error != 0) {
1270 			G_MIRROR_LOGREQ(0, bp,
1271 			    "Synchronization request failed (error=%d).",
1272 			    bp->bio_error);
1273 			g_destroy_bio(bp);
1274 			return;
1275 		}
1276 		G_MIRROR_LOGREQ(3, bp,
1277 		    "Synchronization request half-finished.");
1278 		bp->bio_cmd = BIO_WRITE;
1279 		bp->bio_cflags = 0;
1280 		cp = disk->d_consumer;
1281 		KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1282 		    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
1283 		    cp->acr, cp->acw, cp->ace));
1284 		cp->index++;
1285 		g_io_request(bp, cp);
1286 		return;
1287 	    }
1288 	case BIO_WRITE:
1289 	    {
1290 		struct g_mirror_disk_sync *sync;
1291 		off_t offset;
1292 		void *data;
1293 		int i;
1294 
1295 		if (bp->bio_error != 0) {
1296 			G_MIRROR_LOGREQ(0, bp,
1297 			    "Synchronization request failed (error=%d).",
1298 			    bp->bio_error);
1299 			g_destroy_bio(bp);
1300 			sc->sc_bump_id |= G_MIRROR_BUMP_GENID;
1301 			g_mirror_event_send(disk,
1302 			    G_MIRROR_DISK_STATE_DISCONNECTED,
1303 			    G_MIRROR_EVENT_DONTWAIT);
1304 			return;
1305 		}
1306 		G_MIRROR_LOGREQ(3, bp, "Synchronization request finished.");
1307 		sync = &disk->d_sync;
1308 		if (sync->ds_offset == sc->sc_mediasize ||
1309 		    sync->ds_consumer == NULL ||
1310 		    (sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
1311 			/* Don't send more synchronization requests. */
1312 			sync->ds_inflight--;
1313 			if (sync->ds_bios != NULL) {
1314 				i = (int)(uintptr_t)bp->bio_caller1;
1315 				sync->ds_bios[i] = NULL;
1316 			}
1317 			free(bp->bio_data, M_MIRROR);
1318 			g_destroy_bio(bp);
1319 			if (sync->ds_inflight > 0)
1320 				return;
1321 			if (sync->ds_consumer == NULL ||
1322 			    (sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
1323 				return;
1324 			}
1325 			/* Disk up-to-date, activate it. */
1326 			g_mirror_event_send(disk, G_MIRROR_DISK_STATE_ACTIVE,
1327 			    G_MIRROR_EVENT_DONTWAIT);
1328 			return;
1329 		}
1330 
1331 		/* Send next synchronization request. */
1332 		data = bp->bio_data;
1333 		bzero(bp, sizeof(*bp));
1334 		bp->bio_cmd = BIO_READ;
1335 		bp->bio_offset = sync->ds_offset;
1336 		bp->bio_length = MIN(MAXPHYS, sc->sc_mediasize - bp->bio_offset);
1337 		sync->ds_offset += bp->bio_length;
1338 		bp->bio_done = g_mirror_sync_done;
1339 		bp->bio_data = data;
1340 		bp->bio_from = sync->ds_consumer;
1341 		bp->bio_to = sc->sc_provider;
1342 		G_MIRROR_LOGREQ(3, bp, "Sending synchronization request.");
1343 		sync->ds_consumer->index++;
1344 		/*
1345 		 * Delay the request if it is colliding with a regular request.
1346 		 */
1347 		if (g_mirror_regular_collision(sc, bp))
1348 			g_mirror_sync_delay(sc, bp);
1349 		else
1350 			g_io_request(bp, sync->ds_consumer);
1351 
1352 		/* Release delayed requests if possible. */
1353 		g_mirror_regular_release(sc);
1354 
1355 		/* Find the smallest offset */
1356 		offset = sc->sc_mediasize;
1357 		for (i = 0; i < g_mirror_syncreqs; i++) {
1358 			bp = sync->ds_bios[i];
1359 			if (bp->bio_offset < offset)
1360 				offset = bp->bio_offset;
1361 		}
1362 		if (sync->ds_offset_done + (MAXPHYS * 100) < offset) {
1363 			/* Update offset_done on every 100 blocks. */
1364 			sync->ds_offset_done = offset;
1365 			g_mirror_update_metadata(disk);
1366 		}
1367 		return;
1368 	    }
1369 	default:
1370 		KASSERT(1 == 0, ("Invalid command here: %u (device=%s)",
1371 		    bp->bio_cmd, sc->sc_name));
1372 		break;
1373 	}
1374 }
1375 
1376 static void
1377 g_mirror_request_prefer(struct g_mirror_softc *sc, struct bio *bp)
1378 {
1379 	struct g_mirror_disk *disk;
1380 	struct g_consumer *cp;
1381 	struct bio *cbp;
1382 
1383 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1384 		if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE)
1385 			break;
1386 	}
1387 	if (disk == NULL) {
1388 		if (bp->bio_error == 0)
1389 			bp->bio_error = ENXIO;
1390 		g_io_deliver(bp, bp->bio_error);
1391 		return;
1392 	}
1393 	cbp = g_clone_bio(bp);
1394 	if (cbp == NULL) {
1395 		if (bp->bio_error == 0)
1396 			bp->bio_error = ENOMEM;
1397 		g_io_deliver(bp, bp->bio_error);
1398 		return;
1399 	}
1400 	/*
1401 	 * Fill in the component buf structure.
1402 	 */
1403 	cp = disk->d_consumer;
1404 	cbp->bio_done = g_mirror_done;
1405 	cbp->bio_to = cp->provider;
1406 	G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1407 	KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1408 	    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr,
1409 	    cp->acw, cp->ace));
1410 	cp->index++;
1411 	g_io_request(cbp, cp);
1412 }
1413 
1414 static void
1415 g_mirror_request_round_robin(struct g_mirror_softc *sc, struct bio *bp)
1416 {
1417 	struct g_mirror_disk *disk;
1418 	struct g_consumer *cp;
1419 	struct bio *cbp;
1420 
1421 	disk = g_mirror_get_disk(sc);
1422 	if (disk == NULL) {
1423 		if (bp->bio_error == 0)
1424 			bp->bio_error = ENXIO;
1425 		g_io_deliver(bp, bp->bio_error);
1426 		return;
1427 	}
1428 	cbp = g_clone_bio(bp);
1429 	if (cbp == NULL) {
1430 		if (bp->bio_error == 0)
1431 			bp->bio_error = ENOMEM;
1432 		g_io_deliver(bp, bp->bio_error);
1433 		return;
1434 	}
1435 	/*
1436 	 * Fill in the component buf structure.
1437 	 */
1438 	cp = disk->d_consumer;
1439 	cbp->bio_done = g_mirror_done;
1440 	cbp->bio_to = cp->provider;
1441 	G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1442 	KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1443 	    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr,
1444 	    cp->acw, cp->ace));
1445 	cp->index++;
1446 	g_io_request(cbp, cp);
1447 }
1448 
1449 #define TRACK_SIZE  (1 * 1024 * 1024)
1450 #define LOAD_SCALE	256
1451 #define ABS(x)		(((x) >= 0) ? (x) : (-(x)))
1452 
1453 static void
1454 g_mirror_request_load(struct g_mirror_softc *sc, struct bio *bp)
1455 {
1456 	struct g_mirror_disk *disk, *dp;
1457 	struct g_consumer *cp;
1458 	struct bio *cbp;
1459 	int prio, best;
1460 
1461 	/* Find a disk with the smallest load. */
1462 	disk = NULL;
1463 	best = INT_MAX;
1464 	LIST_FOREACH(dp, &sc->sc_disks, d_next) {
1465 		if (dp->d_state != G_MIRROR_DISK_STATE_ACTIVE)
1466 			continue;
1467 		prio = dp->load;
1468 		/* If disk head is precisely in position - highly prefer it. */
1469 		if (dp->d_last_offset == bp->bio_offset)
1470 			prio -= 2 * LOAD_SCALE;
1471 		else
1472 		/* If disk head is close to position - prefer it. */
1473 		if (ABS(dp->d_last_offset - bp->bio_offset) < TRACK_SIZE)
1474 			prio -= 1 * LOAD_SCALE;
1475 		if (prio <= best) {
1476 			disk = dp;
1477 			best = prio;
1478 		}
1479 	}
1480 	KASSERT(disk != NULL, ("NULL disk for %s.", sc->sc_name));
1481 	cbp = g_clone_bio(bp);
1482 	if (cbp == NULL) {
1483 		if (bp->bio_error == 0)
1484 			bp->bio_error = ENOMEM;
1485 		g_io_deliver(bp, bp->bio_error);
1486 		return;
1487 	}
1488 	/*
1489 	 * Fill in the component buf structure.
1490 	 */
1491 	cp = disk->d_consumer;
1492 	cbp->bio_done = g_mirror_done;
1493 	cbp->bio_to = cp->provider;
1494 	G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1495 	KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1496 	    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr,
1497 	    cp->acw, cp->ace));
1498 	cp->index++;
1499 	/* Remember last head position */
1500 	disk->d_last_offset = bp->bio_offset + bp->bio_length;
1501 	/* Update loads. */
1502 	LIST_FOREACH(dp, &sc->sc_disks, d_next) {
1503 		dp->load = (dp->d_consumer->index * LOAD_SCALE +
1504 		    dp->load * 7) / 8;
1505 	}
1506 	g_io_request(cbp, cp);
1507 }
1508 
1509 static void
1510 g_mirror_request_split(struct g_mirror_softc *sc, struct bio *bp)
1511 {
1512 	struct bio_queue_head queue;
1513 	struct g_mirror_disk *disk;
1514 	struct g_consumer *cp;
1515 	struct bio *cbp;
1516 	off_t left, mod, offset, slice;
1517 	u_char *data;
1518 	u_int ndisks;
1519 
1520 	if (bp->bio_length <= sc->sc_slice) {
1521 		g_mirror_request_round_robin(sc, bp);
1522 		return;
1523 	}
1524 	ndisks = g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE);
1525 	slice = bp->bio_length / ndisks;
1526 	mod = slice % sc->sc_provider->sectorsize;
1527 	if (mod != 0)
1528 		slice += sc->sc_provider->sectorsize - mod;
1529 	/*
1530 	 * Allocate all bios before sending any request, so we can
1531 	 * return ENOMEM in nice and clean way.
1532 	 */
1533 	left = bp->bio_length;
1534 	offset = bp->bio_offset;
1535 	data = bp->bio_data;
1536 	bioq_init(&queue);
1537 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1538 		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
1539 			continue;
1540 		cbp = g_clone_bio(bp);
1541 		if (cbp == NULL) {
1542 			for (cbp = bioq_first(&queue); cbp != NULL;
1543 			    cbp = bioq_first(&queue)) {
1544 				bioq_remove(&queue, cbp);
1545 				g_destroy_bio(cbp);
1546 			}
1547 			if (bp->bio_error == 0)
1548 				bp->bio_error = ENOMEM;
1549 			g_io_deliver(bp, bp->bio_error);
1550 			return;
1551 		}
1552 		bioq_insert_tail(&queue, cbp);
1553 		cbp->bio_done = g_mirror_done;
1554 		cbp->bio_caller1 = disk;
1555 		cbp->bio_to = disk->d_consumer->provider;
1556 		cbp->bio_offset = offset;
1557 		cbp->bio_data = data;
1558 		cbp->bio_length = MIN(left, slice);
1559 		left -= cbp->bio_length;
1560 		if (left == 0)
1561 			break;
1562 		offset += cbp->bio_length;
1563 		data += cbp->bio_length;
1564 	}
1565 	for (cbp = bioq_first(&queue); cbp != NULL; cbp = bioq_first(&queue)) {
1566 		bioq_remove(&queue, cbp);
1567 		G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1568 		disk = cbp->bio_caller1;
1569 		cbp->bio_caller1 = NULL;
1570 		cp = disk->d_consumer;
1571 		KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1572 		    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
1573 		    cp->acr, cp->acw, cp->ace));
1574 		disk->d_consumer->index++;
1575 		g_io_request(cbp, disk->d_consumer);
1576 	}
1577 }
1578 
1579 static void
1580 g_mirror_register_request(struct bio *bp)
1581 {
1582 	struct g_mirror_softc *sc;
1583 
1584 	sc = bp->bio_to->geom->softc;
1585 	switch (bp->bio_cmd) {
1586 	case BIO_READ:
1587 		switch (sc->sc_balance) {
1588 		case G_MIRROR_BALANCE_LOAD:
1589 			g_mirror_request_load(sc, bp);
1590 			break;
1591 		case G_MIRROR_BALANCE_PREFER:
1592 			g_mirror_request_prefer(sc, bp);
1593 			break;
1594 		case G_MIRROR_BALANCE_ROUND_ROBIN:
1595 			g_mirror_request_round_robin(sc, bp);
1596 			break;
1597 		case G_MIRROR_BALANCE_SPLIT:
1598 			g_mirror_request_split(sc, bp);
1599 			break;
1600 		}
1601 		return;
1602 	case BIO_WRITE:
1603 	case BIO_DELETE:
1604 	    {
1605 		struct g_mirror_disk *disk;
1606 		struct g_mirror_disk_sync *sync;
1607 		struct bio_queue_head queue;
1608 		struct g_consumer *cp;
1609 		struct bio *cbp;
1610 
1611 		/*
1612 		 * Delay the request if it is colliding with a synchronization
1613 		 * request.
1614 		 */
1615 		if (g_mirror_sync_collision(sc, bp)) {
1616 			g_mirror_regular_delay(sc, bp);
1617 			return;
1618 		}
1619 
1620 		if (sc->sc_idle)
1621 			g_mirror_unidle(sc);
1622 		else
1623 			sc->sc_last_write = time_uptime;
1624 
1625 		/*
1626 		 * Allocate all bios before sending any request, so we can
1627 		 * return ENOMEM in nice and clean way.
1628 		 */
1629 		bioq_init(&queue);
1630 		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1631 			sync = &disk->d_sync;
1632 			switch (disk->d_state) {
1633 			case G_MIRROR_DISK_STATE_ACTIVE:
1634 				break;
1635 			case G_MIRROR_DISK_STATE_SYNCHRONIZING:
1636 				if (bp->bio_offset >= sync->ds_offset)
1637 					continue;
1638 				break;
1639 			default:
1640 				continue;
1641 			}
1642 			if (bp->bio_cmd == BIO_DELETE &&
1643 			    (disk->d_flags & G_MIRROR_DISK_FLAG_CANDELETE) == 0)
1644 				continue;
1645 			cbp = g_clone_bio(bp);
1646 			if (cbp == NULL) {
1647 				for (cbp = bioq_first(&queue); cbp != NULL;
1648 				    cbp = bioq_first(&queue)) {
1649 					bioq_remove(&queue, cbp);
1650 					g_destroy_bio(cbp);
1651 				}
1652 				if (bp->bio_error == 0)
1653 					bp->bio_error = ENOMEM;
1654 				g_io_deliver(bp, bp->bio_error);
1655 				return;
1656 			}
1657 			bioq_insert_tail(&queue, cbp);
1658 			cbp->bio_done = g_mirror_done;
1659 			cp = disk->d_consumer;
1660 			cbp->bio_caller1 = cp;
1661 			cbp->bio_to = cp->provider;
1662 			KASSERT(cp->acr >= 1 && cp->acw >= 1 && cp->ace >= 1,
1663 			    ("Consumer %s not opened (r%dw%de%d).",
1664 			    cp->provider->name, cp->acr, cp->acw, cp->ace));
1665 		}
1666 		for (cbp = bioq_first(&queue); cbp != NULL;
1667 		    cbp = bioq_first(&queue)) {
1668 			bioq_remove(&queue, cbp);
1669 			G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1670 			cp = cbp->bio_caller1;
1671 			cbp->bio_caller1 = NULL;
1672 			cp->index++;
1673 			sc->sc_writes++;
1674 			g_io_request(cbp, cp);
1675 		}
1676 		/*
1677 		 * Put request onto inflight queue, so we can check if new
1678 		 * synchronization requests don't collide with it.
1679 		 */
1680 		bioq_insert_tail(&sc->sc_inflight, bp);
1681 		/*
1682 		 * Bump syncid on first write.
1683 		 */
1684 		if ((sc->sc_bump_id & G_MIRROR_BUMP_SYNCID) != 0) {
1685 			sc->sc_bump_id &= ~G_MIRROR_BUMP_SYNCID;
1686 			g_mirror_bump_syncid(sc);
1687 		}
1688 		return;
1689 	    }
1690 	default:
1691 		KASSERT(1 == 0, ("Invalid command here: %u (device=%s)",
1692 		    bp->bio_cmd, sc->sc_name));
1693 		break;
1694 	}
1695 }
1696 
1697 static int
1698 g_mirror_can_destroy(struct g_mirror_softc *sc)
1699 {
1700 	struct g_geom *gp;
1701 	struct g_consumer *cp;
1702 
1703 	g_topology_assert();
1704 	gp = sc->sc_geom;
1705 	if (gp->softc == NULL)
1706 		return (1);
1707 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_TASTING) != 0)
1708 		return (0);
1709 	LIST_FOREACH(cp, &gp->consumer, consumer) {
1710 		if (g_mirror_is_busy(sc, cp))
1711 			return (0);
1712 	}
1713 	gp = sc->sc_sync.ds_geom;
1714 	LIST_FOREACH(cp, &gp->consumer, consumer) {
1715 		if (g_mirror_is_busy(sc, cp))
1716 			return (0);
1717 	}
1718 	G_MIRROR_DEBUG(2, "No I/O requests for %s, it can be destroyed.",
1719 	    sc->sc_name);
1720 	return (1);
1721 }
1722 
1723 static int
1724 g_mirror_try_destroy(struct g_mirror_softc *sc)
1725 {
1726 
1727 	if (sc->sc_rootmount != NULL) {
1728 		G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p", __LINE__,
1729 		    sc->sc_rootmount);
1730 		root_mount_rel(sc->sc_rootmount);
1731 		sc->sc_rootmount = NULL;
1732 	}
1733 	g_topology_lock();
1734 	if (!g_mirror_can_destroy(sc)) {
1735 		g_topology_unlock();
1736 		return (0);
1737 	}
1738 	sc->sc_geom->softc = NULL;
1739 	sc->sc_sync.ds_geom->softc = NULL;
1740 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_WAIT) != 0) {
1741 		g_topology_unlock();
1742 		G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__,
1743 		    &sc->sc_worker);
1744 		/* Unlock sc_lock here, as it can be destroyed after wakeup. */
1745 		sx_xunlock(&sc->sc_lock);
1746 		wakeup(&sc->sc_worker);
1747 		sc->sc_worker = NULL;
1748 	} else {
1749 		g_topology_unlock();
1750 		g_mirror_destroy_device(sc);
1751 		free(sc, M_MIRROR);
1752 	}
1753 	return (1);
1754 }
1755 
1756 /*
1757  * Worker thread.
1758  */
1759 static void
1760 g_mirror_worker(void *arg)
1761 {
1762 	struct g_mirror_softc *sc;
1763 	struct g_mirror_event *ep;
1764 	struct bio *bp;
1765 	int timeout;
1766 
1767 	sc = arg;
1768 	thread_lock(curthread);
1769 	sched_prio(curthread, PRIBIO);
1770 	thread_unlock(curthread);
1771 
1772 	sx_xlock(&sc->sc_lock);
1773 	for (;;) {
1774 		G_MIRROR_DEBUG(5, "%s: Let's see...", __func__);
1775 		/*
1776 		 * First take a look at events.
1777 		 * This is important to handle events before any I/O requests.
1778 		 */
1779 		ep = g_mirror_event_get(sc);
1780 		if (ep != NULL) {
1781 			g_mirror_event_remove(sc, ep);
1782 			if ((ep->e_flags & G_MIRROR_EVENT_DEVICE) != 0) {
1783 				/* Update only device status. */
1784 				G_MIRROR_DEBUG(3,
1785 				    "Running event for device %s.",
1786 				    sc->sc_name);
1787 				ep->e_error = 0;
1788 				g_mirror_update_device(sc, 1);
1789 			} else {
1790 				/* Update disk status. */
1791 				G_MIRROR_DEBUG(3, "Running event for disk %s.",
1792 				     g_mirror_get_diskname(ep->e_disk));
1793 				ep->e_error = g_mirror_update_disk(ep->e_disk,
1794 				    ep->e_state);
1795 				if (ep->e_error == 0)
1796 					g_mirror_update_device(sc, 0);
1797 			}
1798 			if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0) {
1799 				KASSERT(ep->e_error == 0,
1800 				    ("Error cannot be handled."));
1801 				g_mirror_event_free(ep);
1802 			} else {
1803 				ep->e_flags |= G_MIRROR_EVENT_DONE;
1804 				G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__,
1805 				    ep);
1806 				mtx_lock(&sc->sc_events_mtx);
1807 				wakeup(ep);
1808 				mtx_unlock(&sc->sc_events_mtx);
1809 			}
1810 			if ((sc->sc_flags &
1811 			    G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
1812 				if (g_mirror_try_destroy(sc)) {
1813 					curthread->td_pflags &= ~TDP_GEOM;
1814 					G_MIRROR_DEBUG(1, "Thread exiting.");
1815 					kproc_exit(0);
1816 				}
1817 			}
1818 			G_MIRROR_DEBUG(5, "%s: I'm here 1.", __func__);
1819 			continue;
1820 		}
1821 		/*
1822 		 * Check if we can mark array as CLEAN and if we can't take
1823 		 * how much seconds should we wait.
1824 		 */
1825 		timeout = g_mirror_idle(sc, -1);
1826 		/*
1827 		 * Now I/O requests.
1828 		 */
1829 		/* Get first request from the queue. */
1830 		mtx_lock(&sc->sc_queue_mtx);
1831 		bp = bioq_first(&sc->sc_queue);
1832 		if (bp == NULL) {
1833 			if ((sc->sc_flags &
1834 			    G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
1835 				mtx_unlock(&sc->sc_queue_mtx);
1836 				if (g_mirror_try_destroy(sc)) {
1837 					curthread->td_pflags &= ~TDP_GEOM;
1838 					G_MIRROR_DEBUG(1, "Thread exiting.");
1839 					kproc_exit(0);
1840 				}
1841 				mtx_lock(&sc->sc_queue_mtx);
1842 			}
1843 			sx_xunlock(&sc->sc_lock);
1844 			/*
1845 			 * XXX: We can miss an event here, because an event
1846 			 *      can be added without sx-device-lock and without
1847 			 *      mtx-queue-lock. Maybe I should just stop using
1848 			 *      dedicated mutex for events synchronization and
1849 			 *      stick with the queue lock?
1850 			 *      The event will hang here until next I/O request
1851 			 *      or next event is received.
1852 			 */
1853 			MSLEEP(sc, &sc->sc_queue_mtx, PRIBIO | PDROP, "m:w1",
1854 			    timeout * hz);
1855 			sx_xlock(&sc->sc_lock);
1856 			G_MIRROR_DEBUG(5, "%s: I'm here 4.", __func__);
1857 			continue;
1858 		}
1859 		bioq_remove(&sc->sc_queue, bp);
1860 		mtx_unlock(&sc->sc_queue_mtx);
1861 
1862 		if (bp->bio_from->geom == sc->sc_sync.ds_geom &&
1863 		    (bp->bio_cflags & G_MIRROR_BIO_FLAG_SYNC) != 0) {
1864 			g_mirror_sync_request(bp);	/* READ */
1865 		} else if (bp->bio_to != sc->sc_provider) {
1866 			if ((bp->bio_cflags & G_MIRROR_BIO_FLAG_REGULAR) != 0)
1867 				g_mirror_regular_request(bp);
1868 			else if ((bp->bio_cflags & G_MIRROR_BIO_FLAG_SYNC) != 0)
1869 				g_mirror_sync_request(bp);	/* WRITE */
1870 			else {
1871 				KASSERT(0,
1872 				    ("Invalid request cflags=0x%hhx to=%s.",
1873 				    bp->bio_cflags, bp->bio_to->name));
1874 			}
1875 		} else {
1876 			g_mirror_register_request(bp);
1877 		}
1878 		G_MIRROR_DEBUG(5, "%s: I'm here 9.", __func__);
1879 	}
1880 }
1881 
1882 static void
1883 g_mirror_update_idle(struct g_mirror_softc *sc, struct g_mirror_disk *disk)
1884 {
1885 
1886 	sx_assert(&sc->sc_lock, SX_LOCKED);
1887 
1888 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOFAILSYNC) != 0)
1889 		return;
1890 	if (!sc->sc_idle && (disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) == 0) {
1891 		G_MIRROR_DEBUG(1, "Disk %s (device %s) marked as dirty.",
1892 		    g_mirror_get_diskname(disk), sc->sc_name);
1893 		disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
1894 	} else if (sc->sc_idle &&
1895 	    (disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) != 0) {
1896 		G_MIRROR_DEBUG(1, "Disk %s (device %s) marked as clean.",
1897 		    g_mirror_get_diskname(disk), sc->sc_name);
1898 		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
1899 	}
1900 }
1901 
1902 static void
1903 g_mirror_sync_start(struct g_mirror_disk *disk)
1904 {
1905 	struct g_mirror_softc *sc;
1906 	struct g_consumer *cp;
1907 	struct bio *bp;
1908 	int error, i;
1909 
1910 	g_topology_assert_not();
1911 	sc = disk->d_softc;
1912 	sx_assert(&sc->sc_lock, SX_LOCKED);
1913 
1914 	KASSERT(disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
1915 	    ("Disk %s is not marked for synchronization.",
1916 	    g_mirror_get_diskname(disk)));
1917 	KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
1918 	    ("Device not in RUNNING state (%s, %u).", sc->sc_name,
1919 	    sc->sc_state));
1920 
1921 	sx_xunlock(&sc->sc_lock);
1922 	g_topology_lock();
1923 	cp = g_new_consumer(sc->sc_sync.ds_geom);
1924 	error = g_attach(cp, sc->sc_provider);
1925 	KASSERT(error == 0,
1926 	    ("Cannot attach to %s (error=%d).", sc->sc_name, error));
1927 	error = g_access(cp, 1, 0, 0);
1928 	KASSERT(error == 0, ("Cannot open %s (error=%d).", sc->sc_name, error));
1929 	g_topology_unlock();
1930 	sx_xlock(&sc->sc_lock);
1931 
1932 	G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s.", sc->sc_name,
1933 	    g_mirror_get_diskname(disk));
1934 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOFAILSYNC) == 0)
1935 		disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
1936 	KASSERT(disk->d_sync.ds_consumer == NULL,
1937 	    ("Sync consumer already exists (device=%s, disk=%s).",
1938 	    sc->sc_name, g_mirror_get_diskname(disk)));
1939 
1940 	disk->d_sync.ds_consumer = cp;
1941 	disk->d_sync.ds_consumer->private = disk;
1942 	disk->d_sync.ds_consumer->index = 0;
1943 
1944 	/*
1945 	 * Allocate memory for synchronization bios and initialize them.
1946 	 */
1947 	disk->d_sync.ds_bios = malloc(sizeof(struct bio *) * g_mirror_syncreqs,
1948 	    M_MIRROR, M_WAITOK);
1949 	for (i = 0; i < g_mirror_syncreqs; i++) {
1950 		bp = g_alloc_bio();
1951 		disk->d_sync.ds_bios[i] = bp;
1952 		bp->bio_parent = NULL;
1953 		bp->bio_cmd = BIO_READ;
1954 		bp->bio_data = malloc(MAXPHYS, M_MIRROR, M_WAITOK);
1955 		bp->bio_cflags = 0;
1956 		bp->bio_offset = disk->d_sync.ds_offset;
1957 		bp->bio_length = MIN(MAXPHYS, sc->sc_mediasize - bp->bio_offset);
1958 		disk->d_sync.ds_offset += bp->bio_length;
1959 		bp->bio_done = g_mirror_sync_done;
1960 		bp->bio_from = disk->d_sync.ds_consumer;
1961 		bp->bio_to = sc->sc_provider;
1962 		bp->bio_caller1 = (void *)(uintptr_t)i;
1963 	}
1964 
1965 	/* Increase the number of disks in SYNCHRONIZING state. */
1966 	sc->sc_sync.ds_ndisks++;
1967 	/* Set the number of in-flight synchronization requests. */
1968 	disk->d_sync.ds_inflight = g_mirror_syncreqs;
1969 
1970 	/*
1971 	 * Fire off first synchronization requests.
1972 	 */
1973 	for (i = 0; i < g_mirror_syncreqs; i++) {
1974 		bp = disk->d_sync.ds_bios[i];
1975 		G_MIRROR_LOGREQ(3, bp, "Sending synchronization request.");
1976 		disk->d_sync.ds_consumer->index++;
1977 		/*
1978 		 * Delay the request if it is colliding with a regular request.
1979 		 */
1980 		if (g_mirror_regular_collision(sc, bp))
1981 			g_mirror_sync_delay(sc, bp);
1982 		else
1983 			g_io_request(bp, disk->d_sync.ds_consumer);
1984 	}
1985 }
1986 
1987 /*
1988  * Stop synchronization process.
1989  * type: 0 - synchronization finished
1990  *       1 - synchronization stopped
1991  */
1992 static void
1993 g_mirror_sync_stop(struct g_mirror_disk *disk, int type)
1994 {
1995 	struct g_mirror_softc *sc;
1996 	struct g_consumer *cp;
1997 
1998 	g_topology_assert_not();
1999 	sc = disk->d_softc;
2000 	sx_assert(&sc->sc_lock, SX_LOCKED);
2001 
2002 	KASSERT(disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
2003 	    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2004 	    g_mirror_disk_state2str(disk->d_state)));
2005 	if (disk->d_sync.ds_consumer == NULL)
2006 		return;
2007 
2008 	if (type == 0) {
2009 		G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s finished.",
2010 		    sc->sc_name, g_mirror_get_diskname(disk));
2011 	} else /* if (type == 1) */ {
2012 		G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s stopped.",
2013 		    sc->sc_name, g_mirror_get_diskname(disk));
2014 	}
2015 	free(disk->d_sync.ds_bios, M_MIRROR);
2016 	disk->d_sync.ds_bios = NULL;
2017 	cp = disk->d_sync.ds_consumer;
2018 	disk->d_sync.ds_consumer = NULL;
2019 	disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2020 	sc->sc_sync.ds_ndisks--;
2021 	sx_xunlock(&sc->sc_lock); /* Avoid recursion on sc_lock. */
2022 	g_topology_lock();
2023 	g_mirror_kill_consumer(sc, cp);
2024 	g_topology_unlock();
2025 	sx_xlock(&sc->sc_lock);
2026 }
2027 
2028 static void
2029 g_mirror_launch_provider(struct g_mirror_softc *sc)
2030 {
2031 	struct g_mirror_disk *disk;
2032 	struct g_provider *pp;
2033 
2034 	sx_assert(&sc->sc_lock, SX_LOCKED);
2035 
2036 	g_topology_lock();
2037 	pp = g_new_providerf(sc->sc_geom, "mirror/%s", sc->sc_name);
2038 	pp->mediasize = sc->sc_mediasize;
2039 	pp->sectorsize = sc->sc_sectorsize;
2040 	pp->stripesize = 0;
2041 	pp->stripeoffset = 0;
2042 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2043 		if (disk->d_consumer && disk->d_consumer->provider &&
2044 		    disk->d_consumer->provider->stripesize > pp->stripesize) {
2045 			pp->stripesize = disk->d_consumer->provider->stripesize;
2046 			pp->stripeoffset = disk->d_consumer->provider->stripeoffset;
2047 		}
2048 	}
2049 	sc->sc_provider = pp;
2050 	g_error_provider(pp, 0);
2051 	g_topology_unlock();
2052 	G_MIRROR_DEBUG(0, "Device %s launched (%u/%u).", pp->name,
2053 	    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE), sc->sc_ndisks);
2054 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2055 		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
2056 			g_mirror_sync_start(disk);
2057 	}
2058 }
2059 
2060 static void
2061 g_mirror_destroy_provider(struct g_mirror_softc *sc)
2062 {
2063 	struct g_mirror_disk *disk;
2064 	struct bio *bp;
2065 
2066 	g_topology_assert_not();
2067 	KASSERT(sc->sc_provider != NULL, ("NULL provider (device=%s).",
2068 	    sc->sc_name));
2069 
2070 	g_topology_lock();
2071 	g_error_provider(sc->sc_provider, ENXIO);
2072 	mtx_lock(&sc->sc_queue_mtx);
2073 	while ((bp = bioq_first(&sc->sc_queue)) != NULL) {
2074 		bioq_remove(&sc->sc_queue, bp);
2075 		g_io_deliver(bp, ENXIO);
2076 	}
2077 	mtx_unlock(&sc->sc_queue_mtx);
2078 	G_MIRROR_DEBUG(0, "Device %s: provider %s destroyed.", sc->sc_name,
2079 	    sc->sc_provider->name);
2080 	sc->sc_provider->flags |= G_PF_WITHER;
2081 	g_orphan_provider(sc->sc_provider, ENXIO);
2082 	g_topology_unlock();
2083 	sc->sc_provider = NULL;
2084 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2085 		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
2086 			g_mirror_sync_stop(disk, 1);
2087 	}
2088 }
2089 
2090 static void
2091 g_mirror_go(void *arg)
2092 {
2093 	struct g_mirror_softc *sc;
2094 
2095 	sc = arg;
2096 	G_MIRROR_DEBUG(0, "Force device %s start due to timeout.", sc->sc_name);
2097 	g_mirror_event_send(sc, 0,
2098 	    G_MIRROR_EVENT_DONTWAIT | G_MIRROR_EVENT_DEVICE);
2099 }
2100 
2101 static u_int
2102 g_mirror_determine_state(struct g_mirror_disk *disk)
2103 {
2104 	struct g_mirror_softc *sc;
2105 	u_int state;
2106 
2107 	sc = disk->d_softc;
2108 	if (sc->sc_syncid == disk->d_sync.ds_syncid) {
2109 		if ((disk->d_flags &
2110 		    G_MIRROR_DISK_FLAG_SYNCHRONIZING) == 0) {
2111 			/* Disk does not need synchronization. */
2112 			state = G_MIRROR_DISK_STATE_ACTIVE;
2113 		} else {
2114 			if ((sc->sc_flags &
2115 			     G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) == 0 ||
2116 			    (disk->d_flags &
2117 			     G_MIRROR_DISK_FLAG_FORCE_SYNC) != 0) {
2118 				/*
2119 				 * We can start synchronization from
2120 				 * the stored offset.
2121 				 */
2122 				state = G_MIRROR_DISK_STATE_SYNCHRONIZING;
2123 			} else {
2124 				state = G_MIRROR_DISK_STATE_STALE;
2125 			}
2126 		}
2127 	} else if (disk->d_sync.ds_syncid < sc->sc_syncid) {
2128 		/*
2129 		 * Reset all synchronization data for this disk,
2130 		 * because if it even was synchronized, it was
2131 		 * synchronized to disks with different syncid.
2132 		 */
2133 		disk->d_flags |= G_MIRROR_DISK_FLAG_SYNCHRONIZING;
2134 		disk->d_sync.ds_offset = 0;
2135 		disk->d_sync.ds_offset_done = 0;
2136 		disk->d_sync.ds_syncid = sc->sc_syncid;
2137 		if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) == 0 ||
2138 		    (disk->d_flags & G_MIRROR_DISK_FLAG_FORCE_SYNC) != 0) {
2139 			state = G_MIRROR_DISK_STATE_SYNCHRONIZING;
2140 		} else {
2141 			state = G_MIRROR_DISK_STATE_STALE;
2142 		}
2143 	} else /* if (sc->sc_syncid < disk->d_sync.ds_syncid) */ {
2144 		/*
2145 		 * Not good, NOT GOOD!
2146 		 * It means that mirror was started on stale disks
2147 		 * and more fresh disk just arrive.
2148 		 * If there were writes, mirror is broken, sorry.
2149 		 * I think the best choice here is don't touch
2150 		 * this disk and inform the user loudly.
2151 		 */
2152 		G_MIRROR_DEBUG(0, "Device %s was started before the freshest "
2153 		    "disk (%s) arrives!! It will not be connected to the "
2154 		    "running device.", sc->sc_name,
2155 		    g_mirror_get_diskname(disk));
2156 		g_mirror_destroy_disk(disk);
2157 		state = G_MIRROR_DISK_STATE_NONE;
2158 		/* Return immediately, because disk was destroyed. */
2159 		return (state);
2160 	}
2161 	G_MIRROR_DEBUG(3, "State for %s disk: %s.",
2162 	    g_mirror_get_diskname(disk), g_mirror_disk_state2str(state));
2163 	return (state);
2164 }
2165 
2166 /*
2167  * Update device state.
2168  */
2169 static void
2170 g_mirror_update_device(struct g_mirror_softc *sc, boolean_t force)
2171 {
2172 	struct g_mirror_disk *disk;
2173 	u_int state;
2174 
2175 	sx_assert(&sc->sc_lock, SX_XLOCKED);
2176 
2177 	switch (sc->sc_state) {
2178 	case G_MIRROR_DEVICE_STATE_STARTING:
2179 	    {
2180 		struct g_mirror_disk *pdisk, *tdisk;
2181 		u_int dirty, ndisks, genid, syncid;
2182 
2183 		KASSERT(sc->sc_provider == NULL,
2184 		    ("Non-NULL provider in STARTING state (%s).", sc->sc_name));
2185 		/*
2186 		 * Are we ready? We are, if all disks are connected or
2187 		 * if we have any disks and 'force' is true.
2188 		 */
2189 		ndisks = g_mirror_ndisks(sc, -1);
2190 		if (sc->sc_ndisks == ndisks || (force && ndisks > 0)) {
2191 			;
2192 		} else if (ndisks == 0) {
2193 			/*
2194 			 * Disks went down in starting phase, so destroy
2195 			 * device.
2196 			 */
2197 			callout_drain(&sc->sc_callout);
2198 			sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
2199 			G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p", __LINE__,
2200 			    sc->sc_rootmount);
2201 			root_mount_rel(sc->sc_rootmount);
2202 			sc->sc_rootmount = NULL;
2203 			return;
2204 		} else {
2205 			return;
2206 		}
2207 
2208 		/*
2209 		 * Activate all disks with the biggest syncid.
2210 		 */
2211 		if (force) {
2212 			/*
2213 			 * If 'force' is true, we have been called due to
2214 			 * timeout, so don't bother canceling timeout.
2215 			 */
2216 			ndisks = 0;
2217 			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2218 				if ((disk->d_flags &
2219 				    G_MIRROR_DISK_FLAG_SYNCHRONIZING) == 0) {
2220 					ndisks++;
2221 				}
2222 			}
2223 			if (ndisks == 0) {
2224 				/* No valid disks found, destroy device. */
2225 				sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
2226 				G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p",
2227 				    __LINE__, sc->sc_rootmount);
2228 				root_mount_rel(sc->sc_rootmount);
2229 				sc->sc_rootmount = NULL;
2230 				return;
2231 			}
2232 		} else {
2233 			/* Cancel timeout. */
2234 			callout_drain(&sc->sc_callout);
2235 		}
2236 
2237 		/*
2238 		 * Find the biggest genid.
2239 		 */
2240 		genid = 0;
2241 		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2242 			if (disk->d_genid > genid)
2243 				genid = disk->d_genid;
2244 		}
2245 		sc->sc_genid = genid;
2246 		/*
2247 		 * Remove all disks without the biggest genid.
2248 		 */
2249 		LIST_FOREACH_SAFE(disk, &sc->sc_disks, d_next, tdisk) {
2250 			if (disk->d_genid < genid) {
2251 				G_MIRROR_DEBUG(0,
2252 				    "Component %s (device %s) broken, skipping.",
2253 				    g_mirror_get_diskname(disk), sc->sc_name);
2254 				g_mirror_destroy_disk(disk);
2255 			}
2256 		}
2257 
2258 		/*
2259 		 * Find the biggest syncid.
2260 		 */
2261 		syncid = 0;
2262 		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2263 			if (disk->d_sync.ds_syncid > syncid)
2264 				syncid = disk->d_sync.ds_syncid;
2265 		}
2266 
2267 		/*
2268 		 * Here we need to look for dirty disks and if all disks
2269 		 * with the biggest syncid are dirty, we have to choose
2270 		 * one with the biggest priority and rebuild the rest.
2271 		 */
2272 		/*
2273 		 * Find the number of dirty disks with the biggest syncid.
2274 		 * Find the number of disks with the biggest syncid.
2275 		 * While here, find a disk with the biggest priority.
2276 		 */
2277 		dirty = ndisks = 0;
2278 		pdisk = NULL;
2279 		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2280 			if (disk->d_sync.ds_syncid != syncid)
2281 				continue;
2282 			if ((disk->d_flags &
2283 			    G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
2284 				continue;
2285 			}
2286 			ndisks++;
2287 			if ((disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) != 0) {
2288 				dirty++;
2289 				if (pdisk == NULL ||
2290 				    pdisk->d_priority < disk->d_priority) {
2291 					pdisk = disk;
2292 				}
2293 			}
2294 		}
2295 		if (dirty == 0) {
2296 			/* No dirty disks at all, great. */
2297 		} else if (dirty == ndisks) {
2298 			/*
2299 			 * Force synchronization for all dirty disks except one
2300 			 * with the biggest priority.
2301 			 */
2302 			KASSERT(pdisk != NULL, ("pdisk == NULL"));
2303 			G_MIRROR_DEBUG(1, "Using disk %s (device %s) as a "
2304 			    "master disk for synchronization.",
2305 			    g_mirror_get_diskname(pdisk), sc->sc_name);
2306 			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2307 				if (disk->d_sync.ds_syncid != syncid)
2308 					continue;
2309 				if ((disk->d_flags &
2310 				    G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
2311 					continue;
2312 				}
2313 				KASSERT((disk->d_flags &
2314 				    G_MIRROR_DISK_FLAG_DIRTY) != 0,
2315 				    ("Disk %s isn't marked as dirty.",
2316 				    g_mirror_get_diskname(disk)));
2317 				/* Skip the disk with the biggest priority. */
2318 				if (disk == pdisk)
2319 					continue;
2320 				disk->d_sync.ds_syncid = 0;
2321 			}
2322 		} else if (dirty < ndisks) {
2323 			/*
2324 			 * Force synchronization for all dirty disks.
2325 			 * We have some non-dirty disks.
2326 			 */
2327 			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2328 				if (disk->d_sync.ds_syncid != syncid)
2329 					continue;
2330 				if ((disk->d_flags &
2331 				    G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
2332 					continue;
2333 				}
2334 				if ((disk->d_flags &
2335 				    G_MIRROR_DISK_FLAG_DIRTY) == 0) {
2336 					continue;
2337 				}
2338 				disk->d_sync.ds_syncid = 0;
2339 			}
2340 		}
2341 
2342 		/* Reset hint. */
2343 		sc->sc_hint = NULL;
2344 		sc->sc_syncid = syncid;
2345 		if (force) {
2346 			/* Remember to bump syncid on first write. */
2347 			sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
2348 		}
2349 		state = G_MIRROR_DEVICE_STATE_RUNNING;
2350 		G_MIRROR_DEBUG(1, "Device %s state changed from %s to %s.",
2351 		    sc->sc_name, g_mirror_device_state2str(sc->sc_state),
2352 		    g_mirror_device_state2str(state));
2353 		sc->sc_state = state;
2354 		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2355 			state = g_mirror_determine_state(disk);
2356 			g_mirror_event_send(disk, state,
2357 			    G_MIRROR_EVENT_DONTWAIT);
2358 			if (state == G_MIRROR_DISK_STATE_STALE)
2359 				sc->sc_bump_id |= G_MIRROR_BUMP_SYNCID;
2360 		}
2361 		break;
2362 	    }
2363 	case G_MIRROR_DEVICE_STATE_RUNNING:
2364 		if (g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) == 0 &&
2365 		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_NEW) == 0) {
2366 			/*
2367 			 * No active disks or no disks at all,
2368 			 * so destroy device.
2369 			 */
2370 			if (sc->sc_provider != NULL)
2371 				g_mirror_destroy_provider(sc);
2372 			sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
2373 			break;
2374 		} else if (g_mirror_ndisks(sc,
2375 		    G_MIRROR_DISK_STATE_ACTIVE) > 0 &&
2376 		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_NEW) == 0) {
2377 			/*
2378 			 * We have active disks, launch provider if it doesn't
2379 			 * exist.
2380 			 */
2381 			if (sc->sc_provider == NULL)
2382 				g_mirror_launch_provider(sc);
2383 			if (sc->sc_rootmount != NULL) {
2384 				G_MIRROR_DEBUG(1, "root_mount_rel[%u] %p",
2385 				    __LINE__, sc->sc_rootmount);
2386 				root_mount_rel(sc->sc_rootmount);
2387 				sc->sc_rootmount = NULL;
2388 			}
2389 		}
2390 		/*
2391 		 * Genid should be bumped immediately, so do it here.
2392 		 */
2393 		if ((sc->sc_bump_id & G_MIRROR_BUMP_GENID) != 0) {
2394 			sc->sc_bump_id &= ~G_MIRROR_BUMP_GENID;
2395 			g_mirror_bump_genid(sc);
2396 		}
2397 		break;
2398 	default:
2399 		KASSERT(1 == 0, ("Wrong device state (%s, %s).",
2400 		    sc->sc_name, g_mirror_device_state2str(sc->sc_state)));
2401 		break;
2402 	}
2403 }
2404 
2405 /*
2406  * Update disk state and device state if needed.
2407  */
2408 #define	DISK_STATE_CHANGED()	G_MIRROR_DEBUG(1,			\
2409 	"Disk %s state changed from %s to %s (device %s).",		\
2410 	g_mirror_get_diskname(disk),					\
2411 	g_mirror_disk_state2str(disk->d_state),				\
2412 	g_mirror_disk_state2str(state), sc->sc_name)
2413 static int
2414 g_mirror_update_disk(struct g_mirror_disk *disk, u_int state)
2415 {
2416 	struct g_mirror_softc *sc;
2417 
2418 	sc = disk->d_softc;
2419 	sx_assert(&sc->sc_lock, SX_XLOCKED);
2420 
2421 again:
2422 	G_MIRROR_DEBUG(3, "Changing disk %s state from %s to %s.",
2423 	    g_mirror_get_diskname(disk), g_mirror_disk_state2str(disk->d_state),
2424 	    g_mirror_disk_state2str(state));
2425 	switch (state) {
2426 	case G_MIRROR_DISK_STATE_NEW:
2427 		/*
2428 		 * Possible scenarios:
2429 		 * 1. New disk arrive.
2430 		 */
2431 		/* Previous state should be NONE. */
2432 		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NONE,
2433 		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2434 		    g_mirror_disk_state2str(disk->d_state)));
2435 		DISK_STATE_CHANGED();
2436 
2437 		disk->d_state = state;
2438 		if (LIST_EMPTY(&sc->sc_disks))
2439 			LIST_INSERT_HEAD(&sc->sc_disks, disk, d_next);
2440 		else {
2441 			struct g_mirror_disk *dp;
2442 
2443 			LIST_FOREACH(dp, &sc->sc_disks, d_next) {
2444 				if (disk->d_priority >= dp->d_priority) {
2445 					LIST_INSERT_BEFORE(dp, disk, d_next);
2446 					dp = NULL;
2447 					break;
2448 				}
2449 				if (LIST_NEXT(dp, d_next) == NULL)
2450 					break;
2451 			}
2452 			if (dp != NULL)
2453 				LIST_INSERT_AFTER(dp, disk, d_next);
2454 		}
2455 		G_MIRROR_DEBUG(1, "Device %s: provider %s detected.",
2456 		    sc->sc_name, g_mirror_get_diskname(disk));
2457 		if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING)
2458 			break;
2459 		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2460 		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2461 		    g_mirror_device_state2str(sc->sc_state),
2462 		    g_mirror_get_diskname(disk),
2463 		    g_mirror_disk_state2str(disk->d_state)));
2464 		state = g_mirror_determine_state(disk);
2465 		if (state != G_MIRROR_DISK_STATE_NONE)
2466 			goto again;
2467 		break;
2468 	case G_MIRROR_DISK_STATE_ACTIVE:
2469 		/*
2470 		 * Possible scenarios:
2471 		 * 1. New disk does not need synchronization.
2472 		 * 2. Synchronization process finished successfully.
2473 		 */
2474 		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2475 		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2476 		    g_mirror_device_state2str(sc->sc_state),
2477 		    g_mirror_get_diskname(disk),
2478 		    g_mirror_disk_state2str(disk->d_state)));
2479 		/* Previous state should be NEW or SYNCHRONIZING. */
2480 		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW ||
2481 		    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
2482 		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2483 		    g_mirror_disk_state2str(disk->d_state)));
2484 		DISK_STATE_CHANGED();
2485 
2486 		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
2487 			disk->d_flags &= ~G_MIRROR_DISK_FLAG_SYNCHRONIZING;
2488 			disk->d_flags &= ~G_MIRROR_DISK_FLAG_FORCE_SYNC;
2489 			g_mirror_sync_stop(disk, 0);
2490 		}
2491 		disk->d_state = state;
2492 		disk->d_sync.ds_offset = 0;
2493 		disk->d_sync.ds_offset_done = 0;
2494 		g_mirror_update_idle(sc, disk);
2495 		g_mirror_update_metadata(disk);
2496 		G_MIRROR_DEBUG(1, "Device %s: provider %s activated.",
2497 		    sc->sc_name, g_mirror_get_diskname(disk));
2498 		break;
2499 	case G_MIRROR_DISK_STATE_STALE:
2500 		/*
2501 		 * Possible scenarios:
2502 		 * 1. Stale disk was connected.
2503 		 */
2504 		/* Previous state should be NEW. */
2505 		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW,
2506 		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2507 		    g_mirror_disk_state2str(disk->d_state)));
2508 		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2509 		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2510 		    g_mirror_device_state2str(sc->sc_state),
2511 		    g_mirror_get_diskname(disk),
2512 		    g_mirror_disk_state2str(disk->d_state)));
2513 		/*
2514 		 * STALE state is only possible if device is marked
2515 		 * NOAUTOSYNC.
2516 		 */
2517 		KASSERT((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) != 0,
2518 		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2519 		    g_mirror_device_state2str(sc->sc_state),
2520 		    g_mirror_get_diskname(disk),
2521 		    g_mirror_disk_state2str(disk->d_state)));
2522 		DISK_STATE_CHANGED();
2523 
2524 		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2525 		disk->d_state = state;
2526 		g_mirror_update_metadata(disk);
2527 		G_MIRROR_DEBUG(0, "Device %s: provider %s is stale.",
2528 		    sc->sc_name, g_mirror_get_diskname(disk));
2529 		break;
2530 	case G_MIRROR_DISK_STATE_SYNCHRONIZING:
2531 		/*
2532 		 * Possible scenarios:
2533 		 * 1. Disk which needs synchronization was connected.
2534 		 */
2535 		/* Previous state should be NEW. */
2536 		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW,
2537 		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2538 		    g_mirror_disk_state2str(disk->d_state)));
2539 		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2540 		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2541 		    g_mirror_device_state2str(sc->sc_state),
2542 		    g_mirror_get_diskname(disk),
2543 		    g_mirror_disk_state2str(disk->d_state)));
2544 		DISK_STATE_CHANGED();
2545 
2546 		if (disk->d_state == G_MIRROR_DISK_STATE_NEW)
2547 			disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2548 		disk->d_state = state;
2549 		if (sc->sc_provider != NULL) {
2550 			g_mirror_sync_start(disk);
2551 			g_mirror_update_metadata(disk);
2552 		}
2553 		break;
2554 	case G_MIRROR_DISK_STATE_DISCONNECTED:
2555 		/*
2556 		 * Possible scenarios:
2557 		 * 1. Device wasn't running yet, but disk disappear.
2558 		 * 2. Disk was active and disapppear.
2559 		 * 3. Disk disappear during synchronization process.
2560 		 */
2561 		if (sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING) {
2562 			/*
2563 			 * Previous state should be ACTIVE, STALE or
2564 			 * SYNCHRONIZING.
2565 			 */
2566 			KASSERT(disk->d_state == G_MIRROR_DISK_STATE_ACTIVE ||
2567 			    disk->d_state == G_MIRROR_DISK_STATE_STALE ||
2568 			    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
2569 			    ("Wrong disk state (%s, %s).",
2570 			    g_mirror_get_diskname(disk),
2571 			    g_mirror_disk_state2str(disk->d_state)));
2572 		} else if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING) {
2573 			/* Previous state should be NEW. */
2574 			KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW,
2575 			    ("Wrong disk state (%s, %s).",
2576 			    g_mirror_get_diskname(disk),
2577 			    g_mirror_disk_state2str(disk->d_state)));
2578 			/*
2579 			 * Reset bumping syncid if disk disappeared in STARTING
2580 			 * state.
2581 			 */
2582 			if ((sc->sc_bump_id & G_MIRROR_BUMP_SYNCID) != 0)
2583 				sc->sc_bump_id &= ~G_MIRROR_BUMP_SYNCID;
2584 #ifdef	INVARIANTS
2585 		} else {
2586 			KASSERT(1 == 0, ("Wrong device state (%s, %s, %s, %s).",
2587 			    sc->sc_name,
2588 			    g_mirror_device_state2str(sc->sc_state),
2589 			    g_mirror_get_diskname(disk),
2590 			    g_mirror_disk_state2str(disk->d_state)));
2591 #endif
2592 		}
2593 		DISK_STATE_CHANGED();
2594 		G_MIRROR_DEBUG(0, "Device %s: provider %s disconnected.",
2595 		    sc->sc_name, g_mirror_get_diskname(disk));
2596 
2597 		g_mirror_destroy_disk(disk);
2598 		break;
2599 	case G_MIRROR_DISK_STATE_DESTROY:
2600 	    {
2601 		int error;
2602 
2603 		error = g_mirror_clear_metadata(disk);
2604 		if (error != 0)
2605 			return (error);
2606 		DISK_STATE_CHANGED();
2607 		G_MIRROR_DEBUG(0, "Device %s: provider %s destroyed.",
2608 		    sc->sc_name, g_mirror_get_diskname(disk));
2609 
2610 		g_mirror_destroy_disk(disk);
2611 		sc->sc_ndisks--;
2612 		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2613 			g_mirror_update_metadata(disk);
2614 		}
2615 		break;
2616 	    }
2617 	default:
2618 		KASSERT(1 == 0, ("Unknown state (%u).", state));
2619 		break;
2620 	}
2621 	return (0);
2622 }
2623 #undef	DISK_STATE_CHANGED
2624 
2625 int
2626 g_mirror_read_metadata(struct g_consumer *cp, struct g_mirror_metadata *md)
2627 {
2628 	struct g_provider *pp;
2629 	u_char *buf;
2630 	int error;
2631 
2632 	g_topology_assert();
2633 
2634 	error = g_access(cp, 1, 0, 0);
2635 	if (error != 0)
2636 		return (error);
2637 	pp = cp->provider;
2638 	g_topology_unlock();
2639 	/* Metadata are stored on last sector. */
2640 	buf = g_read_data(cp, pp->mediasize - pp->sectorsize, pp->sectorsize,
2641 	    &error);
2642 	g_topology_lock();
2643 	g_access(cp, -1, 0, 0);
2644 	if (buf == NULL) {
2645 		G_MIRROR_DEBUG(1, "Cannot read metadata from %s (error=%d).",
2646 		    cp->provider->name, error);
2647 		return (error);
2648 	}
2649 
2650 	/* Decode metadata. */
2651 	error = mirror_metadata_decode(buf, md);
2652 	g_free(buf);
2653 	if (strcmp(md->md_magic, G_MIRROR_MAGIC) != 0)
2654 		return (EINVAL);
2655 	if (md->md_version > G_MIRROR_VERSION) {
2656 		G_MIRROR_DEBUG(0,
2657 		    "Kernel module is too old to handle metadata from %s.",
2658 		    cp->provider->name);
2659 		return (EINVAL);
2660 	}
2661 	if (error != 0) {
2662 		G_MIRROR_DEBUG(1, "MD5 metadata hash mismatch for provider %s.",
2663 		    cp->provider->name);
2664 		return (error);
2665 	}
2666 
2667 	return (0);
2668 }
2669 
2670 static int
2671 g_mirror_check_metadata(struct g_mirror_softc *sc, struct g_provider *pp,
2672     struct g_mirror_metadata *md)
2673 {
2674 
2675 	if (g_mirror_id2disk(sc, md->md_did) != NULL) {
2676 		G_MIRROR_DEBUG(1, "Disk %s (id=%u) already exists, skipping.",
2677 		    pp->name, md->md_did);
2678 		return (EEXIST);
2679 	}
2680 	if (md->md_all != sc->sc_ndisks) {
2681 		G_MIRROR_DEBUG(1,
2682 		    "Invalid '%s' field on disk %s (device %s), skipping.",
2683 		    "md_all", pp->name, sc->sc_name);
2684 		return (EINVAL);
2685 	}
2686 	if (md->md_slice != sc->sc_slice) {
2687 		G_MIRROR_DEBUG(1,
2688 		    "Invalid '%s' field on disk %s (device %s), skipping.",
2689 		    "md_slice", pp->name, sc->sc_name);
2690 		return (EINVAL);
2691 	}
2692 	if (md->md_balance != sc->sc_balance) {
2693 		G_MIRROR_DEBUG(1,
2694 		    "Invalid '%s' field on disk %s (device %s), skipping.",
2695 		    "md_balance", pp->name, sc->sc_name);
2696 		return (EINVAL);
2697 	}
2698 	if (md->md_mediasize != sc->sc_mediasize) {
2699 		G_MIRROR_DEBUG(1,
2700 		    "Invalid '%s' field on disk %s (device %s), skipping.",
2701 		    "md_mediasize", pp->name, sc->sc_name);
2702 		return (EINVAL);
2703 	}
2704 	if (sc->sc_mediasize > pp->mediasize) {
2705 		G_MIRROR_DEBUG(1,
2706 		    "Invalid size of disk %s (device %s), skipping.", pp->name,
2707 		    sc->sc_name);
2708 		return (EINVAL);
2709 	}
2710 	if (md->md_sectorsize != sc->sc_sectorsize) {
2711 		G_MIRROR_DEBUG(1,
2712 		    "Invalid '%s' field on disk %s (device %s), skipping.",
2713 		    "md_sectorsize", pp->name, sc->sc_name);
2714 		return (EINVAL);
2715 	}
2716 	if ((sc->sc_sectorsize % pp->sectorsize) != 0) {
2717 		G_MIRROR_DEBUG(1,
2718 		    "Invalid sector size of disk %s (device %s), skipping.",
2719 		    pp->name, sc->sc_name);
2720 		return (EINVAL);
2721 	}
2722 	if ((md->md_mflags & ~G_MIRROR_DEVICE_FLAG_MASK) != 0) {
2723 		G_MIRROR_DEBUG(1,
2724 		    "Invalid device flags on disk %s (device %s), skipping.",
2725 		    pp->name, sc->sc_name);
2726 		return (EINVAL);
2727 	}
2728 	if ((md->md_dflags & ~G_MIRROR_DISK_FLAG_MASK) != 0) {
2729 		G_MIRROR_DEBUG(1,
2730 		    "Invalid disk flags on disk %s (device %s), skipping.",
2731 		    pp->name, sc->sc_name);
2732 		return (EINVAL);
2733 	}
2734 	return (0);
2735 }
2736 
2737 int
2738 g_mirror_add_disk(struct g_mirror_softc *sc, struct g_provider *pp,
2739     struct g_mirror_metadata *md)
2740 {
2741 	struct g_mirror_disk *disk;
2742 	int error;
2743 
2744 	g_topology_assert_not();
2745 	G_MIRROR_DEBUG(2, "Adding disk %s.", pp->name);
2746 
2747 	error = g_mirror_check_metadata(sc, pp, md);
2748 	if (error != 0)
2749 		return (error);
2750 	if (sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING &&
2751 	    md->md_genid < sc->sc_genid) {
2752 		G_MIRROR_DEBUG(0, "Component %s (device %s) broken, skipping.",
2753 		    pp->name, sc->sc_name);
2754 		return (EINVAL);
2755 	}
2756 	disk = g_mirror_init_disk(sc, pp, md, &error);
2757 	if (disk == NULL)
2758 		return (error);
2759 	error = g_mirror_event_send(disk, G_MIRROR_DISK_STATE_NEW,
2760 	    G_MIRROR_EVENT_WAIT);
2761 	if (error != 0)
2762 		return (error);
2763 	if (md->md_version < G_MIRROR_VERSION) {
2764 		G_MIRROR_DEBUG(0, "Upgrading metadata on %s (v%d->v%d).",
2765 		    pp->name, md->md_version, G_MIRROR_VERSION);
2766 		g_mirror_update_metadata(disk);
2767 	}
2768 	return (0);
2769 }
2770 
2771 static void
2772 g_mirror_destroy_delayed(void *arg, int flag)
2773 {
2774 	struct g_mirror_softc *sc;
2775 	int error;
2776 
2777 	if (flag == EV_CANCEL) {
2778 		G_MIRROR_DEBUG(1, "Destroying canceled.");
2779 		return;
2780 	}
2781 	sc = arg;
2782 	g_topology_unlock();
2783 	sx_xlock(&sc->sc_lock);
2784 	KASSERT((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) == 0,
2785 	    ("DESTROY flag set on %s.", sc->sc_name));
2786 	KASSERT((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROYING) != 0,
2787 	    ("DESTROYING flag not set on %s.", sc->sc_name));
2788 	G_MIRROR_DEBUG(1, "Destroying %s (delayed).", sc->sc_name);
2789 	error = g_mirror_destroy(sc, G_MIRROR_DESTROY_SOFT);
2790 	if (error != 0) {
2791 		G_MIRROR_DEBUG(0, "Cannot destroy %s.", sc->sc_name);
2792 		sx_xunlock(&sc->sc_lock);
2793 	}
2794 	g_topology_lock();
2795 }
2796 
2797 static int
2798 g_mirror_access(struct g_provider *pp, int acr, int acw, int ace)
2799 {
2800 	struct g_mirror_softc *sc;
2801 	int dcr, dcw, dce, error = 0;
2802 
2803 	g_topology_assert();
2804 	G_MIRROR_DEBUG(2, "Access request for %s: r%dw%de%d.", pp->name, acr,
2805 	    acw, ace);
2806 
2807 	sc = pp->geom->softc;
2808 	if (sc == NULL && acr <= 0 && acw <= 0 && ace <= 0)
2809 		return (0);
2810 	KASSERT(sc != NULL, ("NULL softc (provider=%s).", pp->name));
2811 
2812 	dcr = pp->acr + acr;
2813 	dcw = pp->acw + acw;
2814 	dce = pp->ace + ace;
2815 
2816 	g_topology_unlock();
2817 	sx_xlock(&sc->sc_lock);
2818 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0 ||
2819 	    LIST_EMPTY(&sc->sc_disks)) {
2820 		if (acr > 0 || acw > 0 || ace > 0)
2821 			error = ENXIO;
2822 		goto end;
2823 	}
2824 	if (dcw == 0 && !sc->sc_idle)
2825 		g_mirror_idle(sc, dcw);
2826 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROYING) != 0) {
2827 		if (acr > 0 || acw > 0 || ace > 0) {
2828 			error = ENXIO;
2829 			goto end;
2830 		}
2831 		if (dcr == 0 && dcw == 0 && dce == 0) {
2832 			g_post_event(g_mirror_destroy_delayed, sc, M_WAITOK,
2833 			    sc, NULL);
2834 		}
2835 	}
2836 end:
2837 	sx_xunlock(&sc->sc_lock);
2838 	g_topology_lock();
2839 	return (error);
2840 }
2841 
2842 static struct g_geom *
2843 g_mirror_create(struct g_class *mp, const struct g_mirror_metadata *md)
2844 {
2845 	struct g_mirror_softc *sc;
2846 	struct g_geom *gp;
2847 	int error, timeout;
2848 
2849 	g_topology_assert();
2850 	G_MIRROR_DEBUG(1, "Creating device %s (id=%u).", md->md_name,
2851 	    md->md_mid);
2852 
2853 	/* One disk is minimum. */
2854 	if (md->md_all < 1)
2855 		return (NULL);
2856 	/*
2857 	 * Action geom.
2858 	 */
2859 	gp = g_new_geomf(mp, "%s", md->md_name);
2860 	sc = malloc(sizeof(*sc), M_MIRROR, M_WAITOK | M_ZERO);
2861 	gp->start = g_mirror_start;
2862 	gp->orphan = g_mirror_orphan;
2863 	gp->access = g_mirror_access;
2864 	gp->dumpconf = g_mirror_dumpconf;
2865 
2866 	sc->sc_id = md->md_mid;
2867 	sc->sc_slice = md->md_slice;
2868 	sc->sc_balance = md->md_balance;
2869 	sc->sc_mediasize = md->md_mediasize;
2870 	sc->sc_sectorsize = md->md_sectorsize;
2871 	sc->sc_ndisks = md->md_all;
2872 	sc->sc_flags = md->md_mflags;
2873 	sc->sc_bump_id = 0;
2874 	sc->sc_idle = 1;
2875 	sc->sc_last_write = time_uptime;
2876 	sc->sc_writes = 0;
2877 	sx_init(&sc->sc_lock, "gmirror:lock");
2878 	bioq_init(&sc->sc_queue);
2879 	mtx_init(&sc->sc_queue_mtx, "gmirror:queue", NULL, MTX_DEF);
2880 	bioq_init(&sc->sc_regular_delayed);
2881 	bioq_init(&sc->sc_inflight);
2882 	bioq_init(&sc->sc_sync_delayed);
2883 	LIST_INIT(&sc->sc_disks);
2884 	TAILQ_INIT(&sc->sc_events);
2885 	mtx_init(&sc->sc_events_mtx, "gmirror:events", NULL, MTX_DEF);
2886 	callout_init(&sc->sc_callout, CALLOUT_MPSAFE);
2887 	sc->sc_state = G_MIRROR_DEVICE_STATE_STARTING;
2888 	gp->softc = sc;
2889 	sc->sc_geom = gp;
2890 	sc->sc_provider = NULL;
2891 	/*
2892 	 * Synchronization geom.
2893 	 */
2894 	gp = g_new_geomf(mp, "%s.sync", md->md_name);
2895 	gp->softc = sc;
2896 	gp->orphan = g_mirror_orphan;
2897 	sc->sc_sync.ds_geom = gp;
2898 	sc->sc_sync.ds_ndisks = 0;
2899 	error = kproc_create(g_mirror_worker, sc, &sc->sc_worker, 0, 0,
2900 	    "g_mirror %s", md->md_name);
2901 	if (error != 0) {
2902 		G_MIRROR_DEBUG(1, "Cannot create kernel thread for %s.",
2903 		    sc->sc_name);
2904 		g_destroy_geom(sc->sc_sync.ds_geom);
2905 		mtx_destroy(&sc->sc_events_mtx);
2906 		mtx_destroy(&sc->sc_queue_mtx);
2907 		sx_destroy(&sc->sc_lock);
2908 		g_destroy_geom(sc->sc_geom);
2909 		free(sc, M_MIRROR);
2910 		return (NULL);
2911 	}
2912 
2913 	G_MIRROR_DEBUG(1, "Device %s created (%u components, id=%u).",
2914 	    sc->sc_name, sc->sc_ndisks, sc->sc_id);
2915 
2916 	sc->sc_rootmount = root_mount_hold("GMIRROR");
2917 	G_MIRROR_DEBUG(1, "root_mount_hold %p", sc->sc_rootmount);
2918 	/*
2919 	 * Run timeout.
2920 	 */
2921 	timeout = g_mirror_timeout * hz;
2922 	callout_reset(&sc->sc_callout, timeout, g_mirror_go, sc);
2923 	return (sc->sc_geom);
2924 }
2925 
2926 int
2927 g_mirror_destroy(struct g_mirror_softc *sc, int how)
2928 {
2929 	struct g_mirror_disk *disk;
2930 	struct g_provider *pp;
2931 
2932 	g_topology_assert_not();
2933 	if (sc == NULL)
2934 		return (ENXIO);
2935 	sx_assert(&sc->sc_lock, SX_XLOCKED);
2936 
2937 	pp = sc->sc_provider;
2938 	if (pp != NULL && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)) {
2939 		switch (how) {
2940 		case G_MIRROR_DESTROY_SOFT:
2941 			G_MIRROR_DEBUG(1,
2942 			    "Device %s is still open (r%dw%de%d).", pp->name,
2943 			    pp->acr, pp->acw, pp->ace);
2944 			return (EBUSY);
2945 		case G_MIRROR_DESTROY_DELAYED:
2946 			G_MIRROR_DEBUG(1,
2947 			    "Device %s will be destroyed on last close.",
2948 			    pp->name);
2949 			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2950 				if (disk->d_state ==
2951 				    G_MIRROR_DISK_STATE_SYNCHRONIZING) {
2952 					g_mirror_sync_stop(disk, 1);
2953 				}
2954 			}
2955 			sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROYING;
2956 			return (EBUSY);
2957 		case G_MIRROR_DESTROY_HARD:
2958 			G_MIRROR_DEBUG(1, "Device %s is still open, so it "
2959 			    "can't be definitely removed.", pp->name);
2960 		}
2961 	}
2962 
2963 	g_topology_lock();
2964 	if (sc->sc_geom->softc == NULL) {
2965 		g_topology_unlock();
2966 		return (0);
2967 	}
2968 	sc->sc_geom->softc = NULL;
2969 	sc->sc_sync.ds_geom->softc = NULL;
2970 	g_topology_unlock();
2971 
2972 	sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
2973 	sc->sc_flags |= G_MIRROR_DEVICE_FLAG_WAIT;
2974 	G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
2975 	sx_xunlock(&sc->sc_lock);
2976 	mtx_lock(&sc->sc_queue_mtx);
2977 	wakeup(sc);
2978 	mtx_unlock(&sc->sc_queue_mtx);
2979 	G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, &sc->sc_worker);
2980 	while (sc->sc_worker != NULL)
2981 		tsleep(&sc->sc_worker, PRIBIO, "m:destroy", hz / 5);
2982 	G_MIRROR_DEBUG(4, "%s: Woken up %p.", __func__, &sc->sc_worker);
2983 	sx_xlock(&sc->sc_lock);
2984 	g_mirror_destroy_device(sc);
2985 	free(sc, M_MIRROR);
2986 	return (0);
2987 }
2988 
2989 static void
2990 g_mirror_taste_orphan(struct g_consumer *cp)
2991 {
2992 
2993 	KASSERT(1 == 0, ("%s called while tasting %s.", __func__,
2994 	    cp->provider->name));
2995 }
2996 
2997 static struct g_geom *
2998 g_mirror_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
2999 {
3000 	struct g_mirror_metadata md;
3001 	struct g_mirror_softc *sc;
3002 	struct g_consumer *cp;
3003 	struct g_geom *gp;
3004 	int error;
3005 
3006 	g_topology_assert();
3007 	g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name);
3008 	G_MIRROR_DEBUG(2, "Tasting %s.", pp->name);
3009 
3010 	gp = g_new_geomf(mp, "mirror:taste");
3011 	/*
3012 	 * This orphan function should be never called.
3013 	 */
3014 	gp->orphan = g_mirror_taste_orphan;
3015 	cp = g_new_consumer(gp);
3016 	g_attach(cp, pp);
3017 	error = g_mirror_read_metadata(cp, &md);
3018 	g_detach(cp);
3019 	g_destroy_consumer(cp);
3020 	g_destroy_geom(gp);
3021 	if (error != 0)
3022 		return (NULL);
3023 	gp = NULL;
3024 
3025 	if (md.md_provider[0] != '\0' &&
3026 	    !g_compare_names(md.md_provider, pp->name))
3027 		return (NULL);
3028 	if (md.md_provsize != 0 && md.md_provsize != pp->mediasize)
3029 		return (NULL);
3030 	if ((md.md_dflags & G_MIRROR_DISK_FLAG_INACTIVE) != 0) {
3031 		G_MIRROR_DEBUG(0,
3032 		    "Device %s: provider %s marked as inactive, skipping.",
3033 		    md.md_name, pp->name);
3034 		return (NULL);
3035 	}
3036 	if (g_mirror_debug >= 2)
3037 		mirror_metadata_dump(&md);
3038 
3039 	/*
3040 	 * Let's check if device already exists.
3041 	 */
3042 	sc = NULL;
3043 	LIST_FOREACH(gp, &mp->geom, geom) {
3044 		sc = gp->softc;
3045 		if (sc == NULL)
3046 			continue;
3047 		if (sc->sc_sync.ds_geom == gp)
3048 			continue;
3049 		if (strcmp(md.md_name, sc->sc_name) != 0)
3050 			continue;
3051 		if (md.md_mid != sc->sc_id) {
3052 			G_MIRROR_DEBUG(0, "Device %s already configured.",
3053 			    sc->sc_name);
3054 			return (NULL);
3055 		}
3056 		break;
3057 	}
3058 	if (gp == NULL) {
3059 		gp = g_mirror_create(mp, &md);
3060 		if (gp == NULL) {
3061 			G_MIRROR_DEBUG(0, "Cannot create device %s.",
3062 			    md.md_name);
3063 			return (NULL);
3064 		}
3065 		sc = gp->softc;
3066 	}
3067 	G_MIRROR_DEBUG(1, "Adding disk %s to %s.", pp->name, gp->name);
3068 	g_topology_unlock();
3069 	sx_xlock(&sc->sc_lock);
3070 	sc->sc_flags |= G_MIRROR_DEVICE_FLAG_TASTING;
3071 	error = g_mirror_add_disk(sc, pp, &md);
3072 	if (error != 0) {
3073 		G_MIRROR_DEBUG(0, "Cannot add disk %s to %s (error=%d).",
3074 		    pp->name, gp->name, error);
3075 		if (LIST_EMPTY(&sc->sc_disks)) {
3076 			g_cancel_event(sc);
3077 			g_mirror_destroy(sc, G_MIRROR_DESTROY_HARD);
3078 			g_topology_lock();
3079 			return (NULL);
3080 		}
3081 		gp = NULL;
3082 	}
3083 	sc->sc_flags &= ~G_MIRROR_DEVICE_FLAG_TASTING;
3084 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
3085 		g_mirror_destroy(sc, G_MIRROR_DESTROY_HARD);
3086 		g_topology_lock();
3087 		return (NULL);
3088 	}
3089 	sx_xunlock(&sc->sc_lock);
3090 	g_topology_lock();
3091 	return (gp);
3092 }
3093 
3094 static int
3095 g_mirror_destroy_geom(struct gctl_req *req __unused,
3096     struct g_class *mp __unused, struct g_geom *gp)
3097 {
3098 	struct g_mirror_softc *sc;
3099 	int error;
3100 
3101 	g_topology_unlock();
3102 	sc = gp->softc;
3103 	sx_xlock(&sc->sc_lock);
3104 	g_cancel_event(sc);
3105 	error = g_mirror_destroy(gp->softc, G_MIRROR_DESTROY_SOFT);
3106 	if (error != 0)
3107 		sx_xunlock(&sc->sc_lock);
3108 	g_topology_lock();
3109 	return (error);
3110 }
3111 
3112 static void
3113 g_mirror_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
3114     struct g_consumer *cp, struct g_provider *pp)
3115 {
3116 	struct g_mirror_softc *sc;
3117 
3118 	g_topology_assert();
3119 
3120 	sc = gp->softc;
3121 	if (sc == NULL)
3122 		return;
3123 	/* Skip synchronization geom. */
3124 	if (gp == sc->sc_sync.ds_geom)
3125 		return;
3126 	if (pp != NULL) {
3127 		/* Nothing here. */
3128 	} else if (cp != NULL) {
3129 		struct g_mirror_disk *disk;
3130 
3131 		disk = cp->private;
3132 		if (disk == NULL)
3133 			return;
3134 		g_topology_unlock();
3135 		sx_xlock(&sc->sc_lock);
3136 		sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)disk->d_id);
3137 		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
3138 			sbuf_printf(sb, "%s<Synchronized>", indent);
3139 			if (disk->d_sync.ds_offset == 0)
3140 				sbuf_printf(sb, "0%%");
3141 			else {
3142 				sbuf_printf(sb, "%u%%",
3143 				    (u_int)((disk->d_sync.ds_offset * 100) /
3144 				    sc->sc_provider->mediasize));
3145 			}
3146 			sbuf_printf(sb, "</Synchronized>\n");
3147 			if (disk->d_sync.ds_offset > 0) {
3148 				sbuf_printf(sb, "%s<BytesSynced>%jd"
3149 				    "</BytesSynced>\n", indent,
3150 				    (intmax_t)disk->d_sync.ds_offset);
3151 			}
3152 		}
3153 		sbuf_printf(sb, "%s<SyncID>%u</SyncID>\n", indent,
3154 		    disk->d_sync.ds_syncid);
3155 		sbuf_printf(sb, "%s<GenID>%u</GenID>\n", indent,
3156 		    disk->d_genid);
3157 		sbuf_printf(sb, "%s<Flags>", indent);
3158 		if (disk->d_flags == 0)
3159 			sbuf_printf(sb, "NONE");
3160 		else {
3161 			int first = 1;
3162 
3163 #define	ADD_FLAG(flag, name)	do {					\
3164 	if ((disk->d_flags & (flag)) != 0) {				\
3165 		if (!first)						\
3166 			sbuf_printf(sb, ", ");				\
3167 		else							\
3168 			first = 0;					\
3169 		sbuf_printf(sb, name);					\
3170 	}								\
3171 } while (0)
3172 			ADD_FLAG(G_MIRROR_DISK_FLAG_DIRTY, "DIRTY");
3173 			ADD_FLAG(G_MIRROR_DISK_FLAG_HARDCODED, "HARDCODED");
3174 			ADD_FLAG(G_MIRROR_DISK_FLAG_INACTIVE, "INACTIVE");
3175 			ADD_FLAG(G_MIRROR_DISK_FLAG_SYNCHRONIZING,
3176 			    "SYNCHRONIZING");
3177 			ADD_FLAG(G_MIRROR_DISK_FLAG_FORCE_SYNC, "FORCE_SYNC");
3178 			ADD_FLAG(G_MIRROR_DISK_FLAG_BROKEN, "BROKEN");
3179 #undef	ADD_FLAG
3180 		}
3181 		sbuf_printf(sb, "</Flags>\n");
3182 		sbuf_printf(sb, "%s<Priority>%u</Priority>\n", indent,
3183 		    disk->d_priority);
3184 		sbuf_printf(sb, "%s<State>%s</State>\n", indent,
3185 		    g_mirror_disk_state2str(disk->d_state));
3186 		sx_xunlock(&sc->sc_lock);
3187 		g_topology_lock();
3188 	} else {
3189 		g_topology_unlock();
3190 		sx_xlock(&sc->sc_lock);
3191 		sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)sc->sc_id);
3192 		sbuf_printf(sb, "%s<SyncID>%u</SyncID>\n", indent, sc->sc_syncid);
3193 		sbuf_printf(sb, "%s<GenID>%u</GenID>\n", indent, sc->sc_genid);
3194 		sbuf_printf(sb, "%s<Flags>", indent);
3195 		if (sc->sc_flags == 0)
3196 			sbuf_printf(sb, "NONE");
3197 		else {
3198 			int first = 1;
3199 
3200 #define	ADD_FLAG(flag, name)	do {					\
3201 	if ((sc->sc_flags & (flag)) != 0) {				\
3202 		if (!first)						\
3203 			sbuf_printf(sb, ", ");				\
3204 		else							\
3205 			first = 0;					\
3206 		sbuf_printf(sb, name);					\
3207 	}								\
3208 } while (0)
3209 			ADD_FLAG(G_MIRROR_DEVICE_FLAG_NOFAILSYNC, "NOFAILSYNC");
3210 			ADD_FLAG(G_MIRROR_DEVICE_FLAG_NOAUTOSYNC, "NOAUTOSYNC");
3211 #undef	ADD_FLAG
3212 		}
3213 		sbuf_printf(sb, "</Flags>\n");
3214 		sbuf_printf(sb, "%s<Slice>%u</Slice>\n", indent,
3215 		    (u_int)sc->sc_slice);
3216 		sbuf_printf(sb, "%s<Balance>%s</Balance>\n", indent,
3217 		    balance_name(sc->sc_balance));
3218 		sbuf_printf(sb, "%s<Components>%u</Components>\n", indent,
3219 		    sc->sc_ndisks);
3220 		sbuf_printf(sb, "%s<State>", indent);
3221 		if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING)
3222 			sbuf_printf(sb, "%s", "STARTING");
3223 		else if (sc->sc_ndisks ==
3224 		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE))
3225 			sbuf_printf(sb, "%s", "COMPLETE");
3226 		else
3227 			sbuf_printf(sb, "%s", "DEGRADED");
3228 		sbuf_printf(sb, "</State>\n");
3229 		sx_xunlock(&sc->sc_lock);
3230 		g_topology_lock();
3231 	}
3232 }
3233 
3234 static void
3235 g_mirror_shutdown_pre_sync(void *arg, int howto)
3236 {
3237 	struct g_class *mp;
3238 	struct g_geom *gp, *gp2;
3239 	struct g_mirror_softc *sc;
3240 	int error;
3241 
3242 	mp = arg;
3243 	DROP_GIANT();
3244 	g_topology_lock();
3245 	LIST_FOREACH_SAFE(gp, &mp->geom, geom, gp2) {
3246 		if ((sc = gp->softc) == NULL)
3247 			continue;
3248 		/* Skip synchronization geom. */
3249 		if (gp == sc->sc_sync.ds_geom)
3250 			continue;
3251 		g_topology_unlock();
3252 		sx_xlock(&sc->sc_lock);
3253 		g_cancel_event(sc);
3254 		error = g_mirror_destroy(sc, G_MIRROR_DESTROY_DELAYED);
3255 		if (error != 0)
3256 			sx_xunlock(&sc->sc_lock);
3257 		g_topology_lock();
3258 	}
3259 	g_topology_unlock();
3260 	PICKUP_GIANT();
3261 }
3262 
3263 static void
3264 g_mirror_init(struct g_class *mp)
3265 {
3266 
3267 	g_mirror_pre_sync = EVENTHANDLER_REGISTER(shutdown_pre_sync,
3268 	    g_mirror_shutdown_pre_sync, mp, SHUTDOWN_PRI_FIRST);
3269 	if (g_mirror_pre_sync == NULL)
3270 		G_MIRROR_DEBUG(0, "Warning! Cannot register shutdown event.");
3271 }
3272 
3273 static void
3274 g_mirror_fini(struct g_class *mp)
3275 {
3276 
3277 	if (g_mirror_pre_sync != NULL)
3278 		EVENTHANDLER_DEREGISTER(shutdown_pre_sync, g_mirror_pre_sync);
3279 }
3280 
3281 DECLARE_GEOM_CLASS(g_mirror_class, g_mirror);
3282