xref: /freebsd/sys/geom/mirror/g_mirror.c (revision 6b3455a7665208c366849f0b2b3bc916fb97516e)
1 /*-
2  * Copyright (c) 2004 Pawel Jakub Dawidek <pjd@FreeBSD.org>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 
27 #include <sys/cdefs.h>
28 __FBSDID("$FreeBSD$");
29 
30 #include <sys/param.h>
31 #include <sys/systm.h>
32 #include <sys/kernel.h>
33 #include <sys/module.h>
34 #include <sys/limits.h>
35 #include <sys/lock.h>
36 #include <sys/mutex.h>
37 #include <sys/bio.h>
38 #include <sys/sysctl.h>
39 #include <sys/malloc.h>
40 #include <sys/bitstring.h>
41 #include <vm/uma.h>
42 #include <machine/atomic.h>
43 #include <geom/geom.h>
44 #include <sys/proc.h>
45 #include <sys/kthread.h>
46 #include <geom/mirror/g_mirror.h>
47 
48 
49 static MALLOC_DEFINE(M_MIRROR, "mirror data", "GEOM_MIRROR Data");
50 
51 SYSCTL_DECL(_kern_geom);
52 SYSCTL_NODE(_kern_geom, OID_AUTO, mirror, CTLFLAG_RW, 0, "GEOM_MIRROR stuff");
53 u_int g_mirror_debug = 0;
54 SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, debug, CTLFLAG_RW, &g_mirror_debug, 0,
55     "Debug level");
56 static u_int g_mirror_sync_block_size = 131072;
57 SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, sync_block_size, CTLFLAG_RW,
58     &g_mirror_sync_block_size, 0, "Synchronization block size");
59 static u_int g_mirror_timeout = 8;
60 SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, timeout, CTLFLAG_RW, &g_mirror_timeout,
61     0, "Time to wait on all mirror components");
62 static u_int g_mirror_reqs_per_sync = 5;
63 SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, reqs_per_sync, CTLFLAG_RW,
64     &g_mirror_reqs_per_sync, 0,
65     "Number of regular I/O requests per synchronization request");
66 static u_int g_mirror_syncs_per_sec = 100;
67 SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, syncs_per_sec, CTLFLAG_RW,
68     &g_mirror_syncs_per_sec, 0,
69     "Number of synchronizations requests per second");
70 
71 #define	MSLEEP(ident, mtx, priority, wmesg, timeout)	do {		\
72 	G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, (ident));	\
73 	msleep((ident), (mtx), (priority), (wmesg), (timeout));		\
74 	G_MIRROR_DEBUG(4, "%s: Woken up %p.", __func__, (ident));	\
75 } while (0)
76 
77 
78 static int g_mirror_destroy_geom(struct gctl_req *req, struct g_class *mp,
79     struct g_geom *gp);
80 static g_taste_t g_mirror_taste;
81 
82 struct g_class g_mirror_class = {
83 	.name = G_MIRROR_CLASS_NAME,
84 	.ctlreq = g_mirror_config,
85 	.taste = g_mirror_taste,
86 	.destroy_geom = g_mirror_destroy_geom
87 };
88 
89 
90 static void g_mirror_destroy_provider(struct g_mirror_softc *sc);
91 static int g_mirror_update_disk(struct g_mirror_disk *disk, u_int state);
92 static void g_mirror_update_device(struct g_mirror_softc *sc, boolean_t force);
93 static void g_mirror_dumpconf(struct sbuf *sb, const char *indent,
94     struct g_geom *gp, struct g_consumer *cp, struct g_provider *pp);
95 static void g_mirror_sync_stop(struct g_mirror_disk *disk, int type);
96 
97 
98 static const char *
99 g_mirror_disk_state2str(int state)
100 {
101 
102 	switch (state) {
103 	case G_MIRROR_DISK_STATE_NONE:
104 		return ("NONE");
105 	case G_MIRROR_DISK_STATE_NEW:
106 		return ("NEW");
107 	case G_MIRROR_DISK_STATE_ACTIVE:
108 		return ("ACTIVE");
109 	case G_MIRROR_DISK_STATE_STALE:
110 		return ("STALE");
111 	case G_MIRROR_DISK_STATE_SYNCHRONIZING:
112 		return ("SYNCHRONIZING");
113 	case G_MIRROR_DISK_STATE_DISCONNECTED:
114 		return ("DISCONNECTED");
115 	case G_MIRROR_DISK_STATE_DESTROY:
116 		return ("DESTROY");
117 	default:
118 		return ("INVALID");
119 	}
120 }
121 
122 static const char *
123 g_mirror_device_state2str(int state)
124 {
125 
126 	switch (state) {
127 	case G_MIRROR_DEVICE_STATE_STARTING:
128 		return ("STARTING");
129 	case G_MIRROR_DEVICE_STATE_RUNNING:
130 		return ("RUNNING");
131 	default:
132 		return ("INVALID");
133 	}
134 }
135 
136 static const char *
137 g_mirror_get_diskname(struct g_mirror_disk *disk)
138 {
139 
140 	if (disk->d_consumer == NULL || disk->d_consumer->provider == NULL)
141 		return ("[unknown]");
142 	return (disk->d_name);
143 }
144 
145 /*
146  * --- Events handling functions ---
147  * Events in geom_mirror are used to maintain disks and device status
148  * from one thread to simplify locking.
149  */
150 static void
151 g_mirror_event_free(struct g_mirror_event *ep)
152 {
153 
154 	free(ep, M_MIRROR);
155 }
156 
157 int
158 g_mirror_event_send(void *arg, int state, int flags)
159 {
160 	struct g_mirror_softc *sc;
161 	struct g_mirror_disk *disk;
162 	struct g_mirror_event *ep;
163 	int error;
164 
165 	ep = malloc(sizeof(*ep), M_MIRROR, M_WAITOK);
166 	G_MIRROR_DEBUG(4, "%s: Sending event %p.", __func__, ep);
167 	if ((flags & G_MIRROR_EVENT_DEVICE) != 0) {
168 		disk = NULL;
169 		sc = arg;
170 	} else {
171 		disk = arg;
172 		sc = disk->d_softc;
173 	}
174 	ep->e_disk = disk;
175 	ep->e_state = state;
176 	ep->e_flags = flags;
177 	ep->e_error = 0;
178 	mtx_lock(&sc->sc_events_mtx);
179 	TAILQ_INSERT_TAIL(&sc->sc_events, ep, e_next);
180 	mtx_unlock(&sc->sc_events_mtx);
181 	G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
182 	mtx_lock(&sc->sc_queue_mtx);
183 	wakeup(sc);
184 	mtx_unlock(&sc->sc_queue_mtx);
185 	if ((flags & G_MIRROR_EVENT_DONTWAIT) != 0)
186 		return (0);
187 	g_topology_assert();
188 	G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, ep);
189 	g_topology_unlock();
190 	while ((ep->e_flags & G_MIRROR_EVENT_DONE) == 0) {
191 		mtx_lock(&sc->sc_events_mtx);
192 		MSLEEP(ep, &sc->sc_events_mtx, PRIBIO | PDROP, "m:event",
193 		    hz * 5);
194 	}
195 	/* Don't even try to use 'sc' here, because it could be already dead. */
196 	g_topology_lock();
197 	error = ep->e_error;
198 	g_mirror_event_free(ep);
199 	return (error);
200 }
201 
202 static struct g_mirror_event *
203 g_mirror_event_get(struct g_mirror_softc *sc)
204 {
205 	struct g_mirror_event *ep;
206 
207 	mtx_lock(&sc->sc_events_mtx);
208 	ep = TAILQ_FIRST(&sc->sc_events);
209 	if (ep != NULL)
210 		TAILQ_REMOVE(&sc->sc_events, ep, e_next);
211 	mtx_unlock(&sc->sc_events_mtx);
212 	return (ep);
213 }
214 
215 static void
216 g_mirror_event_cancel(struct g_mirror_disk *disk)
217 {
218 	struct g_mirror_softc *sc;
219 	struct g_mirror_event *ep, *tmpep;
220 
221 	g_topology_assert();
222 
223 	sc = disk->d_softc;
224 	mtx_lock(&sc->sc_events_mtx);
225 	TAILQ_FOREACH_SAFE(ep, &sc->sc_events, e_next, tmpep) {
226 		if ((ep->e_flags & G_MIRROR_EVENT_DEVICE) != 0)
227 			continue;
228 		if (ep->e_disk != disk)
229 			continue;
230 		TAILQ_REMOVE(&sc->sc_events, ep, e_next);
231 		if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0)
232 			g_mirror_event_free(ep);
233 		else {
234 			ep->e_error = ECANCELED;
235 			wakeup(ep);
236 		}
237 	}
238 	mtx_unlock(&sc->sc_events_mtx);
239 }
240 
241 /*
242  * Return the number of disks in given state.
243  * If state is equal to -1, count all connected disks.
244  */
245 u_int
246 g_mirror_ndisks(struct g_mirror_softc *sc, int state)
247 {
248 	struct g_mirror_disk *disk;
249 	u_int n = 0;
250 
251 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
252 		if (state == -1 || disk->d_state == state)
253 			n++;
254 	}
255 	return (n);
256 }
257 
258 /*
259  * Find a disk in mirror by its disk ID.
260  */
261 static struct g_mirror_disk *
262 g_mirror_id2disk(struct g_mirror_softc *sc, uint32_t id)
263 {
264 	struct g_mirror_disk *disk;
265 
266 	g_topology_assert();
267 
268 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
269 		if (disk->d_id == id)
270 			return (disk);
271 	}
272 	return (NULL);
273 }
274 
275 static u_int
276 g_mirror_nrequests(struct g_mirror_softc *sc, struct g_consumer *cp)
277 {
278 	struct bio *bp;
279 	u_int nreqs = 0;
280 
281 	mtx_lock(&sc->sc_queue_mtx);
282 	TAILQ_FOREACH(bp, &sc->sc_queue.queue, bio_queue) {
283 		if (bp->bio_from == cp)
284 			nreqs++;
285 	}
286 	mtx_unlock(&sc->sc_queue_mtx);
287 	return (nreqs);
288 }
289 
290 static void
291 g_mirror_kill_consumer(struct g_mirror_softc *sc, struct g_consumer *cp)
292 {
293 
294 	g_topology_assert();
295 
296 	cp->private = NULL;
297 	if (cp->nstart != cp->nend) {
298 		G_MIRROR_DEBUG(2,
299 		    "I/O requests for %s exist, can't destroy it now.",
300 		    cp->provider->name);
301 		return;
302 	}
303 	if (g_mirror_nrequests(sc, cp) > 0) {
304 		G_MIRROR_DEBUG(2,
305 		    "I/O requests for %s in queue, can't destroy it now.",
306 		    cp->provider->name);
307 		return;
308 	}
309 	G_MIRROR_DEBUG(2, "Consumer %s destroyed.", cp->provider->name);
310 	g_detach(cp);
311 	g_destroy_consumer(cp);
312 }
313 
314 static int
315 g_mirror_connect_disk(struct g_mirror_disk *disk, struct g_provider *pp)
316 {
317 	int error;
318 
319 	g_topology_assert();
320 	KASSERT(disk->d_consumer == NULL,
321 	    ("Disk already connected (device %s).", disk->d_softc->sc_name));
322 
323 	disk->d_consumer = g_new_consumer(disk->d_softc->sc_geom);
324 	disk->d_consumer->private = disk;
325 	error = g_attach(disk->d_consumer, pp);
326 	if (error != 0)
327 		return (error);
328 	G_MIRROR_DEBUG(2, "Disk %s connected.", g_mirror_get_diskname(disk));
329 	return (0);
330 }
331 
332 static void
333 g_mirror_disconnect_disk(struct g_mirror_disk *disk)
334 {
335 	struct g_consumer *cp;
336 
337 	g_topology_assert();
338 
339 	cp = disk->d_consumer;
340 	if (cp == NULL)
341 		return;
342 	if (cp->provider != NULL) {
343 		G_MIRROR_DEBUG(2, "Disk %s disconnected.",
344 		    g_mirror_get_diskname(disk));
345 		if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0) {
346 			G_MIRROR_DEBUG(2, "Access %s r%dw%de%d = %d",
347 			    cp->provider->name, -cp->acr, -cp->acw, -cp->ace,
348 			    0);
349 			g_access(cp, -cp->acr, -cp->acw, -cp->ace);
350 		}
351 		g_mirror_kill_consumer(disk->d_softc, cp);
352 	} else {
353 		g_destroy_consumer(cp);
354 	}
355 }
356 
357 /*
358  * Initialize disk. This means allocate memory, create consumer, attach it
359  * to the provider and open access (r1w1e1) to it.
360  */
361 static struct g_mirror_disk *
362 g_mirror_init_disk(struct g_mirror_softc *sc, struct g_provider *pp,
363     struct g_mirror_metadata *md, int *errorp)
364 {
365 	struct g_mirror_disk *disk;
366 	int error;
367 
368 	disk = malloc(sizeof(*disk), M_MIRROR, M_NOWAIT | M_ZERO);
369 	if (disk == NULL) {
370 		error = ENOMEM;
371 		goto fail;
372 	}
373 	disk->d_softc = sc;
374 	error = g_mirror_connect_disk(disk, pp);
375 	if (error != 0)
376 		goto fail;
377 	disk->d_id = md->md_did;
378 	disk->d_state = G_MIRROR_DISK_STATE_NONE;
379 	disk->d_priority = md->md_priority;
380 	disk->d_delay.sec = 0;
381 	disk->d_delay.frac = 0;
382 	binuptime(&disk->d_last_used);
383 	disk->d_flags = md->md_dflags;
384 	disk->d_sync.ds_consumer = NULL;
385 	disk->d_sync.ds_offset = md->md_sync_offset;
386 	disk->d_sync.ds_offset_done = md->md_sync_offset;
387 	disk->d_sync.ds_syncid = md->md_syncid;
388 	if (errorp != NULL)
389 		*errorp = 0;
390 	return (disk);
391 fail:
392 	if (errorp != NULL)
393 		*errorp = error;
394 	if (disk != NULL) {
395 		g_mirror_disconnect_disk(disk);
396 		free(disk, M_MIRROR);
397 	}
398 	return (NULL);
399 }
400 
401 /*
402  * Free the disk.
403  */
404 static void
405 g_mirror_free_disk(struct g_mirror_disk *disk)
406 {
407 
408 	g_topology_assert();
409 
410 	g_mirror_disconnect_disk(disk);
411 	free(disk, M_MIRROR);
412 }
413 
414 static void
415 g_mirror_destroy_disk(struct g_mirror_disk *disk)
416 {
417 	struct g_mirror_softc *sc;
418 
419 	g_topology_assert();
420 
421 	LIST_REMOVE(disk, d_next);
422 	g_mirror_event_cancel(disk);
423 	sc = disk->d_softc;
424 	if (sc->sc_hint == disk)
425 		sc->sc_hint = NULL;
426 	switch (disk->d_state) {
427 	case G_MIRROR_DISK_STATE_SYNCHRONIZING:
428 		g_mirror_sync_stop(disk, 1);
429 		/* FALLTHROUGH */
430 	case G_MIRROR_DISK_STATE_NEW:
431 	case G_MIRROR_DISK_STATE_STALE:
432 	case G_MIRROR_DISK_STATE_ACTIVE:
433 		g_mirror_free_disk(disk);
434 		break;
435 	default:
436 		KASSERT(0 == 1, ("Wrong disk state (%s, %s).",
437 		    g_mirror_get_diskname(disk),
438 		    g_mirror_disk_state2str(disk->d_state)));
439 	}
440 }
441 
442 static void
443 g_mirror_destroy_device(struct g_mirror_softc *sc)
444 {
445 	struct g_mirror_disk *disk;
446 	struct g_mirror_event *ep;
447 	struct g_geom *gp;
448 	struct g_consumer *cp;
449 
450 	g_topology_assert();
451 
452 	gp = sc->sc_geom;
453 	if (sc->sc_provider != NULL)
454 		g_mirror_destroy_provider(sc);
455 	for (disk = LIST_FIRST(&sc->sc_disks); disk != NULL;
456 	    disk = LIST_FIRST(&sc->sc_disks)) {
457 		g_mirror_destroy_disk(disk);
458 	}
459 	while ((ep = g_mirror_event_get(sc)) != NULL) {
460 		if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0)
461 			g_mirror_event_free(ep);
462 		else {
463 			ep->e_error = ECANCELED;
464 			ep->e_flags |= G_MIRROR_EVENT_DONE;
465 			G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, ep);
466 			mtx_lock(&sc->sc_events_mtx);
467 			wakeup(ep);
468 			mtx_unlock(&sc->sc_events_mtx);
469 		}
470 	}
471 	callout_drain(&sc->sc_callout);
472 	gp->softc = NULL;
473 	uma_zdestroy(sc->sc_sync.ds_zone);
474 	while ((cp = LIST_FIRST(&sc->sc_sync.ds_geom->consumer)) != NULL) {
475 		if (cp->provider != NULL) {
476 			if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0)
477 				g_access(cp, -cp->acr, -cp->acw, -cp->ace);
478 			g_detach(cp);
479 		}
480 		g_destroy_consumer(cp);
481 	}
482 	sc->sc_sync.ds_geom->softc = NULL;
483 	g_destroy_geom(sc->sc_sync.ds_geom);
484 	mtx_destroy(&sc->sc_queue_mtx);
485 	mtx_destroy(&sc->sc_events_mtx);
486 	G_MIRROR_DEBUG(0, "Device %s destroyed.", gp->name);
487 	g_wither_geom(gp, ENXIO);
488 }
489 
490 static void
491 g_mirror_orphan(struct g_consumer *cp)
492 {
493 	struct g_mirror_disk *disk;
494 
495 	g_topology_assert();
496 
497 	disk = cp->private;
498 	if (disk == NULL)
499 		return;
500 	disk->d_softc->sc_bump_syncid = G_MIRROR_BUMP_ON_FIRST_WRITE;
501 	g_mirror_event_send(disk, G_MIRROR_DISK_STATE_DISCONNECTED,
502 	    G_MIRROR_EVENT_DONTWAIT);
503 }
504 
505 static void
506 g_mirror_spoiled(struct g_consumer *cp)
507 {
508 	struct g_mirror_disk *disk;
509 
510 	g_topology_assert();
511 
512 	disk = cp->private;
513 	if (disk == NULL)
514 		return;
515 	disk->d_softc->sc_bump_syncid = G_MIRROR_BUMP_IMMEDIATELY;
516 	g_mirror_event_send(disk, G_MIRROR_DISK_STATE_DISCONNECTED,
517 	    G_MIRROR_EVENT_DONTWAIT);
518 }
519 
520 /*
521  * Function should return the next active disk on the list.
522  * It is possible that it will be the same disk as given.
523  * If there are no active disks on list, NULL is returned.
524  */
525 static __inline struct g_mirror_disk *
526 g_mirror_find_next(struct g_mirror_softc *sc, struct g_mirror_disk *disk)
527 {
528 	struct g_mirror_disk *dp;
529 
530 	for (dp = LIST_NEXT(disk, d_next); dp != disk;
531 	    dp = LIST_NEXT(dp, d_next)) {
532 		if (dp == NULL)
533 			dp = LIST_FIRST(&sc->sc_disks);
534 		if (dp->d_state == G_MIRROR_DISK_STATE_ACTIVE)
535 			break;
536 	}
537 	if (dp->d_state != G_MIRROR_DISK_STATE_ACTIVE)
538 		return (NULL);
539 	return (dp);
540 }
541 
542 static struct g_mirror_disk *
543 g_mirror_get_disk(struct g_mirror_softc *sc)
544 {
545 	struct g_mirror_disk *disk;
546 
547 	if (sc->sc_hint == NULL) {
548 		sc->sc_hint = LIST_FIRST(&sc->sc_disks);
549 		if (sc->sc_hint == NULL)
550 			return (NULL);
551 	}
552 	disk = sc->sc_hint;
553 	if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE) {
554 		disk = g_mirror_find_next(sc, disk);
555 		if (disk == NULL)
556 			return (NULL);
557 	}
558 	sc->sc_hint = g_mirror_find_next(sc, disk);
559 	return (disk);
560 }
561 
562 static int
563 g_mirror_clear_metadata(struct g_mirror_disk *disk)
564 {
565 	struct g_mirror_softc *sc;
566 	struct g_consumer *cp;
567 	off_t offset, length;
568 	u_char *sector;
569 	int close = 0, error = 0;
570 
571 	g_topology_assert();
572 
573 	sc = disk->d_softc;
574 	cp = disk->d_consumer;
575 	KASSERT(cp != NULL, ("NULL consumer (%s).", sc->sc_name));
576 	KASSERT(cp->provider != NULL, ("NULL provider (%s).", sc->sc_name));
577 	length = cp->provider->sectorsize;
578 	offset = cp->provider->mediasize - length;
579 	sector = malloc((size_t)length, M_MIRROR, M_WAITOK | M_ZERO);
580 	/*
581 	 * Open consumer if it wasn't opened and remember to close it.
582 	 */
583 	if ((disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) == 0) {
584 		error = g_access(cp, 0, 1, 1);
585 		G_MIRROR_DEBUG(2, "Access %s r%dw%de%d = %d",
586 		    cp->provider->name, 0, 1, 1, error);
587 		if (error == 0)
588 			close = 1;
589 #ifdef	INVARIANTS
590 	} else {
591 		KASSERT(cp->acw > 0 && cp->ace > 0,
592 		    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
593 		    cp->acr, cp->acw, cp->ace));
594 #endif
595 	}
596 	if (error == 0) {
597 		g_topology_unlock();
598 		error = g_write_data(cp, offset, sector, length);
599 		g_topology_lock();
600 	}
601 	free(sector, M_MIRROR);
602 	if (close) {
603 		g_access(cp, 0, -1, -1);
604 		G_MIRROR_DEBUG(2, "Access %s r%dw%de%d = %d",
605 		    cp->provider->name, 0, -1, -1, 0);
606 	}
607 	if (error != 0) {
608 		G_MIRROR_DEBUG(0, "Cannot clear metadata on disk %s.",
609 		    g_mirror_get_diskname(disk));
610 		disk->d_softc->sc_bump_syncid = G_MIRROR_BUMP_IMMEDIATELY;
611 		g_mirror_event_send(disk, G_MIRROR_DISK_STATE_DISCONNECTED,
612 		    G_MIRROR_EVENT_DONTWAIT);
613 		return (error);
614 	}
615 	G_MIRROR_DEBUG(2, "Metadata on %s cleared.",
616 	    g_mirror_get_diskname(disk));
617 	return (0);
618 }
619 
620 void
621 g_mirror_fill_metadata(struct g_mirror_softc *sc, struct g_mirror_disk *disk,
622     struct g_mirror_metadata *md)
623 {
624 
625 	strlcpy(md->md_magic, G_MIRROR_MAGIC, sizeof(md->md_magic));
626 	md->md_version = G_MIRROR_VERSION;
627 	strlcpy(md->md_name, sc->sc_name, sizeof(md->md_name));
628 	md->md_mid = sc->sc_id;
629 	md->md_all = sc->sc_ndisks;
630 	md->md_slice = sc->sc_slice;
631 	md->md_balance = sc->sc_balance;
632 	md->md_mediasize = sc->sc_mediasize;
633 	md->md_sectorsize = sc->sc_sectorsize;
634 	md->md_mflags = (sc->sc_flags & G_MIRROR_DEVICE_FLAG_MASK);
635 	if (disk == NULL) {
636 		md->md_did = arc4random();
637 		md->md_priority = 0;
638 		md->md_syncid = 0;
639 		md->md_dflags = 0;
640 		md->md_sync_offset = 0;
641 	} else {
642 		md->md_did = disk->d_id;
643 		md->md_priority = disk->d_priority;
644 		md->md_syncid = disk->d_sync.ds_syncid;
645 		md->md_dflags = (disk->d_flags & G_MIRROR_DISK_FLAG_MASK);
646 		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
647 			md->md_sync_offset = disk->d_sync.ds_offset_done;
648 		else
649 			md->md_sync_offset = 0;
650 	}
651 }
652 
653 void
654 g_mirror_update_metadata(struct g_mirror_disk *disk)
655 {
656 	struct g_mirror_softc *sc;
657 	struct g_mirror_metadata md;
658 	struct g_consumer *cp;
659 	off_t offset, length;
660 	u_char *sector;
661 	int close = 0, error = 0;
662 
663 	g_topology_assert();
664 
665 	sc = disk->d_softc;
666 	cp = disk->d_consumer;
667 	KASSERT(cp != NULL, ("NULL consumer (%s).", sc->sc_name));
668 	KASSERT(cp->provider != NULL, ("NULL provider (%s).", sc->sc_name));
669 	length = cp->provider->sectorsize;
670 	offset = cp->provider->mediasize - length;
671 	sector = malloc((size_t)length, M_MIRROR, M_WAITOK);
672 	/*
673 	 * Open consumer if it wasn't opened and remember to close it.
674 	 */
675 	if ((disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) == 0) {
676 		error = g_access(cp, 0, 1, 1);
677 		G_MIRROR_DEBUG(2, "Access %s r%dw%de%d = %d",
678 		    cp->provider->name, 0, 1, 1, error);
679 		if (error == 0)
680 			close = 1;
681 #ifdef	INVARIANTS
682 	} else {
683 		KASSERT(cp->acw > 0 && cp->ace > 0,
684 		    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
685 		    cp->acr, cp->acw, cp->ace));
686 #endif
687 	}
688 	if (error == 0) {
689 		g_mirror_fill_metadata(sc, disk, &md);
690 		mirror_metadata_encode(&md, sector);
691 		g_topology_unlock();
692 		error = g_write_data(cp, offset, sector, length);
693 		g_topology_lock();
694 	}
695 	free(sector, M_MIRROR);
696 	if (close) {
697 		g_access(cp, 0, -1, -1);
698 		G_MIRROR_DEBUG(2, "Access %s r%dw%de%d = %d",
699 		    cp->provider->name, 0, -1, -1, 0);
700 	}
701 	if (error != 0) {
702 		G_MIRROR_DEBUG(0,
703 		    "Cannot update metadata on disk %s (error=%d).",
704 		    g_mirror_get_diskname(disk), error);
705 		disk->d_softc->sc_bump_syncid = G_MIRROR_BUMP_IMMEDIATELY;
706 		g_mirror_event_send(disk, G_MIRROR_DISK_STATE_DISCONNECTED,
707 		    G_MIRROR_EVENT_DONTWAIT);
708 		return;
709 	}
710 	G_MIRROR_DEBUG(2, "Metadata on %s updated.",
711 	    g_mirror_get_diskname(disk));
712 }
713 
714 static void
715 g_mirror_bump_syncid(struct g_mirror_softc *sc)
716 {
717 	struct g_mirror_disk *disk;
718 
719 	g_topology_assert();
720 	KASSERT(g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 0,
721 	    ("%s called with no active disks (device=%s).", __func__,
722 	    sc->sc_name));
723 
724 	sc->sc_syncid++;
725 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
726 		if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE ||
727 		    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
728 			disk->d_sync.ds_syncid = sc->sc_syncid;
729 			g_mirror_update_metadata(disk);
730 		}
731 	}
732 }
733 
734 static __inline int
735 bintime_cmp(struct bintime *bt1, struct bintime *bt2)
736 {
737 
738 	if (bt1->sec < bt2->sec)
739 		return (-1);
740 	else if (bt1->sec > bt2->sec)
741 		return (1);
742 	if (bt1->frac < bt2->frac)
743 		return (-1);
744 	else if (bt1->frac > bt2->frac)
745 		return (1);
746 	return (0);
747 }
748 
749 static void
750 g_mirror_update_delay(struct g_mirror_disk *disk, struct bio *bp)
751 {
752 
753 	if (disk->d_softc->sc_balance != G_MIRROR_BALANCE_LOAD)
754 		return;
755 	binuptime(&disk->d_delay);
756 	bintime_sub(&disk->d_delay, &bp->bio_t0);
757 }
758 
759 static void
760 g_mirror_done(struct bio *bp)
761 {
762 	struct g_mirror_softc *sc;
763 
764 	sc = bp->bio_from->geom->softc;
765 	bp->bio_flags = BIO_FLAG1;
766 	mtx_lock(&sc->sc_queue_mtx);
767 	bioq_disksort(&sc->sc_queue, bp);
768 	wakeup(sc);
769 	mtx_unlock(&sc->sc_queue_mtx);
770 }
771 
772 static void
773 g_mirror_regular_request(struct bio *bp)
774 {
775 	struct g_mirror_softc *sc;
776 	struct g_mirror_disk *disk;
777 	struct bio *pbp;
778 
779 	g_topology_assert_not();
780 
781 	pbp = bp->bio_parent;
782 	sc = pbp->bio_to->geom->softc;
783 	disk = bp->bio_from->private;
784 	if (disk == NULL) {
785 		g_topology_lock();
786 		g_mirror_kill_consumer(sc, bp->bio_from);
787 		g_topology_unlock();
788 	} else {
789 		g_mirror_update_delay(disk, bp);
790 	}
791 
792 	pbp->bio_inbed++;
793 	KASSERT(pbp->bio_inbed <= pbp->bio_children,
794 	    ("bio_inbed (%u) is bigger than bio_children (%u).", pbp->bio_inbed,
795 	    pbp->bio_children));
796 	if (bp->bio_error == 0 && pbp->bio_error == 0) {
797 		G_MIRROR_LOGREQ(3, bp, "Request delivered.");
798 		g_destroy_bio(bp);
799 		if (pbp->bio_children == pbp->bio_inbed) {
800 			G_MIRROR_LOGREQ(3, pbp, "Request delivered.");
801 			pbp->bio_completed = pbp->bio_length;
802 			g_io_deliver(pbp, pbp->bio_error);
803 		}
804 		return;
805 	} else if (bp->bio_error != 0) {
806 		if (pbp->bio_error == 0)
807 			pbp->bio_error = bp->bio_error;
808 		G_MIRROR_LOGREQ(0, bp, "Request failed (error=%d).",
809 		    bp->bio_error);
810 		if (disk != NULL) {
811 			sc->sc_bump_syncid = G_MIRROR_BUMP_IMMEDIATELY;
812 			g_mirror_event_send(disk,
813 			    G_MIRROR_DISK_STATE_DISCONNECTED,
814 			    G_MIRROR_EVENT_DONTWAIT);
815 		}
816 		switch (pbp->bio_cmd) {
817 		case BIO_DELETE:
818 		case BIO_WRITE:
819 			pbp->bio_inbed--;
820 			pbp->bio_children--;
821 			break;
822 		}
823 	}
824 	g_destroy_bio(bp);
825 
826 	switch (pbp->bio_cmd) {
827 	case BIO_READ:
828 		if (pbp->bio_children == pbp->bio_inbed) {
829 			pbp->bio_error = 0;
830 			mtx_lock(&sc->sc_queue_mtx);
831 			bioq_disksort(&sc->sc_queue, pbp);
832 			G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
833 			wakeup(sc);
834 			mtx_unlock(&sc->sc_queue_mtx);
835 		}
836 		break;
837 	case BIO_DELETE:
838 	case BIO_WRITE:
839 		if (pbp->bio_children == 0) {
840 			/*
841 			 * All requests failed.
842 			 */
843 		} else if (pbp->bio_inbed < pbp->bio_children) {
844 			/* Do nothing. */
845 			break;
846 		} else if (pbp->bio_children == pbp->bio_inbed) {
847 			/* Some requests succeeded. */
848 			pbp->bio_error = 0;
849 			pbp->bio_completed = pbp->bio_length;
850 		}
851 		g_io_deliver(pbp, pbp->bio_error);
852 		break;
853 	default:
854 		KASSERT(1 == 0, ("Invalid request: %u.", pbp->bio_cmd));
855 		break;
856 	}
857 }
858 
859 static void
860 g_mirror_sync_done(struct bio *bp)
861 {
862 	struct g_mirror_softc *sc;
863 
864 	G_MIRROR_LOGREQ(3, bp, "Synchronization request delivered.");
865 	sc = bp->bio_from->geom->softc;
866 	bp->bio_flags = BIO_FLAG2;
867 	mtx_lock(&sc->sc_queue_mtx);
868 	bioq_disksort(&sc->sc_queue, bp);
869 	wakeup(sc);
870 	mtx_unlock(&sc->sc_queue_mtx);
871 }
872 
873 static void
874 g_mirror_start(struct bio *bp)
875 {
876 	struct g_mirror_softc *sc;
877 
878 	sc = bp->bio_to->geom->softc;
879 	/*
880 	 * If sc == NULL or there are no valid disks, provider's error
881 	 * should be set and g_mirror_start() should not be called at all.
882 	 */
883 	KASSERT(sc != NULL && sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
884 	    ("Provider's error should be set (error=%d)(mirror=%s).",
885 	    bp->bio_to->error, bp->bio_to->name));
886 	G_MIRROR_LOGREQ(3, bp, "Request received.");
887 
888 	switch (bp->bio_cmd) {
889 	case BIO_READ:
890 	case BIO_WRITE:
891 	case BIO_DELETE:
892 		break;
893 	case BIO_GETATTR:
894 	default:
895 		g_io_deliver(bp, EOPNOTSUPP);
896 		return;
897 	}
898 	mtx_lock(&sc->sc_queue_mtx);
899 	bioq_disksort(&sc->sc_queue, bp);
900 	G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
901 	wakeup(sc);
902 	mtx_unlock(&sc->sc_queue_mtx);
903 }
904 
905 /*
906  * Send one synchronization request.
907  */
908 static void
909 g_mirror_sync_one(struct g_mirror_disk *disk)
910 {
911 	struct g_mirror_softc *sc;
912 	struct bio *bp;
913 
914 	sc = disk->d_softc;
915 	KASSERT(disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
916 	    ("Disk %s is not marked for synchronization.",
917 	    g_mirror_get_diskname(disk)));
918 
919 	bp = g_new_bio();
920 	if (bp == NULL)
921 		return;
922 	bp->bio_parent = NULL;
923 	bp->bio_cmd = BIO_READ;
924 	bp->bio_offset = disk->d_sync.ds_offset;
925 	bp->bio_length = MIN(sc->sc_sync.ds_block,
926 	    sc->sc_mediasize - bp->bio_offset);
927 	bp->bio_flags = 0;
928 	bp->bio_done = g_mirror_sync_done;
929 	bp->bio_data = uma_zalloc(sc->sc_sync.ds_zone, M_NOWAIT | M_ZERO);
930 	if (bp->bio_data == NULL) {
931 		g_destroy_bio(bp);
932 		return;
933 	}
934 	disk->d_sync.ds_offset += bp->bio_length;
935 	bp->bio_to = sc->sc_provider;
936 	G_MIRROR_LOGREQ(3, bp, "Sending synchronization request.");
937 	g_io_request(bp, disk->d_sync.ds_consumer);
938 }
939 
940 static void
941 g_mirror_sync_request(struct bio *bp)
942 {
943 	struct g_mirror_softc *sc;
944 	struct g_mirror_disk *disk;
945 
946 	sc = bp->bio_from->geom->softc;
947 	disk = bp->bio_from->private;
948 	if (disk == NULL) {
949 		g_topology_lock();
950 		g_mirror_kill_consumer(sc, bp->bio_from);
951 		g_topology_unlock();
952 		uma_zfree(sc->sc_sync.ds_zone, bp->bio_data);
953 		g_destroy_bio(bp);
954 		return;
955 	}
956 
957 	/*
958 	 * Synchronization request.
959 	 */
960 	switch (bp->bio_cmd) {
961 	case BIO_READ:
962 	    {
963 		struct g_consumer *cp;
964 
965 		if (bp->bio_error != 0) {
966 			G_MIRROR_LOGREQ(0, bp,
967 			    "Synchronization request failed (error=%d).",
968 			    bp->bio_error);
969 			uma_zfree(sc->sc_sync.ds_zone, bp->bio_data);
970 			g_destroy_bio(bp);
971 			return;
972 		}
973 		bp->bio_cmd = BIO_WRITE;
974 		bp->bio_flags = 0;
975 		G_MIRROR_LOGREQ(3, bp, "Synchronization request finished.");
976 		cp = disk->d_consumer;
977 		KASSERT(cp->acr == 0 && cp->acw == 1 && cp->ace == 1,
978 		    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
979 		    cp->acr, cp->acw, cp->ace));
980 		g_io_request(bp, cp);
981 		return;
982 	    }
983 	case BIO_WRITE:
984 		uma_zfree(sc->sc_sync.ds_zone, bp->bio_data);
985 		if (bp->bio_error != 0) {
986 			G_MIRROR_LOGREQ(0, bp,
987 			    "Synchronization request failed (error=%d).",
988 			    bp->bio_error);
989 			g_destroy_bio(bp);
990 			disk->d_softc->sc_bump_syncid = G_MIRROR_BUMP_IMMEDIATELY;
991 			g_mirror_event_send(disk,
992 			    G_MIRROR_DISK_STATE_DISCONNECTED,
993 			    G_MIRROR_EVENT_DONTWAIT);
994 			return;
995 		}
996 		G_MIRROR_LOGREQ(3, bp, "Synchronization request finished.");
997 		g_destroy_bio(bp);
998 		disk->d_sync.ds_offset_done = bp->bio_offset + bp->bio_length;
999 		if (bp->bio_offset + bp->bio_length ==
1000 		    sc->sc_provider->mediasize) {
1001 			/*
1002 			 * Disk up-to-date, activate it.
1003 			 */
1004 			g_mirror_event_send(disk, G_MIRROR_DISK_STATE_ACTIVE,
1005 			    G_MIRROR_EVENT_DONTWAIT);
1006 			return;
1007 		} else if ((disk->d_sync.ds_offset_done %
1008 		    (sc->sc_sync.ds_block * 100)) == 0) {
1009 			/*
1010 			 * Update offset_done on every 100 blocks.
1011 			 * XXX: This should be configurable.
1012 			 */
1013 			g_topology_lock();
1014 			g_mirror_update_metadata(disk);
1015 			g_topology_unlock();
1016 		}
1017 		return;
1018 	default:
1019 		KASSERT(1 == 0, ("Invalid command here: %u (device=%s)",
1020 		    bp->bio_cmd, sc->sc_name));
1021 		break;
1022 	}
1023 }
1024 
1025 static void
1026 g_mirror_request_round_robin(struct g_mirror_softc *sc, struct bio *bp)
1027 {
1028 	struct g_mirror_disk *disk;
1029 	struct g_consumer *cp;
1030 	struct bio *cbp;
1031 
1032 	disk = g_mirror_get_disk(sc);
1033 	if (disk == NULL) {
1034 		if (bp->bio_error == 0)
1035 			bp->bio_error = ENXIO;
1036 		g_io_deliver(bp, bp->bio_error);
1037 		return;
1038 	}
1039 	cbp = g_clone_bio(bp);
1040 	if (cbp == NULL) {
1041 		if (bp->bio_error == 0)
1042 			bp->bio_error = ENOMEM;
1043 		g_io_deliver(bp, bp->bio_error);
1044 		return;
1045 	}
1046 	/*
1047 	 * Fill in the component buf structure.
1048 	 */
1049 	cp = disk->d_consumer;
1050 	cbp->bio_done = g_mirror_done;
1051 	cbp->bio_to = cp->provider;
1052 	G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1053 	KASSERT(cp->acr > 0 && cp->ace > 0,
1054 	    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr,
1055 	    cp->acw, cp->ace));
1056 	g_io_request(cbp, cp);
1057 }
1058 
1059 static void
1060 g_mirror_request_load(struct g_mirror_softc *sc, struct bio *bp)
1061 {
1062 	struct g_mirror_disk *disk, *dp;
1063 	struct g_consumer *cp;
1064 	struct bio *cbp;
1065 	struct bintime curtime;
1066 
1067 	binuptime(&curtime);
1068 	/*
1069 	 * Find a disk which the smallest load.
1070 	 */
1071 	disk = NULL;
1072 	LIST_FOREACH(dp, &sc->sc_disks, d_next) {
1073 		if (dp->d_state != G_MIRROR_DISK_STATE_ACTIVE)
1074 			continue;
1075 		/* If disk wasn't used for more than 2 sec, use it. */
1076 		if (curtime.sec - dp->d_last_used.sec >= 2) {
1077 			disk = dp;
1078 			break;
1079 		}
1080 		if (disk == NULL ||
1081 		    bintime_cmp(&dp->d_delay, &disk->d_delay) < 0) {
1082 			disk = dp;
1083 		}
1084 	}
1085 	cbp = g_clone_bio(bp);
1086 	if (cbp == NULL) {
1087 		if (bp->bio_error == 0)
1088 			bp->bio_error = ENOMEM;
1089 		g_io_deliver(bp, bp->bio_error);
1090 		return;
1091 	}
1092 	/*
1093 	 * Fill in the component buf structure.
1094 	 */
1095 	cp = disk->d_consumer;
1096 	cbp->bio_done = g_mirror_done;
1097 	cbp->bio_to = cp->provider;
1098 	binuptime(&disk->d_last_used);
1099 	G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1100 	KASSERT(cp->acr > 0 && cp->ace > 0,
1101 	    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr,
1102 	    cp->acw, cp->ace));
1103 	g_io_request(cbp, cp);
1104 }
1105 
1106 static void
1107 g_mirror_request_split(struct g_mirror_softc *sc, struct bio *bp)
1108 {
1109 	struct bio_queue_head queue;
1110 	struct g_mirror_disk *disk;
1111 	struct g_consumer *cp;
1112 	struct bio *cbp;
1113 	off_t left, mod, offset, slice;
1114 	u_char *data;
1115 	u_int ndisks;
1116 
1117 	if (bp->bio_length <= sc->sc_slice) {
1118 		g_mirror_request_round_robin(sc, bp);
1119 		return;
1120 	}
1121 	ndisks = g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE);
1122 	slice = bp->bio_length / ndisks;
1123 	mod = slice % sc->sc_provider->sectorsize;
1124 	if (mod != 0)
1125 		slice += sc->sc_provider->sectorsize - mod;
1126 	/*
1127 	 * Allocate all bios before sending any request, so we can
1128 	 * return ENOMEM in nice and clean way.
1129 	 */
1130 	left = bp->bio_length;
1131 	offset = bp->bio_offset;
1132 	data = bp->bio_data;
1133 	bioq_init(&queue);
1134 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1135 		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
1136 			continue;
1137 		cbp = g_clone_bio(bp);
1138 		if (cbp == NULL) {
1139 			for (cbp = bioq_first(&queue); cbp != NULL;
1140 			    cbp = bioq_first(&queue)) {
1141 				bioq_remove(&queue, cbp);
1142 				g_destroy_bio(cbp);
1143 			}
1144 			if (bp->bio_error == 0)
1145 				bp->bio_error = ENOMEM;
1146 			g_io_deliver(bp, bp->bio_error);
1147 			return;
1148 		}
1149 		bioq_insert_tail(&queue, cbp);
1150 		cbp->bio_done = g_mirror_done;
1151 		cbp->bio_caller1 = disk;
1152 		cbp->bio_to = disk->d_consumer->provider;
1153 		cbp->bio_offset = offset;
1154 		cbp->bio_data = data;
1155 		cbp->bio_length = MIN(left, slice);
1156 		left -= cbp->bio_length;
1157 		if (left == 0)
1158 			break;
1159 		offset += cbp->bio_length;
1160 		data += cbp->bio_length;
1161 	}
1162 	for (cbp = bioq_first(&queue); cbp != NULL; cbp = bioq_first(&queue)) {
1163 		bioq_remove(&queue, cbp);
1164 		G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1165 		disk = cbp->bio_caller1;
1166 		cbp->bio_caller1 = NULL;
1167 		cp = disk->d_consumer;
1168 		KASSERT(cp->acr > 0 && cp->ace > 0,
1169 		    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
1170 		    cp->acr, cp->acw, cp->ace));
1171 		g_io_request(cbp, disk->d_consumer);
1172 	}
1173 }
1174 
1175 static void
1176 g_mirror_register_request(struct bio *bp)
1177 {
1178 	struct g_mirror_softc *sc;
1179 
1180 	sc = bp->bio_to->geom->softc;
1181 	switch (bp->bio_cmd) {
1182 	case BIO_READ:
1183 		switch (sc->sc_balance) {
1184 		case G_MIRROR_BALANCE_ROUND_ROBIN:
1185 			g_mirror_request_round_robin(sc, bp);
1186 			break;
1187 		case G_MIRROR_BALANCE_LOAD:
1188 			g_mirror_request_load(sc, bp);
1189 			break;
1190 		case G_MIRROR_BALANCE_SPLIT:
1191 			g_mirror_request_split(sc, bp);
1192 			break;
1193 		}
1194 		return;
1195 	case BIO_WRITE:
1196 	case BIO_DELETE:
1197 	    {
1198 		struct g_mirror_disk *disk;
1199 		struct bio_queue_head queue;
1200 		struct g_consumer *cp;
1201 		struct bio *cbp;
1202 
1203 		/*
1204 		 * Allocate all bios before sending any request, so we can
1205 		 * return ENOMEM in nice and clean way.
1206 		 */
1207 		bioq_init(&queue);
1208 		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1209 			switch (disk->d_state) {
1210 			case G_MIRROR_DISK_STATE_ACTIVE:
1211 				break;
1212 			case G_MIRROR_DISK_STATE_SYNCHRONIZING:
1213 				if (bp->bio_offset >= disk->d_sync.ds_offset)
1214 					continue;
1215 				break;
1216 			default:
1217 				continue;
1218 			}
1219 			cbp = g_clone_bio(bp);
1220 			if (cbp == NULL) {
1221 				for (cbp = bioq_first(&queue); cbp != NULL;
1222 				    cbp = bioq_first(&queue)) {
1223 					bioq_remove(&queue, cbp);
1224 					g_destroy_bio(cbp);
1225 				}
1226 				if (bp->bio_error == 0)
1227 					bp->bio_error = ENOMEM;
1228 				g_io_deliver(bp, bp->bio_error);
1229 				return;
1230 			}
1231 			bioq_insert_tail(&queue, cbp);
1232 		}
1233 		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1234 			switch (disk->d_state) {
1235 			case G_MIRROR_DISK_STATE_ACTIVE:
1236 				break;
1237 			case G_MIRROR_DISK_STATE_SYNCHRONIZING:
1238 				if (bp->bio_offset >= disk->d_sync.ds_offset)
1239 					continue;
1240 				break;
1241 			default:
1242 				continue;
1243 			}
1244 			cbp = bioq_first(&queue);
1245 			KASSERT(cbp != NULL, ("NULL cbp! (device %s).",
1246 			    sc->sc_name));
1247 			bioq_remove(&queue, cbp);
1248 			cp = disk->d_consumer;
1249 			cbp->bio_done = g_mirror_done;
1250 			cbp->bio_to = cp->provider;
1251 			G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1252 			KASSERT(cp->acw > 0 && cp->ace > 0,
1253 			    ("Consumer %s not opened (r%dw%de%d).",
1254 			    cp->provider->name, cp->acr, cp->acw, cp->ace));
1255 			g_io_request(cbp, cp);
1256 		}
1257 		/*
1258 		 * Bump syncid on first write.
1259 		 */
1260 		if (sc->sc_bump_syncid == G_MIRROR_BUMP_ON_FIRST_WRITE) {
1261 			sc->sc_bump_syncid = 0;
1262 			g_topology_lock();
1263 			g_mirror_bump_syncid(sc);
1264 			g_topology_unlock();
1265 		}
1266 		return;
1267 	    }
1268 	default:
1269 		KASSERT(1 == 0, ("Invalid command here: %u (device=%s)",
1270 		    bp->bio_cmd, sc->sc_name));
1271 		break;
1272 	}
1273 }
1274 
1275 /*
1276  * Worker thread.
1277  */
1278 static void
1279 g_mirror_worker(void *arg)
1280 {
1281 	struct g_mirror_softc *sc;
1282 	struct g_mirror_disk *disk;
1283 	struct g_mirror_event *ep;
1284 	struct bio *bp;
1285 	u_int nreqs;
1286 
1287 	sc = arg;
1288 	curthread->td_base_pri = PRIBIO;
1289 
1290 	nreqs = 0;
1291 	for (;;) {
1292 		G_MIRROR_DEBUG(5, "%s: Let's see...", __func__);
1293 		/*
1294 		 * First take a look at events.
1295 		 * This is important to handle events before any I/O requests.
1296 		 */
1297 		ep = g_mirror_event_get(sc);
1298 		if (ep != NULL) {
1299 			g_topology_lock();
1300 			if ((ep->e_flags & G_MIRROR_EVENT_DEVICE) != 0) {
1301 				/* Update only device status. */
1302 				G_MIRROR_DEBUG(3,
1303 				    "Running event for device %s.",
1304 				    sc->sc_name);
1305 				ep->e_error = 0;
1306 				g_mirror_update_device(sc, 1);
1307 			} else {
1308 				/* Update disk status. */
1309 				G_MIRROR_DEBUG(3, "Running event for disk %s.",
1310 				     g_mirror_get_diskname(ep->e_disk));
1311 				ep->e_error = g_mirror_update_disk(ep->e_disk,
1312 				    ep->e_state);
1313 				if (ep->e_error == 0)
1314 					g_mirror_update_device(sc, 0);
1315 			}
1316 			g_topology_unlock();
1317 			if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0) {
1318 				KASSERT(ep->e_error == 0,
1319 				    ("Error cannot be handled."));
1320 				g_mirror_event_free(ep);
1321 			} else {
1322 				ep->e_flags |= G_MIRROR_EVENT_DONE;
1323 				G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__,
1324 				    ep);
1325 				mtx_lock(&sc->sc_events_mtx);
1326 				wakeup(ep);
1327 				mtx_unlock(&sc->sc_events_mtx);
1328 			}
1329 			if ((sc->sc_flags &
1330 			    G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
1331 end:
1332 				if ((sc->sc_flags &
1333 				    G_MIRROR_DEVICE_FLAG_WAIT) != 0) {
1334 					G_MIRROR_DEBUG(4, "%s: Waking up %p.",
1335 					    __func__, &sc->sc_worker);
1336 					wakeup(&sc->sc_worker);
1337 					sc->sc_worker = NULL;
1338 				} else {
1339 					g_topology_lock();
1340 					g_mirror_destroy_device(sc);
1341 					g_topology_unlock();
1342 					free(sc, M_MIRROR);
1343 				}
1344 				kthread_exit(0);
1345 			}
1346 			G_MIRROR_DEBUG(5, "%s: I'm here 1.", __func__);
1347 			continue;
1348 		}
1349 		/*
1350 		 * Now I/O requests.
1351 		 */
1352 		/* Get first request from the queue. */
1353 		mtx_lock(&sc->sc_queue_mtx);
1354 		bp = bioq_first(&sc->sc_queue);
1355 		if (bp == NULL) {
1356 			if ((sc->sc_flags &
1357 			    G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
1358 				mtx_unlock(&sc->sc_queue_mtx);
1359 				goto end;
1360 			}
1361 		}
1362 		if (sc->sc_sync.ds_ndisks > 0 &&
1363 		    (bp == NULL || nreqs > g_mirror_reqs_per_sync)) {
1364 			mtx_unlock(&sc->sc_queue_mtx);
1365 			/*
1366 			 * It is time for synchronization...
1367 			 */
1368 			nreqs = 0;
1369 			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1370 				if (disk->d_state !=
1371 				    G_MIRROR_DISK_STATE_SYNCHRONIZING) {
1372 					continue;
1373 				}
1374 				if (disk->d_sync.ds_offset >=
1375 				    sc->sc_provider->mediasize) {
1376 					continue;
1377 				}
1378 				if (disk->d_sync.ds_offset >
1379 				    disk->d_sync.ds_offset_done) {
1380 					continue;
1381 				}
1382 				g_mirror_sync_one(disk);
1383 			}
1384 			G_MIRROR_DEBUG(5, "%s: I'm here 2.", __func__);
1385 			goto sleep;
1386 		}
1387 		if (bp == NULL) {
1388 			MSLEEP(sc, &sc->sc_queue_mtx, PRIBIO | PDROP, "m:w1", 0);
1389 			G_MIRROR_DEBUG(5, "%s: I'm here 3.", __func__);
1390 			continue;
1391 		}
1392 		nreqs++;
1393 		bioq_remove(&sc->sc_queue, bp);
1394 		mtx_unlock(&sc->sc_queue_mtx);
1395 
1396 		if ((bp->bio_flags & BIO_FLAG1) != 0) {
1397 			g_mirror_regular_request(bp);
1398 		} else if ((bp->bio_flags & BIO_FLAG2) != 0) {
1399 			u_int timeout, sps;
1400 
1401 			g_mirror_sync_request(bp);
1402 sleep:
1403 			sps = atomic_load_acq_int(&g_mirror_syncs_per_sec);
1404 			if (sps == 0) {
1405 				G_MIRROR_DEBUG(5, "%s: I'm here 5.", __func__);
1406 				continue;
1407 			}
1408 			mtx_lock(&sc->sc_queue_mtx);
1409 			if (bioq_first(&sc->sc_queue) != NULL) {
1410 				mtx_unlock(&sc->sc_queue_mtx);
1411 				G_MIRROR_DEBUG(5, "%s: I'm here 4.", __func__);
1412 				continue;
1413 			}
1414 			timeout = hz / sps;
1415 			if (timeout == 0)
1416 				timeout = 1;
1417 			MSLEEP(sc, &sc->sc_queue_mtx, PRIBIO | PDROP, "m:w2",
1418 			    timeout);
1419 		} else {
1420 			g_mirror_register_request(bp);
1421 		}
1422 		G_MIRROR_DEBUG(5, "%s: I'm here 6.", __func__);
1423 	}
1424 }
1425 
1426 /*
1427  * Open disk's consumer if needed.
1428  */
1429 static void
1430 g_mirror_update_access(struct g_mirror_disk *disk)
1431 {
1432 	struct g_provider *pp;
1433 	struct g_consumer *cp;
1434 	int acr, acw, ace, cpw, error;
1435 
1436 	g_topology_assert();
1437 
1438 	cp = disk->d_consumer;
1439 	pp = disk->d_softc->sc_provider;
1440 	if (pp == NULL) {
1441 		acr = -cp->acr;
1442 		acw = -cp->acw;
1443 		ace = -cp->ace;
1444 	} else {
1445 		acr = pp->acr - cp->acr;
1446 		acw = pp->acw - cp->acw;
1447 		ace = pp->ace - cp->ace;
1448 		/* Grab an extra "exclusive" bit. */
1449 		if (pp->acr > 0 || pp->acw > 0 || pp->ace > 0)
1450 			ace++;
1451 	}
1452 	if (acr == 0 && acw == 0 && ace == 0)
1453 		return;
1454 	cpw = cp->acw;
1455 	error = g_access(cp, acr, acw, ace);
1456 	G_MIRROR_DEBUG(2, "Access %s r%dw%de%d = %d", cp->provider->name, acr,
1457 	    acw, ace, error);
1458 	if (error != 0) {
1459 		disk->d_softc->sc_bump_syncid = G_MIRROR_BUMP_ON_FIRST_WRITE;
1460 		g_mirror_event_send(disk, G_MIRROR_DISK_STATE_DISCONNECTED,
1461 		    G_MIRROR_EVENT_DONTWAIT);
1462 		return;
1463 	}
1464 	if (cpw == 0 && cp->acw > 0) {
1465 		G_MIRROR_DEBUG(1, "Disk %s (device %s) marked as dirty.",
1466 		    g_mirror_get_diskname(disk), disk->d_softc->sc_name);
1467 		disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
1468 	} else if (cpw > 0 && cp->acw == 0) {
1469 		G_MIRROR_DEBUG(1, "Disk %s (device %s) marked as clean.",
1470 		    g_mirror_get_diskname(disk), disk->d_softc->sc_name);
1471 		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
1472 	}
1473 }
1474 
1475 static void
1476 g_mirror_sync_start(struct g_mirror_disk *disk)
1477 {
1478 	struct g_mirror_softc *sc;
1479 	struct g_consumer *cp;
1480 	int error;
1481 
1482 	g_topology_assert();
1483 
1484 	sc = disk->d_softc;
1485 	KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
1486 	    ("Device not in RUNNING state (%s, %u).", sc->sc_name,
1487 	    sc->sc_state));
1488 	cp = disk->d_consumer;
1489 	KASSERT(cp->acr == 0 && cp->acw == 0 && cp->ace == 0,
1490 	    ("Consumer %s already opened.", cp->provider->name));
1491 
1492 	G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s.", sc->sc_name,
1493 	    g_mirror_get_diskname(disk));
1494 	error = g_access(cp, 0, 1, 1);
1495 	G_MIRROR_DEBUG(2, "Access %s r%dw%de%d = %d", cp->provider->name, 0, 1,
1496 	    1, error);
1497 	if (error != 0) {
1498 		g_mirror_event_send(disk, G_MIRROR_DISK_STATE_DISCONNECTED,
1499 		    G_MIRROR_EVENT_DONTWAIT);
1500 		return;
1501 	}
1502 	disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
1503 	KASSERT(disk->d_sync.ds_consumer == NULL,
1504 	    ("Sync consumer already exists (device=%s, disk=%s).",
1505 	    sc->sc_name, g_mirror_get_diskname(disk)));
1506 	disk->d_sync.ds_consumer = g_new_consumer(sc->sc_sync.ds_geom);
1507 	disk->d_sync.ds_consumer->private = disk;
1508 	error = g_attach(disk->d_sync.ds_consumer, disk->d_softc->sc_provider);
1509 	KASSERT(error == 0, ("Cannot attach to %s (error=%d).",
1510 	    disk->d_softc->sc_name, error));
1511 	error = g_access(disk->d_sync.ds_consumer, 1, 0, 0);
1512 	KASSERT(error == 0, ("Cannot open %s (error=%d).",
1513 	    disk->d_softc->sc_name, error));
1514 	sc->sc_sync.ds_ndisks++;
1515 }
1516 
1517 /*
1518  * Stop synchronization process.
1519  * type: 0 - synchronization finished
1520  *       1 - synchronization stopped
1521  */
1522 static void
1523 g_mirror_sync_stop(struct g_mirror_disk *disk, int type)
1524 {
1525 	struct g_consumer *cp;
1526 
1527 	g_topology_assert();
1528 	KASSERT(disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
1529 	    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
1530 	    g_mirror_disk_state2str(disk->d_state)));
1531 	if (disk->d_sync.ds_consumer == NULL)
1532 		return;
1533 
1534 	if (type == 0) {
1535 		G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s finished.",
1536 		    disk->d_softc->sc_name, g_mirror_get_diskname(disk));
1537 	} else /* if (type == 1) */ {
1538 		G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s stopped.",
1539 		    disk->d_softc->sc_name, g_mirror_get_diskname(disk));
1540 	}
1541 	cp = disk->d_sync.ds_consumer;
1542 	g_access(cp, -1, 0, 0);
1543 	g_mirror_kill_consumer(disk->d_softc, cp);
1544 	disk->d_sync.ds_consumer = NULL;
1545 	disk->d_softc->sc_sync.ds_ndisks--;
1546 	cp = disk->d_consumer;
1547 	KASSERT(cp->acr == 0 && cp->acw == 1 && cp->ace == 1,
1548 	    ("Consumer %s not opened.", cp->provider->name));
1549 	g_access(cp, 0, -1, -1);
1550 	G_MIRROR_DEBUG(2, "Access %s r%dw%de%d = %d", cp->provider->name, 0, -1,
1551 	    -1, 0);
1552 	disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
1553 }
1554 
1555 static void
1556 g_mirror_launch_provider(struct g_mirror_softc *sc)
1557 {
1558 	struct g_mirror_disk *disk;
1559 	struct g_provider *pp;
1560 
1561 	g_topology_assert();
1562 
1563 	pp = g_new_providerf(sc->sc_geom, "mirror/%s", sc->sc_name);
1564 	pp->mediasize = sc->sc_mediasize;
1565 	pp->sectorsize = sc->sc_sectorsize;
1566 	sc->sc_provider = pp;
1567 	g_error_provider(pp, 0);
1568 	G_MIRROR_DEBUG(0, "Device %s: provider %s launched.", sc->sc_name,
1569 	    pp->name);
1570 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1571 		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
1572 			g_mirror_sync_start(disk);
1573 	}
1574 }
1575 
1576 static void
1577 g_mirror_destroy_provider(struct g_mirror_softc *sc)
1578 {
1579 	struct g_mirror_disk *disk;
1580 	struct bio *bp;
1581 
1582 	g_topology_assert();
1583 	KASSERT(sc->sc_provider != NULL, ("NULL provider (device=%s).",
1584 	    sc->sc_name));
1585 
1586 	g_error_provider(sc->sc_provider, ENXIO);
1587 	mtx_lock(&sc->sc_queue_mtx);
1588 	while ((bp = bioq_first(&sc->sc_queue)) != NULL) {
1589 		bioq_remove(&sc->sc_queue, bp);
1590 		g_io_deliver(bp, ENXIO);
1591 	}
1592 	mtx_unlock(&sc->sc_queue_mtx);
1593 	G_MIRROR_DEBUG(0, "Device %s: provider %s destroyed.", sc->sc_name,
1594 	    sc->sc_provider->name);
1595 	sc->sc_provider->flags |= G_PF_WITHER;
1596 	g_orphan_provider(sc->sc_provider, ENXIO);
1597 	sc->sc_provider = NULL;
1598 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1599 		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
1600 			g_mirror_sync_stop(disk, 1);
1601 	}
1602 }
1603 
1604 static void
1605 g_mirror_go(void *arg)
1606 {
1607 	struct g_mirror_softc *sc;
1608 
1609 	sc = arg;
1610 	G_MIRROR_DEBUG(0, "Force device %s start due to timeout.", sc->sc_name);
1611 	g_mirror_event_send(sc, 0,
1612 	    G_MIRROR_EVENT_DONTWAIT | G_MIRROR_EVENT_DEVICE);
1613 }
1614 
1615 static u_int
1616 g_mirror_determine_state(struct g_mirror_disk *disk)
1617 {
1618 	struct g_mirror_softc *sc;
1619 	u_int state;
1620 
1621 	sc = disk->d_softc;
1622 	if (sc->sc_syncid == disk->d_sync.ds_syncid) {
1623 		if ((disk->d_flags &
1624 		    G_MIRROR_DISK_FLAG_SYNCHRONIZING) == 0) {
1625 			/* Disk does not need synchronization. */
1626 			state = G_MIRROR_DISK_STATE_ACTIVE;
1627 		} else {
1628 			if ((sc->sc_flags &
1629 			     G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) == 0  ||
1630 			    (disk->d_flags &
1631 			     G_MIRROR_DISK_FLAG_FORCE_SYNC) != 0) {
1632 				/*
1633 				 * We can start synchronization from
1634 				 * the stored offset.
1635 				 */
1636 				state = G_MIRROR_DISK_STATE_SYNCHRONIZING;
1637 			} else {
1638 				state = G_MIRROR_DISK_STATE_STALE;
1639 			}
1640 		}
1641 	} else if (disk->d_sync.ds_syncid < sc->sc_syncid) {
1642 		/*
1643 		 * Reset all synchronization data for this disk,
1644 		 * because if it even was synchronized, it was
1645 		 * synchronized to disks with different syncid.
1646 		 */
1647 		disk->d_flags |= G_MIRROR_DISK_FLAG_SYNCHRONIZING;
1648 		disk->d_sync.ds_offset = 0;
1649 		disk->d_sync.ds_offset_done = 0;
1650 		disk->d_sync.ds_syncid = sc->sc_syncid;
1651 		if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) == 0 ||
1652 		    (disk->d_flags & G_MIRROR_DISK_FLAG_FORCE_SYNC) != 0) {
1653 			state = G_MIRROR_DISK_STATE_SYNCHRONIZING;
1654 		} else {
1655 			state = G_MIRROR_DISK_STATE_STALE;
1656 		}
1657 	} else /* if (sc->sc_syncid < disk->d_sync.ds_syncid) */ {
1658 		/*
1659 		 * Not good, NOT GOOD!
1660 		 * It means that mirror was started on stale disks
1661 		 * and more fresh disk just arrive.
1662 		 * If there were writes, mirror is fucked up, sorry.
1663 		 * I think the best choice here is don't touch
1664 		 * this disk and inform the user laudly.
1665 		 */
1666 		G_MIRROR_DEBUG(0, "Device %s was started before the freshest "
1667 		    "disk (%s) arrives!! It will not be connected to the "
1668 		    "running device.", sc->sc_name,
1669 		    g_mirror_get_diskname(disk));
1670 		g_mirror_destroy_disk(disk);
1671 		state = G_MIRROR_DISK_STATE_NONE;
1672 		/* Return immediately, because disk was destroyed. */
1673 		return (state);
1674 	}
1675 	G_MIRROR_DEBUG(3, "State for %s disk: %s.",
1676 	    g_mirror_get_diskname(disk), g_mirror_disk_state2str(state));
1677 	return (state);
1678 }
1679 
1680 /*
1681  * Update device state.
1682  */
1683 static void
1684 g_mirror_update_device(struct g_mirror_softc *sc, boolean_t force)
1685 {
1686 	struct g_mirror_disk *disk;
1687 	u_int state;
1688 
1689 	g_topology_assert();
1690 
1691 	switch (sc->sc_state) {
1692 	case G_MIRROR_DEVICE_STATE_STARTING:
1693 	    {
1694 		struct g_mirror_disk *pdisk;
1695 		u_int dirty, ndisks, syncid;
1696 
1697 		KASSERT(sc->sc_provider == NULL,
1698 		    ("Non-NULL provider in STARTING state (%s).", sc->sc_name));
1699 		/*
1700 		 * Are we ready? We are, if all disks are connected or
1701 		 * if we have any disks and 'force' is true.
1702 		 */
1703 		if ((force && g_mirror_ndisks(sc, -1) > 0) ||
1704 		    sc->sc_ndisks == g_mirror_ndisks(sc, -1)) {
1705 			;
1706 		} else if (g_mirror_ndisks(sc, -1) == 0) {
1707 			/*
1708 			 * Disks went down in starting phase, so destroy
1709 			 * device.
1710 			 */
1711 			callout_drain(&sc->sc_callout);
1712 			sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
1713 			return;
1714 		} else {
1715 			return;
1716 		}
1717 
1718 		/*
1719 		 * Activate all disks with the biggest syncid.
1720 		 */
1721 		if (force) {
1722 			/*
1723 			 * If called with 'force' true, we're called from
1724 			 * timeout * procedure, so don't bother canceling
1725 			 * timeout.
1726 			 */
1727 			ndisks = 0;
1728 			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1729 				if ((disk->d_flags &
1730 				    G_MIRROR_DISK_FLAG_SYNCHRONIZING) == 0) {
1731 					ndisks++;
1732 				}
1733 			}
1734 			if (ndisks == 0) {
1735 				/* No valid disks found, destroy device. */
1736 				sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
1737 				return;
1738 			}
1739 		} else {
1740 			/* Cancel timeout. */
1741 			callout_drain(&sc->sc_callout);
1742 		}
1743 
1744 		/*
1745 		 * Find disk with the biggest syncid.
1746 		 */
1747 		syncid = 0;
1748 		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1749 			if (disk->d_sync.ds_syncid > syncid)
1750 				syncid = disk->d_sync.ds_syncid;
1751 		}
1752 
1753 		/*
1754 		 * Here we need to look for dirty disks and if all disks
1755 		 * with the biggest syncid are dirty, we have to choose
1756 		 * one with the biggest priority and rebuild the rest.
1757 		 */
1758 		/*
1759 		 * Find the number of dirty disks with the biggest syncid.
1760 		 * Find the number of disks with the biggest syncid.
1761 		 * While here, find a disk with the biggest priority.
1762 		 */
1763 		dirty = ndisks = 0;
1764 		pdisk = NULL;
1765 		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1766 			if (disk->d_sync.ds_syncid != syncid)
1767 				continue;
1768 			if ((disk->d_flags &
1769 			    G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
1770 				continue;
1771 			}
1772 			ndisks++;
1773 			if ((disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) != 0) {
1774 				dirty++;
1775 				if (pdisk == NULL ||
1776 				    pdisk->d_priority < disk->d_priority) {
1777 					pdisk = disk;
1778 				}
1779 			}
1780 		}
1781 		if (dirty == 0) {
1782 			/* No dirty disks at all, great. */
1783 		} else if (dirty == ndisks) {
1784 			/*
1785 			 * Force synchronization for all dirty disks except one
1786 			 * with the biggest priority.
1787 			 */
1788 			KASSERT(pdisk != NULL, ("pdisk == NULL"));
1789 			G_MIRROR_DEBUG(1, "Using disk %s (device %s) as a "
1790 			    "master disk for synchronization.",
1791 			    g_mirror_get_diskname(pdisk), sc->sc_name);
1792 			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1793 				if (disk->d_sync.ds_syncid != syncid)
1794 					continue;
1795 				if ((disk->d_flags &
1796 				    G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
1797 					continue;
1798 				}
1799 				KASSERT((disk->d_flags &
1800 				    G_MIRROR_DISK_FLAG_DIRTY) != 0,
1801 				    ("Disk %s isn't marked as dirty.",
1802 				    g_mirror_get_diskname(disk)));
1803 				/* Skip the disk with the biggest priority. */
1804 				if (disk == pdisk)
1805 					continue;
1806 				disk->d_sync.ds_syncid = 0;
1807 			}
1808 		} else if (dirty < ndisks) {
1809 			/*
1810 			 * Force synchronization for all dirty disks.
1811 			 * We have some non-dirty disks.
1812 			 */
1813 			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1814 				if (disk->d_sync.ds_syncid != syncid)
1815 					continue;
1816 				if ((disk->d_flags &
1817 				    G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
1818 					continue;
1819 				}
1820 				if ((disk->d_flags &
1821 				    G_MIRROR_DISK_FLAG_DIRTY) == 0) {
1822 					continue;
1823 				}
1824 				disk->d_sync.ds_syncid = 0;
1825 			}
1826 		}
1827 
1828 		/* Reset hint. */
1829 		sc->sc_hint = NULL;
1830 		sc->sc_syncid = syncid;
1831 		if (force) {
1832 			/* Remember to bump syncid on first write. */
1833 			sc->sc_bump_syncid = G_MIRROR_BUMP_ON_FIRST_WRITE;
1834 		}
1835 		state = G_MIRROR_DEVICE_STATE_RUNNING;
1836 		G_MIRROR_DEBUG(1, "Device %s state changed from %s to %s.",
1837 		    sc->sc_name, g_mirror_device_state2str(sc->sc_state),
1838 		    g_mirror_device_state2str(state));
1839 		sc->sc_state = state;
1840 		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1841 			state = g_mirror_determine_state(disk);
1842 			g_mirror_event_send(disk, state,
1843 			    G_MIRROR_EVENT_DONTWAIT);
1844 			if (state == G_MIRROR_DISK_STATE_STALE) {
1845 				sc->sc_bump_syncid =
1846 				    G_MIRROR_BUMP_ON_FIRST_WRITE;
1847 			}
1848 		}
1849 		break;
1850 	    }
1851 	case G_MIRROR_DEVICE_STATE_RUNNING:
1852 		/*
1853 		 * Bump syncid here, if we need to do it immediately.
1854 		 */
1855 		if (sc->sc_bump_syncid == G_MIRROR_BUMP_IMMEDIATELY) {
1856 			sc->sc_bump_syncid = 0;
1857 			g_mirror_bump_syncid(sc);
1858 		}
1859 		if (g_mirror_ndisks(sc, -1) == 0) {
1860 			/*
1861 			 * No disks at all, we need to destroy device.
1862 			 */
1863 			sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
1864 		} else if (g_mirror_ndisks(sc,
1865 		    G_MIRROR_DISK_STATE_ACTIVE) == 0 &&
1866 		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_NEW) == 0) {
1867 			/*
1868 			 * No active disks, destroy provider.
1869 			 */
1870 			if (sc->sc_provider != NULL)
1871 				g_mirror_destroy_provider(sc);
1872 		} else if (g_mirror_ndisks(sc,
1873 		    G_MIRROR_DISK_STATE_ACTIVE) > 0) {
1874 			/*
1875 			 * We have active disks, launch provider if it doesn't
1876 			 * exist.
1877 			 */
1878 			if (sc->sc_provider == NULL)
1879 				g_mirror_launch_provider(sc);
1880 		}
1881 		break;
1882 	default:
1883 		KASSERT(1 == 0, ("Wrong device state (%s, %s).",
1884 		    sc->sc_name, g_mirror_device_state2str(sc->sc_state)));
1885 		break;
1886 	}
1887 }
1888 
1889 /*
1890  * Update disk state and device state if needed.
1891  */
1892 #define	DISK_STATE_CHANGED()	G_MIRROR_DEBUG(1,			\
1893 	"Disk %s state changed from %s to %s (device %s).",		\
1894 	g_mirror_get_diskname(disk),					\
1895 	g_mirror_disk_state2str(disk->d_state),				\
1896 	g_mirror_disk_state2str(state), sc->sc_name)
1897 static int
1898 g_mirror_update_disk(struct g_mirror_disk *disk, u_int state)
1899 {
1900 	struct g_mirror_softc *sc;
1901 
1902 	g_topology_assert();
1903 
1904 	sc = disk->d_softc;
1905 again:
1906 	G_MIRROR_DEBUG(3, "Changing disk %s state from %s to %s.",
1907 	    g_mirror_get_diskname(disk), g_mirror_disk_state2str(disk->d_state),
1908 	    g_mirror_disk_state2str(state));
1909 	switch (state) {
1910 	case G_MIRROR_DISK_STATE_NEW:
1911 		/*
1912 		 * Possible scenarios:
1913 		 * 1. New disk arrive.
1914 		 */
1915 		/* Previous state should be NONE. */
1916 		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NONE,
1917 		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
1918 		    g_mirror_disk_state2str(disk->d_state)));
1919 		DISK_STATE_CHANGED();
1920 
1921 		disk->d_state = state;
1922 		LIST_INSERT_HEAD(&sc->sc_disks, disk, d_next);
1923 		G_MIRROR_DEBUG(0, "Device %s: provider %s detected.",
1924 		    sc->sc_name, g_mirror_get_diskname(disk));
1925 		if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING)
1926 			break;
1927 		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
1928 		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
1929 		    g_mirror_device_state2str(sc->sc_state),
1930 		    g_mirror_get_diskname(disk),
1931 		    g_mirror_disk_state2str(disk->d_state)));
1932 		state = g_mirror_determine_state(disk);
1933 		if (state != G_MIRROR_DISK_STATE_NONE)
1934 			goto again;
1935 		break;
1936 	case G_MIRROR_DISK_STATE_ACTIVE:
1937 		/*
1938 		 * Possible scenarios:
1939 		 * 1. New disk does not need synchronization.
1940 		 * 2. Synchronization process finished successfully.
1941 		 */
1942 		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
1943 		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
1944 		    g_mirror_device_state2str(sc->sc_state),
1945 		    g_mirror_get_diskname(disk),
1946 		    g_mirror_disk_state2str(disk->d_state)));
1947 		/* Previous state should be NEW or SYNCHRONIZING. */
1948 		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW ||
1949 		    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
1950 		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
1951 		    g_mirror_disk_state2str(disk->d_state)));
1952 		DISK_STATE_CHANGED();
1953 
1954 		if (disk->d_state == G_MIRROR_DISK_STATE_NEW)
1955 			disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
1956 		else if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
1957 			disk->d_flags &= ~G_MIRROR_DISK_FLAG_SYNCHRONIZING;
1958 			disk->d_flags &= ~G_MIRROR_DISK_FLAG_FORCE_SYNC;
1959 			g_mirror_sync_stop(disk, 0);
1960 		}
1961 		disk->d_state = state;
1962 		disk->d_sync.ds_offset = 0;
1963 		disk->d_sync.ds_offset_done = 0;
1964 		g_mirror_update_access(disk);
1965 		g_mirror_update_metadata(disk);
1966 		G_MIRROR_DEBUG(0, "Device %s: provider %s activated.",
1967 		    sc->sc_name, g_mirror_get_diskname(disk));
1968 		break;
1969 	case G_MIRROR_DISK_STATE_STALE:
1970 		/*
1971 		 * Possible scenarios:
1972 		 * 1. Stale disk was connected.
1973 		 */
1974 		/* Previous state should be NEW. */
1975 		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW,
1976 		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
1977 		    g_mirror_disk_state2str(disk->d_state)));
1978 		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
1979 		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
1980 		    g_mirror_device_state2str(sc->sc_state),
1981 		    g_mirror_get_diskname(disk),
1982 		    g_mirror_disk_state2str(disk->d_state)));
1983 		/*
1984 		 * STALE state is only possible if device is marked
1985 		 * NOAUTOSYNC.
1986 		 */
1987 		KASSERT((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) != 0,
1988 		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
1989 		    g_mirror_device_state2str(sc->sc_state),
1990 		    g_mirror_get_diskname(disk),
1991 		    g_mirror_disk_state2str(disk->d_state)));
1992 		DISK_STATE_CHANGED();
1993 
1994 		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
1995 		disk->d_state = state;
1996 		g_mirror_update_metadata(disk);
1997 		G_MIRROR_DEBUG(0, "Device %s: provider %s is stale.",
1998 		    sc->sc_name, g_mirror_get_diskname(disk));
1999 		break;
2000 	case G_MIRROR_DISK_STATE_SYNCHRONIZING:
2001 		/*
2002 		 * Possible scenarios:
2003 		 * 1. Disk which needs synchronization was connected.
2004 		 */
2005 		/* Previous state should be NEW. */
2006 		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW,
2007 		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2008 		    g_mirror_disk_state2str(disk->d_state)));
2009 		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2010 		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2011 		    g_mirror_device_state2str(sc->sc_state),
2012 		    g_mirror_get_diskname(disk),
2013 		    g_mirror_disk_state2str(disk->d_state)));
2014 		DISK_STATE_CHANGED();
2015 
2016 		if (disk->d_state == G_MIRROR_DISK_STATE_NEW)
2017 			disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2018 		disk->d_state = state;
2019 		if (sc->sc_provider != NULL) {
2020 			g_mirror_sync_start(disk);
2021 			g_mirror_update_metadata(disk);
2022 		}
2023 		break;
2024 	case G_MIRROR_DISK_STATE_DISCONNECTED:
2025 		/*
2026 		 * Possible scenarios:
2027 		 * 1. Device wasn't running yet, but disk disappear.
2028 		 * 2. Disk was active and disapppear.
2029 		 * 3. Disk disappear during synchronization process.
2030 		 */
2031 		if (sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING) {
2032 			/*
2033 			 * Previous state should be ACTIVE, STALE or
2034 			 * SYNCHRONIZING.
2035 			 */
2036 			KASSERT(disk->d_state == G_MIRROR_DISK_STATE_ACTIVE ||
2037 			    disk->d_state == G_MIRROR_DISK_STATE_STALE ||
2038 			    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
2039 			    ("Wrong disk state (%s, %s).",
2040 			    g_mirror_get_diskname(disk),
2041 			    g_mirror_disk_state2str(disk->d_state)));
2042 		} else if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING) {
2043 			/* Previous state should be NEW. */
2044 			KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW,
2045 			    ("Wrong disk state (%s, %s).",
2046 			    g_mirror_get_diskname(disk),
2047 			    g_mirror_disk_state2str(disk->d_state)));
2048 			/*
2049 			 * Reset bumping syncid if disk disappeared in STARTING
2050 			 * state.
2051 			 */
2052 			if (sc->sc_bump_syncid == G_MIRROR_BUMP_ON_FIRST_WRITE)
2053 				sc->sc_bump_syncid = 0;
2054 #ifdef	INVARIANTS
2055 		} else {
2056 			KASSERT(1 == 0, ("Wrong device state (%s, %s, %s, %s).",
2057 			    sc->sc_name,
2058 			    g_mirror_device_state2str(sc->sc_state),
2059 			    g_mirror_get_diskname(disk),
2060 			    g_mirror_disk_state2str(disk->d_state)));
2061 #endif
2062 		}
2063 		DISK_STATE_CHANGED();
2064 		G_MIRROR_DEBUG(0, "Device %s: provider %s disconnected.",
2065 		    sc->sc_name, g_mirror_get_diskname(disk));
2066 
2067 		g_mirror_destroy_disk(disk);
2068 		break;
2069 	case G_MIRROR_DISK_STATE_DESTROY:
2070 	    {
2071 		int error;
2072 
2073 		error = g_mirror_clear_metadata(disk);
2074 		if (error != 0)
2075 			return (error);
2076 		DISK_STATE_CHANGED();
2077 		G_MIRROR_DEBUG(0, "Device %s: provider %s destroyed.",
2078 		    sc->sc_name, g_mirror_get_diskname(disk));
2079 
2080 		g_mirror_destroy_disk(disk);
2081 		sc->sc_ndisks--;
2082 		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2083 			g_mirror_update_metadata(disk);
2084 		}
2085 		break;
2086 	    }
2087 	default:
2088 		KASSERT(1 == 0, ("Unknown state (%u).", state));
2089 		break;
2090 	}
2091 	return (0);
2092 }
2093 #undef	DISK_STATE_CHANGED
2094 
2095 static int
2096 g_mirror_read_metadata(struct g_consumer *cp, struct g_mirror_metadata *md)
2097 {
2098 	struct g_provider *pp;
2099 	u_char *buf;
2100 	int error;
2101 
2102 	g_topology_assert();
2103 
2104 	error = g_access(cp, 1, 0, 0);
2105 	if (error != 0)
2106 		return (error);
2107 	pp = cp->provider;
2108 	g_topology_unlock();
2109 	/* Metadata are stored on last sector. */
2110 	buf = g_read_data(cp, pp->mediasize - pp->sectorsize, pp->sectorsize,
2111 	    &error);
2112 	g_topology_lock();
2113 	if (buf == NULL) {
2114 		g_access(cp, -1, 0, 0);
2115 		return (error);
2116 	}
2117 	if (error != 0) {
2118 		g_access(cp, -1, 0, 0);
2119 		g_free(buf);
2120 		return (error);
2121 	}
2122 	error = g_access(cp, -1, 0, 0);
2123 	KASSERT(error == 0, ("Cannot decrease access count for %s.", pp->name));
2124 
2125 	/* Decode metadata. */
2126 	error = mirror_metadata_decode(buf, md);
2127 	g_free(buf);
2128 	if (strcmp(md->md_magic, G_MIRROR_MAGIC) != 0)
2129 		return (EINVAL);
2130 	if (error != 0) {
2131 		G_MIRROR_DEBUG(1, "MD5 metadata hash mismatch for provider %s.",
2132 		    cp->provider->name);
2133 		return (error);
2134 	}
2135 
2136 	return (0);
2137 }
2138 
2139 static int
2140 g_mirror_check_metadata(struct g_mirror_softc *sc, struct g_provider *pp,
2141     struct g_mirror_metadata *md)
2142 {
2143 
2144 	if (g_mirror_id2disk(sc, md->md_did) != NULL) {
2145 		G_MIRROR_DEBUG(1, "Disk %s (id=%u) already exists, skipping.",
2146 		    pp->name, md->md_did);
2147 		return (EEXIST);
2148 	}
2149 	if (md->md_all != sc->sc_ndisks) {
2150 		G_MIRROR_DEBUG(1,
2151 		    "Invalid '%s' field on disk %s (device %s), skipping.",
2152 		    "md_all", pp->name, sc->sc_name);
2153 		return (EINVAL);
2154 	}
2155 	if (md->md_slice != sc->sc_slice) {
2156 		G_MIRROR_DEBUG(1,
2157 		    "Invalid '%s' field on disk %s (device %s), skipping.",
2158 		    "md_slice", pp->name, sc->sc_name);
2159 		return (EINVAL);
2160 	}
2161 	if (md->md_balance != sc->sc_balance) {
2162 		G_MIRROR_DEBUG(1,
2163 		    "Invalid '%s' field on disk %s (device %s), skipping.",
2164 		    "md_balance", pp->name, sc->sc_name);
2165 		return (EINVAL);
2166 	}
2167 	if (md->md_mediasize != sc->sc_mediasize) {
2168 		G_MIRROR_DEBUG(1,
2169 		    "Invalid '%s' field on disk %s (device %s), skipping.",
2170 		    "md_mediasize", pp->name, sc->sc_name);
2171 		return (EINVAL);
2172 	}
2173 	if (sc->sc_mediasize > pp->mediasize) {
2174 		G_MIRROR_DEBUG(1,
2175 		    "Invalid size of disk %s (device %s), skipping.", pp->name,
2176 		    sc->sc_name);
2177 		return (EINVAL);
2178 	}
2179 	if (md->md_sectorsize != sc->sc_sectorsize) {
2180 		G_MIRROR_DEBUG(1,
2181 		    "Invalid '%s' field on disk %s (device %s), skipping.",
2182 		    "md_sectorsize", pp->name, sc->sc_name);
2183 		return (EINVAL);
2184 	}
2185 	if ((sc->sc_sectorsize % pp->sectorsize) != 0) {
2186 		G_MIRROR_DEBUG(1,
2187 		    "Invalid sector size of disk %s (device %s), skipping.",
2188 		    pp->name, sc->sc_name);
2189 		return (EINVAL);
2190 	}
2191 	if ((md->md_mflags & ~G_MIRROR_DEVICE_FLAG_MASK) != 0) {
2192 		G_MIRROR_DEBUG(1,
2193 		    "Invalid device flags on disk %s (device %s), skipping.",
2194 		    pp->name, sc->sc_name);
2195 		return (EINVAL);
2196 	}
2197 	if ((md->md_dflags & ~G_MIRROR_DISK_FLAG_MASK) != 0) {
2198 		G_MIRROR_DEBUG(1,
2199 		    "Invalid disk flags on disk %s (device %s), skipping.",
2200 		    pp->name, sc->sc_name);
2201 		return (EINVAL);
2202 	}
2203 	return (0);
2204 }
2205 
2206 static int
2207 g_mirror_add_disk(struct g_mirror_softc *sc, struct g_provider *pp,
2208     struct g_mirror_metadata *md)
2209 {
2210 	struct g_mirror_disk *disk;
2211 	int error;
2212 
2213 	g_topology_assert();
2214 	G_MIRROR_DEBUG(2, "Adding disk %s.", pp->name);
2215 
2216 	error = g_mirror_check_metadata(sc, pp, md);
2217 	if (error != 0)
2218 		return (error);
2219 	disk = g_mirror_init_disk(sc, pp, md, &error);
2220 	if (disk == NULL)
2221 		return (error);
2222 	error = g_mirror_event_send(disk, G_MIRROR_DISK_STATE_NEW,
2223 	    G_MIRROR_EVENT_WAIT);
2224 	return (error);
2225 }
2226 
2227 static int
2228 g_mirror_access(struct g_provider *pp, int acr, int acw, int ace)
2229 {
2230 	struct g_mirror_softc *sc;
2231 	struct g_mirror_disk *disk;
2232 	int dcr, dcw, dce, err, error;
2233 
2234 	g_topology_assert();
2235 	G_MIRROR_DEBUG(2, "Access request for %s: r%dw%de%d.", pp->name, acr,
2236 	    acw, ace);
2237 
2238 	dcr = pp->acr + acr;
2239 	dcw = pp->acw + acw;
2240 	dce = pp->ace + ace;
2241 
2242 	/* On first open, grab an extra "exclusive" bit */
2243 	if (pp->acr == 0 && pp->acw == 0 && pp->ace == 0)
2244 		ace++;
2245 	/* ... and let go of it on last close */
2246 	if (dcr == 0 && dcw == 0 && dce == 0)
2247 		ace--;
2248 
2249 	sc = pp->geom->softc;
2250 	if (sc == NULL || LIST_EMPTY(&sc->sc_disks)) {
2251 		if (acr <= 0 && acw <= 0 && ace <= 0)
2252 			return (0);
2253 		else
2254 			return (ENXIO);
2255 	}
2256 	error = ENXIO;
2257 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2258 		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
2259 			continue;
2260 		err = g_access(disk->d_consumer, acr, acw, ace);
2261 		G_MIRROR_DEBUG(2, "Access %s r%dw%de%d = %d",
2262 		    g_mirror_get_diskname(disk), acr, acw, ace, err);
2263 		if (err == 0) {
2264 			/*
2265 			 * Mark disk as dirty on open and unmark on close.
2266 			 */
2267 			if (pp->acw == 0 && dcw > 0) {
2268 				G_MIRROR_DEBUG(1,
2269 				    "Disk %s (device %s) marked as dirty.",
2270 				    g_mirror_get_diskname(disk), sc->sc_name);
2271 				disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
2272 				g_mirror_update_metadata(disk);
2273 			} else if (pp->acw > 0 && dcw == 0) {
2274 				G_MIRROR_DEBUG(1,
2275 				    "Disk %s (device %s) marked as clean.",
2276 				    g_mirror_get_diskname(disk), sc->sc_name);
2277 				disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2278 				g_mirror_update_metadata(disk);
2279 			}
2280 			error = 0;
2281 		} else {
2282 			sc->sc_bump_syncid = G_MIRROR_BUMP_ON_FIRST_WRITE;
2283 			g_mirror_event_send(disk,
2284 			    G_MIRROR_DISK_STATE_DISCONNECTED,
2285 			    G_MIRROR_EVENT_DONTWAIT);
2286 		}
2287 	}
2288 	return (error);
2289 }
2290 
2291 static struct g_geom *
2292 g_mirror_create(struct g_class *mp, const struct g_mirror_metadata *md)
2293 {
2294 	struct g_mirror_softc *sc;
2295 	struct g_geom *gp;
2296 	int error, timeout;
2297 
2298 	g_topology_assert();
2299 	G_MIRROR_DEBUG(1, "Creating device %s (id=%u).", md->md_name,
2300 	    md->md_mid);
2301 
2302 	/* One disk is minimum. */
2303 	if (md->md_all < 1)
2304 		return (NULL);
2305 	/*
2306 	 * Action geom.
2307 	 */
2308 	gp = g_new_geomf(mp, "%s", md->md_name);
2309 	sc = malloc(sizeof(*sc), M_MIRROR, M_WAITOK | M_ZERO);
2310 	gp->start = g_mirror_start;
2311 	gp->spoiled = g_mirror_spoiled;
2312 	gp->orphan = g_mirror_orphan;
2313 	gp->access = g_mirror_access;
2314 	gp->dumpconf = g_mirror_dumpconf;
2315 
2316 	sc->sc_id = md->md_mid;
2317 	sc->sc_slice = md->md_slice;
2318 	sc->sc_balance = md->md_balance;
2319 	sc->sc_mediasize = md->md_mediasize;
2320 	sc->sc_sectorsize = md->md_sectorsize;
2321 	sc->sc_ndisks = md->md_all;
2322 	sc->sc_flags = md->md_mflags;
2323 	sc->sc_bump_syncid = 0;
2324 	bioq_init(&sc->sc_queue);
2325 	mtx_init(&sc->sc_queue_mtx, "gmirror:queue", NULL, MTX_DEF);
2326 	LIST_INIT(&sc->sc_disks);
2327 	TAILQ_INIT(&sc->sc_events);
2328 	mtx_init(&sc->sc_events_mtx, "gmirror:events", NULL, MTX_DEF);
2329 	callout_init(&sc->sc_callout, CALLOUT_MPSAFE);
2330 	sc->sc_state = G_MIRROR_DEVICE_STATE_STARTING;
2331 	gp->softc = sc;
2332 	sc->sc_geom = gp;
2333 	sc->sc_provider = NULL;
2334 	/*
2335 	 * Synchronization geom.
2336 	 */
2337 	gp = g_new_geomf(mp, "%s.sync", md->md_name);
2338 	gp->softc = sc;
2339 	gp->spoiled = g_mirror_spoiled;
2340 	gp->orphan = g_mirror_orphan;
2341 	sc->sc_sync.ds_geom = gp;
2342 	sc->sc_sync.ds_block = atomic_load_acq_int(&g_mirror_sync_block_size);
2343 	sc->sc_sync.ds_ndisks = 0;
2344 	sc->sc_sync.ds_zone = uma_zcreate("gmirror:sync", sc->sc_sync.ds_block,
2345 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
2346 	error = kthread_create(g_mirror_worker, sc, &sc->sc_worker, 0, 0,
2347 	    "g_mirror %s", md->md_name);
2348 	if (error != 0) {
2349 		G_MIRROR_DEBUG(1, "Cannot create kernel thread for %s.",
2350 		    sc->sc_name);
2351 		uma_zdestroy(sc->sc_sync.ds_zone);
2352 		g_destroy_geom(sc->sc_sync.ds_geom);
2353 		mtx_destroy(&sc->sc_events_mtx);
2354 		mtx_destroy(&sc->sc_queue_mtx);
2355 		g_destroy_geom(sc->sc_geom);
2356 		free(sc, M_MIRROR);
2357 		return (NULL);
2358 	}
2359 
2360 	G_MIRROR_DEBUG(0, "Device %s created (id=%u).", sc->sc_name, sc->sc_id);
2361 
2362 	/*
2363 	 * Run timeout.
2364 	 */
2365 	timeout = atomic_load_acq_int(&g_mirror_timeout);
2366 	callout_reset(&sc->sc_callout, timeout * hz, g_mirror_go, sc);
2367 	return (sc->sc_geom);
2368 }
2369 
2370 int
2371 g_mirror_destroy(struct g_mirror_softc *sc, boolean_t force)
2372 {
2373 	struct g_provider *pp;
2374 
2375 	g_topology_assert();
2376 
2377 	if (sc == NULL)
2378 		return (ENXIO);
2379 	pp = sc->sc_provider;
2380 	if (pp != NULL && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)) {
2381 		if (force) {
2382 			G_MIRROR_DEBUG(0, "Device %s is still open, so it "
2383 			    "can't be definitely removed.", pp->name);
2384 		} else {
2385 			G_MIRROR_DEBUG(1,
2386 			    "Device %s is still open (r%dw%de%d).", pp->name,
2387 			    pp->acr, pp->acw, pp->ace);
2388 			return (EBUSY);
2389 		}
2390 	}
2391 
2392 	sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
2393 	sc->sc_flags |= G_MIRROR_DEVICE_FLAG_WAIT;
2394 	G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
2395 	mtx_lock(&sc->sc_queue_mtx);
2396 	wakeup(sc);
2397 	mtx_unlock(&sc->sc_queue_mtx);
2398 	G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, &sc->sc_worker);
2399 	while (sc->sc_worker != NULL)
2400 		tsleep(&sc->sc_worker, PRIBIO, "m:destroy", hz / 5);
2401 	G_MIRROR_DEBUG(4, "%s: Woken up %p.", __func__, &sc->sc_worker);
2402 	g_mirror_destroy_device(sc);
2403 	free(sc, M_MIRROR);
2404 	return (0);
2405 }
2406 
2407 static void
2408 g_mirror_taste_orphan(struct g_consumer *cp)
2409 {
2410 
2411 	KASSERT(1 == 0, ("%s called while tasting %s.", __func__,
2412 	    cp->provider->name));
2413 }
2414 
2415 static struct g_geom *
2416 g_mirror_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
2417 {
2418 	struct g_mirror_metadata md;
2419 	struct g_mirror_softc *sc;
2420 	struct g_consumer *cp;
2421 	struct g_geom *gp;
2422 	int error;
2423 
2424 	g_topology_assert();
2425 	g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name);
2426 	G_MIRROR_DEBUG(2, "Tasting %s.", pp->name);
2427 
2428 	gp = g_new_geomf(mp, "mirror:taste");
2429 	/*
2430 	 * This orphan function should be never called.
2431 	 */
2432 	gp->orphan = g_mirror_taste_orphan;
2433 	cp = g_new_consumer(gp);
2434 	g_attach(cp, pp);
2435 	error = g_mirror_read_metadata(cp, &md);
2436 	g_detach(cp);
2437 	g_destroy_consumer(cp);
2438 	g_destroy_geom(gp);
2439 	if (error != 0)
2440 		return (NULL);
2441 	gp = NULL;
2442 
2443 	if (md.md_version > G_MIRROR_VERSION) {
2444 		printf("geom_mirror.ko module is too old to handle %s.\n",
2445 		    pp->name);
2446 		return (NULL);
2447 	}
2448 	if ((md.md_dflags & G_MIRROR_DISK_FLAG_INACTIVE) != 0) {
2449 		G_MIRROR_DEBUG(0,
2450 		    "Device %s: provider %s marked as inactive, skipping.",
2451 		    md.md_name, pp->name);
2452 		return (NULL);
2453 	}
2454 	if (g_mirror_debug >= 2)
2455 		mirror_metadata_dump(&md);
2456 
2457 	/*
2458 	 * Let's check if device already exists.
2459 	 */
2460 	LIST_FOREACH(gp, &mp->geom, geom) {
2461 		sc = gp->softc;
2462 		if (sc == NULL)
2463 			continue;
2464 		if (sc->sc_sync.ds_geom == gp)
2465 			continue;
2466 		if (strcmp(md.md_name, sc->sc_name) != 0)
2467 			continue;
2468 		if (md.md_mid != sc->sc_id) {
2469 			G_MIRROR_DEBUG(0, "Device %s already configured.",
2470 			    sc->sc_name);
2471 			return (NULL);
2472 		}
2473 		break;
2474 	}
2475 	if (gp == NULL) {
2476 		gp = g_mirror_create(mp, &md);
2477 		if (gp == NULL) {
2478 			G_MIRROR_DEBUG(0, "Cannot create device %s.mirror",
2479 			    md.md_name);
2480 			return (NULL);
2481 		}
2482 		sc = gp->softc;
2483 	}
2484 	G_MIRROR_DEBUG(1, "Adding disk %s to %s.", pp->name, gp->name);
2485 	error = g_mirror_add_disk(sc, pp, &md);
2486 	if (error != 0) {
2487 		G_MIRROR_DEBUG(0, "Cannot add disk %s to %s (error=%d).",
2488 		    pp->name, gp->name, error);
2489 		if (LIST_EMPTY(&sc->sc_disks))
2490 			g_mirror_destroy(sc, 1);
2491 		return (NULL);
2492 	}
2493 	return (gp);
2494 }
2495 
2496 static int
2497 g_mirror_destroy_geom(struct gctl_req *req __unused,
2498     struct g_class *mp __unused, struct g_geom *gp)
2499 {
2500 
2501 	return (g_mirror_destroy(gp->softc, 0));
2502 }
2503 
2504 static void
2505 g_mirror_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
2506     struct g_consumer *cp, struct g_provider *pp)
2507 {
2508 	struct g_mirror_softc *sc;
2509 
2510 	g_topology_assert();
2511 
2512 	sc = gp->softc;
2513 	if (sc == NULL)
2514 		return;
2515 	/* Skip synchronization geom. */
2516 	if (gp == sc->sc_sync.ds_geom)
2517 		return;
2518 	if (pp != NULL) {
2519 		/* Nothing here. */
2520 	} else if (cp != NULL) {
2521 		struct g_mirror_disk *disk;
2522 
2523 		disk = cp->private;
2524 		if (disk == NULL)
2525 			return;
2526 		sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)disk->d_id);
2527 		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
2528 			sbuf_printf(sb, "%s<Synchronized>", indent);
2529 			if (disk->d_sync.ds_offset_done == 0)
2530 				sbuf_printf(sb, "0%%");
2531 			else {
2532 				sbuf_printf(sb, "%u%%",
2533 				    (u_int)((disk->d_sync.ds_offset_done * 100) /
2534 				    sc->sc_provider->mediasize));
2535 			}
2536 			sbuf_printf(sb, "</Synchronized>\n");
2537 		}
2538 		sbuf_printf(sb, "%s<SyncID>%u</SyncID>\n", indent,
2539 		    disk->d_sync.ds_syncid);
2540 		sbuf_printf(sb, "%s<Flags>", indent);
2541 		if (disk->d_flags == 0)
2542 			sbuf_printf(sb, "NONE");
2543 		else {
2544 			int first = 1;
2545 
2546 			if ((disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) != 0) {
2547 				if (!first)
2548 					sbuf_printf(sb, ", ");
2549 				else
2550 					first = 0;
2551 				sbuf_printf(sb, "DIRTY");
2552 			}
2553 			if ((disk->d_flags &
2554 			    G_MIRROR_DISK_FLAG_INACTIVE) != 0) {
2555 				if (!first)
2556 					sbuf_printf(sb, ", ");
2557 				else
2558 					first = 0;
2559 				sbuf_printf(sb, "INACTIVE");
2560 			}
2561 			if ((disk->d_flags &
2562 			    G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
2563 				if (!first)
2564 					sbuf_printf(sb, ", ");
2565 				else
2566 					first = 0;
2567 				sbuf_printf(sb, "SYNCHRONIZING");
2568 			}
2569 			if ((disk->d_flags &
2570 			    G_MIRROR_DISK_FLAG_FORCE_SYNC) != 0) {
2571 				if (!first)
2572 					sbuf_printf(sb, ", ");
2573 				else
2574 					first = 0;
2575 				sbuf_printf(sb, "FORCE_SYNC");
2576 			}
2577 		}
2578 		sbuf_printf(sb, "</Flags>\n");
2579 		sbuf_printf(sb, "%s<Priority>%u</Priority>\n", indent,
2580 		    disk->d_priority);
2581 		sbuf_printf(sb, "%s<State>%s</State>\n", indent,
2582 		    g_mirror_disk_state2str(disk->d_state));
2583 	} else {
2584 		sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)sc->sc_id);
2585 		sbuf_printf(sb, "%s<SyncID>%u</SyncID>\n", indent, sc->sc_syncid);
2586 		sbuf_printf(sb, "%s<Flags>", indent);
2587 		if (sc->sc_flags == 0)
2588 			sbuf_printf(sb, "NONE");
2589 		else {
2590 			int first = 1;
2591 
2592 			if ((sc->sc_flags &
2593 			    G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) != 0) {
2594 				if (!first)
2595 					sbuf_printf(sb, ", ");
2596 				else
2597 					first = 0;
2598 				sbuf_printf(sb, "NOAUTOSYNC");
2599 			}
2600 		}
2601 		sbuf_printf(sb, "</Flags>\n");
2602 		sbuf_printf(sb, "%s<Slice>%u</Slice>\n", indent,
2603 		    (u_int)sc->sc_slice);
2604 		sbuf_printf(sb, "%s<Balance>%s</Balance>\n", indent,
2605 		    balance_name(sc->sc_balance));
2606 		sbuf_printf(sb, "%s<Components>%u</Components>\n", indent,
2607 		    sc->sc_ndisks);
2608 	}
2609 }
2610 
2611 DECLARE_GEOM_CLASS(g_mirror_class, g_mirror);
2612