xref: /freebsd/sys/geom/mirror/g_mirror.c (revision cec50dea12481dc578c0805c887ab2097e1c06c5)
1 /*-
2  * Copyright (c) 2004 Pawel Jakub Dawidek <pjd@FreeBSD.org>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 
27 #include <sys/cdefs.h>
28 __FBSDID("$FreeBSD$");
29 
30 #include <sys/param.h>
31 #include <sys/systm.h>
32 #include <sys/kernel.h>
33 #include <sys/module.h>
34 #include <sys/limits.h>
35 #include <sys/lock.h>
36 #include <sys/mutex.h>
37 #include <sys/bio.h>
38 #include <sys/sysctl.h>
39 #include <sys/malloc.h>
40 #include <sys/bitstring.h>
41 #include <vm/uma.h>
42 #include <machine/atomic.h>
43 #include <geom/geom.h>
44 #include <sys/proc.h>
45 #include <sys/kthread.h>
46 #include <geom/mirror/g_mirror.h>
47 
48 
49 static MALLOC_DEFINE(M_MIRROR, "mirror data", "GEOM_MIRROR Data");
50 
51 SYSCTL_DECL(_kern_geom);
52 SYSCTL_NODE(_kern_geom, OID_AUTO, mirror, CTLFLAG_RW, 0, "GEOM_MIRROR stuff");
53 u_int g_mirror_debug = 0;
54 TUNABLE_INT("kern.geom.mirror.debug", &g_mirror_debug);
55 SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, debug, CTLFLAG_RW, &g_mirror_debug, 0,
56     "Debug level");
57 static u_int g_mirror_timeout = 8;
58 TUNABLE_INT("kern.geom.mirror.timeout", &g_mirror_timeout);
59 SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, timeout, CTLFLAG_RW, &g_mirror_timeout,
60     0, "Time to wait on all mirror components");
61 static u_int g_mirror_reqs_per_sync = 5;
62 SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, reqs_per_sync, CTLFLAG_RW,
63     &g_mirror_reqs_per_sync, 0,
64     "Number of regular I/O requests per synchronization request");
65 static u_int g_mirror_syncs_per_sec = 100;
66 SYSCTL_UINT(_kern_geom_mirror, OID_AUTO, syncs_per_sec, CTLFLAG_RW,
67     &g_mirror_syncs_per_sec, 0,
68     "Number of synchronizations requests per second");
69 
70 #define	MSLEEP(ident, mtx, priority, wmesg, timeout)	do {		\
71 	G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, (ident));	\
72 	msleep((ident), (mtx), (priority), (wmesg), (timeout));		\
73 	G_MIRROR_DEBUG(4, "%s: Woken up %p.", __func__, (ident));	\
74 } while (0)
75 
76 
77 static int g_mirror_destroy_geom(struct gctl_req *req, struct g_class *mp,
78     struct g_geom *gp);
79 static g_taste_t g_mirror_taste;
80 
81 struct g_class g_mirror_class = {
82 	.name = G_MIRROR_CLASS_NAME,
83 	.version = G_VERSION,
84 	.ctlreq = g_mirror_config,
85 	.taste = g_mirror_taste,
86 	.destroy_geom = g_mirror_destroy_geom
87 };
88 
89 
90 static void g_mirror_destroy_provider(struct g_mirror_softc *sc);
91 static int g_mirror_update_disk(struct g_mirror_disk *disk, u_int state);
92 static void g_mirror_update_device(struct g_mirror_softc *sc, boolean_t force);
93 static void g_mirror_dumpconf(struct sbuf *sb, const char *indent,
94     struct g_geom *gp, struct g_consumer *cp, struct g_provider *pp);
95 static void g_mirror_sync_stop(struct g_mirror_disk *disk, int type);
96 
97 
98 static const char *
99 g_mirror_disk_state2str(int state)
100 {
101 
102 	switch (state) {
103 	case G_MIRROR_DISK_STATE_NONE:
104 		return ("NONE");
105 	case G_MIRROR_DISK_STATE_NEW:
106 		return ("NEW");
107 	case G_MIRROR_DISK_STATE_ACTIVE:
108 		return ("ACTIVE");
109 	case G_MIRROR_DISK_STATE_STALE:
110 		return ("STALE");
111 	case G_MIRROR_DISK_STATE_SYNCHRONIZING:
112 		return ("SYNCHRONIZING");
113 	case G_MIRROR_DISK_STATE_DISCONNECTED:
114 		return ("DISCONNECTED");
115 	case G_MIRROR_DISK_STATE_DESTROY:
116 		return ("DESTROY");
117 	default:
118 		return ("INVALID");
119 	}
120 }
121 
122 static const char *
123 g_mirror_device_state2str(int state)
124 {
125 
126 	switch (state) {
127 	case G_MIRROR_DEVICE_STATE_STARTING:
128 		return ("STARTING");
129 	case G_MIRROR_DEVICE_STATE_RUNNING:
130 		return ("RUNNING");
131 	default:
132 		return ("INVALID");
133 	}
134 }
135 
136 static const char *
137 g_mirror_get_diskname(struct g_mirror_disk *disk)
138 {
139 
140 	if (disk->d_consumer == NULL || disk->d_consumer->provider == NULL)
141 		return ("[unknown]");
142 	return (disk->d_name);
143 }
144 
145 /*
146  * --- Events handling functions ---
147  * Events in geom_mirror are used to maintain disks and device status
148  * from one thread to simplify locking.
149  */
150 static void
151 g_mirror_event_free(struct g_mirror_event *ep)
152 {
153 
154 	free(ep, M_MIRROR);
155 }
156 
157 int
158 g_mirror_event_send(void *arg, int state, int flags)
159 {
160 	struct g_mirror_softc *sc;
161 	struct g_mirror_disk *disk;
162 	struct g_mirror_event *ep;
163 	int error;
164 
165 	ep = malloc(sizeof(*ep), M_MIRROR, M_WAITOK);
166 	G_MIRROR_DEBUG(4, "%s: Sending event %p.", __func__, ep);
167 	if ((flags & G_MIRROR_EVENT_DEVICE) != 0) {
168 		disk = NULL;
169 		sc = arg;
170 	} else {
171 		disk = arg;
172 		sc = disk->d_softc;
173 	}
174 	ep->e_disk = disk;
175 	ep->e_state = state;
176 	ep->e_flags = flags;
177 	ep->e_error = 0;
178 	mtx_lock(&sc->sc_events_mtx);
179 	TAILQ_INSERT_TAIL(&sc->sc_events, ep, e_next);
180 	mtx_unlock(&sc->sc_events_mtx);
181 	G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
182 	mtx_lock(&sc->sc_queue_mtx);
183 	wakeup(sc);
184 	mtx_unlock(&sc->sc_queue_mtx);
185 	if ((flags & G_MIRROR_EVENT_DONTWAIT) != 0)
186 		return (0);
187 	g_topology_assert();
188 	G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, ep);
189 	g_topology_unlock();
190 	while ((ep->e_flags & G_MIRROR_EVENT_DONE) == 0) {
191 		mtx_lock(&sc->sc_events_mtx);
192 		MSLEEP(ep, &sc->sc_events_mtx, PRIBIO | PDROP, "m:event",
193 		    hz * 5);
194 	}
195 	/* Don't even try to use 'sc' here, because it could be already dead. */
196 	g_topology_lock();
197 	error = ep->e_error;
198 	g_mirror_event_free(ep);
199 	return (error);
200 }
201 
202 static struct g_mirror_event *
203 g_mirror_event_get(struct g_mirror_softc *sc)
204 {
205 	struct g_mirror_event *ep;
206 
207 	mtx_lock(&sc->sc_events_mtx);
208 	ep = TAILQ_FIRST(&sc->sc_events);
209 	if (ep != NULL)
210 		TAILQ_REMOVE(&sc->sc_events, ep, e_next);
211 	mtx_unlock(&sc->sc_events_mtx);
212 	return (ep);
213 }
214 
215 static void
216 g_mirror_event_cancel(struct g_mirror_disk *disk)
217 {
218 	struct g_mirror_softc *sc;
219 	struct g_mirror_event *ep, *tmpep;
220 
221 	g_topology_assert();
222 
223 	sc = disk->d_softc;
224 	mtx_lock(&sc->sc_events_mtx);
225 	TAILQ_FOREACH_SAFE(ep, &sc->sc_events, e_next, tmpep) {
226 		if ((ep->e_flags & G_MIRROR_EVENT_DEVICE) != 0)
227 			continue;
228 		if (ep->e_disk != disk)
229 			continue;
230 		TAILQ_REMOVE(&sc->sc_events, ep, e_next);
231 		if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0)
232 			g_mirror_event_free(ep);
233 		else {
234 			ep->e_error = ECANCELED;
235 			wakeup(ep);
236 		}
237 	}
238 	mtx_unlock(&sc->sc_events_mtx);
239 }
240 
241 /*
242  * Return the number of disks in given state.
243  * If state is equal to -1, count all connected disks.
244  */
245 u_int
246 g_mirror_ndisks(struct g_mirror_softc *sc, int state)
247 {
248 	struct g_mirror_disk *disk;
249 	u_int n = 0;
250 
251 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
252 		if (state == -1 || disk->d_state == state)
253 			n++;
254 	}
255 	return (n);
256 }
257 
258 /*
259  * Find a disk in mirror by its disk ID.
260  */
261 static struct g_mirror_disk *
262 g_mirror_id2disk(struct g_mirror_softc *sc, uint32_t id)
263 {
264 	struct g_mirror_disk *disk;
265 
266 	g_topology_assert();
267 
268 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
269 		if (disk->d_id == id)
270 			return (disk);
271 	}
272 	return (NULL);
273 }
274 
275 static u_int
276 g_mirror_nrequests(struct g_mirror_softc *sc, struct g_consumer *cp)
277 {
278 	struct bio *bp;
279 	u_int nreqs = 0;
280 
281 	mtx_lock(&sc->sc_queue_mtx);
282 	TAILQ_FOREACH(bp, &sc->sc_queue.queue, bio_queue) {
283 		if (bp->bio_from == cp)
284 			nreqs++;
285 	}
286 	mtx_unlock(&sc->sc_queue_mtx);
287 	return (nreqs);
288 }
289 
290 static int
291 g_mirror_is_busy(struct g_mirror_softc *sc, struct g_consumer *cp)
292 {
293 
294 	if (cp->nstart != cp->nend) {
295 		G_MIRROR_DEBUG(2,
296 		    "I/O requests for %s exist, can't destroy it now.",
297 		    cp->provider->name);
298 		return (1);
299 	}
300 	if (g_mirror_nrequests(sc, cp) > 0) {
301 		G_MIRROR_DEBUG(2,
302 		    "I/O requests for %s in queue, can't destroy it now.",
303 		    cp->provider->name);
304 		return (1);
305 	}
306 	return (0);
307 }
308 
309 static void
310 g_mirror_kill_consumer(struct g_mirror_softc *sc, struct g_consumer *cp)
311 {
312 
313 	g_topology_assert();
314 
315 	cp->private = NULL;
316 	if (g_mirror_is_busy(sc, cp))
317 		return;
318 	G_MIRROR_DEBUG(2, "Consumer %s destroyed.", cp->provider->name);
319 	g_detach(cp);
320 	g_destroy_consumer(cp);
321 }
322 
323 static int
324 g_mirror_connect_disk(struct g_mirror_disk *disk, struct g_provider *pp)
325 {
326 	int error;
327 
328 	g_topology_assert();
329 	KASSERT(disk->d_consumer == NULL,
330 	    ("Disk already connected (device %s).", disk->d_softc->sc_name));
331 
332 	disk->d_consumer = g_new_consumer(disk->d_softc->sc_geom);
333 	disk->d_consumer->private = disk;
334 	error = g_attach(disk->d_consumer, pp);
335 	if (error != 0)
336 		return (error);
337 	G_MIRROR_DEBUG(2, "Disk %s connected.", g_mirror_get_diskname(disk));
338 	return (0);
339 }
340 
341 static void
342 g_mirror_disconnect_consumer(struct g_mirror_softc *sc, struct g_consumer *cp)
343 {
344 
345 	g_topology_assert();
346 
347 	if (cp == NULL)
348 		return;
349 	if (cp->provider != NULL) {
350 		G_MIRROR_DEBUG(2, "Disk %s disconnected.", cp->provider->name);
351 		if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0) {
352 			G_MIRROR_DEBUG(2, "Access %s r%dw%de%d = %d",
353 			    cp->provider->name, -cp->acr, -cp->acw, -cp->ace,
354 			    0);
355 			g_access(cp, -cp->acr, -cp->acw, -cp->ace);
356 		}
357 		g_mirror_kill_consumer(sc, cp);
358 	} else {
359 		g_destroy_consumer(cp);
360 	}
361 }
362 
363 /*
364  * Initialize disk. This means allocate memory, create consumer, attach it
365  * to the provider and open access (r1w1e1) to it.
366  */
367 static struct g_mirror_disk *
368 g_mirror_init_disk(struct g_mirror_softc *sc, struct g_provider *pp,
369     struct g_mirror_metadata *md, int *errorp)
370 {
371 	struct g_mirror_disk *disk;
372 	int error;
373 
374 	disk = malloc(sizeof(*disk), M_MIRROR, M_NOWAIT | M_ZERO);
375 	if (disk == NULL) {
376 		error = ENOMEM;
377 		goto fail;
378 	}
379 	disk->d_softc = sc;
380 	error = g_mirror_connect_disk(disk, pp);
381 	if (error != 0)
382 		goto fail;
383 	disk->d_id = md->md_did;
384 	disk->d_state = G_MIRROR_DISK_STATE_NONE;
385 	disk->d_priority = md->md_priority;
386 	disk->d_delay.sec = 0;
387 	disk->d_delay.frac = 0;
388 	binuptime(&disk->d_last_used);
389 	disk->d_flags = md->md_dflags;
390 	if (md->md_provider[0] != '\0')
391 		disk->d_flags |= G_MIRROR_DISK_FLAG_HARDCODED;
392 	disk->d_sync.ds_consumer = NULL;
393 	disk->d_sync.ds_offset = md->md_sync_offset;
394 	disk->d_sync.ds_offset_done = md->md_sync_offset;
395 	disk->d_sync.ds_syncid = md->md_syncid;
396 	if (errorp != NULL)
397 		*errorp = 0;
398 	return (disk);
399 fail:
400 	if (errorp != NULL)
401 		*errorp = error;
402 	if (disk != NULL) {
403 		g_mirror_disconnect_consumer(sc, disk->d_consumer);
404 		free(disk, M_MIRROR);
405 	}
406 	return (NULL);
407 }
408 
409 static void
410 g_mirror_destroy_disk(struct g_mirror_disk *disk)
411 {
412 	struct g_mirror_softc *sc;
413 
414 	g_topology_assert();
415 
416 	LIST_REMOVE(disk, d_next);
417 	g_mirror_event_cancel(disk);
418 	sc = disk->d_softc;
419 	if (sc->sc_hint == disk)
420 		sc->sc_hint = NULL;
421 	switch (disk->d_state) {
422 	case G_MIRROR_DISK_STATE_SYNCHRONIZING:
423 		g_mirror_sync_stop(disk, 1);
424 		/* FALLTHROUGH */
425 	case G_MIRROR_DISK_STATE_NEW:
426 	case G_MIRROR_DISK_STATE_STALE:
427 	case G_MIRROR_DISK_STATE_ACTIVE:
428 		g_mirror_disconnect_consumer(sc, disk->d_consumer);
429 		free(disk, M_MIRROR);
430 		break;
431 	default:
432 		KASSERT(0 == 1, ("Wrong disk state (%s, %s).",
433 		    g_mirror_get_diskname(disk),
434 		    g_mirror_disk_state2str(disk->d_state)));
435 	}
436 }
437 
438 static void
439 g_mirror_destroy_device(struct g_mirror_softc *sc)
440 {
441 	struct g_mirror_disk *disk;
442 	struct g_mirror_event *ep;
443 	struct g_geom *gp;
444 	struct g_consumer *cp, *tmpcp;
445 
446 	g_topology_assert();
447 
448 	gp = sc->sc_geom;
449 	if (sc->sc_provider != NULL)
450 		g_mirror_destroy_provider(sc);
451 	for (disk = LIST_FIRST(&sc->sc_disks); disk != NULL;
452 	    disk = LIST_FIRST(&sc->sc_disks)) {
453 		g_mirror_destroy_disk(disk);
454 	}
455 	while ((ep = g_mirror_event_get(sc)) != NULL) {
456 		if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0)
457 			g_mirror_event_free(ep);
458 		else {
459 			ep->e_error = ECANCELED;
460 			ep->e_flags |= G_MIRROR_EVENT_DONE;
461 			G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, ep);
462 			mtx_lock(&sc->sc_events_mtx);
463 			wakeup(ep);
464 			mtx_unlock(&sc->sc_events_mtx);
465 		}
466 	}
467 	callout_drain(&sc->sc_callout);
468 	gp->softc = NULL;
469 
470 	LIST_FOREACH_SAFE(cp, &sc->sc_sync.ds_geom->consumer, consumer, tmpcp) {
471 		g_mirror_disconnect_consumer(sc, cp);
472 	}
473 	sc->sc_sync.ds_geom->softc = NULL;
474 	g_wither_geom(sc->sc_sync.ds_geom, ENXIO);
475 	mtx_destroy(&sc->sc_queue_mtx);
476 	mtx_destroy(&sc->sc_events_mtx);
477 	G_MIRROR_DEBUG(0, "Device %s destroyed.", gp->name);
478 	g_wither_geom(gp, ENXIO);
479 }
480 
481 static void
482 g_mirror_orphan(struct g_consumer *cp)
483 {
484 	struct g_mirror_disk *disk;
485 
486 	g_topology_assert();
487 
488 	disk = cp->private;
489 	if (disk == NULL)
490 		return;
491 	disk->d_softc->sc_bump_syncid = G_MIRROR_BUMP_ON_FIRST_WRITE;
492 	g_mirror_event_send(disk, G_MIRROR_DISK_STATE_DISCONNECTED,
493 	    G_MIRROR_EVENT_DONTWAIT);
494 }
495 
496 static void
497 g_mirror_spoiled(struct g_consumer *cp)
498 {
499 	struct g_mirror_disk *disk;
500 
501 	g_topology_assert();
502 
503 	disk = cp->private;
504 	if (disk == NULL)
505 		return;
506 	disk->d_softc->sc_bump_syncid = G_MIRROR_BUMP_IMMEDIATELY;
507 	g_mirror_event_send(disk, G_MIRROR_DISK_STATE_DISCONNECTED,
508 	    G_MIRROR_EVENT_DONTWAIT);
509 }
510 
511 /*
512  * Function should return the next active disk on the list.
513  * It is possible that it will be the same disk as given.
514  * If there are no active disks on list, NULL is returned.
515  */
516 static __inline struct g_mirror_disk *
517 g_mirror_find_next(struct g_mirror_softc *sc, struct g_mirror_disk *disk)
518 {
519 	struct g_mirror_disk *dp;
520 
521 	for (dp = LIST_NEXT(disk, d_next); dp != disk;
522 	    dp = LIST_NEXT(dp, d_next)) {
523 		if (dp == NULL)
524 			dp = LIST_FIRST(&sc->sc_disks);
525 		if (dp->d_state == G_MIRROR_DISK_STATE_ACTIVE)
526 			break;
527 	}
528 	if (dp->d_state != G_MIRROR_DISK_STATE_ACTIVE)
529 		return (NULL);
530 	return (dp);
531 }
532 
533 static struct g_mirror_disk *
534 g_mirror_get_disk(struct g_mirror_softc *sc)
535 {
536 	struct g_mirror_disk *disk;
537 
538 	if (sc->sc_hint == NULL) {
539 		sc->sc_hint = LIST_FIRST(&sc->sc_disks);
540 		if (sc->sc_hint == NULL)
541 			return (NULL);
542 	}
543 	disk = sc->sc_hint;
544 	if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE) {
545 		disk = g_mirror_find_next(sc, disk);
546 		if (disk == NULL)
547 			return (NULL);
548 	}
549 	sc->sc_hint = g_mirror_find_next(sc, disk);
550 	return (disk);
551 }
552 
553 static int
554 g_mirror_write_metadata(struct g_mirror_disk *disk,
555     struct g_mirror_metadata *md)
556 {
557 	struct g_mirror_softc *sc;
558 	struct g_consumer *cp;
559 	off_t offset, length;
560 	u_char *sector;
561 	int close = 0, error = 0;
562 
563 	g_topology_assert();
564 
565 	sc = disk->d_softc;
566 	cp = disk->d_consumer;
567 	KASSERT(cp != NULL, ("NULL consumer (%s).", sc->sc_name));
568 	KASSERT(cp->provider != NULL, ("NULL provider (%s).", sc->sc_name));
569 	length = cp->provider->sectorsize;
570 	offset = cp->provider->mediasize - length;
571 	sector = malloc((size_t)length, M_MIRROR, M_WAITOK | M_ZERO);
572 	/*
573 	 * Open consumer if it wasn't opened and remember to close it.
574 	 */
575 	if ((disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) == 0) {
576 		error = g_access(cp, 0, 1, 1);
577 		G_MIRROR_DEBUG(2, "Access %s r%dw%de%d = %d",
578 		    cp->provider->name, 0, 1, 1, error);
579 		if (error == 0)
580 			close = 1;
581 #ifdef	INVARIANTS
582 	} else {
583 		KASSERT(cp->acw > 0 && cp->ace > 0,
584 		    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
585 		    cp->acr, cp->acw, cp->ace));
586 #endif
587 	}
588 	if (error == 0) {
589 		if (md != NULL)
590 			mirror_metadata_encode(md, sector);
591 		g_topology_unlock();
592 		error = g_write_data(cp, offset, sector, length);
593 		g_topology_lock();
594 	}
595 	free(sector, M_MIRROR);
596 	if (close) {
597 		g_access(cp, 0, -1, -1);
598 		G_MIRROR_DEBUG(2, "Access %s r%dw%de%d = %d",
599 		    cp->provider->name, 0, -1, -1, 0);
600 	}
601 	if (error != 0) {
602 		disk->d_softc->sc_bump_syncid = G_MIRROR_BUMP_IMMEDIATELY;
603 		g_mirror_event_send(disk, G_MIRROR_DISK_STATE_DISCONNECTED,
604 		    G_MIRROR_EVENT_DONTWAIT);
605 	}
606 	return (error);
607 }
608 
609 static int
610 g_mirror_clear_metadata(struct g_mirror_disk *disk)
611 {
612 	int error;
613 
614 	g_topology_assert();
615 	error = g_mirror_write_metadata(disk, NULL);
616 	if (error == 0) {
617 		G_MIRROR_DEBUG(2, "Metadata on %s cleared.",
618 		    g_mirror_get_diskname(disk));
619 	} else {
620 		G_MIRROR_DEBUG(0,
621 		    "Cannot clear metadata on disk %s (error=%d).",
622 		    g_mirror_get_diskname(disk), error);
623 	}
624 	return (error);
625 }
626 
627 void
628 g_mirror_fill_metadata(struct g_mirror_softc *sc, struct g_mirror_disk *disk,
629     struct g_mirror_metadata *md)
630 {
631 
632 	strlcpy(md->md_magic, G_MIRROR_MAGIC, sizeof(md->md_magic));
633 	md->md_version = G_MIRROR_VERSION;
634 	strlcpy(md->md_name, sc->sc_name, sizeof(md->md_name));
635 	md->md_mid = sc->sc_id;
636 	md->md_all = sc->sc_ndisks;
637 	md->md_slice = sc->sc_slice;
638 	md->md_balance = sc->sc_balance;
639 	md->md_mediasize = sc->sc_mediasize;
640 	md->md_sectorsize = sc->sc_sectorsize;
641 	md->md_mflags = (sc->sc_flags & G_MIRROR_DEVICE_FLAG_MASK);
642 	bzero(md->md_provider, sizeof(md->md_provider));
643 	if (disk == NULL) {
644 		md->md_did = arc4random();
645 		md->md_priority = 0;
646 		md->md_syncid = 0;
647 		md->md_dflags = 0;
648 		md->md_sync_offset = 0;
649 	} else {
650 		md->md_did = disk->d_id;
651 		md->md_priority = disk->d_priority;
652 		md->md_syncid = disk->d_sync.ds_syncid;
653 		md->md_dflags = (disk->d_flags & G_MIRROR_DISK_FLAG_MASK);
654 		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
655 			md->md_sync_offset = disk->d_sync.ds_offset_done;
656 		else
657 			md->md_sync_offset = 0;
658 		if ((disk->d_flags & G_MIRROR_DISK_FLAG_HARDCODED) != 0) {
659 			strlcpy(md->md_provider,
660 			    disk->d_consumer->provider->name,
661 			    sizeof(md->md_provider));
662 		}
663 	}
664 }
665 
666 void
667 g_mirror_update_metadata(struct g_mirror_disk *disk)
668 {
669 	struct g_mirror_metadata md;
670 	int error;
671 
672 	g_topology_assert();
673 	g_mirror_fill_metadata(disk->d_softc, disk, &md);
674 	error = g_mirror_write_metadata(disk, &md);
675 	if (error == 0) {
676 		G_MIRROR_DEBUG(2, "Metadata on %s updated.",
677 		    g_mirror_get_diskname(disk));
678 	} else {
679 		G_MIRROR_DEBUG(0,
680 		    "Cannot update metadata on disk %s (error=%d).",
681 		    g_mirror_get_diskname(disk), error);
682 	}
683 }
684 
685 static void
686 g_mirror_bump_syncid(struct g_mirror_softc *sc)
687 {
688 	struct g_mirror_disk *disk;
689 
690 	g_topology_assert();
691 	KASSERT(g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE) > 0,
692 	    ("%s called with no active disks (device=%s).", __func__,
693 	    sc->sc_name));
694 
695 	sc->sc_syncid++;
696 	G_MIRROR_DEBUG(1, "Device %s: syncid bumped to %u.", sc->sc_name,
697 	    sc->sc_syncid);
698 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
699 		if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE ||
700 		    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
701 			disk->d_sync.ds_syncid = sc->sc_syncid;
702 			g_mirror_update_metadata(disk);
703 		}
704 	}
705 }
706 
707 static __inline int
708 bintime_cmp(struct bintime *bt1, struct bintime *bt2)
709 {
710 
711 	if (bt1->sec < bt2->sec)
712 		return (-1);
713 	else if (bt1->sec > bt2->sec)
714 		return (1);
715 	if (bt1->frac < bt2->frac)
716 		return (-1);
717 	else if (bt1->frac > bt2->frac)
718 		return (1);
719 	return (0);
720 }
721 
722 static void
723 g_mirror_update_delay(struct g_mirror_disk *disk, struct bio *bp)
724 {
725 
726 	if (disk->d_softc->sc_balance != G_MIRROR_BALANCE_LOAD)
727 		return;
728 	binuptime(&disk->d_delay);
729 	bintime_sub(&disk->d_delay, &bp->bio_t0);
730 }
731 
732 static void
733 g_mirror_done(struct bio *bp)
734 {
735 	struct g_mirror_softc *sc;
736 
737 	sc = bp->bio_from->geom->softc;
738 	bp->bio_cflags |= G_MIRROR_BIO_FLAG_REGULAR;
739 	mtx_lock(&sc->sc_queue_mtx);
740 	bioq_disksort(&sc->sc_queue, bp);
741 	wakeup(sc);
742 	mtx_unlock(&sc->sc_queue_mtx);
743 }
744 
745 static void
746 g_mirror_regular_request(struct bio *bp)
747 {
748 	struct g_mirror_softc *sc;
749 	struct g_mirror_disk *disk;
750 	struct bio *pbp;
751 
752 	g_topology_assert_not();
753 
754 	pbp = bp->bio_parent;
755 	sc = pbp->bio_to->geom->softc;
756 	disk = bp->bio_from->private;
757 	if (disk == NULL) {
758 		g_topology_lock();
759 		g_mirror_kill_consumer(sc, bp->bio_from);
760 		g_topology_unlock();
761 	} else {
762 		g_mirror_update_delay(disk, bp);
763 	}
764 
765 	pbp->bio_inbed++;
766 	KASSERT(pbp->bio_inbed <= pbp->bio_children,
767 	    ("bio_inbed (%u) is bigger than bio_children (%u).", pbp->bio_inbed,
768 	    pbp->bio_children));
769 	if (bp->bio_error == 0 && pbp->bio_error == 0) {
770 		G_MIRROR_LOGREQ(3, bp, "Request delivered.");
771 		g_destroy_bio(bp);
772 		if (pbp->bio_children == pbp->bio_inbed) {
773 			G_MIRROR_LOGREQ(3, pbp, "Request delivered.");
774 			pbp->bio_completed = pbp->bio_length;
775 			g_io_deliver(pbp, pbp->bio_error);
776 		}
777 		return;
778 	} else if (bp->bio_error != 0) {
779 		if (pbp->bio_error == 0)
780 			pbp->bio_error = bp->bio_error;
781 		G_MIRROR_LOGREQ(0, bp, "Request failed (error=%d).",
782 		    bp->bio_error);
783 		if (disk != NULL) {
784 			sc->sc_bump_syncid = G_MIRROR_BUMP_IMMEDIATELY;
785 			g_mirror_event_send(disk,
786 			    G_MIRROR_DISK_STATE_DISCONNECTED,
787 			    G_MIRROR_EVENT_DONTWAIT);
788 		}
789 		switch (pbp->bio_cmd) {
790 		case BIO_DELETE:
791 		case BIO_WRITE:
792 			pbp->bio_inbed--;
793 			pbp->bio_children--;
794 			break;
795 		}
796 	}
797 	g_destroy_bio(bp);
798 
799 	switch (pbp->bio_cmd) {
800 	case BIO_READ:
801 		if (pbp->bio_children == pbp->bio_inbed) {
802 			pbp->bio_error = 0;
803 			mtx_lock(&sc->sc_queue_mtx);
804 			bioq_disksort(&sc->sc_queue, pbp);
805 			G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
806 			wakeup(sc);
807 			mtx_unlock(&sc->sc_queue_mtx);
808 		}
809 		break;
810 	case BIO_DELETE:
811 	case BIO_WRITE:
812 		if (pbp->bio_children == 0) {
813 			/*
814 			 * All requests failed.
815 			 */
816 		} else if (pbp->bio_inbed < pbp->bio_children) {
817 			/* Do nothing. */
818 			break;
819 		} else if (pbp->bio_children == pbp->bio_inbed) {
820 			/* Some requests succeeded. */
821 			pbp->bio_error = 0;
822 			pbp->bio_completed = pbp->bio_length;
823 		}
824 		g_io_deliver(pbp, pbp->bio_error);
825 		break;
826 	default:
827 		KASSERT(1 == 0, ("Invalid request: %u.", pbp->bio_cmd));
828 		break;
829 	}
830 }
831 
832 static void
833 g_mirror_sync_done(struct bio *bp)
834 {
835 	struct g_mirror_softc *sc;
836 
837 	G_MIRROR_LOGREQ(3, bp, "Synchronization request delivered.");
838 	sc = bp->bio_from->geom->softc;
839 	bp->bio_cflags |= G_MIRROR_BIO_FLAG_SYNC;
840 	mtx_lock(&sc->sc_queue_mtx);
841 	bioq_disksort(&sc->sc_queue, bp);
842 	wakeup(sc);
843 	mtx_unlock(&sc->sc_queue_mtx);
844 }
845 
846 static void
847 g_mirror_start(struct bio *bp)
848 {
849 	struct g_mirror_softc *sc;
850 
851 	sc = bp->bio_to->geom->softc;
852 	/*
853 	 * If sc == NULL or there are no valid disks, provider's error
854 	 * should be set and g_mirror_start() should not be called at all.
855 	 */
856 	KASSERT(sc != NULL && sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
857 	    ("Provider's error should be set (error=%d)(mirror=%s).",
858 	    bp->bio_to->error, bp->bio_to->name));
859 	G_MIRROR_LOGREQ(3, bp, "Request received.");
860 
861 	switch (bp->bio_cmd) {
862 	case BIO_READ:
863 	case BIO_WRITE:
864 	case BIO_DELETE:
865 		break;
866 	case BIO_GETATTR:
867 	default:
868 		g_io_deliver(bp, EOPNOTSUPP);
869 		return;
870 	}
871 	mtx_lock(&sc->sc_queue_mtx);
872 	bioq_disksort(&sc->sc_queue, bp);
873 	G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
874 	wakeup(sc);
875 	mtx_unlock(&sc->sc_queue_mtx);
876 }
877 
878 /*
879  * Send one synchronization request.
880  */
881 static void
882 g_mirror_sync_one(struct g_mirror_disk *disk)
883 {
884 	struct g_mirror_softc *sc;
885 	struct bio *bp;
886 
887 	sc = disk->d_softc;
888 	KASSERT(disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
889 	    ("Disk %s is not marked for synchronization.",
890 	    g_mirror_get_diskname(disk)));
891 
892 	bp = g_new_bio();
893 	if (bp == NULL)
894 		return;
895 	bp->bio_parent = NULL;
896 	bp->bio_cmd = BIO_READ;
897 	bp->bio_offset = disk->d_sync.ds_offset;
898 	bp->bio_length = MIN(G_MIRROR_SYNC_BLOCK_SIZE,
899 	    sc->sc_mediasize - bp->bio_offset);
900 	bp->bio_cflags = 0;
901 	bp->bio_done = g_mirror_sync_done;
902 	bp->bio_data = disk->d_sync.ds_data;
903 	if (bp->bio_data == NULL) {
904 		g_destroy_bio(bp);
905 		return;
906 	}
907 	disk->d_sync.ds_offset += bp->bio_length;
908 	bp->bio_to = sc->sc_provider;
909 	G_MIRROR_LOGREQ(3, bp, "Sending synchronization request.");
910 	g_io_request(bp, disk->d_sync.ds_consumer);
911 }
912 
913 static void
914 g_mirror_sync_request(struct bio *bp)
915 {
916 	struct g_mirror_softc *sc;
917 	struct g_mirror_disk *disk;
918 
919 	sc = bp->bio_from->geom->softc;
920 	disk = bp->bio_from->private;
921 	if (disk == NULL) {
922 		g_topology_lock();
923 		g_mirror_kill_consumer(sc, bp->bio_from);
924 		g_topology_unlock();
925 		g_destroy_bio(bp);
926 		return;
927 	}
928 
929 	/*
930 	 * Synchronization request.
931 	 */
932 	switch (bp->bio_cmd) {
933 	case BIO_READ:
934 	    {
935 		struct g_consumer *cp;
936 
937 		if (bp->bio_error != 0) {
938 			G_MIRROR_LOGREQ(0, bp,
939 			    "Synchronization request failed (error=%d).",
940 			    bp->bio_error);
941 			g_destroy_bio(bp);
942 			return;
943 		}
944 		bp->bio_cmd = BIO_WRITE;
945 		bp->bio_cflags = 0;
946 		G_MIRROR_LOGREQ(3, bp, "Synchronization request finished.");
947 		cp = disk->d_consumer;
948 		KASSERT(cp->acr == 0 && cp->acw == 1 && cp->ace == 1,
949 		    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
950 		    cp->acr, cp->acw, cp->ace));
951 		g_io_request(bp, cp);
952 		return;
953 	    }
954 	case BIO_WRITE:
955 		if (bp->bio_error != 0) {
956 			G_MIRROR_LOGREQ(0, bp,
957 			    "Synchronization request failed (error=%d).",
958 			    bp->bio_error);
959 			g_destroy_bio(bp);
960 			sc->sc_bump_syncid = G_MIRROR_BUMP_IMMEDIATELY;
961 			g_mirror_event_send(disk,
962 			    G_MIRROR_DISK_STATE_DISCONNECTED,
963 			    G_MIRROR_EVENT_DONTWAIT);
964 			return;
965 		}
966 		G_MIRROR_LOGREQ(3, bp, "Synchronization request finished.");
967 		disk->d_sync.ds_offset_done = bp->bio_offset + bp->bio_length;
968 		g_destroy_bio(bp);
969 		if (disk->d_sync.ds_offset_done == sc->sc_provider->mediasize) {
970 			/*
971 			 * Disk up-to-date, activate it.
972 			 */
973 			g_mirror_event_send(disk, G_MIRROR_DISK_STATE_ACTIVE,
974 			    G_MIRROR_EVENT_DONTWAIT);
975 			return;
976 		} else if ((disk->d_sync.ds_offset_done %
977 		    (G_MIRROR_SYNC_BLOCK_SIZE * 100)) == 0) {
978 			/*
979 			 * Update offset_done on every 100 blocks.
980 			 * XXX: This should be configurable.
981 			 */
982 			g_topology_lock();
983 			g_mirror_update_metadata(disk);
984 			g_topology_unlock();
985 		}
986 		return;
987 	default:
988 		KASSERT(1 == 0, ("Invalid command here: %u (device=%s)",
989 		    bp->bio_cmd, sc->sc_name));
990 		break;
991 	}
992 }
993 
994 static void
995 g_mirror_request_prefer(struct g_mirror_softc *sc, struct bio *bp)
996 {
997 	struct g_mirror_disk *disk;
998 	struct g_consumer *cp;
999 	struct bio *cbp;
1000 
1001 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1002 		if (disk->d_state == G_MIRROR_DISK_STATE_ACTIVE)
1003 			break;
1004 	}
1005 	if (disk == NULL) {
1006 		if (bp->bio_error == 0)
1007 			bp->bio_error = ENXIO;
1008 		g_io_deliver(bp, bp->bio_error);
1009 		return;
1010 	}
1011 	cbp = g_clone_bio(bp);
1012 	if (cbp == NULL) {
1013 		if (bp->bio_error == 0)
1014 			bp->bio_error = ENOMEM;
1015 		g_io_deliver(bp, bp->bio_error);
1016 		return;
1017 	}
1018 	/*
1019 	 * Fill in the component buf structure.
1020 	 */
1021 	cp = disk->d_consumer;
1022 	cbp->bio_done = g_mirror_done;
1023 	cbp->bio_to = cp->provider;
1024 	G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1025 	KASSERT(cp->acr > 0 && cp->ace > 0,
1026 	    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr,
1027 	    cp->acw, cp->ace));
1028 	g_io_request(cbp, cp);
1029 }
1030 
1031 static void
1032 g_mirror_request_round_robin(struct g_mirror_softc *sc, struct bio *bp)
1033 {
1034 	struct g_mirror_disk *disk;
1035 	struct g_consumer *cp;
1036 	struct bio *cbp;
1037 
1038 	disk = g_mirror_get_disk(sc);
1039 	if (disk == NULL) {
1040 		if (bp->bio_error == 0)
1041 			bp->bio_error = ENXIO;
1042 		g_io_deliver(bp, bp->bio_error);
1043 		return;
1044 	}
1045 	cbp = g_clone_bio(bp);
1046 	if (cbp == NULL) {
1047 		if (bp->bio_error == 0)
1048 			bp->bio_error = ENOMEM;
1049 		g_io_deliver(bp, bp->bio_error);
1050 		return;
1051 	}
1052 	/*
1053 	 * Fill in the component buf structure.
1054 	 */
1055 	cp = disk->d_consumer;
1056 	cbp->bio_done = g_mirror_done;
1057 	cbp->bio_to = cp->provider;
1058 	G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1059 	KASSERT(cp->acr > 0 && cp->ace > 0,
1060 	    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr,
1061 	    cp->acw, cp->ace));
1062 	g_io_request(cbp, cp);
1063 }
1064 
1065 static void
1066 g_mirror_request_load(struct g_mirror_softc *sc, struct bio *bp)
1067 {
1068 	struct g_mirror_disk *disk, *dp;
1069 	struct g_consumer *cp;
1070 	struct bio *cbp;
1071 	struct bintime curtime;
1072 
1073 	binuptime(&curtime);
1074 	/*
1075 	 * Find a disk which the smallest load.
1076 	 */
1077 	disk = NULL;
1078 	LIST_FOREACH(dp, &sc->sc_disks, d_next) {
1079 		if (dp->d_state != G_MIRROR_DISK_STATE_ACTIVE)
1080 			continue;
1081 		/* If disk wasn't used for more than 2 sec, use it. */
1082 		if (curtime.sec - dp->d_last_used.sec >= 2) {
1083 			disk = dp;
1084 			break;
1085 		}
1086 		if (disk == NULL ||
1087 		    bintime_cmp(&dp->d_delay, &disk->d_delay) < 0) {
1088 			disk = dp;
1089 		}
1090 	}
1091 	cbp = g_clone_bio(bp);
1092 	if (cbp == NULL) {
1093 		if (bp->bio_error == 0)
1094 			bp->bio_error = ENOMEM;
1095 		g_io_deliver(bp, bp->bio_error);
1096 		return;
1097 	}
1098 	/*
1099 	 * Fill in the component buf structure.
1100 	 */
1101 	cp = disk->d_consumer;
1102 	cbp->bio_done = g_mirror_done;
1103 	cbp->bio_to = cp->provider;
1104 	binuptime(&disk->d_last_used);
1105 	G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1106 	KASSERT(cp->acr > 0 && cp->ace > 0,
1107 	    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr,
1108 	    cp->acw, cp->ace));
1109 	g_io_request(cbp, cp);
1110 }
1111 
1112 static void
1113 g_mirror_request_split(struct g_mirror_softc *sc, struct bio *bp)
1114 {
1115 	struct bio_queue_head queue;
1116 	struct g_mirror_disk *disk;
1117 	struct g_consumer *cp;
1118 	struct bio *cbp;
1119 	off_t left, mod, offset, slice;
1120 	u_char *data;
1121 	u_int ndisks;
1122 
1123 	if (bp->bio_length <= sc->sc_slice) {
1124 		g_mirror_request_round_robin(sc, bp);
1125 		return;
1126 	}
1127 	ndisks = g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE);
1128 	slice = bp->bio_length / ndisks;
1129 	mod = slice % sc->sc_provider->sectorsize;
1130 	if (mod != 0)
1131 		slice += sc->sc_provider->sectorsize - mod;
1132 	/*
1133 	 * Allocate all bios before sending any request, so we can
1134 	 * return ENOMEM in nice and clean way.
1135 	 */
1136 	left = bp->bio_length;
1137 	offset = bp->bio_offset;
1138 	data = bp->bio_data;
1139 	bioq_init(&queue);
1140 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1141 		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
1142 			continue;
1143 		cbp = g_clone_bio(bp);
1144 		if (cbp == NULL) {
1145 			for (cbp = bioq_first(&queue); cbp != NULL;
1146 			    cbp = bioq_first(&queue)) {
1147 				bioq_remove(&queue, cbp);
1148 				g_destroy_bio(cbp);
1149 			}
1150 			if (bp->bio_error == 0)
1151 				bp->bio_error = ENOMEM;
1152 			g_io_deliver(bp, bp->bio_error);
1153 			return;
1154 		}
1155 		bioq_insert_tail(&queue, cbp);
1156 		cbp->bio_done = g_mirror_done;
1157 		cbp->bio_caller1 = disk;
1158 		cbp->bio_to = disk->d_consumer->provider;
1159 		cbp->bio_offset = offset;
1160 		cbp->bio_data = data;
1161 		cbp->bio_length = MIN(left, slice);
1162 		left -= cbp->bio_length;
1163 		if (left == 0)
1164 			break;
1165 		offset += cbp->bio_length;
1166 		data += cbp->bio_length;
1167 	}
1168 	for (cbp = bioq_first(&queue); cbp != NULL; cbp = bioq_first(&queue)) {
1169 		bioq_remove(&queue, cbp);
1170 		G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1171 		disk = cbp->bio_caller1;
1172 		cbp->bio_caller1 = NULL;
1173 		cp = disk->d_consumer;
1174 		KASSERT(cp->acr > 0 && cp->ace > 0,
1175 		    ("Consumer %s not opened (r%dw%de%d).", cp->provider->name,
1176 		    cp->acr, cp->acw, cp->ace));
1177 		g_io_request(cbp, disk->d_consumer);
1178 	}
1179 }
1180 
1181 static void
1182 g_mirror_register_request(struct bio *bp)
1183 {
1184 	struct g_mirror_softc *sc;
1185 
1186 	sc = bp->bio_to->geom->softc;
1187 	switch (bp->bio_cmd) {
1188 	case BIO_READ:
1189 		switch (sc->sc_balance) {
1190 		case G_MIRROR_BALANCE_LOAD:
1191 			g_mirror_request_load(sc, bp);
1192 			break;
1193 		case G_MIRROR_BALANCE_PREFER:
1194 			g_mirror_request_prefer(sc, bp);
1195 			break;
1196 		case G_MIRROR_BALANCE_ROUND_ROBIN:
1197 			g_mirror_request_round_robin(sc, bp);
1198 			break;
1199 		case G_MIRROR_BALANCE_SPLIT:
1200 			g_mirror_request_split(sc, bp);
1201 			break;
1202 		}
1203 		return;
1204 	case BIO_WRITE:
1205 	case BIO_DELETE:
1206 	    {
1207 		struct g_mirror_disk *disk;
1208 		struct bio_queue_head queue;
1209 		struct g_consumer *cp;
1210 		struct bio *cbp;
1211 
1212 		/*
1213 		 * Allocate all bios before sending any request, so we can
1214 		 * return ENOMEM in nice and clean way.
1215 		 */
1216 		bioq_init(&queue);
1217 		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1218 			switch (disk->d_state) {
1219 			case G_MIRROR_DISK_STATE_ACTIVE:
1220 				break;
1221 			case G_MIRROR_DISK_STATE_SYNCHRONIZING:
1222 				if (bp->bio_offset >= disk->d_sync.ds_offset)
1223 					continue;
1224 				break;
1225 			default:
1226 				continue;
1227 			}
1228 			cbp = g_clone_bio(bp);
1229 			if (cbp == NULL) {
1230 				for (cbp = bioq_first(&queue); cbp != NULL;
1231 				    cbp = bioq_first(&queue)) {
1232 					bioq_remove(&queue, cbp);
1233 					g_destroy_bio(cbp);
1234 				}
1235 				if (bp->bio_error == 0)
1236 					bp->bio_error = ENOMEM;
1237 				g_io_deliver(bp, bp->bio_error);
1238 				return;
1239 			}
1240 			bioq_insert_tail(&queue, cbp);
1241 		}
1242 		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1243 			switch (disk->d_state) {
1244 			case G_MIRROR_DISK_STATE_ACTIVE:
1245 				break;
1246 			case G_MIRROR_DISK_STATE_SYNCHRONIZING:
1247 				if (bp->bio_offset >= disk->d_sync.ds_offset)
1248 					continue;
1249 				break;
1250 			default:
1251 				continue;
1252 			}
1253 			cbp = bioq_first(&queue);
1254 			KASSERT(cbp != NULL, ("NULL cbp! (device %s).",
1255 			    sc->sc_name));
1256 			bioq_remove(&queue, cbp);
1257 			cp = disk->d_consumer;
1258 			cbp->bio_done = g_mirror_done;
1259 			cbp->bio_to = cp->provider;
1260 			G_MIRROR_LOGREQ(3, cbp, "Sending request.");
1261 			KASSERT(cp->acw > 0 && cp->ace > 0,
1262 			    ("Consumer %s not opened (r%dw%de%d).",
1263 			    cp->provider->name, cp->acr, cp->acw, cp->ace));
1264 			g_io_request(cbp, cp);
1265 		}
1266 		/*
1267 		 * Bump syncid on first write.
1268 		 */
1269 		if (sc->sc_bump_syncid == G_MIRROR_BUMP_ON_FIRST_WRITE) {
1270 			sc->sc_bump_syncid = 0;
1271 			g_topology_lock();
1272 			g_mirror_bump_syncid(sc);
1273 			g_topology_unlock();
1274 		}
1275 		return;
1276 	    }
1277 	default:
1278 		KASSERT(1 == 0, ("Invalid command here: %u (device=%s)",
1279 		    bp->bio_cmd, sc->sc_name));
1280 		break;
1281 	}
1282 }
1283 
1284 static int
1285 g_mirror_can_destroy(struct g_mirror_softc *sc)
1286 {
1287 	struct g_geom *gp;
1288 	struct g_consumer *cp;
1289 
1290 	g_topology_assert();
1291 	gp = sc->sc_geom;
1292 	LIST_FOREACH(cp, &gp->consumer, consumer) {
1293 		if (g_mirror_is_busy(sc, cp))
1294 			return (0);
1295 	}
1296 	gp = sc->sc_sync.ds_geom;
1297 	LIST_FOREACH(cp, &gp->consumer, consumer) {
1298 		if (g_mirror_is_busy(sc, cp))
1299 			return (0);
1300 	}
1301 	G_MIRROR_DEBUG(2, "No I/O requests for %s, it can be destroyed.",
1302 	    sc->sc_name);
1303 	return (1);
1304 }
1305 
1306 static int
1307 g_mirror_try_destroy(struct g_mirror_softc *sc)
1308 {
1309 
1310 	if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_WAIT) != 0) {
1311 		g_topology_lock();
1312 		if (!g_mirror_can_destroy(sc)) {
1313 			g_topology_unlock();
1314 			return (0);
1315 		}
1316 		g_topology_unlock();
1317 		G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__,
1318 		    &sc->sc_worker);
1319 		wakeup(&sc->sc_worker);
1320 		sc->sc_worker = NULL;
1321 	} else {
1322 		g_topology_lock();
1323 		if (!g_mirror_can_destroy(sc)) {
1324 			g_topology_unlock();
1325 			return (0);
1326 		}
1327 		g_mirror_destroy_device(sc);
1328 		g_topology_unlock();
1329 		free(sc, M_MIRROR);
1330 	}
1331 	return (1);
1332 }
1333 
1334 /*
1335  * Worker thread.
1336  */
1337 static void
1338 g_mirror_worker(void *arg)
1339 {
1340 	struct g_mirror_softc *sc;
1341 	struct g_mirror_disk *disk;
1342 	struct g_mirror_event *ep;
1343 	struct bio *bp;
1344 	u_int nreqs;
1345 
1346 	sc = arg;
1347 	curthread->td_base_pri = PRIBIO;
1348 
1349 	nreqs = 0;
1350 	for (;;) {
1351 		G_MIRROR_DEBUG(5, "%s: Let's see...", __func__);
1352 		/*
1353 		 * First take a look at events.
1354 		 * This is important to handle events before any I/O requests.
1355 		 */
1356 		ep = g_mirror_event_get(sc);
1357 		if (ep != NULL) {
1358 			g_topology_lock();
1359 			if ((ep->e_flags & G_MIRROR_EVENT_DEVICE) != 0) {
1360 				/* Update only device status. */
1361 				G_MIRROR_DEBUG(3,
1362 				    "Running event for device %s.",
1363 				    sc->sc_name);
1364 				ep->e_error = 0;
1365 				g_mirror_update_device(sc, 1);
1366 			} else {
1367 				/* Update disk status. */
1368 				G_MIRROR_DEBUG(3, "Running event for disk %s.",
1369 				     g_mirror_get_diskname(ep->e_disk));
1370 				ep->e_error = g_mirror_update_disk(ep->e_disk,
1371 				    ep->e_state);
1372 				if (ep->e_error == 0)
1373 					g_mirror_update_device(sc, 0);
1374 			}
1375 			g_topology_unlock();
1376 			if ((ep->e_flags & G_MIRROR_EVENT_DONTWAIT) != 0) {
1377 				KASSERT(ep->e_error == 0,
1378 				    ("Error cannot be handled."));
1379 				g_mirror_event_free(ep);
1380 			} else {
1381 				ep->e_flags |= G_MIRROR_EVENT_DONE;
1382 				G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__,
1383 				    ep);
1384 				mtx_lock(&sc->sc_events_mtx);
1385 				wakeup(ep);
1386 				mtx_unlock(&sc->sc_events_mtx);
1387 			}
1388 			if ((sc->sc_flags &
1389 			    G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
1390 				if (g_mirror_try_destroy(sc))
1391 					kthread_exit(0);
1392 			}
1393 			G_MIRROR_DEBUG(5, "%s: I'm here 1.", __func__);
1394 			continue;
1395 		}
1396 		/*
1397 		 * Now I/O requests.
1398 		 */
1399 		/* Get first request from the queue. */
1400 		mtx_lock(&sc->sc_queue_mtx);
1401 		bp = bioq_first(&sc->sc_queue);
1402 		if (bp == NULL) {
1403 			if ((sc->sc_flags &
1404 			    G_MIRROR_DEVICE_FLAG_DESTROY) != 0) {
1405 				mtx_unlock(&sc->sc_queue_mtx);
1406 				if (g_mirror_try_destroy(sc))
1407 					kthread_exit(0);
1408 				mtx_lock(&sc->sc_queue_mtx);
1409 			}
1410 		}
1411 		if (sc->sc_sync.ds_ndisks > 0 &&
1412 		    (bp == NULL || nreqs > g_mirror_reqs_per_sync)) {
1413 			mtx_unlock(&sc->sc_queue_mtx);
1414 			/*
1415 			 * It is time for synchronization...
1416 			 */
1417 			nreqs = 0;
1418 			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1419 				if (disk->d_state !=
1420 				    G_MIRROR_DISK_STATE_SYNCHRONIZING) {
1421 					continue;
1422 				}
1423 				if (disk->d_sync.ds_offset >=
1424 				    sc->sc_provider->mediasize) {
1425 					continue;
1426 				}
1427 				if (disk->d_sync.ds_offset >
1428 				    disk->d_sync.ds_offset_done) {
1429 					continue;
1430 				}
1431 				g_mirror_sync_one(disk);
1432 			}
1433 			G_MIRROR_DEBUG(5, "%s: I'm here 2.", __func__);
1434 			goto sleep;
1435 		}
1436 		if (bp == NULL) {
1437 			MSLEEP(sc, &sc->sc_queue_mtx, PRIBIO | PDROP, "m:w1", 0);
1438 			G_MIRROR_DEBUG(5, "%s: I'm here 3.", __func__);
1439 			continue;
1440 		}
1441 		nreqs++;
1442 		bioq_remove(&sc->sc_queue, bp);
1443 		mtx_unlock(&sc->sc_queue_mtx);
1444 
1445 		if ((bp->bio_cflags & G_MIRROR_BIO_FLAG_REGULAR) != 0) {
1446 			g_mirror_regular_request(bp);
1447 		} else if ((bp->bio_cflags & G_MIRROR_BIO_FLAG_SYNC) != 0) {
1448 			u_int timeout, sps;
1449 
1450 			g_mirror_sync_request(bp);
1451 sleep:
1452 			sps = atomic_load_acq_int(&g_mirror_syncs_per_sec);
1453 			if (sps == 0) {
1454 				G_MIRROR_DEBUG(5, "%s: I'm here 5.", __func__);
1455 				continue;
1456 			}
1457 			mtx_lock(&sc->sc_queue_mtx);
1458 			if (bioq_first(&sc->sc_queue) != NULL) {
1459 				mtx_unlock(&sc->sc_queue_mtx);
1460 				G_MIRROR_DEBUG(5, "%s: I'm here 4.", __func__);
1461 				continue;
1462 			}
1463 			timeout = hz / sps;
1464 			if (timeout == 0)
1465 				timeout = 1;
1466 			MSLEEP(sc, &sc->sc_queue_mtx, PRIBIO | PDROP, "m:w2",
1467 			    timeout);
1468 		} else {
1469 			g_mirror_register_request(bp);
1470 		}
1471 		G_MIRROR_DEBUG(5, "%s: I'm here 6.", __func__);
1472 	}
1473 }
1474 
1475 /*
1476  * Open disk's consumer if needed.
1477  */
1478 static void
1479 g_mirror_update_access(struct g_mirror_disk *disk)
1480 {
1481 	struct g_provider *pp;
1482 	struct g_consumer *cp;
1483 	int acr, acw, ace, cpw, error;
1484 
1485 	g_topology_assert();
1486 
1487 	cp = disk->d_consumer;
1488 	pp = disk->d_softc->sc_provider;
1489 	if (pp == NULL) {
1490 		acr = -cp->acr;
1491 		acw = -cp->acw;
1492 		ace = -cp->ace;
1493 	} else {
1494 		acr = pp->acr - cp->acr;
1495 		acw = pp->acw - cp->acw;
1496 		ace = pp->ace - cp->ace;
1497 		/* Grab an extra "exclusive" bit. */
1498 		if (pp->acr > 0 || pp->acw > 0 || pp->ace > 0)
1499 			ace++;
1500 	}
1501 	if (acr == 0 && acw == 0 && ace == 0)
1502 		return;
1503 	cpw = cp->acw;
1504 	error = g_access(cp, acr, acw, ace);
1505 	G_MIRROR_DEBUG(2, "Access %s r%dw%de%d = %d", cp->provider->name, acr,
1506 	    acw, ace, error);
1507 	if (error != 0) {
1508 		disk->d_softc->sc_bump_syncid = G_MIRROR_BUMP_ON_FIRST_WRITE;
1509 		g_mirror_event_send(disk, G_MIRROR_DISK_STATE_DISCONNECTED,
1510 		    G_MIRROR_EVENT_DONTWAIT);
1511 		return;
1512 	}
1513 	if (cpw == 0 && cp->acw > 0) {
1514 		G_MIRROR_DEBUG(1, "Disk %s (device %s) marked as dirty.",
1515 		    g_mirror_get_diskname(disk), disk->d_softc->sc_name);
1516 		disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
1517 	} else if (cpw > 0 && cp->acw == 0) {
1518 		G_MIRROR_DEBUG(1, "Disk %s (device %s) marked as clean.",
1519 		    g_mirror_get_diskname(disk), disk->d_softc->sc_name);
1520 		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
1521 	}
1522 }
1523 
1524 static void
1525 g_mirror_sync_start(struct g_mirror_disk *disk)
1526 {
1527 	struct g_mirror_softc *sc;
1528 	struct g_consumer *cp;
1529 	int error;
1530 
1531 	g_topology_assert();
1532 
1533 	sc = disk->d_softc;
1534 	KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
1535 	    ("Device not in RUNNING state (%s, %u).", sc->sc_name,
1536 	    sc->sc_state));
1537 	cp = disk->d_consumer;
1538 	KASSERT(cp->acr == 0 && cp->acw == 0 && cp->ace == 0,
1539 	    ("Consumer %s already opened.", cp->provider->name));
1540 
1541 	G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s.", sc->sc_name,
1542 	    g_mirror_get_diskname(disk));
1543 	error = g_access(cp, 0, 1, 1);
1544 	G_MIRROR_DEBUG(2, "Access %s r%dw%de%d = %d", cp->provider->name, 0, 1,
1545 	    1, error);
1546 	if (error != 0) {
1547 		g_mirror_event_send(disk, G_MIRROR_DISK_STATE_DISCONNECTED,
1548 		    G_MIRROR_EVENT_DONTWAIT);
1549 		return;
1550 	}
1551 	disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
1552 	KASSERT(disk->d_sync.ds_consumer == NULL,
1553 	    ("Sync consumer already exists (device=%s, disk=%s).",
1554 	    sc->sc_name, g_mirror_get_diskname(disk)));
1555 	disk->d_sync.ds_consumer = g_new_consumer(sc->sc_sync.ds_geom);
1556 	disk->d_sync.ds_consumer->private = disk;
1557 	error = g_attach(disk->d_sync.ds_consumer, disk->d_softc->sc_provider);
1558 	KASSERT(error == 0, ("Cannot attach to %s (error=%d).",
1559 	    disk->d_softc->sc_name, error));
1560 	error = g_access(disk->d_sync.ds_consumer, 1, 0, 0);
1561 	KASSERT(error == 0, ("Cannot open %s (error=%d).",
1562 	    disk->d_softc->sc_name, error));
1563 	disk->d_sync.ds_data = malloc(G_MIRROR_SYNC_BLOCK_SIZE, M_MIRROR,
1564 	    M_WAITOK);
1565 	sc->sc_sync.ds_ndisks++;
1566 }
1567 
1568 /*
1569  * Stop synchronization process.
1570  * type: 0 - synchronization finished
1571  *       1 - synchronization stopped
1572  */
1573 static void
1574 g_mirror_sync_stop(struct g_mirror_disk *disk, int type)
1575 {
1576 	struct g_consumer *cp;
1577 
1578 	g_topology_assert();
1579 	KASSERT(disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
1580 	    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
1581 	    g_mirror_disk_state2str(disk->d_state)));
1582 	if (disk->d_sync.ds_consumer == NULL)
1583 		return;
1584 
1585 	if (type == 0) {
1586 		G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s finished.",
1587 		    disk->d_softc->sc_name, g_mirror_get_diskname(disk));
1588 	} else /* if (type == 1) */ {
1589 		G_MIRROR_DEBUG(0, "Device %s: rebuilding provider %s stopped.",
1590 		    disk->d_softc->sc_name, g_mirror_get_diskname(disk));
1591 	}
1592 	cp = disk->d_sync.ds_consumer;
1593 	g_access(cp, -1, 0, 0);
1594 	g_mirror_kill_consumer(disk->d_softc, cp);
1595 	free(disk->d_sync.ds_data, M_MIRROR);
1596 	disk->d_sync.ds_consumer = NULL;
1597 	disk->d_softc->sc_sync.ds_ndisks--;
1598 	cp = disk->d_consumer;
1599 	KASSERT(cp->acr == 0 && cp->acw == 1 && cp->ace == 1,
1600 	    ("Consumer %s not opened.", cp->provider->name));
1601 	g_access(cp, 0, -1, -1);
1602 	G_MIRROR_DEBUG(2, "Access %s r%dw%de%d = %d", cp->provider->name, 0, -1,
1603 	    -1, 0);
1604 	disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
1605 }
1606 
1607 static void
1608 g_mirror_launch_provider(struct g_mirror_softc *sc)
1609 {
1610 	struct g_mirror_disk *disk;
1611 	struct g_provider *pp;
1612 
1613 	g_topology_assert();
1614 
1615 	pp = g_new_providerf(sc->sc_geom, "mirror/%s", sc->sc_name);
1616 	pp->mediasize = sc->sc_mediasize;
1617 	pp->sectorsize = sc->sc_sectorsize;
1618 	sc->sc_provider = pp;
1619 	g_error_provider(pp, 0);
1620 	G_MIRROR_DEBUG(0, "Device %s: provider %s launched.", sc->sc_name,
1621 	    pp->name);
1622 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1623 		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
1624 			g_mirror_sync_start(disk);
1625 	}
1626 }
1627 
1628 static void
1629 g_mirror_destroy_provider(struct g_mirror_softc *sc)
1630 {
1631 	struct g_mirror_disk *disk;
1632 	struct bio *bp;
1633 
1634 	g_topology_assert();
1635 	KASSERT(sc->sc_provider != NULL, ("NULL provider (device=%s).",
1636 	    sc->sc_name));
1637 
1638 	g_error_provider(sc->sc_provider, ENXIO);
1639 	mtx_lock(&sc->sc_queue_mtx);
1640 	while ((bp = bioq_first(&sc->sc_queue)) != NULL) {
1641 		bioq_remove(&sc->sc_queue, bp);
1642 		g_io_deliver(bp, ENXIO);
1643 	}
1644 	mtx_unlock(&sc->sc_queue_mtx);
1645 	G_MIRROR_DEBUG(0, "Device %s: provider %s destroyed.", sc->sc_name,
1646 	    sc->sc_provider->name);
1647 	sc->sc_provider->flags |= G_PF_WITHER;
1648 	g_orphan_provider(sc->sc_provider, ENXIO);
1649 	sc->sc_provider = NULL;
1650 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1651 		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING)
1652 			g_mirror_sync_stop(disk, 1);
1653 	}
1654 }
1655 
1656 static void
1657 g_mirror_go(void *arg)
1658 {
1659 	struct g_mirror_softc *sc;
1660 
1661 	sc = arg;
1662 	G_MIRROR_DEBUG(0, "Force device %s start due to timeout.", sc->sc_name);
1663 	g_mirror_event_send(sc, 0,
1664 	    G_MIRROR_EVENT_DONTWAIT | G_MIRROR_EVENT_DEVICE);
1665 }
1666 
1667 static u_int
1668 g_mirror_determine_state(struct g_mirror_disk *disk)
1669 {
1670 	struct g_mirror_softc *sc;
1671 	u_int state;
1672 
1673 	sc = disk->d_softc;
1674 	if (sc->sc_syncid == disk->d_sync.ds_syncid) {
1675 		if ((disk->d_flags &
1676 		    G_MIRROR_DISK_FLAG_SYNCHRONIZING) == 0) {
1677 			/* Disk does not need synchronization. */
1678 			state = G_MIRROR_DISK_STATE_ACTIVE;
1679 		} else {
1680 			if ((sc->sc_flags &
1681 			     G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) == 0  ||
1682 			    (disk->d_flags &
1683 			     G_MIRROR_DISK_FLAG_FORCE_SYNC) != 0) {
1684 				/*
1685 				 * We can start synchronization from
1686 				 * the stored offset.
1687 				 */
1688 				state = G_MIRROR_DISK_STATE_SYNCHRONIZING;
1689 			} else {
1690 				state = G_MIRROR_DISK_STATE_STALE;
1691 			}
1692 		}
1693 	} else if (disk->d_sync.ds_syncid < sc->sc_syncid) {
1694 		/*
1695 		 * Reset all synchronization data for this disk,
1696 		 * because if it even was synchronized, it was
1697 		 * synchronized to disks with different syncid.
1698 		 */
1699 		disk->d_flags |= G_MIRROR_DISK_FLAG_SYNCHRONIZING;
1700 		disk->d_sync.ds_offset = 0;
1701 		disk->d_sync.ds_offset_done = 0;
1702 		disk->d_sync.ds_syncid = sc->sc_syncid;
1703 		if ((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) == 0 ||
1704 		    (disk->d_flags & G_MIRROR_DISK_FLAG_FORCE_SYNC) != 0) {
1705 			state = G_MIRROR_DISK_STATE_SYNCHRONIZING;
1706 		} else {
1707 			state = G_MIRROR_DISK_STATE_STALE;
1708 		}
1709 	} else /* if (sc->sc_syncid < disk->d_sync.ds_syncid) */ {
1710 		/*
1711 		 * Not good, NOT GOOD!
1712 		 * It means that mirror was started on stale disks
1713 		 * and more fresh disk just arrive.
1714 		 * If there were writes, mirror is fucked up, sorry.
1715 		 * I think the best choice here is don't touch
1716 		 * this disk and inform the user laudly.
1717 		 */
1718 		G_MIRROR_DEBUG(0, "Device %s was started before the freshest "
1719 		    "disk (%s) arrives!! It will not be connected to the "
1720 		    "running device.", sc->sc_name,
1721 		    g_mirror_get_diskname(disk));
1722 		g_mirror_destroy_disk(disk);
1723 		state = G_MIRROR_DISK_STATE_NONE;
1724 		/* Return immediately, because disk was destroyed. */
1725 		return (state);
1726 	}
1727 	G_MIRROR_DEBUG(3, "State for %s disk: %s.",
1728 	    g_mirror_get_diskname(disk), g_mirror_disk_state2str(state));
1729 	return (state);
1730 }
1731 
1732 /*
1733  * Update device state.
1734  */
1735 static void
1736 g_mirror_update_device(struct g_mirror_softc *sc, boolean_t force)
1737 {
1738 	struct g_mirror_disk *disk;
1739 	u_int state;
1740 
1741 	g_topology_assert();
1742 
1743 	switch (sc->sc_state) {
1744 	case G_MIRROR_DEVICE_STATE_STARTING:
1745 	    {
1746 		struct g_mirror_disk *pdisk;
1747 		u_int dirty, ndisks, syncid;
1748 
1749 		KASSERT(sc->sc_provider == NULL,
1750 		    ("Non-NULL provider in STARTING state (%s).", sc->sc_name));
1751 		/*
1752 		 * Are we ready? We are, if all disks are connected or
1753 		 * if we have any disks and 'force' is true.
1754 		 */
1755 		if ((force && g_mirror_ndisks(sc, -1) > 0) ||
1756 		    sc->sc_ndisks == g_mirror_ndisks(sc, -1)) {
1757 			;
1758 		} else if (g_mirror_ndisks(sc, -1) == 0) {
1759 			/*
1760 			 * Disks went down in starting phase, so destroy
1761 			 * device.
1762 			 */
1763 			callout_drain(&sc->sc_callout);
1764 			sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
1765 			return;
1766 		} else {
1767 			return;
1768 		}
1769 
1770 		/*
1771 		 * Activate all disks with the biggest syncid.
1772 		 */
1773 		if (force) {
1774 			/*
1775 			 * If 'force' is true, we have been called due to
1776 			 * timeout, so don't bother canceling timeout.
1777 			 */
1778 			ndisks = 0;
1779 			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1780 				if ((disk->d_flags &
1781 				    G_MIRROR_DISK_FLAG_SYNCHRONIZING) == 0) {
1782 					ndisks++;
1783 				}
1784 			}
1785 			if (ndisks == 0) {
1786 				/* No valid disks found, destroy device. */
1787 				sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
1788 				return;
1789 			}
1790 		} else {
1791 			/* Cancel timeout. */
1792 			callout_drain(&sc->sc_callout);
1793 		}
1794 
1795 		/*
1796 		 * Find disk with the biggest syncid.
1797 		 */
1798 		syncid = 0;
1799 		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1800 			if (disk->d_sync.ds_syncid > syncid)
1801 				syncid = disk->d_sync.ds_syncid;
1802 		}
1803 
1804 		/*
1805 		 * Here we need to look for dirty disks and if all disks
1806 		 * with the biggest syncid are dirty, we have to choose
1807 		 * one with the biggest priority and rebuild the rest.
1808 		 */
1809 		/*
1810 		 * Find the number of dirty disks with the biggest syncid.
1811 		 * Find the number of disks with the biggest syncid.
1812 		 * While here, find a disk with the biggest priority.
1813 		 */
1814 		dirty = ndisks = 0;
1815 		pdisk = NULL;
1816 		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1817 			if (disk->d_sync.ds_syncid != syncid)
1818 				continue;
1819 			if ((disk->d_flags &
1820 			    G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
1821 				continue;
1822 			}
1823 			ndisks++;
1824 			if ((disk->d_flags & G_MIRROR_DISK_FLAG_DIRTY) != 0) {
1825 				dirty++;
1826 				if (pdisk == NULL ||
1827 				    pdisk->d_priority < disk->d_priority) {
1828 					pdisk = disk;
1829 				}
1830 			}
1831 		}
1832 		if (dirty == 0) {
1833 			/* No dirty disks at all, great. */
1834 		} else if (dirty == ndisks) {
1835 			/*
1836 			 * Force synchronization for all dirty disks except one
1837 			 * with the biggest priority.
1838 			 */
1839 			KASSERT(pdisk != NULL, ("pdisk == NULL"));
1840 			G_MIRROR_DEBUG(1, "Using disk %s (device %s) as a "
1841 			    "master disk for synchronization.",
1842 			    g_mirror_get_diskname(pdisk), sc->sc_name);
1843 			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1844 				if (disk->d_sync.ds_syncid != syncid)
1845 					continue;
1846 				if ((disk->d_flags &
1847 				    G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
1848 					continue;
1849 				}
1850 				KASSERT((disk->d_flags &
1851 				    G_MIRROR_DISK_FLAG_DIRTY) != 0,
1852 				    ("Disk %s isn't marked as dirty.",
1853 				    g_mirror_get_diskname(disk)));
1854 				/* Skip the disk with the biggest priority. */
1855 				if (disk == pdisk)
1856 					continue;
1857 				disk->d_sync.ds_syncid = 0;
1858 			}
1859 		} else if (dirty < ndisks) {
1860 			/*
1861 			 * Force synchronization for all dirty disks.
1862 			 * We have some non-dirty disks.
1863 			 */
1864 			LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1865 				if (disk->d_sync.ds_syncid != syncid)
1866 					continue;
1867 				if ((disk->d_flags &
1868 				    G_MIRROR_DISK_FLAG_SYNCHRONIZING) != 0) {
1869 					continue;
1870 				}
1871 				if ((disk->d_flags &
1872 				    G_MIRROR_DISK_FLAG_DIRTY) == 0) {
1873 					continue;
1874 				}
1875 				disk->d_sync.ds_syncid = 0;
1876 			}
1877 		}
1878 
1879 		/* Reset hint. */
1880 		sc->sc_hint = NULL;
1881 		sc->sc_syncid = syncid;
1882 		if (force) {
1883 			/* Remember to bump syncid on first write. */
1884 			sc->sc_bump_syncid = G_MIRROR_BUMP_ON_FIRST_WRITE;
1885 		}
1886 		state = G_MIRROR_DEVICE_STATE_RUNNING;
1887 		G_MIRROR_DEBUG(1, "Device %s state changed from %s to %s.",
1888 		    sc->sc_name, g_mirror_device_state2str(sc->sc_state),
1889 		    g_mirror_device_state2str(state));
1890 		sc->sc_state = state;
1891 		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
1892 			state = g_mirror_determine_state(disk);
1893 			g_mirror_event_send(disk, state,
1894 			    G_MIRROR_EVENT_DONTWAIT);
1895 			if (state == G_MIRROR_DISK_STATE_STALE) {
1896 				sc->sc_bump_syncid =
1897 				    G_MIRROR_BUMP_ON_FIRST_WRITE;
1898 			}
1899 		}
1900 		break;
1901 	    }
1902 	case G_MIRROR_DEVICE_STATE_RUNNING:
1903 		if (g_mirror_ndisks(sc, -1) == 0) {
1904 			/*
1905 			 * No disks at all, we need to destroy device.
1906 			 */
1907 			sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
1908 			break;
1909 		} else if (g_mirror_ndisks(sc,
1910 		    G_MIRROR_DISK_STATE_ACTIVE) == 0 &&
1911 		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_NEW) == 0) {
1912 			/*
1913 			 * No active disks, destroy provider.
1914 			 */
1915 			if (sc->sc_provider != NULL)
1916 				g_mirror_destroy_provider(sc);
1917 			break;
1918 		} else if (g_mirror_ndisks(sc,
1919 		    G_MIRROR_DISK_STATE_ACTIVE) > 0 &&
1920 		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_NEW) == 0) {
1921 			/*
1922 			 * We have active disks, launch provider if it doesn't
1923 			 * exist.
1924 			 */
1925 			if (sc->sc_provider == NULL)
1926 				g_mirror_launch_provider(sc);
1927 		}
1928 		/*
1929 		 * Bump syncid here, if we need to do it immediately.
1930 		 */
1931 		if (sc->sc_bump_syncid == G_MIRROR_BUMP_IMMEDIATELY) {
1932 			sc->sc_bump_syncid = 0;
1933 			g_mirror_bump_syncid(sc);
1934 		}
1935 		break;
1936 	default:
1937 		KASSERT(1 == 0, ("Wrong device state (%s, %s).",
1938 		    sc->sc_name, g_mirror_device_state2str(sc->sc_state)));
1939 		break;
1940 	}
1941 }
1942 
1943 /*
1944  * Update disk state and device state if needed.
1945  */
1946 #define	DISK_STATE_CHANGED()	G_MIRROR_DEBUG(1,			\
1947 	"Disk %s state changed from %s to %s (device %s).",		\
1948 	g_mirror_get_diskname(disk),					\
1949 	g_mirror_disk_state2str(disk->d_state),				\
1950 	g_mirror_disk_state2str(state), sc->sc_name)
1951 static int
1952 g_mirror_update_disk(struct g_mirror_disk *disk, u_int state)
1953 {
1954 	struct g_mirror_softc *sc;
1955 
1956 	g_topology_assert();
1957 
1958 	sc = disk->d_softc;
1959 again:
1960 	G_MIRROR_DEBUG(3, "Changing disk %s state from %s to %s.",
1961 	    g_mirror_get_diskname(disk), g_mirror_disk_state2str(disk->d_state),
1962 	    g_mirror_disk_state2str(state));
1963 	switch (state) {
1964 	case G_MIRROR_DISK_STATE_NEW:
1965 		/*
1966 		 * Possible scenarios:
1967 		 * 1. New disk arrive.
1968 		 */
1969 		/* Previous state should be NONE. */
1970 		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NONE,
1971 		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
1972 		    g_mirror_disk_state2str(disk->d_state)));
1973 		DISK_STATE_CHANGED();
1974 
1975 		disk->d_state = state;
1976 		if (LIST_EMPTY(&sc->sc_disks))
1977 			LIST_INSERT_HEAD(&sc->sc_disks, disk, d_next);
1978 		else {
1979 			struct g_mirror_disk *dp;
1980 
1981 			LIST_FOREACH(dp, &sc->sc_disks, d_next) {
1982 				if (disk->d_priority >= dp->d_priority) {
1983 					LIST_INSERT_BEFORE(dp, disk, d_next);
1984 					dp = NULL;
1985 					break;
1986 				}
1987 				if (LIST_NEXT(dp, d_next) == NULL)
1988 					break;
1989 			}
1990 			if (dp != NULL)
1991 				LIST_INSERT_AFTER(dp, disk, d_next);
1992 		}
1993 		G_MIRROR_DEBUG(0, "Device %s: provider %s detected.",
1994 		    sc->sc_name, g_mirror_get_diskname(disk));
1995 		if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING)
1996 			break;
1997 		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
1998 		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
1999 		    g_mirror_device_state2str(sc->sc_state),
2000 		    g_mirror_get_diskname(disk),
2001 		    g_mirror_disk_state2str(disk->d_state)));
2002 		state = g_mirror_determine_state(disk);
2003 		if (state != G_MIRROR_DISK_STATE_NONE)
2004 			goto again;
2005 		break;
2006 	case G_MIRROR_DISK_STATE_ACTIVE:
2007 		/*
2008 		 * Possible scenarios:
2009 		 * 1. New disk does not need synchronization.
2010 		 * 2. Synchronization process finished successfully.
2011 		 */
2012 		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2013 		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2014 		    g_mirror_device_state2str(sc->sc_state),
2015 		    g_mirror_get_diskname(disk),
2016 		    g_mirror_disk_state2str(disk->d_state)));
2017 		/* Previous state should be NEW or SYNCHRONIZING. */
2018 		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW ||
2019 		    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
2020 		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2021 		    g_mirror_disk_state2str(disk->d_state)));
2022 		DISK_STATE_CHANGED();
2023 
2024 		if (disk->d_state == G_MIRROR_DISK_STATE_NEW)
2025 			disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2026 		else if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
2027 			disk->d_flags &= ~G_MIRROR_DISK_FLAG_SYNCHRONIZING;
2028 			disk->d_flags &= ~G_MIRROR_DISK_FLAG_FORCE_SYNC;
2029 			g_mirror_sync_stop(disk, 0);
2030 		}
2031 		disk->d_state = state;
2032 		disk->d_sync.ds_offset = 0;
2033 		disk->d_sync.ds_offset_done = 0;
2034 		g_mirror_update_access(disk);
2035 		g_mirror_update_metadata(disk);
2036 		G_MIRROR_DEBUG(0, "Device %s: provider %s activated.",
2037 		    sc->sc_name, g_mirror_get_diskname(disk));
2038 		break;
2039 	case G_MIRROR_DISK_STATE_STALE:
2040 		/*
2041 		 * Possible scenarios:
2042 		 * 1. Stale disk was connected.
2043 		 */
2044 		/* Previous state should be NEW. */
2045 		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW,
2046 		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2047 		    g_mirror_disk_state2str(disk->d_state)));
2048 		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2049 		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2050 		    g_mirror_device_state2str(sc->sc_state),
2051 		    g_mirror_get_diskname(disk),
2052 		    g_mirror_disk_state2str(disk->d_state)));
2053 		/*
2054 		 * STALE state is only possible if device is marked
2055 		 * NOAUTOSYNC.
2056 		 */
2057 		KASSERT((sc->sc_flags & G_MIRROR_DEVICE_FLAG_NOAUTOSYNC) != 0,
2058 		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2059 		    g_mirror_device_state2str(sc->sc_state),
2060 		    g_mirror_get_diskname(disk),
2061 		    g_mirror_disk_state2str(disk->d_state)));
2062 		DISK_STATE_CHANGED();
2063 
2064 		disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2065 		disk->d_state = state;
2066 		g_mirror_update_metadata(disk);
2067 		G_MIRROR_DEBUG(0, "Device %s: provider %s is stale.",
2068 		    sc->sc_name, g_mirror_get_diskname(disk));
2069 		break;
2070 	case G_MIRROR_DISK_STATE_SYNCHRONIZING:
2071 		/*
2072 		 * Possible scenarios:
2073 		 * 1. Disk which needs synchronization was connected.
2074 		 */
2075 		/* Previous state should be NEW. */
2076 		KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW,
2077 		    ("Wrong disk state (%s, %s).", g_mirror_get_diskname(disk),
2078 		    g_mirror_disk_state2str(disk->d_state)));
2079 		KASSERT(sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING,
2080 		    ("Wrong device state (%s, %s, %s, %s).", sc->sc_name,
2081 		    g_mirror_device_state2str(sc->sc_state),
2082 		    g_mirror_get_diskname(disk),
2083 		    g_mirror_disk_state2str(disk->d_state)));
2084 		DISK_STATE_CHANGED();
2085 
2086 		if (disk->d_state == G_MIRROR_DISK_STATE_NEW)
2087 			disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2088 		disk->d_state = state;
2089 		if (sc->sc_provider != NULL) {
2090 			g_mirror_sync_start(disk);
2091 			g_mirror_update_metadata(disk);
2092 		}
2093 		break;
2094 	case G_MIRROR_DISK_STATE_DISCONNECTED:
2095 		/*
2096 		 * Possible scenarios:
2097 		 * 1. Device wasn't running yet, but disk disappear.
2098 		 * 2. Disk was active and disapppear.
2099 		 * 3. Disk disappear during synchronization process.
2100 		 */
2101 		if (sc->sc_state == G_MIRROR_DEVICE_STATE_RUNNING) {
2102 			/*
2103 			 * Previous state should be ACTIVE, STALE or
2104 			 * SYNCHRONIZING.
2105 			 */
2106 			KASSERT(disk->d_state == G_MIRROR_DISK_STATE_ACTIVE ||
2107 			    disk->d_state == G_MIRROR_DISK_STATE_STALE ||
2108 			    disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING,
2109 			    ("Wrong disk state (%s, %s).",
2110 			    g_mirror_get_diskname(disk),
2111 			    g_mirror_disk_state2str(disk->d_state)));
2112 		} else if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING) {
2113 			/* Previous state should be NEW. */
2114 			KASSERT(disk->d_state == G_MIRROR_DISK_STATE_NEW,
2115 			    ("Wrong disk state (%s, %s).",
2116 			    g_mirror_get_diskname(disk),
2117 			    g_mirror_disk_state2str(disk->d_state)));
2118 			/*
2119 			 * Reset bumping syncid if disk disappeared in STARTING
2120 			 * state.
2121 			 */
2122 			if (sc->sc_bump_syncid == G_MIRROR_BUMP_ON_FIRST_WRITE)
2123 				sc->sc_bump_syncid = 0;
2124 #ifdef	INVARIANTS
2125 		} else {
2126 			KASSERT(1 == 0, ("Wrong device state (%s, %s, %s, %s).",
2127 			    sc->sc_name,
2128 			    g_mirror_device_state2str(sc->sc_state),
2129 			    g_mirror_get_diskname(disk),
2130 			    g_mirror_disk_state2str(disk->d_state)));
2131 #endif
2132 		}
2133 		DISK_STATE_CHANGED();
2134 		G_MIRROR_DEBUG(0, "Device %s: provider %s disconnected.",
2135 		    sc->sc_name, g_mirror_get_diskname(disk));
2136 
2137 		g_mirror_destroy_disk(disk);
2138 		break;
2139 	case G_MIRROR_DISK_STATE_DESTROY:
2140 	    {
2141 		int error;
2142 
2143 		error = g_mirror_clear_metadata(disk);
2144 		if (error != 0)
2145 			return (error);
2146 		DISK_STATE_CHANGED();
2147 		G_MIRROR_DEBUG(0, "Device %s: provider %s destroyed.",
2148 		    sc->sc_name, g_mirror_get_diskname(disk));
2149 
2150 		g_mirror_destroy_disk(disk);
2151 		sc->sc_ndisks--;
2152 		LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2153 			g_mirror_update_metadata(disk);
2154 		}
2155 		break;
2156 	    }
2157 	default:
2158 		KASSERT(1 == 0, ("Unknown state (%u).", state));
2159 		break;
2160 	}
2161 	return (0);
2162 }
2163 #undef	DISK_STATE_CHANGED
2164 
2165 static int
2166 g_mirror_read_metadata(struct g_consumer *cp, struct g_mirror_metadata *md)
2167 {
2168 	struct g_provider *pp;
2169 	u_char *buf;
2170 	int error;
2171 
2172 	g_topology_assert();
2173 
2174 	error = g_access(cp, 1, 0, 0);
2175 	if (error != 0)
2176 		return (error);
2177 	pp = cp->provider;
2178 	g_topology_unlock();
2179 	/* Metadata are stored on last sector. */
2180 	buf = g_read_data(cp, pp->mediasize - pp->sectorsize, pp->sectorsize,
2181 	    &error);
2182 	g_topology_lock();
2183 	if (buf == NULL) {
2184 		g_access(cp, -1, 0, 0);
2185 		return (error);
2186 	}
2187 	if (error != 0) {
2188 		g_access(cp, -1, 0, 0);
2189 		g_free(buf);
2190 		return (error);
2191 	}
2192 	error = g_access(cp, -1, 0, 0);
2193 	KASSERT(error == 0, ("Cannot decrease access count for %s.", pp->name));
2194 
2195 	/* Decode metadata. */
2196 	error = mirror_metadata_decode(buf, md);
2197 	g_free(buf);
2198 	if (strcmp(md->md_magic, G_MIRROR_MAGIC) != 0)
2199 		return (EINVAL);
2200 	if (error != 0) {
2201 		G_MIRROR_DEBUG(1, "MD5 metadata hash mismatch for provider %s.",
2202 		    cp->provider->name);
2203 		return (error);
2204 	}
2205 
2206 	return (0);
2207 }
2208 
2209 static int
2210 g_mirror_check_metadata(struct g_mirror_softc *sc, struct g_provider *pp,
2211     struct g_mirror_metadata *md)
2212 {
2213 
2214 	if (g_mirror_id2disk(sc, md->md_did) != NULL) {
2215 		G_MIRROR_DEBUG(1, "Disk %s (id=%u) already exists, skipping.",
2216 		    pp->name, md->md_did);
2217 		return (EEXIST);
2218 	}
2219 	if (md->md_all != sc->sc_ndisks) {
2220 		G_MIRROR_DEBUG(1,
2221 		    "Invalid '%s' field on disk %s (device %s), skipping.",
2222 		    "md_all", pp->name, sc->sc_name);
2223 		return (EINVAL);
2224 	}
2225 	if (md->md_slice != sc->sc_slice) {
2226 		G_MIRROR_DEBUG(1,
2227 		    "Invalid '%s' field on disk %s (device %s), skipping.",
2228 		    "md_slice", pp->name, sc->sc_name);
2229 		return (EINVAL);
2230 	}
2231 	if (md->md_balance != sc->sc_balance) {
2232 		G_MIRROR_DEBUG(1,
2233 		    "Invalid '%s' field on disk %s (device %s), skipping.",
2234 		    "md_balance", pp->name, sc->sc_name);
2235 		return (EINVAL);
2236 	}
2237 	if (md->md_mediasize != sc->sc_mediasize) {
2238 		G_MIRROR_DEBUG(1,
2239 		    "Invalid '%s' field on disk %s (device %s), skipping.",
2240 		    "md_mediasize", pp->name, sc->sc_name);
2241 		return (EINVAL);
2242 	}
2243 	if (sc->sc_mediasize > pp->mediasize) {
2244 		G_MIRROR_DEBUG(1,
2245 		    "Invalid size of disk %s (device %s), skipping.", pp->name,
2246 		    sc->sc_name);
2247 		return (EINVAL);
2248 	}
2249 	if (md->md_sectorsize != sc->sc_sectorsize) {
2250 		G_MIRROR_DEBUG(1,
2251 		    "Invalid '%s' field on disk %s (device %s), skipping.",
2252 		    "md_sectorsize", pp->name, sc->sc_name);
2253 		return (EINVAL);
2254 	}
2255 	if ((sc->sc_sectorsize % pp->sectorsize) != 0) {
2256 		G_MIRROR_DEBUG(1,
2257 		    "Invalid sector size of disk %s (device %s), skipping.",
2258 		    pp->name, sc->sc_name);
2259 		return (EINVAL);
2260 	}
2261 	if ((md->md_mflags & ~G_MIRROR_DEVICE_FLAG_MASK) != 0) {
2262 		G_MIRROR_DEBUG(1,
2263 		    "Invalid device flags on disk %s (device %s), skipping.",
2264 		    pp->name, sc->sc_name);
2265 		return (EINVAL);
2266 	}
2267 	if ((md->md_dflags & ~G_MIRROR_DISK_FLAG_MASK) != 0) {
2268 		G_MIRROR_DEBUG(1,
2269 		    "Invalid disk flags on disk %s (device %s), skipping.",
2270 		    pp->name, sc->sc_name);
2271 		return (EINVAL);
2272 	}
2273 	return (0);
2274 }
2275 
2276 static int
2277 g_mirror_add_disk(struct g_mirror_softc *sc, struct g_provider *pp,
2278     struct g_mirror_metadata *md)
2279 {
2280 	struct g_mirror_disk *disk;
2281 	int error;
2282 
2283 	g_topology_assert();
2284 	G_MIRROR_DEBUG(2, "Adding disk %s.", pp->name);
2285 
2286 	error = g_mirror_check_metadata(sc, pp, md);
2287 	if (error != 0)
2288 		return (error);
2289 	disk = g_mirror_init_disk(sc, pp, md, &error);
2290 	if (disk == NULL)
2291 		return (error);
2292 	error = g_mirror_event_send(disk, G_MIRROR_DISK_STATE_NEW,
2293 	    G_MIRROR_EVENT_WAIT);
2294 	return (error);
2295 }
2296 
2297 static int
2298 g_mirror_access(struct g_provider *pp, int acr, int acw, int ace)
2299 {
2300 	struct g_mirror_softc *sc;
2301 	struct g_mirror_disk *disk;
2302 	int dcr, dcw, dce, err, error;
2303 
2304 	g_topology_assert();
2305 	G_MIRROR_DEBUG(2, "Access request for %s: r%dw%de%d.", pp->name, acr,
2306 	    acw, ace);
2307 
2308 	dcr = pp->acr + acr;
2309 	dcw = pp->acw + acw;
2310 	dce = pp->ace + ace;
2311 
2312 	/* On first open, grab an extra "exclusive" bit */
2313 	if (pp->acr == 0 && pp->acw == 0 && pp->ace == 0)
2314 		ace++;
2315 	/* ... and let go of it on last close */
2316 	if (dcr == 0 && dcw == 0 && dce == 0)
2317 		ace--;
2318 
2319 	sc = pp->geom->softc;
2320 	if (sc == NULL || LIST_EMPTY(&sc->sc_disks)) {
2321 		if (acr <= 0 && acw <= 0 && ace <= 0)
2322 			return (0);
2323 		else
2324 			return (ENXIO);
2325 	}
2326 	error = ENXIO;
2327 	LIST_FOREACH(disk, &sc->sc_disks, d_next) {
2328 		if (disk->d_state != G_MIRROR_DISK_STATE_ACTIVE)
2329 			continue;
2330 		err = g_access(disk->d_consumer, acr, acw, ace);
2331 		G_MIRROR_DEBUG(2, "Access %s r%dw%de%d = %d",
2332 		    g_mirror_get_diskname(disk), acr, acw, ace, err);
2333 		if (err == 0) {
2334 			/*
2335 			 * Mark disk as dirty on open and unmark on close.
2336 			 */
2337 			if (pp->acw == 0 && dcw > 0) {
2338 				G_MIRROR_DEBUG(1,
2339 				    "Disk %s (device %s) marked as dirty.",
2340 				    g_mirror_get_diskname(disk), sc->sc_name);
2341 				disk->d_flags |= G_MIRROR_DISK_FLAG_DIRTY;
2342 				g_mirror_update_metadata(disk);
2343 			} else if (pp->acw > 0 && dcw == 0) {
2344 				G_MIRROR_DEBUG(1,
2345 				    "Disk %s (device %s) marked as clean.",
2346 				    g_mirror_get_diskname(disk), sc->sc_name);
2347 				disk->d_flags &= ~G_MIRROR_DISK_FLAG_DIRTY;
2348 				g_mirror_update_metadata(disk);
2349 			}
2350 			error = 0;
2351 		} else {
2352 			sc->sc_bump_syncid = G_MIRROR_BUMP_ON_FIRST_WRITE;
2353 			g_mirror_event_send(disk,
2354 			    G_MIRROR_DISK_STATE_DISCONNECTED,
2355 			    G_MIRROR_EVENT_DONTWAIT);
2356 		}
2357 	}
2358 	return (error);
2359 }
2360 
2361 static struct g_geom *
2362 g_mirror_create(struct g_class *mp, const struct g_mirror_metadata *md)
2363 {
2364 	struct g_mirror_softc *sc;
2365 	struct g_geom *gp;
2366 	int error, timeout;
2367 
2368 	g_topology_assert();
2369 	G_MIRROR_DEBUG(1, "Creating device %s (id=%u).", md->md_name,
2370 	    md->md_mid);
2371 
2372 	/* One disk is minimum. */
2373 	if (md->md_all < 1)
2374 		return (NULL);
2375 	/*
2376 	 * Action geom.
2377 	 */
2378 	gp = g_new_geomf(mp, "%s", md->md_name);
2379 	sc = malloc(sizeof(*sc), M_MIRROR, M_WAITOK | M_ZERO);
2380 	gp->start = g_mirror_start;
2381 	gp->spoiled = g_mirror_spoiled;
2382 	gp->orphan = g_mirror_orphan;
2383 	gp->access = g_mirror_access;
2384 	gp->dumpconf = g_mirror_dumpconf;
2385 
2386 	sc->sc_id = md->md_mid;
2387 	sc->sc_slice = md->md_slice;
2388 	sc->sc_balance = md->md_balance;
2389 	sc->sc_mediasize = md->md_mediasize;
2390 	sc->sc_sectorsize = md->md_sectorsize;
2391 	sc->sc_ndisks = md->md_all;
2392 	sc->sc_flags = md->md_mflags;
2393 	sc->sc_bump_syncid = 0;
2394 	bioq_init(&sc->sc_queue);
2395 	mtx_init(&sc->sc_queue_mtx, "gmirror:queue", NULL, MTX_DEF);
2396 	LIST_INIT(&sc->sc_disks);
2397 	TAILQ_INIT(&sc->sc_events);
2398 	mtx_init(&sc->sc_events_mtx, "gmirror:events", NULL, MTX_DEF);
2399 	callout_init(&sc->sc_callout, CALLOUT_MPSAFE);
2400 	sc->sc_state = G_MIRROR_DEVICE_STATE_STARTING;
2401 	gp->softc = sc;
2402 	sc->sc_geom = gp;
2403 	sc->sc_provider = NULL;
2404 	/*
2405 	 * Synchronization geom.
2406 	 */
2407 	gp = g_new_geomf(mp, "%s.sync", md->md_name);
2408 	gp->softc = sc;
2409 	gp->orphan = g_mirror_orphan;
2410 	sc->sc_sync.ds_geom = gp;
2411 	sc->sc_sync.ds_ndisks = 0;
2412 	error = kthread_create(g_mirror_worker, sc, &sc->sc_worker, 0, 0,
2413 	    "g_mirror %s", md->md_name);
2414 	if (error != 0) {
2415 		G_MIRROR_DEBUG(1, "Cannot create kernel thread for %s.",
2416 		    sc->sc_name);
2417 		g_destroy_geom(sc->sc_sync.ds_geom);
2418 		mtx_destroy(&sc->sc_events_mtx);
2419 		mtx_destroy(&sc->sc_queue_mtx);
2420 		g_destroy_geom(sc->sc_geom);
2421 		free(sc, M_MIRROR);
2422 		return (NULL);
2423 	}
2424 
2425 	G_MIRROR_DEBUG(0, "Device %s created (id=%u).", sc->sc_name, sc->sc_id);
2426 
2427 	/*
2428 	 * Run timeout.
2429 	 */
2430 	timeout = atomic_load_acq_int(&g_mirror_timeout);
2431 	callout_reset(&sc->sc_callout, timeout * hz, g_mirror_go, sc);
2432 	return (sc->sc_geom);
2433 }
2434 
2435 int
2436 g_mirror_destroy(struct g_mirror_softc *sc, boolean_t force)
2437 {
2438 	struct g_provider *pp;
2439 
2440 	g_topology_assert();
2441 
2442 	if (sc == NULL)
2443 		return (ENXIO);
2444 	pp = sc->sc_provider;
2445 	if (pp != NULL && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)) {
2446 		if (force) {
2447 			G_MIRROR_DEBUG(0, "Device %s is still open, so it "
2448 			    "can't be definitely removed.", pp->name);
2449 		} else {
2450 			G_MIRROR_DEBUG(1,
2451 			    "Device %s is still open (r%dw%de%d).", pp->name,
2452 			    pp->acr, pp->acw, pp->ace);
2453 			return (EBUSY);
2454 		}
2455 	}
2456 
2457 	sc->sc_flags |= G_MIRROR_DEVICE_FLAG_DESTROY;
2458 	sc->sc_flags |= G_MIRROR_DEVICE_FLAG_WAIT;
2459 	g_topology_unlock();
2460 	G_MIRROR_DEBUG(4, "%s: Waking up %p.", __func__, sc);
2461 	mtx_lock(&sc->sc_queue_mtx);
2462 	wakeup(sc);
2463 	mtx_unlock(&sc->sc_queue_mtx);
2464 	G_MIRROR_DEBUG(4, "%s: Sleeping %p.", __func__, &sc->sc_worker);
2465 	while (sc->sc_worker != NULL)
2466 		tsleep(&sc->sc_worker, PRIBIO, "m:destroy", hz / 5);
2467 	G_MIRROR_DEBUG(4, "%s: Woken up %p.", __func__, &sc->sc_worker);
2468 	g_topology_lock();
2469 	g_mirror_destroy_device(sc);
2470 	free(sc, M_MIRROR);
2471 	return (0);
2472 }
2473 
2474 static void
2475 g_mirror_taste_orphan(struct g_consumer *cp)
2476 {
2477 
2478 	KASSERT(1 == 0, ("%s called while tasting %s.", __func__,
2479 	    cp->provider->name));
2480 }
2481 
2482 static struct g_geom *
2483 g_mirror_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
2484 {
2485 	struct g_mirror_metadata md;
2486 	struct g_mirror_softc *sc;
2487 	struct g_consumer *cp;
2488 	struct g_geom *gp;
2489 	int error;
2490 
2491 	g_topology_assert();
2492 	g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name);
2493 	G_MIRROR_DEBUG(2, "Tasting %s.", pp->name);
2494 	/* Skip providers with 0 sectorsize. */
2495 	if (pp->sectorsize == 0)
2496 		return (NULL);
2497 
2498 	gp = g_new_geomf(mp, "mirror:taste");
2499 	/*
2500 	 * This orphan function should be never called.
2501 	 */
2502 	gp->orphan = g_mirror_taste_orphan;
2503 	cp = g_new_consumer(gp);
2504 	g_attach(cp, pp);
2505 	error = g_mirror_read_metadata(cp, &md);
2506 	g_detach(cp);
2507 	g_destroy_consumer(cp);
2508 	g_destroy_geom(gp);
2509 	if (error != 0)
2510 		return (NULL);
2511 	gp = NULL;
2512 
2513 	if (md.md_version > G_MIRROR_VERSION) {
2514 		printf("geom_mirror.ko module is too old to handle %s.\n",
2515 		    pp->name);
2516 		return (NULL);
2517 	}
2518 	if (md.md_provider[0] != '\0' && strcmp(md.md_provider, pp->name) != 0)
2519 		return (NULL);
2520 	if ((md.md_dflags & G_MIRROR_DISK_FLAG_INACTIVE) != 0) {
2521 		G_MIRROR_DEBUG(0,
2522 		    "Device %s: provider %s marked as inactive, skipping.",
2523 		    md.md_name, pp->name);
2524 		return (NULL);
2525 	}
2526 	if (g_mirror_debug >= 2)
2527 		mirror_metadata_dump(&md);
2528 
2529 	/*
2530 	 * Let's check if device already exists.
2531 	 */
2532 	sc = NULL;
2533 	LIST_FOREACH(gp, &mp->geom, geom) {
2534 		sc = gp->softc;
2535 		if (sc == NULL)
2536 			continue;
2537 		if (sc->sc_sync.ds_geom == gp)
2538 			continue;
2539 		if (strcmp(md.md_name, sc->sc_name) != 0)
2540 			continue;
2541 		if (md.md_mid != sc->sc_id) {
2542 			G_MIRROR_DEBUG(0, "Device %s already configured.",
2543 			    sc->sc_name);
2544 			return (NULL);
2545 		}
2546 		break;
2547 	}
2548 	if (gp == NULL) {
2549 		gp = g_mirror_create(mp, &md);
2550 		if (gp == NULL) {
2551 			G_MIRROR_DEBUG(0, "Cannot create device %s.",
2552 			    md.md_name);
2553 			return (NULL);
2554 		}
2555 		sc = gp->softc;
2556 	}
2557 	G_MIRROR_DEBUG(1, "Adding disk %s to %s.", pp->name, gp->name);
2558 	error = g_mirror_add_disk(sc, pp, &md);
2559 	if (error != 0) {
2560 		G_MIRROR_DEBUG(0, "Cannot add disk %s to %s (error=%d).",
2561 		    pp->name, gp->name, error);
2562 		if (LIST_EMPTY(&sc->sc_disks))
2563 			g_mirror_destroy(sc, 1);
2564 		return (NULL);
2565 	}
2566 	return (gp);
2567 }
2568 
2569 static int
2570 g_mirror_destroy_geom(struct gctl_req *req __unused,
2571     struct g_class *mp __unused, struct g_geom *gp)
2572 {
2573 
2574 	return (g_mirror_destroy(gp->softc, 0));
2575 }
2576 
2577 static void
2578 g_mirror_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
2579     struct g_consumer *cp, struct g_provider *pp)
2580 {
2581 	struct g_mirror_softc *sc;
2582 
2583 	g_topology_assert();
2584 
2585 	sc = gp->softc;
2586 	if (sc == NULL)
2587 		return;
2588 	/* Skip synchronization geom. */
2589 	if (gp == sc->sc_sync.ds_geom)
2590 		return;
2591 	if (pp != NULL) {
2592 		/* Nothing here. */
2593 	} else if (cp != NULL) {
2594 		struct g_mirror_disk *disk;
2595 
2596 		disk = cp->private;
2597 		if (disk == NULL)
2598 			return;
2599 		sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)disk->d_id);
2600 		if (disk->d_state == G_MIRROR_DISK_STATE_SYNCHRONIZING) {
2601 			sbuf_printf(sb, "%s<Synchronized>", indent);
2602 			if (disk->d_sync.ds_offset_done == 0)
2603 				sbuf_printf(sb, "0%%");
2604 			else {
2605 				sbuf_printf(sb, "%u%%",
2606 				    (u_int)((disk->d_sync.ds_offset_done * 100) /
2607 				    sc->sc_provider->mediasize));
2608 			}
2609 			sbuf_printf(sb, "</Synchronized>\n");
2610 		}
2611 		sbuf_printf(sb, "%s<SyncID>%u</SyncID>\n", indent,
2612 		    disk->d_sync.ds_syncid);
2613 		sbuf_printf(sb, "%s<Flags>", indent);
2614 		if (disk->d_flags == 0)
2615 			sbuf_printf(sb, "NONE");
2616 		else {
2617 			int first = 1;
2618 
2619 #define	ADD_FLAG(flag, name)	do {					\
2620 	if ((disk->d_flags & (flag)) != 0) {				\
2621 		if (!first)						\
2622 			sbuf_printf(sb, ", ");				\
2623 		else							\
2624 			first = 0;					\
2625 		sbuf_printf(sb, name);					\
2626 	}								\
2627 } while (0)
2628 			ADD_FLAG(G_MIRROR_DISK_FLAG_DIRTY, "DIRTY");
2629 			ADD_FLAG(G_MIRROR_DISK_FLAG_HARDCODED, "HARDCODED");
2630 			ADD_FLAG(G_MIRROR_DISK_FLAG_INACTIVE, "INACTIVE");
2631 			ADD_FLAG(G_MIRROR_DISK_FLAG_SYNCHRONIZING,
2632 			    "SYNCHRONIZING");
2633 			ADD_FLAG(G_MIRROR_DISK_FLAG_FORCE_SYNC, "FORCE_SYNC");
2634 #undef	ADD_FLAG
2635 		}
2636 		sbuf_printf(sb, "</Flags>\n");
2637 		sbuf_printf(sb, "%s<Priority>%u</Priority>\n", indent,
2638 		    disk->d_priority);
2639 		sbuf_printf(sb, "%s<State>%s</State>\n", indent,
2640 		    g_mirror_disk_state2str(disk->d_state));
2641 	} else {
2642 		sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)sc->sc_id);
2643 		sbuf_printf(sb, "%s<SyncID>%u</SyncID>\n", indent, sc->sc_syncid);
2644 		sbuf_printf(sb, "%s<Flags>", indent);
2645 		if (sc->sc_flags == 0)
2646 			sbuf_printf(sb, "NONE");
2647 		else {
2648 			int first = 1;
2649 
2650 #define	ADD_FLAG(flag, name)	do {					\
2651 	if ((sc->sc_flags & (flag)) != 0) {				\
2652 		if (!first)						\
2653 			sbuf_printf(sb, ", ");				\
2654 		else							\
2655 			first = 0;					\
2656 		sbuf_printf(sb, name);					\
2657 	}								\
2658 } while (0)
2659 			ADD_FLAG(G_MIRROR_DEVICE_FLAG_NOAUTOSYNC, "NOAUTOSYNC");
2660 #undef	ADD_FLAG
2661 		}
2662 		sbuf_printf(sb, "</Flags>\n");
2663 		sbuf_printf(sb, "%s<Slice>%u</Slice>\n", indent,
2664 		    (u_int)sc->sc_slice);
2665 		sbuf_printf(sb, "%s<Balance>%s</Balance>\n", indent,
2666 		    balance_name(sc->sc_balance));
2667 		sbuf_printf(sb, "%s<Components>%u</Components>\n", indent,
2668 		    sc->sc_ndisks);
2669 		sbuf_printf(sb, "%s<State>", indent);
2670 		if (sc->sc_state == G_MIRROR_DEVICE_STATE_STARTING)
2671 			sbuf_printf(sb, "%s", "STARTING");
2672 		else if (sc->sc_ndisks ==
2673 		    g_mirror_ndisks(sc, G_MIRROR_DISK_STATE_ACTIVE))
2674 			sbuf_printf(sb, "%s", "COMPLETE");
2675 		else
2676 			sbuf_printf(sb, "%s", "DEGRADED");
2677 		sbuf_printf(sb, "</State>\n");
2678 	}
2679 }
2680 
2681 DECLARE_GEOM_CLASS(g_mirror_class, g_mirror);
2682