xref: /freebsd/sys/geom/vinum/geom_vinum_subr.c (revision b4af4f93c682e445bf159f0d1ec90b636296c946)
1 /*-
2  * SPDX-License-Identifier: BSD-4-Clause
3  *
4  * Copyright (c) 2004, 2007 Lukas Ertl
5  * Copyright (c) 2007, 2009 Ulf Lilleengen
6  * Copyright (c) 1997, 1998, 1999
7  *      Nan Yang Computer Services Limited.  All rights reserved.
8  *
9  *  Parts written by Greg Lehey
10  *
11  *  This software is distributed under the so-called ``Berkeley
12  *  License'':
13  *
14  * Redistribution and use in source and binary forms, with or without
15  * modification, are permitted provided that the following conditions
16  * are met:
17  * 1. Redistributions of source code must retain the above copyright
18  *    notice, this list of conditions and the following disclaimer.
19  * 2. Redistributions in binary form must reproduce the above copyright
20  *    notice, this list of conditions and the following disclaimer in the
21  *    documentation and/or other materials provided with the distribution.
22  * 3. All advertising materials mentioning features or use of this software
23  *    must display the following acknowledgement:
24  *      This product includes software developed by Nan Yang Computer
25  *      Services Limited.
26  * 4. Neither the name of the Company nor the names of its contributors
27  *    may be used to endorse or promote products derived from this software
28  *    without specific prior written permission.
29  *
30  * This software is provided ``as is'', and any express or implied
31  * warranties, including, but not limited to, the implied warranties of
32  * merchantability and fitness for a particular purpose are disclaimed.
33  * In no event shall the company or contributors be liable for any
34  * direct, indirect, incidental, special, exemplary, or consequential
35  * damages (including, but not limited to, procurement of substitute
36  * goods or services; loss of use, data, or profits; or business
37  * interruption) however caused and on any theory of liability, whether
38  * in contract, strict liability, or tort (including negligence or
39  * otherwise) arising in any way out of the use of this software, even if
40  * advised of the possibility of such damage.
41  *
42  */
43 
44 #include <sys/cdefs.h>
45 __FBSDID("$FreeBSD$");
46 
47 #include <sys/param.h>
48 #include <sys/malloc.h>
49 #include <sys/sbuf.h>
50 #include <sys/systm.h>
51 
52 #include <geom/geom.h>
53 #include <geom/geom_dbg.h>
54 #include <geom/vinum/geom_vinum_var.h>
55 #include <geom/vinum/geom_vinum.h>
56 #include <geom/vinum/geom_vinum_share.h>
57 
58 int	gv_drive_is_newer(struct gv_softc *, struct gv_drive *);
59 static off_t gv_plex_smallest_sd(struct gv_plex *);
60 
61 void
62 gv_parse_config(struct gv_softc *sc, char *buf, struct gv_drive *d)
63 {
64 	char *aptr, *bptr, *cptr;
65 	struct gv_volume *v, *v2;
66 	struct gv_plex *p, *p2;
67 	struct gv_sd *s, *s2;
68 	int error, is_newer, tokens;
69 	char *token[GV_MAXARGS];
70 
71 	is_newer = gv_drive_is_newer(sc, d);
72 
73 	/* Until the end of the string *buf. */
74 	for (aptr = buf; *aptr != '\0'; aptr = bptr) {
75 		bptr = aptr;
76 		cptr = aptr;
77 
78 		/* Separate input lines. */
79 		while (*bptr != '\n')
80 			bptr++;
81 		*bptr = '\0';
82 		bptr++;
83 
84 		tokens = gv_tokenize(cptr, token, GV_MAXARGS);
85 
86 		if (tokens <= 0)
87 			continue;
88 
89 		if (!strcmp(token[0], "volume")) {
90 			v = gv_new_volume(tokens, token);
91 			if (v == NULL) {
92 				G_VINUM_DEBUG(0, "config parse failed volume");
93 				break;
94 			}
95 
96 			v2 = gv_find_vol(sc, v->name);
97 			if (v2 != NULL) {
98 				if (is_newer) {
99 					v2->state = v->state;
100 					G_VINUM_DEBUG(2, "newer volume found!");
101 				}
102 				g_free(v);
103 				continue;
104 			}
105 
106 			gv_create_volume(sc, v);
107 
108 		} else if (!strcmp(token[0], "plex")) {
109 			p = gv_new_plex(tokens, token);
110 			if (p == NULL) {
111 				G_VINUM_DEBUG(0, "config parse failed plex");
112 				break;
113 			}
114 
115 			p2 = gv_find_plex(sc, p->name);
116 			if (p2 != NULL) {
117 				/* XXX */
118 				if (is_newer) {
119 					p2->state = p->state;
120 					G_VINUM_DEBUG(2, "newer plex found!");
121 				}
122 				g_free(p);
123 				continue;
124 			}
125 
126 			error = gv_create_plex(sc, p);
127 			if (error)
128 				continue;
129 			/*
130 			 * These flags were set in gv_create_plex() and are not
131 			 * needed here (on-disk config parsing).
132 			 */
133 			p->flags &= ~GV_PLEX_ADDED;
134 
135 		} else if (!strcmp(token[0], "sd")) {
136 			s = gv_new_sd(tokens, token);
137 
138 			if (s == NULL) {
139 				G_VINUM_DEBUG(0, "config parse failed subdisk");
140 				break;
141 			}
142 
143 			s2 = gv_find_sd(sc, s->name);
144 			if (s2 != NULL) {
145 				/* XXX */
146 				if (is_newer) {
147 					s2->state = s->state;
148 					G_VINUM_DEBUG(2, "newer subdisk found!");
149 				}
150 				g_free(s);
151 				continue;
152 			}
153 
154 			/*
155 			 * Signal that this subdisk was tasted, and could
156 			 * possibly reference a drive that isn't in our config
157 			 * yet.
158 			 */
159 			s->flags |= GV_SD_TASTED;
160 
161 			if (s->state == GV_SD_UP)
162 				s->flags |= GV_SD_CANGOUP;
163 
164 			error = gv_create_sd(sc, s);
165 			if (error)
166 				continue;
167 
168 			/*
169 			 * This flag was set in gv_create_sd() and is not
170 			 * needed here (on-disk config parsing).
171 			 */
172 			s->flags &= ~GV_SD_NEWBORN;
173 			s->flags &= ~GV_SD_GROW;
174 		}
175 	}
176 }
177 
178 /*
179  * Format the vinum configuration properly.  If ondisk is non-zero then the
180  * configuration is intended to be written to disk later.
181  */
182 void
183 gv_format_config(struct gv_softc *sc, struct sbuf *sb, int ondisk, char *prefix)
184 {
185 	struct gv_drive *d;
186 	struct gv_sd *s;
187 	struct gv_plex *p;
188 	struct gv_volume *v;
189 
190 	/*
191 	 * We don't need the drive configuration if we're not writing the
192 	 * config to disk.
193 	 */
194 	if (!ondisk) {
195 		LIST_FOREACH(d, &sc->drives, drive) {
196 			sbuf_printf(sb, "%sdrive %s device /dev/%s\n", prefix,
197 			    d->name, d->device);
198 		}
199 	}
200 
201 	LIST_FOREACH(v, &sc->volumes, volume) {
202 		if (!ondisk)
203 			sbuf_printf(sb, "%s", prefix);
204 		sbuf_printf(sb, "volume %s", v->name);
205 		if (ondisk)
206 			sbuf_printf(sb, " state %s", gv_volstate(v->state));
207 		sbuf_printf(sb, "\n");
208 	}
209 
210 	LIST_FOREACH(p, &sc->plexes, plex) {
211 		if (!ondisk)
212 			sbuf_printf(sb, "%s", prefix);
213 		sbuf_printf(sb, "plex name %s org %s ", p->name,
214 		    gv_plexorg(p->org));
215 		if (gv_is_striped(p))
216 			sbuf_printf(sb, "%ds ", p->stripesize / 512);
217 		if (p->vol_sc != NULL)
218 			sbuf_printf(sb, "vol %s", p->volume);
219 		if (ondisk)
220 			sbuf_printf(sb, " state %s", gv_plexstate(p->state));
221 		sbuf_printf(sb, "\n");
222 	}
223 
224 	LIST_FOREACH(s, &sc->subdisks, sd) {
225 		if (!ondisk)
226 			sbuf_printf(sb, "%s", prefix);
227 		sbuf_printf(sb, "sd name %s drive %s len %jds driveoffset "
228 		    "%jds", s->name, s->drive, s->size / 512,
229 		    s->drive_offset / 512);
230 		if (s->plex_sc != NULL) {
231 			sbuf_printf(sb, " plex %s plexoffset %jds", s->plex,
232 			    s->plex_offset / 512);
233 		}
234 		if (ondisk)
235 			sbuf_printf(sb, " state %s", gv_sdstate(s->state));
236 		sbuf_printf(sb, "\n");
237 	}
238 }
239 
240 static off_t
241 gv_plex_smallest_sd(struct gv_plex *p)
242 {
243 	struct gv_sd *s;
244 	off_t smallest;
245 
246 	KASSERT(p != NULL, ("gv_plex_smallest_sd: NULL p"));
247 
248 	s = LIST_FIRST(&p->subdisks);
249 	if (s == NULL)
250 		return (-1);
251 	smallest = s->size;
252 	LIST_FOREACH(s, &p->subdisks, in_plex) {
253 		if (s->size < smallest)
254 			smallest = s->size;
255 	}
256 	return (smallest);
257 }
258 
259 /* Walk over plexes in a volume and count how many are down. */
260 int
261 gv_plexdown(struct gv_volume *v)
262 {
263 	int plexdown;
264 	struct gv_plex *p;
265 
266 	KASSERT(v != NULL, ("gv_plexdown: NULL v"));
267 
268 	plexdown = 0;
269 
270 	LIST_FOREACH(p, &v->plexes, plex) {
271 		if (p->state == GV_PLEX_DOWN)
272 			plexdown++;
273 	}
274 	return (plexdown);
275 }
276 
277 int
278 gv_sd_to_plex(struct gv_sd *s, struct gv_plex *p)
279 {
280 	struct gv_sd *s2;
281 	off_t psizeorig, remainder, smallest;
282 
283 	/* If this subdisk was already given to this plex, do nothing. */
284 	if (s->plex_sc == p)
285 		return (0);
286 
287 	/* Check correct size of this subdisk. */
288 	s2 = LIST_FIRST(&p->subdisks);
289 	/* Adjust the subdisk-size if necessary. */
290 	if (s2 != NULL && gv_is_striped(p)) {
291 		/* First adjust to the stripesize. */
292 		remainder = s->size % p->stripesize;
293 
294 		if (remainder) {
295 			G_VINUM_DEBUG(1, "size of sd %s is not a "
296 			    "multiple of plex stripesize, taking off "
297 			    "%jd bytes", s->name,
298 			    (intmax_t)remainder);
299 			gv_adjust_freespace(s, remainder);
300 		}
301 
302 		smallest = gv_plex_smallest_sd(p);
303 		/* Then take off extra if other subdisks are smaller. */
304 		remainder = s->size - smallest;
305 
306 		/*
307 		 * Don't allow a remainder below zero for running plexes, it's too
308 		 * painful, and if someone were to accidentally do this, the
309 		 * resulting array might be smaller than the original... not god
310 		 */
311 		if (remainder < 0) {
312 			if (!(p->flags & GV_PLEX_NEWBORN)) {
313 				G_VINUM_DEBUG(0, "sd %s too small for plex %s!",
314 				    s->name, p->name);
315 				return (GV_ERR_BADSIZE);
316 			}
317 			/* Adjust other subdisks. */
318 			LIST_FOREACH(s2, &p->subdisks, in_plex) {
319 				G_VINUM_DEBUG(1, "size of sd %s is to big, "
320 				    "taking off %jd bytes", s->name,
321 				    (intmax_t)remainder);
322 				gv_adjust_freespace(s2, (remainder * -1));
323 			}
324 		} else if (remainder > 0) {
325 			G_VINUM_DEBUG(1, "size of sd %s is to big, "
326 			    "taking off %jd bytes", s->name,
327 			    (intmax_t)remainder);
328 			gv_adjust_freespace(s, remainder);
329 		}
330 	}
331 
332 	/* Find the correct plex offset for this subdisk, if needed. */
333 	if (s->plex_offset == -1) {
334 		/*
335 		 * First set it to 0 to catch the case where we had a detached
336 		 * subdisk that didn't get any good offset.
337 		 */
338 		s->plex_offset = 0;
339 		if (p->sdcount) {
340 			LIST_FOREACH(s2, &p->subdisks, in_plex) {
341 				if (gv_is_striped(p))
342 					s->plex_offset = p->sdcount *
343 					    p->stripesize;
344 				else
345 					s->plex_offset = s2->plex_offset +
346 					    s2->size;
347 			}
348 		}
349 	}
350 
351 	/* There are no subdisks for this plex yet, just insert it. */
352 	if (LIST_EMPTY(&p->subdisks)) {
353 		LIST_INSERT_HEAD(&p->subdisks, s, in_plex);
354 
355 	/* Insert in correct order, depending on plex_offset. */
356 	} else {
357 		LIST_FOREACH(s2, &p->subdisks, in_plex) {
358 			if (s->plex_offset < s2->plex_offset) {
359 				LIST_INSERT_BEFORE(s2, s, in_plex);
360 				break;
361 			} else if (LIST_NEXT(s2, in_plex) == NULL) {
362 				LIST_INSERT_AFTER(s2, s, in_plex);
363 				break;
364 			}
365 		}
366 	}
367 
368 	s->plex_sc = p;
369         /* Adjust the size of our plex. We check if the plex misses a subdisk,
370 	 * so we don't make the plex smaller than it actually should be.
371 	 */
372 	psizeorig = p->size;
373 	p->size = gv_plex_size(p);
374 	/* Make sure the size is not changed. */
375 	if (p->sddetached > 0) {
376 		if (p->size < psizeorig) {
377 			p->size = psizeorig;
378 			/* We make sure wee need another subdisk. */
379 			if (p->sddetached == 1)
380 				p->sddetached++;
381 		}
382 		p->sddetached--;
383 	} else {
384 		if ((p->org == GV_PLEX_RAID5 ||
385 		    p->org == GV_PLEX_STRIPED) &&
386 		    !(p->flags & GV_PLEX_NEWBORN) &&
387 		    p->state == GV_PLEX_UP) {
388 			s->flags |= GV_SD_GROW;
389 		}
390 		p->sdcount++;
391 	}
392 
393 	return (0);
394 }
395 
396 void
397 gv_update_vol_size(struct gv_volume *v, off_t size)
398 {
399 	if (v == NULL)
400 		return;
401 	if (v->provider != NULL) {
402 		g_topology_lock();
403 		v->provider->mediasize = size;
404 		g_topology_unlock();
405 	}
406 	v->size = size;
407 }
408 
409 /* Return how many subdisks that constitute the original plex. */
410 int
411 gv_sdcount(struct gv_plex *p, int growing)
412 {
413 	struct gv_sd *s;
414 	int sdcount;
415 
416 	sdcount = p->sdcount;
417 	if (growing) {
418 		LIST_FOREACH(s, &p->subdisks, in_plex) {
419 			if (s->flags & GV_SD_GROW)
420 				sdcount--;
421 		}
422 	}
423 
424 	return (sdcount);
425 }
426 
427 /* Calculates the plex size. */
428 off_t
429 gv_plex_size(struct gv_plex *p)
430 {
431 	struct gv_sd *s;
432 	off_t size;
433 	int sdcount;
434 
435 	KASSERT(p != NULL, ("gv_plex_size: NULL p"));
436 
437 	/* Adjust the size of our plex. */
438 	size = 0;
439 	sdcount = gv_sdcount(p, 1);
440 	switch (p->org) {
441 	case GV_PLEX_CONCAT:
442 		LIST_FOREACH(s, &p->subdisks, in_plex)
443 			size += s->size;
444 		break;
445 	case GV_PLEX_STRIPED:
446 		s = LIST_FIRST(&p->subdisks);
447 		size = ((s != NULL) ? (sdcount * s->size) : 0);
448 		break;
449 	case GV_PLEX_RAID5:
450 		s = LIST_FIRST(&p->subdisks);
451 		size = ((s != NULL) ? ((sdcount - 1) * s->size) : 0);
452 		break;
453 	}
454 
455 	return (size);
456 }
457 
458 /* Returns the size of a volume. */
459 off_t
460 gv_vol_size(struct gv_volume *v)
461 {
462 	struct gv_plex *p;
463 	off_t minplexsize;
464 
465 	KASSERT(v != NULL, ("gv_vol_size: NULL v"));
466 
467 	p = LIST_FIRST(&v->plexes);
468 	if (p == NULL)
469 		return (0);
470 
471 	minplexsize = p->size;
472 	LIST_FOREACH(p, &v->plexes, in_volume) {
473 		if (p->size < minplexsize) {
474 			minplexsize = p->size;
475 		}
476 	}
477 	return (minplexsize);
478 }
479 
480 void
481 gv_update_plex_config(struct gv_plex *p)
482 {
483 	struct gv_sd *s, *s2;
484 	off_t remainder;
485 	int required_sds, state;
486 
487 	KASSERT(p != NULL, ("gv_update_plex_config: NULL p"));
488 
489 	/* The plex was added to an already running volume. */
490 	if (p->flags & GV_PLEX_ADDED)
491 		gv_set_plex_state(p, GV_PLEX_DOWN, GV_SETSTATE_FORCE);
492 
493 	switch (p->org) {
494 	case GV_PLEX_STRIPED:
495 		required_sds = 2;
496 		break;
497 	case GV_PLEX_RAID5:
498 		required_sds = 3;
499 		break;
500 	case GV_PLEX_CONCAT:
501 	default:
502 		required_sds = 0;
503 		break;
504 	}
505 
506 	if (required_sds) {
507 		if (p->sdcount < required_sds) {
508 			gv_set_plex_state(p, GV_PLEX_DOWN, GV_SETSTATE_FORCE);
509 		}
510 
511 		/*
512 		 * The subdisks in striped plexes must all have the same size.
513 		 */
514 		s = LIST_FIRST(&p->subdisks);
515 		LIST_FOREACH(s2, &p->subdisks, in_plex) {
516 			if (s->size != s2->size) {
517 				G_VINUM_DEBUG(0, "subdisk size mismatch %s"
518 				    "(%jd) <> %s (%jd)", s->name, s->size,
519 				    s2->name, s2->size);
520 				gv_set_plex_state(p, GV_PLEX_DOWN,
521 				    GV_SETSTATE_FORCE);
522 			}
523 		}
524 
525 		LIST_FOREACH(s, &p->subdisks, in_plex) {
526 			/* Trim subdisk sizes to match the stripe size. */
527 			remainder = s->size % p->stripesize;
528 			if (remainder) {
529 				G_VINUM_DEBUG(1, "size of sd %s is not a "
530 				    "multiple of plex stripesize, taking off "
531 				    "%jd bytes", s->name, (intmax_t)remainder);
532 				gv_adjust_freespace(s, remainder);
533 			}
534 		}
535 	}
536 
537 	p->size = gv_plex_size(p);
538 	if (p->sdcount == 0)
539 		gv_set_plex_state(p, GV_PLEX_DOWN, GV_SETSTATE_FORCE);
540 	else if (p->org == GV_PLEX_RAID5 && p->flags & GV_PLEX_NEWBORN) {
541 		LIST_FOREACH(s, &p->subdisks, in_plex)
542 			gv_set_sd_state(s, GV_SD_UP, GV_SETSTATE_FORCE);
543 		/* If added to a volume, we want the plex to be down. */
544 		state = (p->flags & GV_PLEX_ADDED) ? GV_PLEX_DOWN : GV_PLEX_UP;
545 		gv_set_plex_state(p, state, GV_SETSTATE_FORCE);
546 		p->flags &= ~GV_PLEX_ADDED;
547 	} else if (p->flags & GV_PLEX_ADDED) {
548 		LIST_FOREACH(s, &p->subdisks, in_plex)
549 			gv_set_sd_state(s, GV_SD_STALE, GV_SETSTATE_FORCE);
550 		gv_set_plex_state(p, GV_PLEX_DOWN, GV_SETSTATE_FORCE);
551 		p->flags &= ~GV_PLEX_ADDED;
552 	} else if (p->state == GV_PLEX_UP) {
553 		LIST_FOREACH(s, &p->subdisks, in_plex) {
554 			if (s->flags & GV_SD_GROW) {
555 				gv_set_plex_state(p, GV_PLEX_GROWABLE,
556 				    GV_SETSTATE_FORCE);
557 				break;
558 			}
559 		}
560 	}
561 	/* Our plex is grown up now. */
562 	p->flags &= ~GV_PLEX_NEWBORN;
563 }
564 
565 /*
566  * Give a subdisk to a drive, check and adjust several parameters, adjust
567  * freelist.
568  */
569 int
570 gv_sd_to_drive(struct gv_sd *s, struct gv_drive *d)
571 {
572 	struct gv_sd *s2;
573 	struct gv_freelist *fl, *fl2;
574 	off_t tmp;
575 	int i;
576 
577 	fl2 = NULL;
578 
579 	/* Shortcut for "referenced" drives. */
580 	if (d->flags & GV_DRIVE_REFERENCED) {
581 		s->drive_sc = d;
582 		return (0);
583 	}
584 
585 	/* Check if this subdisk was already given to this drive. */
586 	if (s->drive_sc != NULL) {
587 		if (s->drive_sc == d) {
588 			if (!(s->flags & GV_SD_TASTED)) {
589 				return (0);
590 			}
591 		} else {
592 			G_VINUM_DEBUG(0, "error giving subdisk '%s' to '%s' "
593 			    "(already on '%s')", s->name, d->name,
594 			    s->drive_sc->name);
595 			return (GV_ERR_ISATTACHED);
596 		}
597 	}
598 
599 	/* Preliminary checks. */
600 	if ((s->size > d->avail) || (d->freelist_entries == 0)) {
601 		G_VINUM_DEBUG(0, "not enough space on '%s' for '%s'", d->name,
602 		    s->name);
603 		return (GV_ERR_NOSPACE);
604 	}
605 
606 	/* If no size was given for this subdisk, try to auto-size it... */
607 	if (s->size == -1) {
608 		/* Find the largest available slot. */
609 		LIST_FOREACH(fl, &d->freelist, freelist) {
610 			if (fl->size < s->size)
611 				continue;
612 			s->size = fl->size;
613 			s->drive_offset = fl->offset;
614 			fl2 = fl;
615 		}
616 
617 		/* No good slot found? */
618 		if (s->size == -1) {
619 			G_VINUM_DEBUG(0, "unable to autosize '%s' on '%s'",
620 			    s->name, d->name);
621 			return (GV_ERR_BADSIZE);
622 		}
623 
624 	/*
625 	 * ... or check if we have a free slot that's large enough for the
626 	 * given size.
627 	 */
628 	} else {
629 		i = 0;
630 		LIST_FOREACH(fl, &d->freelist, freelist) {
631 			if (fl->size < s->size)
632 				continue;
633 			/* Assign drive offset, if not given. */
634 			if (s->drive_offset == -1)
635 				s->drive_offset = fl->offset;
636 			fl2 = fl;
637 			i++;
638 			break;
639 		}
640 
641 		/* Couldn't find a good free slot. */
642 		if (i == 0) {
643 			G_VINUM_DEBUG(0, "free slots to small for '%s' on '%s'",
644 			    s->name, d->name);
645 			return (GV_ERR_NOSPACE);
646 		}
647 	}
648 
649 	/* No drive offset given, try to calculate it. */
650 	if (s->drive_offset == -1) {
651 
652 		/* Add offsets and sizes from other subdisks on this drive. */
653 		LIST_FOREACH(s2, &d->subdisks, from_drive) {
654 			s->drive_offset = s2->drive_offset + s2->size;
655 		}
656 
657 		/*
658 		 * If there are no other subdisks yet, then set the default
659 		 * offset to GV_DATA_START.
660 		 */
661 		if (s->drive_offset == -1)
662 			s->drive_offset = GV_DATA_START;
663 
664 	/* Check if we have a free slot at the given drive offset. */
665 	} else {
666 		i = 0;
667 		LIST_FOREACH(fl, &d->freelist, freelist) {
668 			/* Yes, this subdisk fits. */
669 			if ((fl->offset <= s->drive_offset) &&
670 			    (fl->offset + fl->size >=
671 			    s->drive_offset + s->size)) {
672 				i++;
673 				fl2 = fl;
674 				break;
675 			}
676 		}
677 
678 		/* Couldn't find a good free slot. */
679 		if (i == 0) {
680 			G_VINUM_DEBUG(0, "given drive_offset for '%s' won't fit "
681 			    "on '%s'", s->name, d->name);
682 			return (GV_ERR_NOSPACE);
683 		}
684 	}
685 
686 	/*
687 	 * Now that all parameters are checked and set up, we can give the
688 	 * subdisk to the drive and adjust the freelist.
689 	 */
690 
691 	/* First, adjust the freelist. */
692 	LIST_FOREACH(fl, &d->freelist, freelist) {
693 		/* Look for the free slot that we have found before. */
694 		if (fl != fl2)
695 			continue;
696 
697 		/* The subdisk starts at the beginning of the free slot. */
698 		if (fl->offset == s->drive_offset) {
699 			fl->offset += s->size;
700 			fl->size -= s->size;
701 
702 			/* The subdisk uses the whole slot, so remove it. */
703 			if (fl->size == 0) {
704 				d->freelist_entries--;
705 				LIST_REMOVE(fl, freelist);
706 			}
707 		/*
708 		 * The subdisk does not start at the beginning of the free
709 		 * slot.
710 		 */
711 		} else {
712 			tmp = fl->offset + fl->size;
713 			fl->size = s->drive_offset - fl->offset;
714 
715 			/*
716 			 * The subdisk didn't use the complete rest of the free
717 			 * slot, so we need to split it.
718 			 */
719 			if (s->drive_offset + s->size != tmp) {
720 				fl2 = g_malloc(sizeof(*fl2), M_WAITOK | M_ZERO);
721 				fl2->offset = s->drive_offset + s->size;
722 				fl2->size = tmp - fl2->offset;
723 				LIST_INSERT_AFTER(fl, fl2, freelist);
724 				d->freelist_entries++;
725 			}
726 		}
727 		break;
728 	}
729 
730 	/*
731 	 * This is the first subdisk on this drive, just insert it into the
732 	 * list.
733 	 */
734 	if (LIST_EMPTY(&d->subdisks)) {
735 		LIST_INSERT_HEAD(&d->subdisks, s, from_drive);
736 
737 	/* There are other subdisks, so insert this one in correct order. */
738 	} else {
739 		LIST_FOREACH(s2, &d->subdisks, from_drive) {
740 			if (s->drive_offset < s2->drive_offset) {
741 				LIST_INSERT_BEFORE(s2, s, from_drive);
742 				break;
743 			} else if (LIST_NEXT(s2, from_drive) == NULL) {
744 				LIST_INSERT_AFTER(s2, s, from_drive);
745 				break;
746 			}
747 		}
748 	}
749 
750 	d->sdcount++;
751 	d->avail -= s->size;
752 
753 	s->flags &= ~GV_SD_TASTED;
754 
755 	/* Link back from the subdisk to this drive. */
756 	s->drive_sc = d;
757 
758 	return (0);
759 }
760 
761 void
762 gv_free_sd(struct gv_sd *s)
763 {
764 	struct gv_drive *d;
765 	struct gv_freelist *fl, *fl2;
766 
767 	KASSERT(s != NULL, ("gv_free_sd: NULL s"));
768 
769 	d = s->drive_sc;
770 	if (d == NULL)
771 		return;
772 
773 	/*
774 	 * First, find the free slot that's immediately before or after this
775 	 * subdisk.
776 	 */
777 	fl = NULL;
778 	LIST_FOREACH(fl, &d->freelist, freelist) {
779 		if (fl->offset == s->drive_offset + s->size)
780 			break;
781 		if (fl->offset + fl->size == s->drive_offset)
782 			break;
783 	}
784 
785 	/* If there is no free slot behind this subdisk, so create one. */
786 	if (fl == NULL) {
787 
788 		fl = g_malloc(sizeof(*fl), M_WAITOK | M_ZERO);
789 		fl->size = s->size;
790 		fl->offset = s->drive_offset;
791 
792 		if (d->freelist_entries == 0) {
793 			LIST_INSERT_HEAD(&d->freelist, fl, freelist);
794 		} else {
795 			LIST_FOREACH(fl2, &d->freelist, freelist) {
796 				if (fl->offset < fl2->offset) {
797 					LIST_INSERT_BEFORE(fl2, fl, freelist);
798 					break;
799 				} else if (LIST_NEXT(fl2, freelist) == NULL) {
800 					LIST_INSERT_AFTER(fl2, fl, freelist);
801 					break;
802 				}
803 			}
804 		}
805 
806 		d->freelist_entries++;
807 
808 	/* Expand the free slot we just found. */
809 	} else {
810 		fl->size += s->size;
811 		if (fl->offset > s->drive_offset)
812 			fl->offset = s->drive_offset;
813 	}
814 
815 	d->avail += s->size;
816 	d->sdcount--;
817 }
818 
819 void
820 gv_adjust_freespace(struct gv_sd *s, off_t remainder)
821 {
822 	struct gv_drive *d;
823 	struct gv_freelist *fl, *fl2;
824 
825 	KASSERT(s != NULL, ("gv_adjust_freespace: NULL s"));
826 	d = s->drive_sc;
827 	KASSERT(d != NULL, ("gv_adjust_freespace: NULL d"));
828 
829 	/* First, find the free slot that's immediately after this subdisk. */
830 	fl = NULL;
831 	LIST_FOREACH(fl, &d->freelist, freelist) {
832 		if (fl->offset == s->drive_offset + s->size)
833 			break;
834 	}
835 
836 	/* If there is no free slot behind this subdisk, so create one. */
837 	if (fl == NULL) {
838 
839 		fl = g_malloc(sizeof(*fl), M_WAITOK | M_ZERO);
840 		fl->size = remainder;
841 		fl->offset = s->drive_offset + s->size - remainder;
842 
843 		if (d->freelist_entries == 0) {
844 			LIST_INSERT_HEAD(&d->freelist, fl, freelist);
845 		} else {
846 			LIST_FOREACH(fl2, &d->freelist, freelist) {
847 				if (fl->offset < fl2->offset) {
848 					LIST_INSERT_BEFORE(fl2, fl, freelist);
849 					break;
850 				} else if (LIST_NEXT(fl2, freelist) == NULL) {
851 					LIST_INSERT_AFTER(fl2, fl, freelist);
852 					break;
853 				}
854 			}
855 		}
856 
857 		d->freelist_entries++;
858 
859 	/* Expand the free slot we just found. */
860 	} else {
861 		fl->offset -= remainder;
862 		fl->size += remainder;
863 	}
864 
865 	s->size -= remainder;
866 	d->avail += remainder;
867 }
868 
869 /* Check if the given plex is a striped one. */
870 int
871 gv_is_striped(struct gv_plex *p)
872 {
873 	KASSERT(p != NULL, ("gv_is_striped: NULL p"));
874 	switch(p->org) {
875 	case GV_PLEX_STRIPED:
876 	case GV_PLEX_RAID5:
877 		return (1);
878 	default:
879 		return (0);
880 	}
881 }
882 
883 /* Find a volume by name. */
884 struct gv_volume *
885 gv_find_vol(struct gv_softc *sc, char *name)
886 {
887 	struct gv_volume *v;
888 
889 	LIST_FOREACH(v, &sc->volumes, volume) {
890 		if (!strncmp(v->name, name, GV_MAXVOLNAME))
891 			return (v);
892 	}
893 
894 	return (NULL);
895 }
896 
897 /* Find a plex by name. */
898 struct gv_plex *
899 gv_find_plex(struct gv_softc *sc, char *name)
900 {
901 	struct gv_plex *p;
902 
903 	LIST_FOREACH(p, &sc->plexes, plex) {
904 		if (!strncmp(p->name, name, GV_MAXPLEXNAME))
905 			return (p);
906 	}
907 
908 	return (NULL);
909 }
910 
911 /* Find a subdisk by name. */
912 struct gv_sd *
913 gv_find_sd(struct gv_softc *sc, char *name)
914 {
915 	struct gv_sd *s;
916 
917 	LIST_FOREACH(s, &sc->subdisks, sd) {
918 		if (!strncmp(s->name, name, GV_MAXSDNAME))
919 			return (s);
920 	}
921 
922 	return (NULL);
923 }
924 
925 /* Find a drive by name. */
926 struct gv_drive *
927 gv_find_drive(struct gv_softc *sc, char *name)
928 {
929 	struct gv_drive *d;
930 
931 	LIST_FOREACH(d, &sc->drives, drive) {
932 		if (!strncmp(d->name, name, GV_MAXDRIVENAME))
933 			return (d);
934 	}
935 
936 	return (NULL);
937 }
938 
939 /* Find a drive given a device. */
940 struct gv_drive *
941 gv_find_drive_device(struct gv_softc *sc, char *device)
942 {
943 	struct gv_drive *d;
944 
945 	LIST_FOREACH(d, &sc->drives, drive) {
946 		if(!strcmp(d->device, device))
947 			return (d);
948 	}
949 
950 	return (NULL);
951 }
952 
953 /* Check if any consumer of the given geom is open. */
954 int
955 gv_consumer_is_open(struct g_consumer *cp)
956 {
957 	if (cp == NULL)
958 		return (0);
959 
960 	if (cp->acr || cp->acw || cp->ace)
961 		return (1);
962 
963 	return (0);
964 }
965 
966 int
967 gv_provider_is_open(struct g_provider *pp)
968 {
969 	if (pp == NULL)
970 		return (0);
971 
972 	if (pp->acr || pp->acw || pp->ace)
973 		return (1);
974 
975 	return (0);
976 }
977 
978 /*
979  * Compare the modification dates of the drives.
980  * Return 1 if a > b, 0 otherwise.
981  */
982 int
983 gv_drive_is_newer(struct gv_softc *sc, struct gv_drive *d)
984 {
985 	struct gv_drive *d2;
986 	struct timeval *a, *b;
987 
988 	KASSERT(!LIST_EMPTY(&sc->drives),
989 	    ("gv_is_drive_newer: empty drive list"));
990 
991 	a = &d->hdr->label.last_update;
992 	LIST_FOREACH(d2, &sc->drives, drive) {
993 		if ((d == d2) || (d2->state != GV_DRIVE_UP) ||
994 		    (d2->hdr == NULL))
995 			continue;
996 		b = &d2->hdr->label.last_update;
997 		if (timevalcmp(a, b, >))
998 			return (1);
999 	}
1000 
1001 	return (0);
1002 }
1003 
1004 /* Return the type of object identified by string 'name'. */
1005 int
1006 gv_object_type(struct gv_softc *sc, char *name)
1007 {
1008 	struct gv_drive *d;
1009 	struct gv_plex *p;
1010 	struct gv_sd *s;
1011 	struct gv_volume *v;
1012 
1013 	LIST_FOREACH(v, &sc->volumes, volume) {
1014 		if (!strncmp(v->name, name, GV_MAXVOLNAME))
1015 			return (GV_TYPE_VOL);
1016 	}
1017 
1018 	LIST_FOREACH(p, &sc->plexes, plex) {
1019 		if (!strncmp(p->name, name, GV_MAXPLEXNAME))
1020 			return (GV_TYPE_PLEX);
1021 	}
1022 
1023 	LIST_FOREACH(s, &sc->subdisks, sd) {
1024 		if (!strncmp(s->name, name, GV_MAXSDNAME))
1025 			return (GV_TYPE_SD);
1026 	}
1027 
1028 	LIST_FOREACH(d, &sc->drives, drive) {
1029 		if (!strncmp(d->name, name, GV_MAXDRIVENAME))
1030 			return (GV_TYPE_DRIVE);
1031 	}
1032 
1033 	return (GV_ERR_NOTFOUND);
1034 }
1035 
1036 void
1037 gv_setup_objects(struct gv_softc *sc)
1038 {
1039 	struct g_provider *pp;
1040 	struct gv_volume *v;
1041 	struct gv_plex *p;
1042 	struct gv_sd *s;
1043 	struct gv_drive *d;
1044 
1045 	LIST_FOREACH(s, &sc->subdisks, sd) {
1046 		d = gv_find_drive(sc, s->drive);
1047 		if (d != NULL)
1048 			gv_sd_to_drive(s, d);
1049 		p = gv_find_plex(sc, s->plex);
1050 		if (p != NULL)
1051 			gv_sd_to_plex(s, p);
1052 		gv_update_sd_state(s);
1053 	}
1054 
1055 	LIST_FOREACH(p, &sc->plexes, plex) {
1056 		gv_update_plex_config(p);
1057 		v = gv_find_vol(sc, p->volume);
1058 		if (v != NULL && p->vol_sc != v) {
1059 			p->vol_sc = v;
1060 			v->plexcount++;
1061 			LIST_INSERT_HEAD(&v->plexes, p, in_volume);
1062 		}
1063 		gv_update_plex_config(p);
1064 	}
1065 
1066 	LIST_FOREACH(v, &sc->volumes, volume) {
1067 		v->size = gv_vol_size(v);
1068 		if (v->provider == NULL) {
1069 			g_topology_lock();
1070 			pp = g_new_providerf(sc->geom, "gvinum/%s", v->name);
1071 			pp->mediasize = v->size;
1072 			pp->sectorsize = 512;    /* XXX */
1073 			g_error_provider(pp, 0);
1074 			v->provider = pp;
1075 			pp->private = v;
1076 			g_topology_unlock();
1077 		} else if (v->provider->mediasize != v->size) {
1078 			g_topology_lock();
1079 			v->provider->mediasize = v->size;
1080 			g_topology_unlock();
1081 		}
1082 		v->flags &= ~GV_VOL_NEWBORN;
1083 		gv_update_vol_state(v);
1084 	}
1085 }
1086 
1087 void
1088 gv_cleanup(struct gv_softc *sc)
1089 {
1090 	struct gv_volume *v, *v2;
1091 	struct gv_plex *p, *p2;
1092 	struct gv_sd *s, *s2;
1093 	struct gv_drive *d, *d2;
1094 	struct gv_freelist *fl, *fl2;
1095 
1096 	mtx_lock(&sc->config_mtx);
1097 	LIST_FOREACH_SAFE(v, &sc->volumes, volume, v2) {
1098 		LIST_REMOVE(v, volume);
1099 		g_free(v->wqueue);
1100 		g_free(v);
1101 	}
1102 	LIST_FOREACH_SAFE(p, &sc->plexes, plex, p2) {
1103 		LIST_REMOVE(p, plex);
1104 		g_free(p->bqueue);
1105 		g_free(p->rqueue);
1106 		g_free(p->wqueue);
1107 		g_free(p);
1108 	}
1109 	LIST_FOREACH_SAFE(s, &sc->subdisks, sd, s2) {
1110 		LIST_REMOVE(s, sd);
1111 		g_free(s);
1112 	}
1113 	LIST_FOREACH_SAFE(d, &sc->drives, drive, d2) {
1114 		LIST_FOREACH_SAFE(fl, &d->freelist, freelist, fl2) {
1115 			LIST_REMOVE(fl, freelist);
1116 			g_free(fl);
1117 		}
1118 		LIST_REMOVE(d, drive);
1119 		g_free(d->hdr);
1120 		g_free(d);
1121 	}
1122 	mtx_destroy(&sc->config_mtx);
1123 }
1124 
1125 /* General 'attach' routine. */
1126 int
1127 gv_attach_plex(struct gv_plex *p, struct gv_volume *v, int rename)
1128 {
1129 	struct gv_sd *s;
1130 	struct gv_softc *sc;
1131 
1132 	g_topology_assert();
1133 
1134 	sc = p->vinumconf;
1135 	KASSERT(sc != NULL, ("NULL sc"));
1136 
1137 	if (p->vol_sc != NULL) {
1138 		G_VINUM_DEBUG(1, "unable to attach %s: already attached to %s",
1139 		    p->name, p->volume);
1140 		return (GV_ERR_ISATTACHED);
1141 	}
1142 
1143 	/* Stale all subdisks of this plex. */
1144 	LIST_FOREACH(s, &p->subdisks, in_plex) {
1145 		if (s->state != GV_SD_STALE)
1146 			gv_set_sd_state(s, GV_SD_STALE, GV_SETSTATE_FORCE);
1147 	}
1148 	/* Attach to volume. Make sure volume is not up and running. */
1149 	if (gv_provider_is_open(v->provider)) {
1150 		G_VINUM_DEBUG(1, "unable to attach %s: volume %s is busy",
1151 		    p->name, v->name);
1152 		return (GV_ERR_ISBUSY);
1153 	}
1154 	p->vol_sc = v;
1155 	strlcpy(p->volume, v->name, sizeof(p->volume));
1156 	v->plexcount++;
1157 	if (rename) {
1158 		snprintf(p->name, sizeof(p->name), "%s.p%d", v->name,
1159 		    v->plexcount);
1160 	}
1161 	LIST_INSERT_HEAD(&v->plexes, p, in_volume);
1162 
1163 	/* Get plex up again. */
1164 	gv_update_vol_size(v, gv_vol_size(v));
1165 	gv_set_plex_state(p, GV_PLEX_UP, 0);
1166 	gv_save_config(p->vinumconf);
1167 	return (0);
1168 }
1169 
1170 int
1171 gv_attach_sd(struct gv_sd *s, struct gv_plex *p, off_t offset, int rename)
1172 {
1173 	struct gv_sd *s2;
1174 	int error, sdcount;
1175 
1176 	g_topology_assert();
1177 
1178 	/* If subdisk is attached, don't do it. */
1179 	if (s->plex_sc != NULL) {
1180 		G_VINUM_DEBUG(1, "unable to attach %s: already attached to %s",
1181 		    s->name, s->plex);
1182 		return (GV_ERR_ISATTACHED);
1183 	}
1184 
1185 	gv_set_sd_state(s, GV_SD_STALE, GV_SETSTATE_FORCE);
1186 	/* First check that this subdisk has a correct offset. If none other
1187 	 * starts at the same, and it's correct module stripesize, it is */
1188 	if (offset != -1 && offset % p->stripesize != 0)
1189 		return (GV_ERR_BADOFFSET);
1190 	LIST_FOREACH(s2, &p->subdisks, in_plex) {
1191 		if (s2->plex_offset == offset)
1192 			return (GV_ERR_BADOFFSET);
1193 	}
1194 
1195 	/* Attach the subdisk to the plex at given offset. */
1196 	s->plex_offset = offset;
1197 	strlcpy(s->plex, p->name, sizeof(s->plex));
1198 
1199 	sdcount = p->sdcount;
1200 	error = gv_sd_to_plex(s, p);
1201 	if (error)
1202 		return (error);
1203 	gv_update_plex_config(p);
1204 
1205 	if (rename) {
1206 		snprintf(s->name, sizeof(s->name), "%s.s%d", s->plex,
1207 		    p->sdcount);
1208 	}
1209 	if (p->vol_sc != NULL)
1210 		gv_update_vol_size(p->vol_sc, gv_vol_size(p->vol_sc));
1211 	gv_save_config(p->vinumconf);
1212 	/* We don't update the subdisk state since the user might have to
1213 	 * initiate a rebuild/sync first. */
1214 	return (0);
1215 }
1216 
1217 /* Detach a plex from a volume. */
1218 int
1219 gv_detach_plex(struct gv_plex *p, int flags)
1220 {
1221 	struct gv_volume *v;
1222 
1223 	g_topology_assert();
1224 	v = p->vol_sc;
1225 
1226 	if (v == NULL) {
1227 		G_VINUM_DEBUG(1, "unable to detach %s: already detached",
1228 		    p->name);
1229 		return (0); /* Not an error. */
1230 	}
1231 
1232 	/*
1233 	 * Only proceed if forced or volume inactive.
1234 	 */
1235 	if (!(flags & GV_FLAG_F) && (gv_provider_is_open(v->provider) ||
1236 	    p->state == GV_PLEX_UP)) {
1237 		G_VINUM_DEBUG(1, "unable to detach %s: volume %s is busy",
1238 		    p->name, p->volume);
1239 		return (GV_ERR_ISBUSY);
1240 	}
1241 	v->plexcount--;
1242 	/* Make sure someone don't read us when gone. */
1243 	v->last_read_plex = NULL;
1244 	LIST_REMOVE(p, in_volume);
1245 	p->vol_sc = NULL;
1246 	memset(p->volume, 0, GV_MAXVOLNAME);
1247 	gv_update_vol_size(v, gv_vol_size(v));
1248 	gv_save_config(p->vinumconf);
1249 	return (0);
1250 }
1251 
1252 /* Detach a subdisk from a plex. */
1253 int
1254 gv_detach_sd(struct gv_sd *s, int flags)
1255 {
1256 	struct gv_plex *p;
1257 
1258 	g_topology_assert();
1259 	p = s->plex_sc;
1260 
1261 	if (p == NULL) {
1262 		G_VINUM_DEBUG(1, "unable to detach %s: already detached",
1263 		    s->name);
1264 		return (0); /* Not an error. */
1265 	}
1266 
1267 	/*
1268 	 * Don't proceed if we're not forcing, and the plex is up, or degraded
1269 	 * with this subdisk up.
1270 	 */
1271 	if (!(flags & GV_FLAG_F) && ((p->state > GV_PLEX_DEGRADED) ||
1272 	    ((p->state == GV_PLEX_DEGRADED) && (s->state == GV_SD_UP)))) {
1273 	    	G_VINUM_DEBUG(1, "unable to detach %s: plex %s is busy",
1274 		    s->name, s->plex);
1275 		return (GV_ERR_ISBUSY);
1276 	}
1277 
1278 	LIST_REMOVE(s, in_plex);
1279 	s->plex_sc = NULL;
1280 	memset(s->plex, 0, GV_MAXPLEXNAME);
1281 	p->sddetached++;
1282 	gv_save_config(s->vinumconf);
1283 	return (0);
1284 }
1285