xref: /freebsd/sys/geom/vinum/geom_vinum_subr.c (revision 2e3507c25e42292b45a5482e116d278f5515d04d)
1 /*-
2  * SPDX-License-Identifier: BSD-4-Clause
3  *
4  * Copyright (c) 2004, 2007 Lukas Ertl
5  * Copyright (c) 2007, 2009 Ulf Lilleengen
6  * Copyright (c) 1997, 1998, 1999
7  *      Nan Yang Computer Services Limited.  All rights reserved.
8  *
9  *  Parts written by Greg Lehey
10  *
11  *  This software is distributed under the so-called ``Berkeley
12  *  License'':
13  *
14  * Redistribution and use in source and binary forms, with or without
15  * modification, are permitted provided that the following conditions
16  * are met:
17  * 1. Redistributions of source code must retain the above copyright
18  *    notice, this list of conditions and the following disclaimer.
19  * 2. Redistributions in binary form must reproduce the above copyright
20  *    notice, this list of conditions and the following disclaimer in the
21  *    documentation and/or other materials provided with the distribution.
22  * 3. All advertising materials mentioning features or use of this software
23  *    must display the following acknowledgement:
24  *      This product includes software developed by Nan Yang Computer
25  *      Services Limited.
26  * 4. Neither the name of the Company nor the names of its contributors
27  *    may be used to endorse or promote products derived from this software
28  *    without specific prior written permission.
29  *
30  * This software is provided ``as is'', and any express or implied
31  * warranties, including, but not limited to, the implied warranties of
32  * merchantability and fitness for a particular purpose are disclaimed.
33  * In no event shall the company or contributors be liable for any
34  * direct, indirect, incidental, special, exemplary, or consequential
35  * damages (including, but not limited to, procurement of substitute
36  * goods or services; loss of use, data, or profits; or business
37  * interruption) however caused and on any theory of liability, whether
38  * in contract, strict liability, or tort (including negligence or
39  * otherwise) arising in any way out of the use of this software, even if
40  * advised of the possibility of such damage.
41  *
42  */
43 
44 #include <sys/param.h>
45 #include <sys/malloc.h>
46 #include <sys/sbuf.h>
47 #include <sys/systm.h>
48 
49 #include <geom/geom.h>
50 #include <geom/geom_dbg.h>
51 #include <geom/vinum/geom_vinum_var.h>
52 #include <geom/vinum/geom_vinum.h>
53 #include <geom/vinum/geom_vinum_share.h>
54 
55 int	gv_drive_is_newer(struct gv_softc *, struct gv_drive *);
56 static off_t gv_plex_smallest_sd(struct gv_plex *);
57 
58 void
59 gv_parse_config(struct gv_softc *sc, char *buf, struct gv_drive *d)
60 {
61 	char *aptr, *bptr, *cptr;
62 	struct gv_volume *v, *v2;
63 	struct gv_plex *p, *p2;
64 	struct gv_sd *s, *s2;
65 	int error, is_newer, tokens;
66 	char *token[GV_MAXARGS];
67 
68 	is_newer = gv_drive_is_newer(sc, d);
69 
70 	/* Until the end of the string *buf. */
71 	for (aptr = buf; *aptr != '\0'; aptr = bptr) {
72 		bptr = aptr;
73 		cptr = aptr;
74 
75 		/* Separate input lines. */
76 		while (*bptr != '\n')
77 			bptr++;
78 		*bptr = '\0';
79 		bptr++;
80 
81 		tokens = gv_tokenize(cptr, token, GV_MAXARGS);
82 
83 		if (tokens <= 0)
84 			continue;
85 
86 		if (!strcmp(token[0], "volume")) {
87 			v = gv_new_volume(tokens, token);
88 			if (v == NULL) {
89 				G_VINUM_DEBUG(0, "config parse failed volume");
90 				break;
91 			}
92 
93 			v2 = gv_find_vol(sc, v->name);
94 			if (v2 != NULL) {
95 				if (is_newer) {
96 					v2->state = v->state;
97 					G_VINUM_DEBUG(2, "newer volume found!");
98 				}
99 				g_free(v);
100 				continue;
101 			}
102 
103 			gv_create_volume(sc, v);
104 
105 		} else if (!strcmp(token[0], "plex")) {
106 			p = gv_new_plex(tokens, token);
107 			if (p == NULL) {
108 				G_VINUM_DEBUG(0, "config parse failed plex");
109 				break;
110 			}
111 
112 			p2 = gv_find_plex(sc, p->name);
113 			if (p2 != NULL) {
114 				/* XXX */
115 				if (is_newer) {
116 					p2->state = p->state;
117 					G_VINUM_DEBUG(2, "newer plex found!");
118 				}
119 				g_free(p);
120 				continue;
121 			}
122 
123 			error = gv_create_plex(sc, p);
124 			if (error)
125 				continue;
126 			/*
127 			 * These flags were set in gv_create_plex() and are not
128 			 * needed here (on-disk config parsing).
129 			 */
130 			p->flags &= ~GV_PLEX_ADDED;
131 
132 		} else if (!strcmp(token[0], "sd")) {
133 			s = gv_new_sd(tokens, token);
134 
135 			if (s == NULL) {
136 				G_VINUM_DEBUG(0, "config parse failed subdisk");
137 				break;
138 			}
139 
140 			s2 = gv_find_sd(sc, s->name);
141 			if (s2 != NULL) {
142 				/* XXX */
143 				if (is_newer) {
144 					s2->state = s->state;
145 					G_VINUM_DEBUG(2, "newer subdisk found!");
146 				}
147 				g_free(s);
148 				continue;
149 			}
150 
151 			/*
152 			 * Signal that this subdisk was tasted, and could
153 			 * possibly reference a drive that isn't in our config
154 			 * yet.
155 			 */
156 			s->flags |= GV_SD_TASTED;
157 
158 			if (s->state == GV_SD_UP)
159 				s->flags |= GV_SD_CANGOUP;
160 
161 			error = gv_create_sd(sc, s);
162 			if (error)
163 				continue;
164 
165 			/*
166 			 * This flag was set in gv_create_sd() and is not
167 			 * needed here (on-disk config parsing).
168 			 */
169 			s->flags &= ~GV_SD_NEWBORN;
170 			s->flags &= ~GV_SD_GROW;
171 		}
172 	}
173 }
174 
175 /*
176  * Format the vinum configuration properly.  If ondisk is non-zero then the
177  * configuration is intended to be written to disk later.
178  */
179 void
180 gv_format_config(struct gv_softc *sc, struct sbuf *sb, int ondisk, char *prefix)
181 {
182 	struct gv_drive *d;
183 	struct gv_sd *s;
184 	struct gv_plex *p;
185 	struct gv_volume *v;
186 
187 	/*
188 	 * We don't need the drive configuration if we're not writing the
189 	 * config to disk.
190 	 */
191 	if (!ondisk) {
192 		LIST_FOREACH(d, &sc->drives, drive) {
193 			sbuf_printf(sb, "%sdrive %s device /dev/%s\n", prefix,
194 			    d->name, d->device);
195 		}
196 	}
197 
198 	LIST_FOREACH(v, &sc->volumes, volume) {
199 		if (!ondisk)
200 			sbuf_printf(sb, "%s", prefix);
201 		sbuf_printf(sb, "volume %s", v->name);
202 		if (ondisk)
203 			sbuf_printf(sb, " state %s", gv_volstate(v->state));
204 		sbuf_printf(sb, "\n");
205 	}
206 
207 	LIST_FOREACH(p, &sc->plexes, plex) {
208 		if (!ondisk)
209 			sbuf_printf(sb, "%s", prefix);
210 		sbuf_printf(sb, "plex name %s org %s ", p->name,
211 		    gv_plexorg(p->org));
212 		if (gv_is_striped(p))
213 			sbuf_printf(sb, "%ds ", p->stripesize / 512);
214 		if (p->vol_sc != NULL)
215 			sbuf_printf(sb, "vol %s", p->volume);
216 		if (ondisk)
217 			sbuf_printf(sb, " state %s", gv_plexstate(p->state));
218 		sbuf_printf(sb, "\n");
219 	}
220 
221 	LIST_FOREACH(s, &sc->subdisks, sd) {
222 		if (!ondisk)
223 			sbuf_printf(sb, "%s", prefix);
224 		sbuf_printf(sb, "sd name %s drive %s len %jds driveoffset "
225 		    "%jds", s->name, s->drive, s->size / 512,
226 		    s->drive_offset / 512);
227 		if (s->plex_sc != NULL) {
228 			sbuf_printf(sb, " plex %s plexoffset %jds", s->plex,
229 			    s->plex_offset / 512);
230 		}
231 		if (ondisk)
232 			sbuf_printf(sb, " state %s", gv_sdstate(s->state));
233 		sbuf_printf(sb, "\n");
234 	}
235 }
236 
237 static off_t
238 gv_plex_smallest_sd(struct gv_plex *p)
239 {
240 	struct gv_sd *s;
241 	off_t smallest;
242 
243 	KASSERT(p != NULL, ("gv_plex_smallest_sd: NULL p"));
244 
245 	s = LIST_FIRST(&p->subdisks);
246 	if (s == NULL)
247 		return (-1);
248 	smallest = s->size;
249 	LIST_FOREACH(s, &p->subdisks, in_plex) {
250 		if (s->size < smallest)
251 			smallest = s->size;
252 	}
253 	return (smallest);
254 }
255 
256 /* Walk over plexes in a volume and count how many are down. */
257 int
258 gv_plexdown(struct gv_volume *v)
259 {
260 	int plexdown;
261 	struct gv_plex *p;
262 
263 	KASSERT(v != NULL, ("gv_plexdown: NULL v"));
264 
265 	plexdown = 0;
266 
267 	LIST_FOREACH(p, &v->plexes, plex) {
268 		if (p->state == GV_PLEX_DOWN)
269 			plexdown++;
270 	}
271 	return (plexdown);
272 }
273 
274 int
275 gv_sd_to_plex(struct gv_sd *s, struct gv_plex *p)
276 {
277 	struct gv_sd *s2;
278 	off_t psizeorig, remainder, smallest;
279 
280 	/* If this subdisk was already given to this plex, do nothing. */
281 	if (s->plex_sc == p)
282 		return (0);
283 
284 	/* Check correct size of this subdisk. */
285 	s2 = LIST_FIRST(&p->subdisks);
286 	/* Adjust the subdisk-size if necessary. */
287 	if (s2 != NULL && gv_is_striped(p)) {
288 		/* First adjust to the stripesize. */
289 		remainder = s->size % p->stripesize;
290 
291 		if (remainder) {
292 			G_VINUM_DEBUG(1, "size of sd %s is not a "
293 			    "multiple of plex stripesize, taking off "
294 			    "%jd bytes", s->name,
295 			    (intmax_t)remainder);
296 			gv_adjust_freespace(s, remainder);
297 		}
298 
299 		smallest = gv_plex_smallest_sd(p);
300 		/* Then take off extra if other subdisks are smaller. */
301 		remainder = s->size - smallest;
302 
303 		/*
304 		 * Don't allow a remainder below zero for running plexes, it's too
305 		 * painful, and if someone were to accidentally do this, the
306 		 * resulting array might be smaller than the original... not god
307 		 */
308 		if (remainder < 0) {
309 			if (!(p->flags & GV_PLEX_NEWBORN)) {
310 				G_VINUM_DEBUG(0, "sd %s too small for plex %s!",
311 				    s->name, p->name);
312 				return (GV_ERR_BADSIZE);
313 			}
314 			/* Adjust other subdisks. */
315 			LIST_FOREACH(s2, &p->subdisks, in_plex) {
316 				G_VINUM_DEBUG(1, "size of sd %s is to big, "
317 				    "taking off %jd bytes", s->name,
318 				    (intmax_t)remainder);
319 				gv_adjust_freespace(s2, (remainder * -1));
320 			}
321 		} else if (remainder > 0) {
322 			G_VINUM_DEBUG(1, "size of sd %s is to big, "
323 			    "taking off %jd bytes", s->name,
324 			    (intmax_t)remainder);
325 			gv_adjust_freespace(s, remainder);
326 		}
327 	}
328 
329 	/* Find the correct plex offset for this subdisk, if needed. */
330 	if (s->plex_offset == -1) {
331 		/*
332 		 * First set it to 0 to catch the case where we had a detached
333 		 * subdisk that didn't get any good offset.
334 		 */
335 		s->plex_offset = 0;
336 		if (p->sdcount) {
337 			LIST_FOREACH(s2, &p->subdisks, in_plex) {
338 				if (gv_is_striped(p))
339 					s->plex_offset = p->sdcount *
340 					    p->stripesize;
341 				else
342 					s->plex_offset = s2->plex_offset +
343 					    s2->size;
344 			}
345 		}
346 	}
347 
348 	/* There are no subdisks for this plex yet, just insert it. */
349 	if (LIST_EMPTY(&p->subdisks)) {
350 		LIST_INSERT_HEAD(&p->subdisks, s, in_plex);
351 
352 	/* Insert in correct order, depending on plex_offset. */
353 	} else {
354 		LIST_FOREACH(s2, &p->subdisks, in_plex) {
355 			if (s->plex_offset < s2->plex_offset) {
356 				LIST_INSERT_BEFORE(s2, s, in_plex);
357 				break;
358 			} else if (LIST_NEXT(s2, in_plex) == NULL) {
359 				LIST_INSERT_AFTER(s2, s, in_plex);
360 				break;
361 			}
362 		}
363 	}
364 
365 	s->plex_sc = p;
366         /* Adjust the size of our plex. We check if the plex misses a subdisk,
367 	 * so we don't make the plex smaller than it actually should be.
368 	 */
369 	psizeorig = p->size;
370 	p->size = gv_plex_size(p);
371 	/* Make sure the size is not changed. */
372 	if (p->sddetached > 0) {
373 		if (p->size < psizeorig) {
374 			p->size = psizeorig;
375 			/* We make sure wee need another subdisk. */
376 			if (p->sddetached == 1)
377 				p->sddetached++;
378 		}
379 		p->sddetached--;
380 	} else {
381 		if ((p->org == GV_PLEX_RAID5 ||
382 		    p->org == GV_PLEX_STRIPED) &&
383 		    !(p->flags & GV_PLEX_NEWBORN) &&
384 		    p->state == GV_PLEX_UP) {
385 			s->flags |= GV_SD_GROW;
386 		}
387 		p->sdcount++;
388 	}
389 
390 	return (0);
391 }
392 
393 void
394 gv_update_vol_size(struct gv_volume *v, off_t size)
395 {
396 	if (v == NULL)
397 		return;
398 	if (v->provider != NULL) {
399 		g_topology_lock();
400 		v->provider->mediasize = size;
401 		g_topology_unlock();
402 	}
403 	v->size = size;
404 }
405 
406 /* Return how many subdisks that constitute the original plex. */
407 int
408 gv_sdcount(struct gv_plex *p, int growing)
409 {
410 	struct gv_sd *s;
411 	int sdcount;
412 
413 	sdcount = p->sdcount;
414 	if (growing) {
415 		LIST_FOREACH(s, &p->subdisks, in_plex) {
416 			if (s->flags & GV_SD_GROW)
417 				sdcount--;
418 		}
419 	}
420 
421 	return (sdcount);
422 }
423 
424 /* Calculates the plex size. */
425 off_t
426 gv_plex_size(struct gv_plex *p)
427 {
428 	struct gv_sd *s;
429 	off_t size;
430 	int sdcount;
431 
432 	KASSERT(p != NULL, ("gv_plex_size: NULL p"));
433 
434 	/* Adjust the size of our plex. */
435 	size = 0;
436 	sdcount = gv_sdcount(p, 1);
437 	switch (p->org) {
438 	case GV_PLEX_CONCAT:
439 		LIST_FOREACH(s, &p->subdisks, in_plex)
440 			size += s->size;
441 		break;
442 	case GV_PLEX_STRIPED:
443 		s = LIST_FIRST(&p->subdisks);
444 		size = ((s != NULL) ? (sdcount * s->size) : 0);
445 		break;
446 	case GV_PLEX_RAID5:
447 		s = LIST_FIRST(&p->subdisks);
448 		size = ((s != NULL) ? ((sdcount - 1) * s->size) : 0);
449 		break;
450 	}
451 
452 	return (size);
453 }
454 
455 /* Returns the size of a volume. */
456 off_t
457 gv_vol_size(struct gv_volume *v)
458 {
459 	struct gv_plex *p;
460 	off_t minplexsize;
461 
462 	KASSERT(v != NULL, ("gv_vol_size: NULL v"));
463 
464 	p = LIST_FIRST(&v->plexes);
465 	if (p == NULL)
466 		return (0);
467 
468 	minplexsize = p->size;
469 	LIST_FOREACH(p, &v->plexes, in_volume) {
470 		if (p->size < minplexsize) {
471 			minplexsize = p->size;
472 		}
473 	}
474 	return (minplexsize);
475 }
476 
477 void
478 gv_update_plex_config(struct gv_plex *p)
479 {
480 	struct gv_sd *s, *s2;
481 	off_t remainder;
482 	int required_sds, state;
483 
484 	KASSERT(p != NULL, ("gv_update_plex_config: NULL p"));
485 
486 	/* The plex was added to an already running volume. */
487 	if (p->flags & GV_PLEX_ADDED)
488 		gv_set_plex_state(p, GV_PLEX_DOWN, GV_SETSTATE_FORCE);
489 
490 	switch (p->org) {
491 	case GV_PLEX_STRIPED:
492 		required_sds = 2;
493 		break;
494 	case GV_PLEX_RAID5:
495 		required_sds = 3;
496 		break;
497 	case GV_PLEX_CONCAT:
498 	default:
499 		required_sds = 0;
500 		break;
501 	}
502 
503 	if (required_sds) {
504 		if (p->sdcount < required_sds) {
505 			gv_set_plex_state(p, GV_PLEX_DOWN, GV_SETSTATE_FORCE);
506 		}
507 
508 		/*
509 		 * The subdisks in striped plexes must all have the same size.
510 		 */
511 		s = LIST_FIRST(&p->subdisks);
512 		LIST_FOREACH(s2, &p->subdisks, in_plex) {
513 			if (s->size != s2->size) {
514 				G_VINUM_DEBUG(0, "subdisk size mismatch %s"
515 				    "(%jd) <> %s (%jd)", s->name, s->size,
516 				    s2->name, s2->size);
517 				gv_set_plex_state(p, GV_PLEX_DOWN,
518 				    GV_SETSTATE_FORCE);
519 			}
520 		}
521 
522 		LIST_FOREACH(s, &p->subdisks, in_plex) {
523 			/* Trim subdisk sizes to match the stripe size. */
524 			remainder = s->size % p->stripesize;
525 			if (remainder) {
526 				G_VINUM_DEBUG(1, "size of sd %s is not a "
527 				    "multiple of plex stripesize, taking off "
528 				    "%jd bytes", s->name, (intmax_t)remainder);
529 				gv_adjust_freespace(s, remainder);
530 			}
531 		}
532 	}
533 
534 	p->size = gv_plex_size(p);
535 	if (p->sdcount == 0)
536 		gv_set_plex_state(p, GV_PLEX_DOWN, GV_SETSTATE_FORCE);
537 	else if (p->org == GV_PLEX_RAID5 && p->flags & GV_PLEX_NEWBORN) {
538 		LIST_FOREACH(s, &p->subdisks, in_plex)
539 			gv_set_sd_state(s, GV_SD_UP, GV_SETSTATE_FORCE);
540 		/* If added to a volume, we want the plex to be down. */
541 		state = (p->flags & GV_PLEX_ADDED) ? GV_PLEX_DOWN : GV_PLEX_UP;
542 		gv_set_plex_state(p, state, GV_SETSTATE_FORCE);
543 		p->flags &= ~GV_PLEX_ADDED;
544 	} else if (p->flags & GV_PLEX_ADDED) {
545 		LIST_FOREACH(s, &p->subdisks, in_plex)
546 			gv_set_sd_state(s, GV_SD_STALE, GV_SETSTATE_FORCE);
547 		gv_set_plex_state(p, GV_PLEX_DOWN, GV_SETSTATE_FORCE);
548 		p->flags &= ~GV_PLEX_ADDED;
549 	} else if (p->state == GV_PLEX_UP) {
550 		LIST_FOREACH(s, &p->subdisks, in_plex) {
551 			if (s->flags & GV_SD_GROW) {
552 				gv_set_plex_state(p, GV_PLEX_GROWABLE,
553 				    GV_SETSTATE_FORCE);
554 				break;
555 			}
556 		}
557 	}
558 	/* Our plex is grown up now. */
559 	p->flags &= ~GV_PLEX_NEWBORN;
560 }
561 
562 /*
563  * Give a subdisk to a drive, check and adjust several parameters, adjust
564  * freelist.
565  */
566 int
567 gv_sd_to_drive(struct gv_sd *s, struct gv_drive *d)
568 {
569 	struct gv_sd *s2;
570 	struct gv_freelist *fl, *fl2;
571 	off_t tmp;
572 	int i;
573 
574 	fl2 = NULL;
575 
576 	/* Shortcut for "referenced" drives. */
577 	if (d->flags & GV_DRIVE_REFERENCED) {
578 		s->drive_sc = d;
579 		return (0);
580 	}
581 
582 	/* Check if this subdisk was already given to this drive. */
583 	if (s->drive_sc != NULL) {
584 		if (s->drive_sc == d) {
585 			if (!(s->flags & GV_SD_TASTED)) {
586 				return (0);
587 			}
588 		} else {
589 			G_VINUM_DEBUG(0, "error giving subdisk '%s' to '%s' "
590 			    "(already on '%s')", s->name, d->name,
591 			    s->drive_sc->name);
592 			return (GV_ERR_ISATTACHED);
593 		}
594 	}
595 
596 	/* Preliminary checks. */
597 	if ((s->size > d->avail) || (d->freelist_entries == 0)) {
598 		G_VINUM_DEBUG(0, "not enough space on '%s' for '%s'", d->name,
599 		    s->name);
600 		return (GV_ERR_NOSPACE);
601 	}
602 
603 	/* If no size was given for this subdisk, try to auto-size it... */
604 	if (s->size == -1) {
605 		/* Find the largest available slot. */
606 		LIST_FOREACH(fl, &d->freelist, freelist) {
607 			if (fl->size < s->size)
608 				continue;
609 			s->size = fl->size;
610 			s->drive_offset = fl->offset;
611 			fl2 = fl;
612 		}
613 
614 		/* No good slot found? */
615 		if (s->size == -1) {
616 			G_VINUM_DEBUG(0, "unable to autosize '%s' on '%s'",
617 			    s->name, d->name);
618 			return (GV_ERR_BADSIZE);
619 		}
620 
621 	/*
622 	 * ... or check if we have a free slot that's large enough for the
623 	 * given size.
624 	 */
625 	} else {
626 		i = 0;
627 		LIST_FOREACH(fl, &d->freelist, freelist) {
628 			if (fl->size < s->size)
629 				continue;
630 			/* Assign drive offset, if not given. */
631 			if (s->drive_offset == -1)
632 				s->drive_offset = fl->offset;
633 			fl2 = fl;
634 			i++;
635 			break;
636 		}
637 
638 		/* Couldn't find a good free slot. */
639 		if (i == 0) {
640 			G_VINUM_DEBUG(0, "free slots to small for '%s' on '%s'",
641 			    s->name, d->name);
642 			return (GV_ERR_NOSPACE);
643 		}
644 	}
645 
646 	/* No drive offset given, try to calculate it. */
647 	if (s->drive_offset == -1) {
648 		/* Add offsets and sizes from other subdisks on this drive. */
649 		LIST_FOREACH(s2, &d->subdisks, from_drive) {
650 			s->drive_offset = s2->drive_offset + s2->size;
651 		}
652 
653 		/*
654 		 * If there are no other subdisks yet, then set the default
655 		 * offset to GV_DATA_START.
656 		 */
657 		if (s->drive_offset == -1)
658 			s->drive_offset = GV_DATA_START;
659 
660 	/* Check if we have a free slot at the given drive offset. */
661 	} else {
662 		i = 0;
663 		LIST_FOREACH(fl, &d->freelist, freelist) {
664 			/* Yes, this subdisk fits. */
665 			if ((fl->offset <= s->drive_offset) &&
666 			    (fl->offset + fl->size >=
667 			    s->drive_offset + s->size)) {
668 				i++;
669 				fl2 = fl;
670 				break;
671 			}
672 		}
673 
674 		/* Couldn't find a good free slot. */
675 		if (i == 0) {
676 			G_VINUM_DEBUG(0, "given drive_offset for '%s' won't fit "
677 			    "on '%s'", s->name, d->name);
678 			return (GV_ERR_NOSPACE);
679 		}
680 	}
681 
682 	/*
683 	 * Now that all parameters are checked and set up, we can give the
684 	 * subdisk to the drive and adjust the freelist.
685 	 */
686 
687 	/* First, adjust the freelist. */
688 	LIST_FOREACH(fl, &d->freelist, freelist) {
689 		/* Look for the free slot that we have found before. */
690 		if (fl != fl2)
691 			continue;
692 
693 		/* The subdisk starts at the beginning of the free slot. */
694 		if (fl->offset == s->drive_offset) {
695 			fl->offset += s->size;
696 			fl->size -= s->size;
697 
698 			/* The subdisk uses the whole slot, so remove it. */
699 			if (fl->size == 0) {
700 				d->freelist_entries--;
701 				LIST_REMOVE(fl, freelist);
702 			}
703 		/*
704 		 * The subdisk does not start at the beginning of the free
705 		 * slot.
706 		 */
707 		} else {
708 			tmp = fl->offset + fl->size;
709 			fl->size = s->drive_offset - fl->offset;
710 
711 			/*
712 			 * The subdisk didn't use the complete rest of the free
713 			 * slot, so we need to split it.
714 			 */
715 			if (s->drive_offset + s->size != tmp) {
716 				fl2 = g_malloc(sizeof(*fl2), M_WAITOK | M_ZERO);
717 				fl2->offset = s->drive_offset + s->size;
718 				fl2->size = tmp - fl2->offset;
719 				LIST_INSERT_AFTER(fl, fl2, freelist);
720 				d->freelist_entries++;
721 			}
722 		}
723 		break;
724 	}
725 
726 	/*
727 	 * This is the first subdisk on this drive, just insert it into the
728 	 * list.
729 	 */
730 	if (LIST_EMPTY(&d->subdisks)) {
731 		LIST_INSERT_HEAD(&d->subdisks, s, from_drive);
732 
733 	/* There are other subdisks, so insert this one in correct order. */
734 	} else {
735 		LIST_FOREACH(s2, &d->subdisks, from_drive) {
736 			if (s->drive_offset < s2->drive_offset) {
737 				LIST_INSERT_BEFORE(s2, s, from_drive);
738 				break;
739 			} else if (LIST_NEXT(s2, from_drive) == NULL) {
740 				LIST_INSERT_AFTER(s2, s, from_drive);
741 				break;
742 			}
743 		}
744 	}
745 
746 	d->sdcount++;
747 	d->avail -= s->size;
748 
749 	s->flags &= ~GV_SD_TASTED;
750 
751 	/* Link back from the subdisk to this drive. */
752 	s->drive_sc = d;
753 
754 	return (0);
755 }
756 
757 void
758 gv_free_sd(struct gv_sd *s)
759 {
760 	struct gv_drive *d;
761 	struct gv_freelist *fl, *fl2;
762 
763 	KASSERT(s != NULL, ("gv_free_sd: NULL s"));
764 
765 	d = s->drive_sc;
766 	if (d == NULL)
767 		return;
768 
769 	/*
770 	 * First, find the free slot that's immediately before or after this
771 	 * subdisk.
772 	 */
773 	fl = NULL;
774 	LIST_FOREACH(fl, &d->freelist, freelist) {
775 		if (fl->offset == s->drive_offset + s->size)
776 			break;
777 		if (fl->offset + fl->size == s->drive_offset)
778 			break;
779 	}
780 
781 	/* If there is no free slot behind this subdisk, so create one. */
782 	if (fl == NULL) {
783 		fl = g_malloc(sizeof(*fl), M_WAITOK | M_ZERO);
784 		fl->size = s->size;
785 		fl->offset = s->drive_offset;
786 
787 		if (d->freelist_entries == 0) {
788 			LIST_INSERT_HEAD(&d->freelist, fl, freelist);
789 		} else {
790 			LIST_FOREACH(fl2, &d->freelist, freelist) {
791 				if (fl->offset < fl2->offset) {
792 					LIST_INSERT_BEFORE(fl2, fl, freelist);
793 					break;
794 				} else if (LIST_NEXT(fl2, freelist) == NULL) {
795 					LIST_INSERT_AFTER(fl2, fl, freelist);
796 					break;
797 				}
798 			}
799 		}
800 
801 		d->freelist_entries++;
802 
803 	/* Expand the free slot we just found. */
804 	} else {
805 		fl->size += s->size;
806 		if (fl->offset > s->drive_offset)
807 			fl->offset = s->drive_offset;
808 	}
809 
810 	d->avail += s->size;
811 	d->sdcount--;
812 }
813 
814 void
815 gv_adjust_freespace(struct gv_sd *s, off_t remainder)
816 {
817 	struct gv_drive *d;
818 	struct gv_freelist *fl, *fl2;
819 
820 	KASSERT(s != NULL, ("gv_adjust_freespace: NULL s"));
821 	d = s->drive_sc;
822 	KASSERT(d != NULL, ("gv_adjust_freespace: NULL d"));
823 
824 	/* First, find the free slot that's immediately after this subdisk. */
825 	fl = NULL;
826 	LIST_FOREACH(fl, &d->freelist, freelist) {
827 		if (fl->offset == s->drive_offset + s->size)
828 			break;
829 	}
830 
831 	/* If there is no free slot behind this subdisk, so create one. */
832 	if (fl == NULL) {
833 		fl = g_malloc(sizeof(*fl), M_WAITOK | M_ZERO);
834 		fl->size = remainder;
835 		fl->offset = s->drive_offset + s->size - remainder;
836 
837 		if (d->freelist_entries == 0) {
838 			LIST_INSERT_HEAD(&d->freelist, fl, freelist);
839 		} else {
840 			LIST_FOREACH(fl2, &d->freelist, freelist) {
841 				if (fl->offset < fl2->offset) {
842 					LIST_INSERT_BEFORE(fl2, fl, freelist);
843 					break;
844 				} else if (LIST_NEXT(fl2, freelist) == NULL) {
845 					LIST_INSERT_AFTER(fl2, fl, freelist);
846 					break;
847 				}
848 			}
849 		}
850 
851 		d->freelist_entries++;
852 
853 	/* Expand the free slot we just found. */
854 	} else {
855 		fl->offset -= remainder;
856 		fl->size += remainder;
857 	}
858 
859 	s->size -= remainder;
860 	d->avail += remainder;
861 }
862 
863 /* Check if the given plex is a striped one. */
864 int
865 gv_is_striped(struct gv_plex *p)
866 {
867 	KASSERT(p != NULL, ("gv_is_striped: NULL p"));
868 	switch(p->org) {
869 	case GV_PLEX_STRIPED:
870 	case GV_PLEX_RAID5:
871 		return (1);
872 	default:
873 		return (0);
874 	}
875 }
876 
877 /* Find a volume by name. */
878 struct gv_volume *
879 gv_find_vol(struct gv_softc *sc, char *name)
880 {
881 	struct gv_volume *v;
882 
883 	LIST_FOREACH(v, &sc->volumes, volume) {
884 		if (!strncmp(v->name, name, GV_MAXVOLNAME))
885 			return (v);
886 	}
887 
888 	return (NULL);
889 }
890 
891 /* Find a plex by name. */
892 struct gv_plex *
893 gv_find_plex(struct gv_softc *sc, char *name)
894 {
895 	struct gv_plex *p;
896 
897 	LIST_FOREACH(p, &sc->plexes, plex) {
898 		if (!strncmp(p->name, name, GV_MAXPLEXNAME))
899 			return (p);
900 	}
901 
902 	return (NULL);
903 }
904 
905 /* Find a subdisk by name. */
906 struct gv_sd *
907 gv_find_sd(struct gv_softc *sc, char *name)
908 {
909 	struct gv_sd *s;
910 
911 	LIST_FOREACH(s, &sc->subdisks, sd) {
912 		if (!strncmp(s->name, name, GV_MAXSDNAME))
913 			return (s);
914 	}
915 
916 	return (NULL);
917 }
918 
919 /* Find a drive by name. */
920 struct gv_drive *
921 gv_find_drive(struct gv_softc *sc, char *name)
922 {
923 	struct gv_drive *d;
924 
925 	LIST_FOREACH(d, &sc->drives, drive) {
926 		if (!strncmp(d->name, name, GV_MAXDRIVENAME))
927 			return (d);
928 	}
929 
930 	return (NULL);
931 }
932 
933 /* Find a drive given a device. */
934 struct gv_drive *
935 gv_find_drive_device(struct gv_softc *sc, char *device)
936 {
937 	struct gv_drive *d;
938 
939 	LIST_FOREACH(d, &sc->drives, drive) {
940 		if(!strcmp(d->device, device))
941 			return (d);
942 	}
943 
944 	return (NULL);
945 }
946 
947 /* Check if any consumer of the given geom is open. */
948 int
949 gv_consumer_is_open(struct g_consumer *cp)
950 {
951 	if (cp == NULL)
952 		return (0);
953 
954 	if (cp->acr || cp->acw || cp->ace)
955 		return (1);
956 
957 	return (0);
958 }
959 
960 int
961 gv_provider_is_open(struct g_provider *pp)
962 {
963 	if (pp == NULL)
964 		return (0);
965 
966 	if (pp->acr || pp->acw || pp->ace)
967 		return (1);
968 
969 	return (0);
970 }
971 
972 /*
973  * Compare the modification dates of the drives.
974  * Return 1 if a > b, 0 otherwise.
975  */
976 int
977 gv_drive_is_newer(struct gv_softc *sc, struct gv_drive *d)
978 {
979 	struct gv_drive *d2;
980 	struct timeval *a, *b;
981 
982 	KASSERT(!LIST_EMPTY(&sc->drives),
983 	    ("gv_is_drive_newer: empty drive list"));
984 
985 	a = &d->hdr->label.last_update;
986 	LIST_FOREACH(d2, &sc->drives, drive) {
987 		if ((d == d2) || (d2->state != GV_DRIVE_UP) ||
988 		    (d2->hdr == NULL))
989 			continue;
990 		b = &d2->hdr->label.last_update;
991 		if (timevalcmp(a, b, >))
992 			return (1);
993 	}
994 
995 	return (0);
996 }
997 
998 /* Return the type of object identified by string 'name'. */
999 int
1000 gv_object_type(struct gv_softc *sc, char *name)
1001 {
1002 	struct gv_drive *d;
1003 	struct gv_plex *p;
1004 	struct gv_sd *s;
1005 	struct gv_volume *v;
1006 
1007 	LIST_FOREACH(v, &sc->volumes, volume) {
1008 		if (!strncmp(v->name, name, GV_MAXVOLNAME))
1009 			return (GV_TYPE_VOL);
1010 	}
1011 
1012 	LIST_FOREACH(p, &sc->plexes, plex) {
1013 		if (!strncmp(p->name, name, GV_MAXPLEXNAME))
1014 			return (GV_TYPE_PLEX);
1015 	}
1016 
1017 	LIST_FOREACH(s, &sc->subdisks, sd) {
1018 		if (!strncmp(s->name, name, GV_MAXSDNAME))
1019 			return (GV_TYPE_SD);
1020 	}
1021 
1022 	LIST_FOREACH(d, &sc->drives, drive) {
1023 		if (!strncmp(d->name, name, GV_MAXDRIVENAME))
1024 			return (GV_TYPE_DRIVE);
1025 	}
1026 
1027 	return (GV_ERR_NOTFOUND);
1028 }
1029 
1030 void
1031 gv_setup_objects(struct gv_softc *sc)
1032 {
1033 	struct g_provider *pp;
1034 	struct gv_volume *v;
1035 	struct gv_plex *p;
1036 	struct gv_sd *s;
1037 	struct gv_drive *d;
1038 
1039 	LIST_FOREACH(s, &sc->subdisks, sd) {
1040 		d = gv_find_drive(sc, s->drive);
1041 		if (d != NULL)
1042 			gv_sd_to_drive(s, d);
1043 		p = gv_find_plex(sc, s->plex);
1044 		if (p != NULL)
1045 			gv_sd_to_plex(s, p);
1046 		gv_update_sd_state(s);
1047 	}
1048 
1049 	LIST_FOREACH(p, &sc->plexes, plex) {
1050 		gv_update_plex_config(p);
1051 		v = gv_find_vol(sc, p->volume);
1052 		if (v != NULL && p->vol_sc != v) {
1053 			p->vol_sc = v;
1054 			v->plexcount++;
1055 			LIST_INSERT_HEAD(&v->plexes, p, in_volume);
1056 		}
1057 		gv_update_plex_config(p);
1058 	}
1059 
1060 	LIST_FOREACH(v, &sc->volumes, volume) {
1061 		v->size = gv_vol_size(v);
1062 		if (v->provider == NULL) {
1063 			g_topology_lock();
1064 			pp = g_new_providerf(sc->geom, "gvinum/%s", v->name);
1065 			pp->mediasize = v->size;
1066 			pp->sectorsize = 512;    /* XXX */
1067 			g_error_provider(pp, 0);
1068 			v->provider = pp;
1069 			pp->private = v;
1070 			g_topology_unlock();
1071 		} else if (v->provider->mediasize != v->size) {
1072 			g_topology_lock();
1073 			v->provider->mediasize = v->size;
1074 			g_topology_unlock();
1075 		}
1076 		v->flags &= ~GV_VOL_NEWBORN;
1077 		gv_update_vol_state(v);
1078 	}
1079 }
1080 
1081 void
1082 gv_cleanup(struct gv_softc *sc)
1083 {
1084 	struct gv_volume *v, *v2;
1085 	struct gv_plex *p, *p2;
1086 	struct gv_sd *s, *s2;
1087 	struct gv_drive *d, *d2;
1088 	struct gv_freelist *fl, *fl2;
1089 
1090 	mtx_lock(&sc->config_mtx);
1091 	LIST_FOREACH_SAFE(v, &sc->volumes, volume, v2) {
1092 		LIST_REMOVE(v, volume);
1093 		g_free(v->wqueue);
1094 		g_free(v);
1095 	}
1096 	LIST_FOREACH_SAFE(p, &sc->plexes, plex, p2) {
1097 		LIST_REMOVE(p, plex);
1098 		g_free(p->bqueue);
1099 		g_free(p->rqueue);
1100 		g_free(p->wqueue);
1101 		g_free(p);
1102 	}
1103 	LIST_FOREACH_SAFE(s, &sc->subdisks, sd, s2) {
1104 		LIST_REMOVE(s, sd);
1105 		g_free(s);
1106 	}
1107 	LIST_FOREACH_SAFE(d, &sc->drives, drive, d2) {
1108 		LIST_FOREACH_SAFE(fl, &d->freelist, freelist, fl2) {
1109 			LIST_REMOVE(fl, freelist);
1110 			g_free(fl);
1111 		}
1112 		LIST_REMOVE(d, drive);
1113 		g_free(d->hdr);
1114 		g_free(d);
1115 	}
1116 	mtx_destroy(&sc->config_mtx);
1117 }
1118 
1119 /* General 'attach' routine. */
1120 int
1121 gv_attach_plex(struct gv_plex *p, struct gv_volume *v, int rename)
1122 {
1123 	struct gv_sd *s;
1124 	struct gv_softc *sc __diagused;
1125 
1126 	g_topology_assert();
1127 
1128 	sc = p->vinumconf;
1129 	KASSERT(sc != NULL, ("NULL sc"));
1130 
1131 	if (p->vol_sc != NULL) {
1132 		G_VINUM_DEBUG(1, "unable to attach %s: already attached to %s",
1133 		    p->name, p->volume);
1134 		return (GV_ERR_ISATTACHED);
1135 	}
1136 
1137 	/* Stale all subdisks of this plex. */
1138 	LIST_FOREACH(s, &p->subdisks, in_plex) {
1139 		if (s->state != GV_SD_STALE)
1140 			gv_set_sd_state(s, GV_SD_STALE, GV_SETSTATE_FORCE);
1141 	}
1142 	/* Attach to volume. Make sure volume is not up and running. */
1143 	if (gv_provider_is_open(v->provider)) {
1144 		G_VINUM_DEBUG(1, "unable to attach %s: volume %s is busy",
1145 		    p->name, v->name);
1146 		return (GV_ERR_ISBUSY);
1147 	}
1148 	p->vol_sc = v;
1149 	strlcpy(p->volume, v->name, sizeof(p->volume));
1150 	v->plexcount++;
1151 	if (rename) {
1152 		snprintf(p->name, sizeof(p->name), "%s.p%d", v->name,
1153 		    v->plexcount);
1154 	}
1155 	LIST_INSERT_HEAD(&v->plexes, p, in_volume);
1156 
1157 	/* Get plex up again. */
1158 	gv_update_vol_size(v, gv_vol_size(v));
1159 	gv_set_plex_state(p, GV_PLEX_UP, 0);
1160 	gv_save_config(p->vinumconf);
1161 	return (0);
1162 }
1163 
1164 int
1165 gv_attach_sd(struct gv_sd *s, struct gv_plex *p, off_t offset, int rename)
1166 {
1167 	struct gv_sd *s2;
1168 	int error;
1169 
1170 	g_topology_assert();
1171 
1172 	/* If subdisk is attached, don't do it. */
1173 	if (s->plex_sc != NULL) {
1174 		G_VINUM_DEBUG(1, "unable to attach %s: already attached to %s",
1175 		    s->name, s->plex);
1176 		return (GV_ERR_ISATTACHED);
1177 	}
1178 
1179 	gv_set_sd_state(s, GV_SD_STALE, GV_SETSTATE_FORCE);
1180 	/* First check that this subdisk has a correct offset. If none other
1181 	 * starts at the same, and it's correct module stripesize, it is */
1182 	if (offset != -1 && offset % p->stripesize != 0)
1183 		return (GV_ERR_BADOFFSET);
1184 	LIST_FOREACH(s2, &p->subdisks, in_plex) {
1185 		if (s2->plex_offset == offset)
1186 			return (GV_ERR_BADOFFSET);
1187 	}
1188 
1189 	/* Attach the subdisk to the plex at given offset. */
1190 	s->plex_offset = offset;
1191 	strlcpy(s->plex, p->name, sizeof(s->plex));
1192 
1193 	error = gv_sd_to_plex(s, p);
1194 	if (error)
1195 		return (error);
1196 	gv_update_plex_config(p);
1197 
1198 	if (rename) {
1199 		snprintf(s->name, sizeof(s->name), "%s.s%d", s->plex,
1200 		    p->sdcount);
1201 	}
1202 	if (p->vol_sc != NULL)
1203 		gv_update_vol_size(p->vol_sc, gv_vol_size(p->vol_sc));
1204 	gv_save_config(p->vinumconf);
1205 	/* We don't update the subdisk state since the user might have to
1206 	 * initiate a rebuild/sync first. */
1207 	return (0);
1208 }
1209 
1210 /* Detach a plex from a volume. */
1211 int
1212 gv_detach_plex(struct gv_plex *p, int flags)
1213 {
1214 	struct gv_volume *v;
1215 
1216 	g_topology_assert();
1217 	v = p->vol_sc;
1218 
1219 	if (v == NULL) {
1220 		G_VINUM_DEBUG(1, "unable to detach %s: already detached",
1221 		    p->name);
1222 		return (0); /* Not an error. */
1223 	}
1224 
1225 	/*
1226 	 * Only proceed if forced or volume inactive.
1227 	 */
1228 	if (!(flags & GV_FLAG_F) && (gv_provider_is_open(v->provider) ||
1229 	    p->state == GV_PLEX_UP)) {
1230 		G_VINUM_DEBUG(1, "unable to detach %s: volume %s is busy",
1231 		    p->name, p->volume);
1232 		return (GV_ERR_ISBUSY);
1233 	}
1234 	v->plexcount--;
1235 	/* Make sure someone don't read us when gone. */
1236 	v->last_read_plex = NULL;
1237 	LIST_REMOVE(p, in_volume);
1238 	p->vol_sc = NULL;
1239 	memset(p->volume, 0, GV_MAXVOLNAME);
1240 	gv_update_vol_size(v, gv_vol_size(v));
1241 	gv_save_config(p->vinumconf);
1242 	return (0);
1243 }
1244 
1245 /* Detach a subdisk from a plex. */
1246 int
1247 gv_detach_sd(struct gv_sd *s, int flags)
1248 {
1249 	struct gv_plex *p;
1250 
1251 	g_topology_assert();
1252 	p = s->plex_sc;
1253 
1254 	if (p == NULL) {
1255 		G_VINUM_DEBUG(1, "unable to detach %s: already detached",
1256 		    s->name);
1257 		return (0); /* Not an error. */
1258 	}
1259 
1260 	/*
1261 	 * Don't proceed if we're not forcing, and the plex is up, or degraded
1262 	 * with this subdisk up.
1263 	 */
1264 	if (!(flags & GV_FLAG_F) && ((p->state > GV_PLEX_DEGRADED) ||
1265 	    ((p->state == GV_PLEX_DEGRADED) && (s->state == GV_SD_UP)))) {
1266 	    	G_VINUM_DEBUG(1, "unable to detach %s: plex %s is busy",
1267 		    s->name, s->plex);
1268 		return (GV_ERR_ISBUSY);
1269 	}
1270 
1271 	LIST_REMOVE(s, in_plex);
1272 	s->plex_sc = NULL;
1273 	memset(s->plex, 0, GV_MAXPLEXNAME);
1274 	p->sddetached++;
1275 	gv_save_config(s->vinumconf);
1276 	return (0);
1277 }
1278