xref: /freebsd/sys/geom/vinum/geom_vinum_subr.c (revision 884a2a699669ec61e2366e3e358342dbc94be24a)
1 /*-
2  * Copyright (c) 2004, 2007 Lukas Ertl
3  * Copyright (c) 2007, 2009 Ulf Lilleengen
4  * Copyright (c) 1997, 1998, 1999
5  *      Nan Yang Computer Services Limited.  All rights reserved.
6  *
7  *  Parts written by Greg Lehey
8  *
9  *  This software is distributed under the so-called ``Berkeley
10  *  License'':
11  *
12  * Redistribution and use in source and binary forms, with or without
13  * modification, are permitted provided that the following conditions
14  * are met:
15  * 1. Redistributions of source code must retain the above copyright
16  *    notice, this list of conditions and the following disclaimer.
17  * 2. Redistributions in binary form must reproduce the above copyright
18  *    notice, this list of conditions and the following disclaimer in the
19  *    documentation and/or other materials provided with the distribution.
20  * 3. All advertising materials mentioning features or use of this software
21  *    must display the following acknowledgement:
22  *      This product includes software developed by Nan Yang Computer
23  *      Services Limited.
24  * 4. Neither the name of the Company nor the names of its contributors
25  *    may be used to endorse or promote products derived from this software
26  *    without specific prior written permission.
27  *
28  * This software is provided ``as is'', and any express or implied
29  * warranties, including, but not limited to, the implied warranties of
30  * merchantability and fitness for a particular purpose are disclaimed.
31  * In no event shall the company or contributors be liable for any
32  * direct, indirect, incidental, special, exemplary, or consequential
33  * damages (including, but not limited to, procurement of substitute
34  * goods or services; loss of use, data, or profits; or business
35  * interruption) however caused and on any theory of liability, whether
36  * in contract, strict liability, or tort (including negligence or
37  * otherwise) arising in any way out of the use of this software, even if
38  * advised of the possibility of such damage.
39  *
40  */
41 
42 #include <sys/cdefs.h>
43 __FBSDID("$FreeBSD$");
44 
45 #include <sys/param.h>
46 #include <sys/malloc.h>
47 #include <sys/systm.h>
48 
49 #include <geom/geom.h>
50 #include <geom/vinum/geom_vinum_var.h>
51 #include <geom/vinum/geom_vinum.h>
52 #include <geom/vinum/geom_vinum_share.h>
53 
54 int	gv_drive_is_newer(struct gv_softc *, struct gv_drive *);
55 static off_t gv_plex_smallest_sd(struct gv_plex *);
56 
57 void
58 gv_parse_config(struct gv_softc *sc, char *buf, struct gv_drive *d)
59 {
60 	char *aptr, *bptr, *cptr;
61 	struct gv_volume *v, *v2;
62 	struct gv_plex *p, *p2;
63 	struct gv_sd *s, *s2;
64 	int error, is_newer, tokens;
65 	char *token[GV_MAXARGS];
66 
67 	is_newer = gv_drive_is_newer(sc, d);
68 
69 	/* Until the end of the string *buf. */
70 	for (aptr = buf; *aptr != '\0'; aptr = bptr) {
71 		bptr = aptr;
72 		cptr = aptr;
73 
74 		/* Seperate input lines. */
75 		while (*bptr != '\n')
76 			bptr++;
77 		*bptr = '\0';
78 		bptr++;
79 
80 		tokens = gv_tokenize(cptr, token, GV_MAXARGS);
81 
82 		if (tokens <= 0)
83 			continue;
84 
85 		if (!strcmp(token[0], "volume")) {
86 			v = gv_new_volume(tokens, token);
87 			if (v == NULL) {
88 				G_VINUM_DEBUG(0, "config parse failed volume");
89 				break;
90 			}
91 
92 			v2 = gv_find_vol(sc, v->name);
93 			if (v2 != NULL) {
94 				if (is_newer) {
95 					v2->state = v->state;
96 					G_VINUM_DEBUG(2, "newer volume found!");
97 				}
98 				g_free(v);
99 				continue;
100 			}
101 
102 			gv_create_volume(sc, v);
103 
104 		} else if (!strcmp(token[0], "plex")) {
105 			p = gv_new_plex(tokens, token);
106 			if (p == NULL) {
107 				G_VINUM_DEBUG(0, "config parse failed plex");
108 				break;
109 			}
110 
111 			p2 = gv_find_plex(sc, p->name);
112 			if (p2 != NULL) {
113 				/* XXX */
114 				if (is_newer) {
115 					p2->state = p->state;
116 					G_VINUM_DEBUG(2, "newer plex found!");
117 				}
118 				g_free(p);
119 				continue;
120 			}
121 
122 			error = gv_create_plex(sc, p);
123 			if (error)
124 				continue;
125 			/*
126 			 * These flags were set in gv_create_plex() and are not
127 			 * needed here (on-disk config parsing).
128 			 */
129 			p->flags &= ~GV_PLEX_ADDED;
130 
131 		} else if (!strcmp(token[0], "sd")) {
132 			s = gv_new_sd(tokens, token);
133 
134 			if (s == NULL) {
135 				G_VINUM_DEBUG(0, "config parse failed subdisk");
136 				break;
137 			}
138 
139 			s2 = gv_find_sd(sc, s->name);
140 			if (s2 != NULL) {
141 				/* XXX */
142 				if (is_newer) {
143 					s2->state = s->state;
144 					G_VINUM_DEBUG(2, "newer subdisk found!");
145 				}
146 				g_free(s);
147 				continue;
148 			}
149 
150 			/*
151 			 * Signal that this subdisk was tasted, and could
152 			 * possibly reference a drive that isn't in our config
153 			 * yet.
154 			 */
155 			s->flags |= GV_SD_TASTED;
156 
157 			if (s->state == GV_SD_UP)
158 				s->flags |= GV_SD_CANGOUP;
159 
160 			error = gv_create_sd(sc, s);
161 			if (error)
162 				continue;
163 
164 			/*
165 			 * This flag was set in gv_create_sd() and is not
166 			 * needed here (on-disk config parsing).
167 			 */
168 			s->flags &= ~GV_SD_NEWBORN;
169 			s->flags &= ~GV_SD_GROW;
170 		}
171 	}
172 }
173 
174 /*
175  * Format the vinum configuration properly.  If ondisk is non-zero then the
176  * configuration is intended to be written to disk later.
177  */
178 void
179 gv_format_config(struct gv_softc *sc, struct sbuf *sb, int ondisk, char *prefix)
180 {
181 	struct gv_drive *d;
182 	struct gv_sd *s;
183 	struct gv_plex *p;
184 	struct gv_volume *v;
185 
186 	/*
187 	 * We don't need the drive configuration if we're not writing the
188 	 * config to disk.
189 	 */
190 	if (!ondisk) {
191 		LIST_FOREACH(d, &sc->drives, drive) {
192 			sbuf_printf(sb, "%sdrive %s device /dev/%s\n", prefix,
193 			    d->name, d->device);
194 		}
195 	}
196 
197 	LIST_FOREACH(v, &sc->volumes, volume) {
198 		if (!ondisk)
199 			sbuf_printf(sb, "%s", prefix);
200 		sbuf_printf(sb, "volume %s", v->name);
201 		if (ondisk)
202 			sbuf_printf(sb, " state %s", gv_volstate(v->state));
203 		sbuf_printf(sb, "\n");
204 	}
205 
206 	LIST_FOREACH(p, &sc->plexes, plex) {
207 		if (!ondisk)
208 			sbuf_printf(sb, "%s", prefix);
209 		sbuf_printf(sb, "plex name %s org %s ", p->name,
210 		    gv_plexorg(p->org));
211 		if (gv_is_striped(p))
212 			sbuf_printf(sb, "%ds ", p->stripesize / 512);
213 		if (p->vol_sc != NULL)
214 			sbuf_printf(sb, "vol %s", p->volume);
215 		if (ondisk)
216 			sbuf_printf(sb, " state %s", gv_plexstate(p->state));
217 		sbuf_printf(sb, "\n");
218 	}
219 
220 	LIST_FOREACH(s, &sc->subdisks, sd) {
221 		if (!ondisk)
222 			sbuf_printf(sb, "%s", prefix);
223 		sbuf_printf(sb, "sd name %s drive %s len %jds driveoffset "
224 		    "%jds", s->name, s->drive, s->size / 512,
225 		    s->drive_offset / 512);
226 		if (s->plex_sc != NULL) {
227 			sbuf_printf(sb, " plex %s plexoffset %jds", s->plex,
228 			    s->plex_offset / 512);
229 		}
230 		if (ondisk)
231 			sbuf_printf(sb, " state %s", gv_sdstate(s->state));
232 		sbuf_printf(sb, "\n");
233 	}
234 }
235 
236 static off_t
237 gv_plex_smallest_sd(struct gv_plex *p)
238 {
239 	struct gv_sd *s;
240 	off_t smallest;
241 
242 	KASSERT(p != NULL, ("gv_plex_smallest_sd: NULL p"));
243 
244 	s = LIST_FIRST(&p->subdisks);
245 	if (s == NULL)
246 		return (-1);
247 	smallest = s->size;
248 	LIST_FOREACH(s, &p->subdisks, in_plex) {
249 		if (s->size < smallest)
250 			smallest = s->size;
251 	}
252 	return (smallest);
253 }
254 
255 /* Walk over plexes in a volume and count how many are down. */
256 int
257 gv_plexdown(struct gv_volume *v)
258 {
259 	int plexdown;
260 	struct gv_plex *p;
261 
262 	KASSERT(v != NULL, ("gv_plexdown: NULL v"));
263 
264 	plexdown = 0;
265 
266 	LIST_FOREACH(p, &v->plexes, plex) {
267 		if (p->state == GV_PLEX_DOWN)
268 			plexdown++;
269 	}
270 	return (plexdown);
271 }
272 
273 int
274 gv_sd_to_plex(struct gv_sd *s, struct gv_plex *p)
275 {
276 	struct gv_sd *s2;
277 	off_t psizeorig, remainder, smallest;
278 
279 	/* If this subdisk was already given to this plex, do nothing. */
280 	if (s->plex_sc == p)
281 		return (0);
282 
283 	/* Check correct size of this subdisk. */
284 	s2 = LIST_FIRST(&p->subdisks);
285 	/* Adjust the subdisk-size if necessary. */
286 	if (s2 != NULL && gv_is_striped(p)) {
287 		/* First adjust to the stripesize. */
288 		remainder = s->size % p->stripesize;
289 
290 		if (remainder) {
291 			G_VINUM_DEBUG(1, "size of sd %s is not a "
292 			    "multiple of plex stripesize, taking off "
293 			    "%jd bytes", s->name,
294 			    (intmax_t)remainder);
295 			gv_adjust_freespace(s, remainder);
296 		}
297 
298 		smallest = gv_plex_smallest_sd(p);
299 		/* Then take off extra if other subdisks are smaller. */
300 		remainder = s->size - smallest;
301 
302 		/*
303 		 * Don't allow a remainder below zero for running plexes, it's too
304 		 * painful, and if someone were to accidentally do this, the
305 		 * resulting array might be smaller than the original... not god
306 		 */
307 		if (remainder < 0) {
308 			if (!(p->flags & GV_PLEX_NEWBORN)) {
309 				G_VINUM_DEBUG(0, "sd %s too small for plex %s!",
310 				    s->name, p->name);
311 				return (GV_ERR_BADSIZE);
312 			}
313 			/* Adjust other subdisks. */
314 			LIST_FOREACH(s2, &p->subdisks, in_plex) {
315 				G_VINUM_DEBUG(1, "size of sd %s is to big, "
316 				    "taking off %jd bytes", s->name,
317 				    (intmax_t)remainder);
318 				gv_adjust_freespace(s2, (remainder * -1));
319 			}
320 		} else if (remainder > 0) {
321 			G_VINUM_DEBUG(1, "size of sd %s is to big, "
322 			    "taking off %jd bytes", s->name,
323 			    (intmax_t)remainder);
324 			gv_adjust_freespace(s, remainder);
325 		}
326 	}
327 
328 	/* Find the correct plex offset for this subdisk, if needed. */
329 	if (s->plex_offset == -1) {
330 		/*
331 		 * First set it to 0 to catch the case where we had a detached
332 		 * subdisk that didn't get any good offset.
333 		 */
334 		s->plex_offset = 0;
335 		if (p->sdcount) {
336 			LIST_FOREACH(s2, &p->subdisks, in_plex) {
337 				if (gv_is_striped(p))
338 					s->plex_offset = p->sdcount *
339 					    p->stripesize;
340 				else
341 					s->plex_offset = s2->plex_offset +
342 					    s2->size;
343 			}
344 		}
345 	}
346 
347 	/* There are no subdisks for this plex yet, just insert it. */
348 	if (LIST_EMPTY(&p->subdisks)) {
349 		LIST_INSERT_HEAD(&p->subdisks, s, in_plex);
350 
351 	/* Insert in correct order, depending on plex_offset. */
352 	} else {
353 		LIST_FOREACH(s2, &p->subdisks, in_plex) {
354 			if (s->plex_offset < s2->plex_offset) {
355 				LIST_INSERT_BEFORE(s2, s, in_plex);
356 				break;
357 			} else if (LIST_NEXT(s2, in_plex) == NULL) {
358 				LIST_INSERT_AFTER(s2, s, in_plex);
359 				break;
360 			}
361 		}
362 	}
363 
364 	s->plex_sc = p;
365         /* Adjust the size of our plex. We check if the plex misses a subdisk,
366 	 * so we don't make the plex smaller than it actually should be.
367 	 */
368 	psizeorig = p->size;
369 	p->size = gv_plex_size(p);
370 	/* Make sure the size is not changed. */
371 	if (p->sddetached > 0) {
372 		if (p->size < psizeorig) {
373 			p->size = psizeorig;
374 			/* We make sure wee need another subdisk. */
375 			if (p->sddetached == 1)
376 				p->sddetached++;
377 		}
378 		p->sddetached--;
379 	} else {
380 		if ((p->org == GV_PLEX_RAID5 ||
381 		    p->org == GV_PLEX_STRIPED) &&
382 		    !(p->flags & GV_PLEX_NEWBORN) &&
383 		    p->state == GV_PLEX_UP) {
384 			s->flags |= GV_SD_GROW;
385 		}
386 		p->sdcount++;
387 	}
388 
389 	return (0);
390 }
391 
392 void
393 gv_update_vol_size(struct gv_volume *v, off_t size)
394 {
395 	if (v == NULL)
396 		return;
397 	if (v->provider != NULL) {
398 		g_topology_lock();
399 		v->provider->mediasize = size;
400 		g_topology_unlock();
401 	}
402 	v->size = size;
403 }
404 
405 /* Return how many subdisks that constitute the original plex. */
406 int
407 gv_sdcount(struct gv_plex *p, int growing)
408 {
409 	struct gv_sd *s;
410 	int sdcount;
411 
412 	sdcount = p->sdcount;
413 	if (growing) {
414 		LIST_FOREACH(s, &p->subdisks, in_plex) {
415 			if (s->flags & GV_SD_GROW)
416 				sdcount--;
417 		}
418 	}
419 
420 	return (sdcount);
421 }
422 
423 /* Calculates the plex size. */
424 off_t
425 gv_plex_size(struct gv_plex *p)
426 {
427 	struct gv_sd *s;
428 	off_t size;
429 	int sdcount;
430 
431 	KASSERT(p != NULL, ("gv_plex_size: NULL p"));
432 
433 	/* Adjust the size of our plex. */
434 	size = 0;
435 	sdcount = gv_sdcount(p, 1);
436 	switch (p->org) {
437 	case GV_PLEX_CONCAT:
438 		LIST_FOREACH(s, &p->subdisks, in_plex)
439 			size += s->size;
440 		break;
441 	case GV_PLEX_STRIPED:
442 		s = LIST_FIRST(&p->subdisks);
443 		size = ((s != NULL) ? (sdcount * s->size) : 0);
444 		break;
445 	case GV_PLEX_RAID5:
446 		s = LIST_FIRST(&p->subdisks);
447 		size = ((s != NULL) ? ((sdcount - 1) * s->size) : 0);
448 		break;
449 	}
450 
451 	return (size);
452 }
453 
454 /* Returns the size of a volume. */
455 off_t
456 gv_vol_size(struct gv_volume *v)
457 {
458 	struct gv_plex *p;
459 	off_t minplexsize;
460 
461 	KASSERT(v != NULL, ("gv_vol_size: NULL v"));
462 
463 	p = LIST_FIRST(&v->plexes);
464 	if (p == NULL)
465 		return (0);
466 
467 	minplexsize = p->size;
468 	LIST_FOREACH(p, &v->plexes, in_volume) {
469 		if (p->size < minplexsize) {
470 			minplexsize = p->size;
471 		}
472 	}
473 	return (minplexsize);
474 }
475 
476 void
477 gv_update_plex_config(struct gv_plex *p)
478 {
479 	struct gv_sd *s, *s2;
480 	off_t remainder;
481 	int required_sds, state;
482 
483 	KASSERT(p != NULL, ("gv_update_plex_config: NULL p"));
484 
485 	/* The plex was added to an already running volume. */
486 	if (p->flags & GV_PLEX_ADDED)
487 		gv_set_plex_state(p, GV_PLEX_DOWN, GV_SETSTATE_FORCE);
488 
489 	switch (p->org) {
490 	case GV_PLEX_STRIPED:
491 		required_sds = 2;
492 		break;
493 	case GV_PLEX_RAID5:
494 		required_sds = 3;
495 		break;
496 	case GV_PLEX_CONCAT:
497 	default:
498 		required_sds = 0;
499 		break;
500 	}
501 
502 	if (required_sds) {
503 		if (p->sdcount < required_sds) {
504 			gv_set_plex_state(p, GV_PLEX_DOWN, GV_SETSTATE_FORCE);
505 		}
506 
507 		/*
508 		 * The subdisks in striped plexes must all have the same size.
509 		 */
510 		s = LIST_FIRST(&p->subdisks);
511 		LIST_FOREACH(s2, &p->subdisks, in_plex) {
512 			if (s->size != s2->size) {
513 				G_VINUM_DEBUG(0, "subdisk size mismatch %s"
514 				    "(%jd) <> %s (%jd)", s->name, s->size,
515 				    s2->name, s2->size);
516 				gv_set_plex_state(p, GV_PLEX_DOWN,
517 				    GV_SETSTATE_FORCE);
518 			}
519 		}
520 
521 		LIST_FOREACH(s, &p->subdisks, in_plex) {
522 			/* Trim subdisk sizes to match the stripe size. */
523 			remainder = s->size % p->stripesize;
524 			if (remainder) {
525 				G_VINUM_DEBUG(1, "size of sd %s is not a "
526 				    "multiple of plex stripesize, taking off "
527 				    "%jd bytes", s->name, (intmax_t)remainder);
528 				gv_adjust_freespace(s, remainder);
529 			}
530 		}
531 	}
532 
533 	p->size = gv_plex_size(p);
534 	if (p->sdcount == 0)
535 		gv_set_plex_state(p, GV_PLEX_DOWN, GV_SETSTATE_FORCE);
536 	else if (p->org == GV_PLEX_RAID5 && p->flags & GV_PLEX_NEWBORN) {
537 		LIST_FOREACH(s, &p->subdisks, in_plex)
538 			gv_set_sd_state(s, GV_SD_UP, GV_SETSTATE_FORCE);
539 		/* If added to a volume, we want the plex to be down. */
540 		state = (p->flags & GV_PLEX_ADDED) ? GV_PLEX_DOWN : GV_PLEX_UP;
541 		gv_set_plex_state(p, state, GV_SETSTATE_FORCE);
542 		p->flags &= ~GV_PLEX_ADDED;
543 	} else if (p->flags & GV_PLEX_ADDED) {
544 		LIST_FOREACH(s, &p->subdisks, in_plex)
545 			gv_set_sd_state(s, GV_SD_STALE, GV_SETSTATE_FORCE);
546 		gv_set_plex_state(p, GV_PLEX_DOWN, GV_SETSTATE_FORCE);
547 		p->flags &= ~GV_PLEX_ADDED;
548 	} else if (p->state == GV_PLEX_UP) {
549 		LIST_FOREACH(s, &p->subdisks, in_plex) {
550 			if (s->flags & GV_SD_GROW) {
551 				gv_set_plex_state(p, GV_PLEX_GROWABLE,
552 				    GV_SETSTATE_FORCE);
553 				break;
554 			}
555 		}
556 	}
557 	/* Our plex is grown up now. */
558 	p->flags &= ~GV_PLEX_NEWBORN;
559 }
560 
561 /*
562  * Give a subdisk to a drive, check and adjust several parameters, adjust
563  * freelist.
564  */
565 int
566 gv_sd_to_drive(struct gv_sd *s, struct gv_drive *d)
567 {
568 	struct gv_sd *s2;
569 	struct gv_freelist *fl, *fl2;
570 	off_t tmp;
571 	int i;
572 
573 	fl2 = NULL;
574 
575 	/* Shortcut for "referenced" drives. */
576 	if (d->flags & GV_DRIVE_REFERENCED) {
577 		s->drive_sc = d;
578 		return (0);
579 	}
580 
581 	/* Check if this subdisk was already given to this drive. */
582 	if (s->drive_sc != NULL) {
583 		if (s->drive_sc == d) {
584 			if (!(s->flags & GV_SD_TASTED)) {
585 				return (0);
586 			}
587 		} else {
588 			G_VINUM_DEBUG(0, "error giving subdisk '%s' to '%s' "
589 			    "(already on '%s')", s->name, d->name,
590 			    s->drive_sc->name);
591 			return (GV_ERR_ISATTACHED);
592 		}
593 	}
594 
595 	/* Preliminary checks. */
596 	if ((s->size > d->avail) || (d->freelist_entries == 0)) {
597 		G_VINUM_DEBUG(0, "not enough space on '%s' for '%s'", d->name,
598 		    s->name);
599 		return (GV_ERR_NOSPACE);
600 	}
601 
602 	/* If no size was given for this subdisk, try to auto-size it... */
603 	if (s->size == -1) {
604 		/* Find the largest available slot. */
605 		LIST_FOREACH(fl, &d->freelist, freelist) {
606 			if (fl->size < s->size)
607 				continue;
608 			s->size = fl->size;
609 			s->drive_offset = fl->offset;
610 			fl2 = fl;
611 		}
612 
613 		/* No good slot found? */
614 		if (s->size == -1) {
615 			G_VINUM_DEBUG(0, "unable to autosize '%s' on '%s'",
616 			    s->name, d->name);
617 			return (GV_ERR_BADSIZE);
618 		}
619 
620 	/*
621 	 * ... or check if we have a free slot that's large enough for the
622 	 * given size.
623 	 */
624 	} else {
625 		i = 0;
626 		LIST_FOREACH(fl, &d->freelist, freelist) {
627 			if (fl->size < s->size)
628 				continue;
629 			/* Assign drive offset, if not given. */
630 			if (s->drive_offset == -1)
631 				s->drive_offset = fl->offset;
632 			fl2 = fl;
633 			i++;
634 			break;
635 		}
636 
637 		/* Couldn't find a good free slot. */
638 		if (i == 0) {
639 			G_VINUM_DEBUG(0, "free slots to small for '%s' on '%s'",
640 			    s->name, d->name);
641 			return (GV_ERR_NOSPACE);
642 		}
643 	}
644 
645 	/* No drive offset given, try to calculate it. */
646 	if (s->drive_offset == -1) {
647 
648 		/* Add offsets and sizes from other subdisks on this drive. */
649 		LIST_FOREACH(s2, &d->subdisks, from_drive) {
650 			s->drive_offset = s2->drive_offset + s2->size;
651 		}
652 
653 		/*
654 		 * If there are no other subdisks yet, then set the default
655 		 * offset to GV_DATA_START.
656 		 */
657 		if (s->drive_offset == -1)
658 			s->drive_offset = GV_DATA_START;
659 
660 	/* Check if we have a free slot at the given drive offset. */
661 	} else {
662 		i = 0;
663 		LIST_FOREACH(fl, &d->freelist, freelist) {
664 			/* Yes, this subdisk fits. */
665 			if ((fl->offset <= s->drive_offset) &&
666 			    (fl->offset + fl->size >=
667 			    s->drive_offset + s->size)) {
668 				i++;
669 				fl2 = fl;
670 				break;
671 			}
672 		}
673 
674 		/* Couldn't find a good free slot. */
675 		if (i == 0) {
676 			G_VINUM_DEBUG(0, "given drive_offset for '%s' won't fit "
677 			    "on '%s'", s->name, d->name);
678 			return (GV_ERR_NOSPACE);
679 		}
680 	}
681 
682 	/*
683 	 * Now that all parameters are checked and set up, we can give the
684 	 * subdisk to the drive and adjust the freelist.
685 	 */
686 
687 	/* First, adjust the freelist. */
688 	LIST_FOREACH(fl, &d->freelist, freelist) {
689 		/* Look for the free slot that we have found before. */
690 		if (fl != fl2)
691 			continue;
692 
693 		/* The subdisk starts at the beginning of the free slot. */
694 		if (fl->offset == s->drive_offset) {
695 			fl->offset += s->size;
696 			fl->size -= s->size;
697 
698 			/* The subdisk uses the whole slot, so remove it. */
699 			if (fl->size == 0) {
700 				d->freelist_entries--;
701 				LIST_REMOVE(fl, freelist);
702 			}
703 		/*
704 		 * The subdisk does not start at the beginning of the free
705 		 * slot.
706 		 */
707 		} else {
708 			tmp = fl->offset + fl->size;
709 			fl->size = s->drive_offset - fl->offset;
710 
711 			/*
712 			 * The subdisk didn't use the complete rest of the free
713 			 * slot, so we need to split it.
714 			 */
715 			if (s->drive_offset + s->size != tmp) {
716 				fl2 = g_malloc(sizeof(*fl2), M_WAITOK | M_ZERO);
717 				fl2->offset = s->drive_offset + s->size;
718 				fl2->size = tmp - fl2->offset;
719 				LIST_INSERT_AFTER(fl, fl2, freelist);
720 				d->freelist_entries++;
721 			}
722 		}
723 		break;
724 	}
725 
726 	/*
727 	 * This is the first subdisk on this drive, just insert it into the
728 	 * list.
729 	 */
730 	if (LIST_EMPTY(&d->subdisks)) {
731 		LIST_INSERT_HEAD(&d->subdisks, s, from_drive);
732 
733 	/* There are other subdisks, so insert this one in correct order. */
734 	} else {
735 		LIST_FOREACH(s2, &d->subdisks, from_drive) {
736 			if (s->drive_offset < s2->drive_offset) {
737 				LIST_INSERT_BEFORE(s2, s, from_drive);
738 				break;
739 			} else if (LIST_NEXT(s2, from_drive) == NULL) {
740 				LIST_INSERT_AFTER(s2, s, from_drive);
741 				break;
742 			}
743 		}
744 	}
745 
746 	d->sdcount++;
747 	d->avail -= s->size;
748 
749 	s->flags &= ~GV_SD_TASTED;
750 
751 	/* Link back from the subdisk to this drive. */
752 	s->drive_sc = d;
753 
754 	return (0);
755 }
756 
757 void
758 gv_free_sd(struct gv_sd *s)
759 {
760 	struct gv_drive *d;
761 	struct gv_freelist *fl, *fl2;
762 
763 	KASSERT(s != NULL, ("gv_free_sd: NULL s"));
764 
765 	d = s->drive_sc;
766 	if (d == NULL)
767 		return;
768 
769 	/*
770 	 * First, find the free slot that's immediately before or after this
771 	 * subdisk.
772 	 */
773 	fl = NULL;
774 	LIST_FOREACH(fl, &d->freelist, freelist) {
775 		if (fl->offset == s->drive_offset + s->size)
776 			break;
777 		if (fl->offset + fl->size == s->drive_offset)
778 			break;
779 	}
780 
781 	/* If there is no free slot behind this subdisk, so create one. */
782 	if (fl == NULL) {
783 
784 		fl = g_malloc(sizeof(*fl), M_WAITOK | M_ZERO);
785 		fl->size = s->size;
786 		fl->offset = s->drive_offset;
787 
788 		if (d->freelist_entries == 0) {
789 			LIST_INSERT_HEAD(&d->freelist, fl, freelist);
790 		} else {
791 			LIST_FOREACH(fl2, &d->freelist, freelist) {
792 				if (fl->offset < fl2->offset) {
793 					LIST_INSERT_BEFORE(fl2, fl, freelist);
794 					break;
795 				} else if (LIST_NEXT(fl2, freelist) == NULL) {
796 					LIST_INSERT_AFTER(fl2, fl, freelist);
797 					break;
798 				}
799 			}
800 		}
801 
802 		d->freelist_entries++;
803 
804 	/* Expand the free slot we just found. */
805 	} else {
806 		fl->size += s->size;
807 		if (fl->offset > s->drive_offset)
808 			fl->offset = s->drive_offset;
809 	}
810 
811 	d->avail += s->size;
812 	d->sdcount--;
813 }
814 
815 void
816 gv_adjust_freespace(struct gv_sd *s, off_t remainder)
817 {
818 	struct gv_drive *d;
819 	struct gv_freelist *fl, *fl2;
820 
821 	KASSERT(s != NULL, ("gv_adjust_freespace: NULL s"));
822 	d = s->drive_sc;
823 	KASSERT(d != NULL, ("gv_adjust_freespace: NULL d"));
824 
825 	/* First, find the free slot that's immediately after this subdisk. */
826 	fl = NULL;
827 	LIST_FOREACH(fl, &d->freelist, freelist) {
828 		if (fl->offset == s->drive_offset + s->size)
829 			break;
830 	}
831 
832 	/* If there is no free slot behind this subdisk, so create one. */
833 	if (fl == NULL) {
834 
835 		fl = g_malloc(sizeof(*fl), M_WAITOK | M_ZERO);
836 		fl->size = remainder;
837 		fl->offset = s->drive_offset + s->size - remainder;
838 
839 		if (d->freelist_entries == 0) {
840 			LIST_INSERT_HEAD(&d->freelist, fl, freelist);
841 		} else {
842 			LIST_FOREACH(fl2, &d->freelist, freelist) {
843 				if (fl->offset < fl2->offset) {
844 					LIST_INSERT_BEFORE(fl2, fl, freelist);
845 					break;
846 				} else if (LIST_NEXT(fl2, freelist) == NULL) {
847 					LIST_INSERT_AFTER(fl2, fl, freelist);
848 					break;
849 				}
850 			}
851 		}
852 
853 		d->freelist_entries++;
854 
855 	/* Expand the free slot we just found. */
856 	} else {
857 		fl->offset -= remainder;
858 		fl->size += remainder;
859 	}
860 
861 	s->size -= remainder;
862 	d->avail += remainder;
863 }
864 
865 /* Check if the given plex is a striped one. */
866 int
867 gv_is_striped(struct gv_plex *p)
868 {
869 	KASSERT(p != NULL, ("gv_is_striped: NULL p"));
870 	switch(p->org) {
871 	case GV_PLEX_STRIPED:
872 	case GV_PLEX_RAID5:
873 		return (1);
874 	default:
875 		return (0);
876 	}
877 }
878 
879 /* Find a volume by name. */
880 struct gv_volume *
881 gv_find_vol(struct gv_softc *sc, char *name)
882 {
883 	struct gv_volume *v;
884 
885 	LIST_FOREACH(v, &sc->volumes, volume) {
886 		if (!strncmp(v->name, name, GV_MAXVOLNAME))
887 			return (v);
888 	}
889 
890 	return (NULL);
891 }
892 
893 /* Find a plex by name. */
894 struct gv_plex *
895 gv_find_plex(struct gv_softc *sc, char *name)
896 {
897 	struct gv_plex *p;
898 
899 	LIST_FOREACH(p, &sc->plexes, plex) {
900 		if (!strncmp(p->name, name, GV_MAXPLEXNAME))
901 			return (p);
902 	}
903 
904 	return (NULL);
905 }
906 
907 /* Find a subdisk by name. */
908 struct gv_sd *
909 gv_find_sd(struct gv_softc *sc, char *name)
910 {
911 	struct gv_sd *s;
912 
913 	LIST_FOREACH(s, &sc->subdisks, sd) {
914 		if (!strncmp(s->name, name, GV_MAXSDNAME))
915 			return (s);
916 	}
917 
918 	return (NULL);
919 }
920 
921 /* Find a drive by name. */
922 struct gv_drive *
923 gv_find_drive(struct gv_softc *sc, char *name)
924 {
925 	struct gv_drive *d;
926 
927 	LIST_FOREACH(d, &sc->drives, drive) {
928 		if (!strncmp(d->name, name, GV_MAXDRIVENAME))
929 			return (d);
930 	}
931 
932 	return (NULL);
933 }
934 
935 /* Find a drive given a device. */
936 struct gv_drive *
937 gv_find_drive_device(struct gv_softc *sc, char *device)
938 {
939 	struct gv_drive *d;
940 
941 	LIST_FOREACH(d, &sc->drives, drive) {
942 		if(!strcmp(d->device, device))
943 			return (d);
944 	}
945 
946 	return (NULL);
947 }
948 
949 /* Check if any consumer of the given geom is open. */
950 int
951 gv_consumer_is_open(struct g_consumer *cp)
952 {
953 	if (cp == NULL)
954 		return (0);
955 
956 	if (cp->acr || cp->acw || cp->ace)
957 		return (1);
958 
959 	return (0);
960 }
961 
962 int
963 gv_provider_is_open(struct g_provider *pp)
964 {
965 	if (pp == NULL)
966 		return (0);
967 
968 	if (pp->acr || pp->acw || pp->ace)
969 		return (1);
970 
971 	return (0);
972 }
973 
974 /*
975  * Compare the modification dates of the drives.
976  * Return 1 if a > b, 0 otherwise.
977  */
978 int
979 gv_drive_is_newer(struct gv_softc *sc, struct gv_drive *d)
980 {
981 	struct gv_drive *d2;
982 	struct timeval *a, *b;
983 
984 	KASSERT(!LIST_EMPTY(&sc->drives),
985 	    ("gv_is_drive_newer: empty drive list"));
986 
987 	a = &d->hdr->label.last_update;
988 	LIST_FOREACH(d2, &sc->drives, drive) {
989 		if ((d == d2) || (d2->state != GV_DRIVE_UP) ||
990 		    (d2->hdr == NULL))
991 			continue;
992 		b = &d2->hdr->label.last_update;
993 		if (timevalcmp(a, b, >))
994 			return (1);
995 	}
996 
997 	return (0);
998 }
999 
1000 /* Return the type of object identified by string 'name'. */
1001 int
1002 gv_object_type(struct gv_softc *sc, char *name)
1003 {
1004 	struct gv_drive *d;
1005 	struct gv_plex *p;
1006 	struct gv_sd *s;
1007 	struct gv_volume *v;
1008 
1009 	LIST_FOREACH(v, &sc->volumes, volume) {
1010 		if (!strncmp(v->name, name, GV_MAXVOLNAME))
1011 			return (GV_TYPE_VOL);
1012 	}
1013 
1014 	LIST_FOREACH(p, &sc->plexes, plex) {
1015 		if (!strncmp(p->name, name, GV_MAXPLEXNAME))
1016 			return (GV_TYPE_PLEX);
1017 	}
1018 
1019 	LIST_FOREACH(s, &sc->subdisks, sd) {
1020 		if (!strncmp(s->name, name, GV_MAXSDNAME))
1021 			return (GV_TYPE_SD);
1022 	}
1023 
1024 	LIST_FOREACH(d, &sc->drives, drive) {
1025 		if (!strncmp(d->name, name, GV_MAXDRIVENAME))
1026 			return (GV_TYPE_DRIVE);
1027 	}
1028 
1029 	return (GV_ERR_NOTFOUND);
1030 }
1031 
1032 void
1033 gv_setup_objects(struct gv_softc *sc)
1034 {
1035 	struct g_provider *pp;
1036 	struct gv_volume *v;
1037 	struct gv_plex *p;
1038 	struct gv_sd *s;
1039 	struct gv_drive *d;
1040 
1041 	LIST_FOREACH(s, &sc->subdisks, sd) {
1042 		d = gv_find_drive(sc, s->drive);
1043 		if (d != NULL)
1044 			gv_sd_to_drive(s, d);
1045 		p = gv_find_plex(sc, s->plex);
1046 		if (p != NULL)
1047 			gv_sd_to_plex(s, p);
1048 		gv_update_sd_state(s);
1049 	}
1050 
1051 	LIST_FOREACH(p, &sc->plexes, plex) {
1052 		gv_update_plex_config(p);
1053 		v = gv_find_vol(sc, p->volume);
1054 		if (v != NULL && p->vol_sc != v) {
1055 			p->vol_sc = v;
1056 			v->plexcount++;
1057 			LIST_INSERT_HEAD(&v->plexes, p, in_volume);
1058 		}
1059 		gv_update_plex_config(p);
1060 	}
1061 
1062 	LIST_FOREACH(v, &sc->volumes, volume) {
1063 		v->size = gv_vol_size(v);
1064 		if (v->provider == NULL) {
1065 			g_topology_lock();
1066 			pp = g_new_providerf(sc->geom, "gvinum/%s", v->name);
1067 			pp->mediasize = v->size;
1068 			pp->sectorsize = 512;    /* XXX */
1069 			g_error_provider(pp, 0);
1070 			v->provider = pp;
1071 			pp->private = v;
1072 			g_topology_unlock();
1073 		} else if (v->provider->mediasize != v->size) {
1074 			g_topology_lock();
1075 			v->provider->mediasize = v->size;
1076 			g_topology_unlock();
1077 		}
1078 		v->flags &= ~GV_VOL_NEWBORN;
1079 		gv_update_vol_state(v);
1080 	}
1081 }
1082 
1083 void
1084 gv_cleanup(struct gv_softc *sc)
1085 {
1086 	struct gv_volume *v, *v2;
1087 	struct gv_plex *p, *p2;
1088 	struct gv_sd *s, *s2;
1089 	struct gv_drive *d, *d2;
1090 	struct gv_freelist *fl, *fl2;
1091 
1092 	mtx_lock(&sc->config_mtx);
1093 	LIST_FOREACH_SAFE(v, &sc->volumes, volume, v2) {
1094 		LIST_REMOVE(v, volume);
1095 		g_free(v->wqueue);
1096 		g_free(v);
1097 	}
1098 	LIST_FOREACH_SAFE(p, &sc->plexes, plex, p2) {
1099 		LIST_REMOVE(p, plex);
1100 		g_free(p->bqueue);
1101 		g_free(p->rqueue);
1102 		g_free(p->wqueue);
1103 		g_free(p);
1104 	}
1105 	LIST_FOREACH_SAFE(s, &sc->subdisks, sd, s2) {
1106 		LIST_REMOVE(s, sd);
1107 		g_free(s);
1108 	}
1109 	LIST_FOREACH_SAFE(d, &sc->drives, drive, d2) {
1110 		LIST_FOREACH_SAFE(fl, &d->freelist, freelist, fl2) {
1111 			LIST_REMOVE(fl, freelist);
1112 			g_free(fl);
1113 		}
1114 		LIST_REMOVE(d, drive);
1115 		g_free(d->hdr);
1116 		g_free(d);
1117 	}
1118 	mtx_destroy(&sc->config_mtx);
1119 }
1120 
1121 /* General 'attach' routine. */
1122 int
1123 gv_attach_plex(struct gv_plex *p, struct gv_volume *v, int rename)
1124 {
1125 	struct gv_sd *s;
1126 	struct gv_softc *sc;
1127 
1128 	g_topology_assert();
1129 
1130 	sc = p->vinumconf;
1131 	KASSERT(sc != NULL, ("NULL sc"));
1132 
1133 	if (p->vol_sc != NULL) {
1134 		G_VINUM_DEBUG(1, "unable to attach %s: already attached to %s",
1135 		    p->name, p->volume);
1136 		return (GV_ERR_ISATTACHED);
1137 	}
1138 
1139 	/* Stale all subdisks of this plex. */
1140 	LIST_FOREACH(s, &p->subdisks, in_plex) {
1141 		if (s->state != GV_SD_STALE)
1142 			gv_set_sd_state(s, GV_SD_STALE, GV_SETSTATE_FORCE);
1143 	}
1144 	/* Attach to volume. Make sure volume is not up and running. */
1145 	if (gv_provider_is_open(v->provider)) {
1146 		G_VINUM_DEBUG(1, "unable to attach %s: volume %s is busy",
1147 		    p->name, v->name);
1148 		return (GV_ERR_ISBUSY);
1149 	}
1150 	p->vol_sc = v;
1151 	strlcpy(p->volume, v->name, sizeof(p->volume));
1152 	v->plexcount++;
1153 	if (rename) {
1154 		snprintf(p->name, sizeof(p->name), "%s.p%d", v->name,
1155 		    v->plexcount);
1156 	}
1157 	LIST_INSERT_HEAD(&v->plexes, p, in_volume);
1158 
1159 	/* Get plex up again. */
1160 	gv_update_vol_size(v, gv_vol_size(v));
1161 	gv_set_plex_state(p, GV_PLEX_UP, 0);
1162 	gv_save_config(p->vinumconf);
1163 	return (0);
1164 }
1165 
1166 int
1167 gv_attach_sd(struct gv_sd *s, struct gv_plex *p, off_t offset, int rename)
1168 {
1169 	struct gv_sd *s2;
1170 	int error, sdcount;
1171 
1172 	g_topology_assert();
1173 
1174 	/* If subdisk is attached, don't do it. */
1175 	if (s->plex_sc != NULL) {
1176 		G_VINUM_DEBUG(1, "unable to attach %s: already attached to %s",
1177 		    s->name, s->plex);
1178 		return (GV_ERR_ISATTACHED);
1179 	}
1180 
1181 	gv_set_sd_state(s, GV_SD_STALE, GV_SETSTATE_FORCE);
1182 	/* First check that this subdisk has a correct offset. If none other
1183 	 * starts at the same, and it's correct module stripesize, it is */
1184 	if (offset != -1 && offset % p->stripesize != 0)
1185 		return (GV_ERR_BADOFFSET);
1186 	LIST_FOREACH(s2, &p->subdisks, in_plex) {
1187 		if (s2->plex_offset == offset)
1188 			return (GV_ERR_BADOFFSET);
1189 	}
1190 
1191 	/* Attach the subdisk to the plex at given offset. */
1192 	s->plex_offset = offset;
1193 	strlcpy(s->plex, p->name, sizeof(s->plex));
1194 
1195 	sdcount = p->sdcount;
1196 	error = gv_sd_to_plex(s, p);
1197 	if (error)
1198 		return (error);
1199 	gv_update_plex_config(p);
1200 
1201 	if (rename) {
1202 		snprintf(s->name, sizeof(s->name), "%s.s%d", s->plex,
1203 		    p->sdcount);
1204 	}
1205 	if (p->vol_sc != NULL)
1206 		gv_update_vol_size(p->vol_sc, gv_vol_size(p->vol_sc));
1207 	gv_save_config(p->vinumconf);
1208 	/* We don't update the subdisk state since the user might have to
1209 	 * initiate a rebuild/sync first. */
1210 	return (0);
1211 }
1212 
1213 /* Detach a plex from a volume. */
1214 int
1215 gv_detach_plex(struct gv_plex *p, int flags)
1216 {
1217 	struct gv_volume *v;
1218 
1219 	g_topology_assert();
1220 	v = p->vol_sc;
1221 
1222 	if (v == NULL) {
1223 		G_VINUM_DEBUG(1, "unable to detach %s: already detached",
1224 		    p->name);
1225 		return (0); /* Not an error. */
1226 	}
1227 
1228 	/*
1229 	 * Only proceed if forced or volume inactive.
1230 	 */
1231 	if (!(flags & GV_FLAG_F) && (gv_provider_is_open(v->provider) ||
1232 	    p->state == GV_PLEX_UP)) {
1233 		G_VINUM_DEBUG(1, "unable to detach %s: volume %s is busy",
1234 		    p->name, p->volume);
1235 		return (GV_ERR_ISBUSY);
1236 	}
1237 	v->plexcount--;
1238 	/* Make sure someone don't read us when gone. */
1239 	v->last_read_plex = NULL;
1240 	LIST_REMOVE(p, in_volume);
1241 	p->vol_sc = NULL;
1242 	memset(p->volume, 0, GV_MAXVOLNAME);
1243 	gv_update_vol_size(v, gv_vol_size(v));
1244 	gv_save_config(p->vinumconf);
1245 	return (0);
1246 }
1247 
1248 /* Detach a subdisk from a plex. */
1249 int
1250 gv_detach_sd(struct gv_sd *s, int flags)
1251 {
1252 	struct gv_plex *p;
1253 
1254 	g_topology_assert();
1255 	p = s->plex_sc;
1256 
1257 	if (p == NULL) {
1258 		G_VINUM_DEBUG(1, "unable to detach %s: already detached",
1259 		    s->name);
1260 		return (0); /* Not an error. */
1261 	}
1262 
1263 	/*
1264 	 * Don't proceed if we're not forcing, and the plex is up, or degraded
1265 	 * with this subdisk up.
1266 	 */
1267 	if (!(flags & GV_FLAG_F) && ((p->state > GV_PLEX_DEGRADED) ||
1268 	    ((p->state == GV_PLEX_DEGRADED) && (s->state == GV_SD_UP)))) {
1269 	    	G_VINUM_DEBUG(1, "unable to detach %s: plex %s is busy",
1270 		    s->name, s->plex);
1271 		return (GV_ERR_ISBUSY);
1272 	}
1273 
1274 	LIST_REMOVE(s, in_plex);
1275 	s->plex_sc = NULL;
1276 	memset(s->plex, 0, GV_MAXPLEXNAME);
1277 	p->sddetached++;
1278 	gv_save_config(s->vinumconf);
1279 	return (0);
1280 }
1281