xref: /freebsd/sys/geom/vinum/geom_vinum_subr.c (revision 0dbdecfd4f23486da21345fe446a28a1c27db3f2)
1 /*-
2  * SPDX-License-Identifier: BSD-4-Clause
3  *
4  * Copyright (c) 2004, 2007 Lukas Ertl
5  * Copyright (c) 2007, 2009 Ulf Lilleengen
6  * Copyright (c) 1997, 1998, 1999
7  *      Nan Yang Computer Services Limited.  All rights reserved.
8  *
9  *  Parts written by Greg Lehey
10  *
11  *  This software is distributed under the so-called ``Berkeley
12  *  License'':
13  *
14  * Redistribution and use in source and binary forms, with or without
15  * modification, are permitted provided that the following conditions
16  * are met:
17  * 1. Redistributions of source code must retain the above copyright
18  *    notice, this list of conditions and the following disclaimer.
19  * 2. Redistributions in binary form must reproduce the above copyright
20  *    notice, this list of conditions and the following disclaimer in the
21  *    documentation and/or other materials provided with the distribution.
22  * 3. All advertising materials mentioning features or use of this software
23  *    must display the following acknowledgement:
24  *      This product includes software developed by Nan Yang Computer
25  *      Services Limited.
26  * 4. Neither the name of the Company nor the names of its contributors
27  *    may be used to endorse or promote products derived from this software
28  *    without specific prior written permission.
29  *
30  * This software is provided ``as is'', and any express or implied
31  * warranties, including, but not limited to, the implied warranties of
32  * merchantability and fitness for a particular purpose are disclaimed.
33  * In no event shall the company or contributors be liable for any
34  * direct, indirect, incidental, special, exemplary, or consequential
35  * damages (including, but not limited to, procurement of substitute
36  * goods or services; loss of use, data, or profits; or business
37  * interruption) however caused and on any theory of liability, whether
38  * in contract, strict liability, or tort (including negligence or
39  * otherwise) arising in any way out of the use of this software, even if
40  * advised of the possibility of such damage.
41  *
42  */
43 
44 #include <sys/cdefs.h>
45 __FBSDID("$FreeBSD$");
46 
47 #include <sys/param.h>
48 #include <sys/malloc.h>
49 #include <sys/sbuf.h>
50 #include <sys/systm.h>
51 
52 #include <geom/geom.h>
53 #include <geom/vinum/geom_vinum_var.h>
54 #include <geom/vinum/geom_vinum.h>
55 #include <geom/vinum/geom_vinum_share.h>
56 
57 int	gv_drive_is_newer(struct gv_softc *, struct gv_drive *);
58 static off_t gv_plex_smallest_sd(struct gv_plex *);
59 
60 void
61 gv_parse_config(struct gv_softc *sc, char *buf, struct gv_drive *d)
62 {
63 	char *aptr, *bptr, *cptr;
64 	struct gv_volume *v, *v2;
65 	struct gv_plex *p, *p2;
66 	struct gv_sd *s, *s2;
67 	int error, is_newer, tokens;
68 	char *token[GV_MAXARGS];
69 
70 	is_newer = gv_drive_is_newer(sc, d);
71 
72 	/* Until the end of the string *buf. */
73 	for (aptr = buf; *aptr != '\0'; aptr = bptr) {
74 		bptr = aptr;
75 		cptr = aptr;
76 
77 		/* Separate input lines. */
78 		while (*bptr != '\n')
79 			bptr++;
80 		*bptr = '\0';
81 		bptr++;
82 
83 		tokens = gv_tokenize(cptr, token, GV_MAXARGS);
84 
85 		if (tokens <= 0)
86 			continue;
87 
88 		if (!strcmp(token[0], "volume")) {
89 			v = gv_new_volume(tokens, token);
90 			if (v == NULL) {
91 				G_VINUM_DEBUG(0, "config parse failed volume");
92 				break;
93 			}
94 
95 			v2 = gv_find_vol(sc, v->name);
96 			if (v2 != NULL) {
97 				if (is_newer) {
98 					v2->state = v->state;
99 					G_VINUM_DEBUG(2, "newer volume found!");
100 				}
101 				g_free(v);
102 				continue;
103 			}
104 
105 			gv_create_volume(sc, v);
106 
107 		} else if (!strcmp(token[0], "plex")) {
108 			p = gv_new_plex(tokens, token);
109 			if (p == NULL) {
110 				G_VINUM_DEBUG(0, "config parse failed plex");
111 				break;
112 			}
113 
114 			p2 = gv_find_plex(sc, p->name);
115 			if (p2 != NULL) {
116 				/* XXX */
117 				if (is_newer) {
118 					p2->state = p->state;
119 					G_VINUM_DEBUG(2, "newer plex found!");
120 				}
121 				g_free(p);
122 				continue;
123 			}
124 
125 			error = gv_create_plex(sc, p);
126 			if (error)
127 				continue;
128 			/*
129 			 * These flags were set in gv_create_plex() and are not
130 			 * needed here (on-disk config parsing).
131 			 */
132 			p->flags &= ~GV_PLEX_ADDED;
133 
134 		} else if (!strcmp(token[0], "sd")) {
135 			s = gv_new_sd(tokens, token);
136 
137 			if (s == NULL) {
138 				G_VINUM_DEBUG(0, "config parse failed subdisk");
139 				break;
140 			}
141 
142 			s2 = gv_find_sd(sc, s->name);
143 			if (s2 != NULL) {
144 				/* XXX */
145 				if (is_newer) {
146 					s2->state = s->state;
147 					G_VINUM_DEBUG(2, "newer subdisk found!");
148 				}
149 				g_free(s);
150 				continue;
151 			}
152 
153 			/*
154 			 * Signal that this subdisk was tasted, and could
155 			 * possibly reference a drive that isn't in our config
156 			 * yet.
157 			 */
158 			s->flags |= GV_SD_TASTED;
159 
160 			if (s->state == GV_SD_UP)
161 				s->flags |= GV_SD_CANGOUP;
162 
163 			error = gv_create_sd(sc, s);
164 			if (error)
165 				continue;
166 
167 			/*
168 			 * This flag was set in gv_create_sd() and is not
169 			 * needed here (on-disk config parsing).
170 			 */
171 			s->flags &= ~GV_SD_NEWBORN;
172 			s->flags &= ~GV_SD_GROW;
173 		}
174 	}
175 }
176 
177 /*
178  * Format the vinum configuration properly.  If ondisk is non-zero then the
179  * configuration is intended to be written to disk later.
180  */
181 void
182 gv_format_config(struct gv_softc *sc, struct sbuf *sb, int ondisk, char *prefix)
183 {
184 	struct gv_drive *d;
185 	struct gv_sd *s;
186 	struct gv_plex *p;
187 	struct gv_volume *v;
188 
189 	/*
190 	 * We don't need the drive configuration if we're not writing the
191 	 * config to disk.
192 	 */
193 	if (!ondisk) {
194 		LIST_FOREACH(d, &sc->drives, drive) {
195 			sbuf_printf(sb, "%sdrive %s device /dev/%s\n", prefix,
196 			    d->name, d->device);
197 		}
198 	}
199 
200 	LIST_FOREACH(v, &sc->volumes, volume) {
201 		if (!ondisk)
202 			sbuf_printf(sb, "%s", prefix);
203 		sbuf_printf(sb, "volume %s", v->name);
204 		if (ondisk)
205 			sbuf_printf(sb, " state %s", gv_volstate(v->state));
206 		sbuf_printf(sb, "\n");
207 	}
208 
209 	LIST_FOREACH(p, &sc->plexes, plex) {
210 		if (!ondisk)
211 			sbuf_printf(sb, "%s", prefix);
212 		sbuf_printf(sb, "plex name %s org %s ", p->name,
213 		    gv_plexorg(p->org));
214 		if (gv_is_striped(p))
215 			sbuf_printf(sb, "%ds ", p->stripesize / 512);
216 		if (p->vol_sc != NULL)
217 			sbuf_printf(sb, "vol %s", p->volume);
218 		if (ondisk)
219 			sbuf_printf(sb, " state %s", gv_plexstate(p->state));
220 		sbuf_printf(sb, "\n");
221 	}
222 
223 	LIST_FOREACH(s, &sc->subdisks, sd) {
224 		if (!ondisk)
225 			sbuf_printf(sb, "%s", prefix);
226 		sbuf_printf(sb, "sd name %s drive %s len %jds driveoffset "
227 		    "%jds", s->name, s->drive, s->size / 512,
228 		    s->drive_offset / 512);
229 		if (s->plex_sc != NULL) {
230 			sbuf_printf(sb, " plex %s plexoffset %jds", s->plex,
231 			    s->plex_offset / 512);
232 		}
233 		if (ondisk)
234 			sbuf_printf(sb, " state %s", gv_sdstate(s->state));
235 		sbuf_printf(sb, "\n");
236 	}
237 }
238 
239 static off_t
240 gv_plex_smallest_sd(struct gv_plex *p)
241 {
242 	struct gv_sd *s;
243 	off_t smallest;
244 
245 	KASSERT(p != NULL, ("gv_plex_smallest_sd: NULL p"));
246 
247 	s = LIST_FIRST(&p->subdisks);
248 	if (s == NULL)
249 		return (-1);
250 	smallest = s->size;
251 	LIST_FOREACH(s, &p->subdisks, in_plex) {
252 		if (s->size < smallest)
253 			smallest = s->size;
254 	}
255 	return (smallest);
256 }
257 
258 /* Walk over plexes in a volume and count how many are down. */
259 int
260 gv_plexdown(struct gv_volume *v)
261 {
262 	int plexdown;
263 	struct gv_plex *p;
264 
265 	KASSERT(v != NULL, ("gv_plexdown: NULL v"));
266 
267 	plexdown = 0;
268 
269 	LIST_FOREACH(p, &v->plexes, plex) {
270 		if (p->state == GV_PLEX_DOWN)
271 			plexdown++;
272 	}
273 	return (plexdown);
274 }
275 
276 int
277 gv_sd_to_plex(struct gv_sd *s, struct gv_plex *p)
278 {
279 	struct gv_sd *s2;
280 	off_t psizeorig, remainder, smallest;
281 
282 	/* If this subdisk was already given to this plex, do nothing. */
283 	if (s->plex_sc == p)
284 		return (0);
285 
286 	/* Check correct size of this subdisk. */
287 	s2 = LIST_FIRST(&p->subdisks);
288 	/* Adjust the subdisk-size if necessary. */
289 	if (s2 != NULL && gv_is_striped(p)) {
290 		/* First adjust to the stripesize. */
291 		remainder = s->size % p->stripesize;
292 
293 		if (remainder) {
294 			G_VINUM_DEBUG(1, "size of sd %s is not a "
295 			    "multiple of plex stripesize, taking off "
296 			    "%jd bytes", s->name,
297 			    (intmax_t)remainder);
298 			gv_adjust_freespace(s, remainder);
299 		}
300 
301 		smallest = gv_plex_smallest_sd(p);
302 		/* Then take off extra if other subdisks are smaller. */
303 		remainder = s->size - smallest;
304 
305 		/*
306 		 * Don't allow a remainder below zero for running plexes, it's too
307 		 * painful, and if someone were to accidentally do this, the
308 		 * resulting array might be smaller than the original... not god
309 		 */
310 		if (remainder < 0) {
311 			if (!(p->flags & GV_PLEX_NEWBORN)) {
312 				G_VINUM_DEBUG(0, "sd %s too small for plex %s!",
313 				    s->name, p->name);
314 				return (GV_ERR_BADSIZE);
315 			}
316 			/* Adjust other subdisks. */
317 			LIST_FOREACH(s2, &p->subdisks, in_plex) {
318 				G_VINUM_DEBUG(1, "size of sd %s is to big, "
319 				    "taking off %jd bytes", s->name,
320 				    (intmax_t)remainder);
321 				gv_adjust_freespace(s2, (remainder * -1));
322 			}
323 		} else if (remainder > 0) {
324 			G_VINUM_DEBUG(1, "size of sd %s is to big, "
325 			    "taking off %jd bytes", s->name,
326 			    (intmax_t)remainder);
327 			gv_adjust_freespace(s, remainder);
328 		}
329 	}
330 
331 	/* Find the correct plex offset for this subdisk, if needed. */
332 	if (s->plex_offset == -1) {
333 		/*
334 		 * First set it to 0 to catch the case where we had a detached
335 		 * subdisk that didn't get any good offset.
336 		 */
337 		s->plex_offset = 0;
338 		if (p->sdcount) {
339 			LIST_FOREACH(s2, &p->subdisks, in_plex) {
340 				if (gv_is_striped(p))
341 					s->plex_offset = p->sdcount *
342 					    p->stripesize;
343 				else
344 					s->plex_offset = s2->plex_offset +
345 					    s2->size;
346 			}
347 		}
348 	}
349 
350 	/* There are no subdisks for this plex yet, just insert it. */
351 	if (LIST_EMPTY(&p->subdisks)) {
352 		LIST_INSERT_HEAD(&p->subdisks, s, in_plex);
353 
354 	/* Insert in correct order, depending on plex_offset. */
355 	} else {
356 		LIST_FOREACH(s2, &p->subdisks, in_plex) {
357 			if (s->plex_offset < s2->plex_offset) {
358 				LIST_INSERT_BEFORE(s2, s, in_plex);
359 				break;
360 			} else if (LIST_NEXT(s2, in_plex) == NULL) {
361 				LIST_INSERT_AFTER(s2, s, in_plex);
362 				break;
363 			}
364 		}
365 	}
366 
367 	s->plex_sc = p;
368         /* Adjust the size of our plex. We check if the plex misses a subdisk,
369 	 * so we don't make the plex smaller than it actually should be.
370 	 */
371 	psizeorig = p->size;
372 	p->size = gv_plex_size(p);
373 	/* Make sure the size is not changed. */
374 	if (p->sddetached > 0) {
375 		if (p->size < psizeorig) {
376 			p->size = psizeorig;
377 			/* We make sure wee need another subdisk. */
378 			if (p->sddetached == 1)
379 				p->sddetached++;
380 		}
381 		p->sddetached--;
382 	} else {
383 		if ((p->org == GV_PLEX_RAID5 ||
384 		    p->org == GV_PLEX_STRIPED) &&
385 		    !(p->flags & GV_PLEX_NEWBORN) &&
386 		    p->state == GV_PLEX_UP) {
387 			s->flags |= GV_SD_GROW;
388 		}
389 		p->sdcount++;
390 	}
391 
392 	return (0);
393 }
394 
395 void
396 gv_update_vol_size(struct gv_volume *v, off_t size)
397 {
398 	if (v == NULL)
399 		return;
400 	if (v->provider != NULL) {
401 		g_topology_lock();
402 		v->provider->mediasize = size;
403 		g_topology_unlock();
404 	}
405 	v->size = size;
406 }
407 
408 /* Return how many subdisks that constitute the original plex. */
409 int
410 gv_sdcount(struct gv_plex *p, int growing)
411 {
412 	struct gv_sd *s;
413 	int sdcount;
414 
415 	sdcount = p->sdcount;
416 	if (growing) {
417 		LIST_FOREACH(s, &p->subdisks, in_plex) {
418 			if (s->flags & GV_SD_GROW)
419 				sdcount--;
420 		}
421 	}
422 
423 	return (sdcount);
424 }
425 
426 /* Calculates the plex size. */
427 off_t
428 gv_plex_size(struct gv_plex *p)
429 {
430 	struct gv_sd *s;
431 	off_t size;
432 	int sdcount;
433 
434 	KASSERT(p != NULL, ("gv_plex_size: NULL p"));
435 
436 	/* Adjust the size of our plex. */
437 	size = 0;
438 	sdcount = gv_sdcount(p, 1);
439 	switch (p->org) {
440 	case GV_PLEX_CONCAT:
441 		LIST_FOREACH(s, &p->subdisks, in_plex)
442 			size += s->size;
443 		break;
444 	case GV_PLEX_STRIPED:
445 		s = LIST_FIRST(&p->subdisks);
446 		size = ((s != NULL) ? (sdcount * s->size) : 0);
447 		break;
448 	case GV_PLEX_RAID5:
449 		s = LIST_FIRST(&p->subdisks);
450 		size = ((s != NULL) ? ((sdcount - 1) * s->size) : 0);
451 		break;
452 	}
453 
454 	return (size);
455 }
456 
457 /* Returns the size of a volume. */
458 off_t
459 gv_vol_size(struct gv_volume *v)
460 {
461 	struct gv_plex *p;
462 	off_t minplexsize;
463 
464 	KASSERT(v != NULL, ("gv_vol_size: NULL v"));
465 
466 	p = LIST_FIRST(&v->plexes);
467 	if (p == NULL)
468 		return (0);
469 
470 	minplexsize = p->size;
471 	LIST_FOREACH(p, &v->plexes, in_volume) {
472 		if (p->size < minplexsize) {
473 			minplexsize = p->size;
474 		}
475 	}
476 	return (minplexsize);
477 }
478 
479 void
480 gv_update_plex_config(struct gv_plex *p)
481 {
482 	struct gv_sd *s, *s2;
483 	off_t remainder;
484 	int required_sds, state;
485 
486 	KASSERT(p != NULL, ("gv_update_plex_config: NULL p"));
487 
488 	/* The plex was added to an already running volume. */
489 	if (p->flags & GV_PLEX_ADDED)
490 		gv_set_plex_state(p, GV_PLEX_DOWN, GV_SETSTATE_FORCE);
491 
492 	switch (p->org) {
493 	case GV_PLEX_STRIPED:
494 		required_sds = 2;
495 		break;
496 	case GV_PLEX_RAID5:
497 		required_sds = 3;
498 		break;
499 	case GV_PLEX_CONCAT:
500 	default:
501 		required_sds = 0;
502 		break;
503 	}
504 
505 	if (required_sds) {
506 		if (p->sdcount < required_sds) {
507 			gv_set_plex_state(p, GV_PLEX_DOWN, GV_SETSTATE_FORCE);
508 		}
509 
510 		/*
511 		 * The subdisks in striped plexes must all have the same size.
512 		 */
513 		s = LIST_FIRST(&p->subdisks);
514 		LIST_FOREACH(s2, &p->subdisks, in_plex) {
515 			if (s->size != s2->size) {
516 				G_VINUM_DEBUG(0, "subdisk size mismatch %s"
517 				    "(%jd) <> %s (%jd)", s->name, s->size,
518 				    s2->name, s2->size);
519 				gv_set_plex_state(p, GV_PLEX_DOWN,
520 				    GV_SETSTATE_FORCE);
521 			}
522 		}
523 
524 		LIST_FOREACH(s, &p->subdisks, in_plex) {
525 			/* Trim subdisk sizes to match the stripe size. */
526 			remainder = s->size % p->stripesize;
527 			if (remainder) {
528 				G_VINUM_DEBUG(1, "size of sd %s is not a "
529 				    "multiple of plex stripesize, taking off "
530 				    "%jd bytes", s->name, (intmax_t)remainder);
531 				gv_adjust_freespace(s, remainder);
532 			}
533 		}
534 	}
535 
536 	p->size = gv_plex_size(p);
537 	if (p->sdcount == 0)
538 		gv_set_plex_state(p, GV_PLEX_DOWN, GV_SETSTATE_FORCE);
539 	else if (p->org == GV_PLEX_RAID5 && p->flags & GV_PLEX_NEWBORN) {
540 		LIST_FOREACH(s, &p->subdisks, in_plex)
541 			gv_set_sd_state(s, GV_SD_UP, GV_SETSTATE_FORCE);
542 		/* If added to a volume, we want the plex to be down. */
543 		state = (p->flags & GV_PLEX_ADDED) ? GV_PLEX_DOWN : GV_PLEX_UP;
544 		gv_set_plex_state(p, state, GV_SETSTATE_FORCE);
545 		p->flags &= ~GV_PLEX_ADDED;
546 	} else if (p->flags & GV_PLEX_ADDED) {
547 		LIST_FOREACH(s, &p->subdisks, in_plex)
548 			gv_set_sd_state(s, GV_SD_STALE, GV_SETSTATE_FORCE);
549 		gv_set_plex_state(p, GV_PLEX_DOWN, GV_SETSTATE_FORCE);
550 		p->flags &= ~GV_PLEX_ADDED;
551 	} else if (p->state == GV_PLEX_UP) {
552 		LIST_FOREACH(s, &p->subdisks, in_plex) {
553 			if (s->flags & GV_SD_GROW) {
554 				gv_set_plex_state(p, GV_PLEX_GROWABLE,
555 				    GV_SETSTATE_FORCE);
556 				break;
557 			}
558 		}
559 	}
560 	/* Our plex is grown up now. */
561 	p->flags &= ~GV_PLEX_NEWBORN;
562 }
563 
564 /*
565  * Give a subdisk to a drive, check and adjust several parameters, adjust
566  * freelist.
567  */
568 int
569 gv_sd_to_drive(struct gv_sd *s, struct gv_drive *d)
570 {
571 	struct gv_sd *s2;
572 	struct gv_freelist *fl, *fl2;
573 	off_t tmp;
574 	int i;
575 
576 	fl2 = NULL;
577 
578 	/* Shortcut for "referenced" drives. */
579 	if (d->flags & GV_DRIVE_REFERENCED) {
580 		s->drive_sc = d;
581 		return (0);
582 	}
583 
584 	/* Check if this subdisk was already given to this drive. */
585 	if (s->drive_sc != NULL) {
586 		if (s->drive_sc == d) {
587 			if (!(s->flags & GV_SD_TASTED)) {
588 				return (0);
589 			}
590 		} else {
591 			G_VINUM_DEBUG(0, "error giving subdisk '%s' to '%s' "
592 			    "(already on '%s')", s->name, d->name,
593 			    s->drive_sc->name);
594 			return (GV_ERR_ISATTACHED);
595 		}
596 	}
597 
598 	/* Preliminary checks. */
599 	if ((s->size > d->avail) || (d->freelist_entries == 0)) {
600 		G_VINUM_DEBUG(0, "not enough space on '%s' for '%s'", d->name,
601 		    s->name);
602 		return (GV_ERR_NOSPACE);
603 	}
604 
605 	/* If no size was given for this subdisk, try to auto-size it... */
606 	if (s->size == -1) {
607 		/* Find the largest available slot. */
608 		LIST_FOREACH(fl, &d->freelist, freelist) {
609 			if (fl->size < s->size)
610 				continue;
611 			s->size = fl->size;
612 			s->drive_offset = fl->offset;
613 			fl2 = fl;
614 		}
615 
616 		/* No good slot found? */
617 		if (s->size == -1) {
618 			G_VINUM_DEBUG(0, "unable to autosize '%s' on '%s'",
619 			    s->name, d->name);
620 			return (GV_ERR_BADSIZE);
621 		}
622 
623 	/*
624 	 * ... or check if we have a free slot that's large enough for the
625 	 * given size.
626 	 */
627 	} else {
628 		i = 0;
629 		LIST_FOREACH(fl, &d->freelist, freelist) {
630 			if (fl->size < s->size)
631 				continue;
632 			/* Assign drive offset, if not given. */
633 			if (s->drive_offset == -1)
634 				s->drive_offset = fl->offset;
635 			fl2 = fl;
636 			i++;
637 			break;
638 		}
639 
640 		/* Couldn't find a good free slot. */
641 		if (i == 0) {
642 			G_VINUM_DEBUG(0, "free slots to small for '%s' on '%s'",
643 			    s->name, d->name);
644 			return (GV_ERR_NOSPACE);
645 		}
646 	}
647 
648 	/* No drive offset given, try to calculate it. */
649 	if (s->drive_offset == -1) {
650 
651 		/* Add offsets and sizes from other subdisks on this drive. */
652 		LIST_FOREACH(s2, &d->subdisks, from_drive) {
653 			s->drive_offset = s2->drive_offset + s2->size;
654 		}
655 
656 		/*
657 		 * If there are no other subdisks yet, then set the default
658 		 * offset to GV_DATA_START.
659 		 */
660 		if (s->drive_offset == -1)
661 			s->drive_offset = GV_DATA_START;
662 
663 	/* Check if we have a free slot at the given drive offset. */
664 	} else {
665 		i = 0;
666 		LIST_FOREACH(fl, &d->freelist, freelist) {
667 			/* Yes, this subdisk fits. */
668 			if ((fl->offset <= s->drive_offset) &&
669 			    (fl->offset + fl->size >=
670 			    s->drive_offset + s->size)) {
671 				i++;
672 				fl2 = fl;
673 				break;
674 			}
675 		}
676 
677 		/* Couldn't find a good free slot. */
678 		if (i == 0) {
679 			G_VINUM_DEBUG(0, "given drive_offset for '%s' won't fit "
680 			    "on '%s'", s->name, d->name);
681 			return (GV_ERR_NOSPACE);
682 		}
683 	}
684 
685 	/*
686 	 * Now that all parameters are checked and set up, we can give the
687 	 * subdisk to the drive and adjust the freelist.
688 	 */
689 
690 	/* First, adjust the freelist. */
691 	LIST_FOREACH(fl, &d->freelist, freelist) {
692 		/* Look for the free slot that we have found before. */
693 		if (fl != fl2)
694 			continue;
695 
696 		/* The subdisk starts at the beginning of the free slot. */
697 		if (fl->offset == s->drive_offset) {
698 			fl->offset += s->size;
699 			fl->size -= s->size;
700 
701 			/* The subdisk uses the whole slot, so remove it. */
702 			if (fl->size == 0) {
703 				d->freelist_entries--;
704 				LIST_REMOVE(fl, freelist);
705 			}
706 		/*
707 		 * The subdisk does not start at the beginning of the free
708 		 * slot.
709 		 */
710 		} else {
711 			tmp = fl->offset + fl->size;
712 			fl->size = s->drive_offset - fl->offset;
713 
714 			/*
715 			 * The subdisk didn't use the complete rest of the free
716 			 * slot, so we need to split it.
717 			 */
718 			if (s->drive_offset + s->size != tmp) {
719 				fl2 = g_malloc(sizeof(*fl2), M_WAITOK | M_ZERO);
720 				fl2->offset = s->drive_offset + s->size;
721 				fl2->size = tmp - fl2->offset;
722 				LIST_INSERT_AFTER(fl, fl2, freelist);
723 				d->freelist_entries++;
724 			}
725 		}
726 		break;
727 	}
728 
729 	/*
730 	 * This is the first subdisk on this drive, just insert it into the
731 	 * list.
732 	 */
733 	if (LIST_EMPTY(&d->subdisks)) {
734 		LIST_INSERT_HEAD(&d->subdisks, s, from_drive);
735 
736 	/* There are other subdisks, so insert this one in correct order. */
737 	} else {
738 		LIST_FOREACH(s2, &d->subdisks, from_drive) {
739 			if (s->drive_offset < s2->drive_offset) {
740 				LIST_INSERT_BEFORE(s2, s, from_drive);
741 				break;
742 			} else if (LIST_NEXT(s2, from_drive) == NULL) {
743 				LIST_INSERT_AFTER(s2, s, from_drive);
744 				break;
745 			}
746 		}
747 	}
748 
749 	d->sdcount++;
750 	d->avail -= s->size;
751 
752 	s->flags &= ~GV_SD_TASTED;
753 
754 	/* Link back from the subdisk to this drive. */
755 	s->drive_sc = d;
756 
757 	return (0);
758 }
759 
760 void
761 gv_free_sd(struct gv_sd *s)
762 {
763 	struct gv_drive *d;
764 	struct gv_freelist *fl, *fl2;
765 
766 	KASSERT(s != NULL, ("gv_free_sd: NULL s"));
767 
768 	d = s->drive_sc;
769 	if (d == NULL)
770 		return;
771 
772 	/*
773 	 * First, find the free slot that's immediately before or after this
774 	 * subdisk.
775 	 */
776 	fl = NULL;
777 	LIST_FOREACH(fl, &d->freelist, freelist) {
778 		if (fl->offset == s->drive_offset + s->size)
779 			break;
780 		if (fl->offset + fl->size == s->drive_offset)
781 			break;
782 	}
783 
784 	/* If there is no free slot behind this subdisk, so create one. */
785 	if (fl == NULL) {
786 
787 		fl = g_malloc(sizeof(*fl), M_WAITOK | M_ZERO);
788 		fl->size = s->size;
789 		fl->offset = s->drive_offset;
790 
791 		if (d->freelist_entries == 0) {
792 			LIST_INSERT_HEAD(&d->freelist, fl, freelist);
793 		} else {
794 			LIST_FOREACH(fl2, &d->freelist, freelist) {
795 				if (fl->offset < fl2->offset) {
796 					LIST_INSERT_BEFORE(fl2, fl, freelist);
797 					break;
798 				} else if (LIST_NEXT(fl2, freelist) == NULL) {
799 					LIST_INSERT_AFTER(fl2, fl, freelist);
800 					break;
801 				}
802 			}
803 		}
804 
805 		d->freelist_entries++;
806 
807 	/* Expand the free slot we just found. */
808 	} else {
809 		fl->size += s->size;
810 		if (fl->offset > s->drive_offset)
811 			fl->offset = s->drive_offset;
812 	}
813 
814 	d->avail += s->size;
815 	d->sdcount--;
816 }
817 
818 void
819 gv_adjust_freespace(struct gv_sd *s, off_t remainder)
820 {
821 	struct gv_drive *d;
822 	struct gv_freelist *fl, *fl2;
823 
824 	KASSERT(s != NULL, ("gv_adjust_freespace: NULL s"));
825 	d = s->drive_sc;
826 	KASSERT(d != NULL, ("gv_adjust_freespace: NULL d"));
827 
828 	/* First, find the free slot that's immediately after this subdisk. */
829 	fl = NULL;
830 	LIST_FOREACH(fl, &d->freelist, freelist) {
831 		if (fl->offset == s->drive_offset + s->size)
832 			break;
833 	}
834 
835 	/* If there is no free slot behind this subdisk, so create one. */
836 	if (fl == NULL) {
837 
838 		fl = g_malloc(sizeof(*fl), M_WAITOK | M_ZERO);
839 		fl->size = remainder;
840 		fl->offset = s->drive_offset + s->size - remainder;
841 
842 		if (d->freelist_entries == 0) {
843 			LIST_INSERT_HEAD(&d->freelist, fl, freelist);
844 		} else {
845 			LIST_FOREACH(fl2, &d->freelist, freelist) {
846 				if (fl->offset < fl2->offset) {
847 					LIST_INSERT_BEFORE(fl2, fl, freelist);
848 					break;
849 				} else if (LIST_NEXT(fl2, freelist) == NULL) {
850 					LIST_INSERT_AFTER(fl2, fl, freelist);
851 					break;
852 				}
853 			}
854 		}
855 
856 		d->freelist_entries++;
857 
858 	/* Expand the free slot we just found. */
859 	} else {
860 		fl->offset -= remainder;
861 		fl->size += remainder;
862 	}
863 
864 	s->size -= remainder;
865 	d->avail += remainder;
866 }
867 
868 /* Check if the given plex is a striped one. */
869 int
870 gv_is_striped(struct gv_plex *p)
871 {
872 	KASSERT(p != NULL, ("gv_is_striped: NULL p"));
873 	switch(p->org) {
874 	case GV_PLEX_STRIPED:
875 	case GV_PLEX_RAID5:
876 		return (1);
877 	default:
878 		return (0);
879 	}
880 }
881 
882 /* Find a volume by name. */
883 struct gv_volume *
884 gv_find_vol(struct gv_softc *sc, char *name)
885 {
886 	struct gv_volume *v;
887 
888 	LIST_FOREACH(v, &sc->volumes, volume) {
889 		if (!strncmp(v->name, name, GV_MAXVOLNAME))
890 			return (v);
891 	}
892 
893 	return (NULL);
894 }
895 
896 /* Find a plex by name. */
897 struct gv_plex *
898 gv_find_plex(struct gv_softc *sc, char *name)
899 {
900 	struct gv_plex *p;
901 
902 	LIST_FOREACH(p, &sc->plexes, plex) {
903 		if (!strncmp(p->name, name, GV_MAXPLEXNAME))
904 			return (p);
905 	}
906 
907 	return (NULL);
908 }
909 
910 /* Find a subdisk by name. */
911 struct gv_sd *
912 gv_find_sd(struct gv_softc *sc, char *name)
913 {
914 	struct gv_sd *s;
915 
916 	LIST_FOREACH(s, &sc->subdisks, sd) {
917 		if (!strncmp(s->name, name, GV_MAXSDNAME))
918 			return (s);
919 	}
920 
921 	return (NULL);
922 }
923 
924 /* Find a drive by name. */
925 struct gv_drive *
926 gv_find_drive(struct gv_softc *sc, char *name)
927 {
928 	struct gv_drive *d;
929 
930 	LIST_FOREACH(d, &sc->drives, drive) {
931 		if (!strncmp(d->name, name, GV_MAXDRIVENAME))
932 			return (d);
933 	}
934 
935 	return (NULL);
936 }
937 
938 /* Find a drive given a device. */
939 struct gv_drive *
940 gv_find_drive_device(struct gv_softc *sc, char *device)
941 {
942 	struct gv_drive *d;
943 
944 	LIST_FOREACH(d, &sc->drives, drive) {
945 		if(!strcmp(d->device, device))
946 			return (d);
947 	}
948 
949 	return (NULL);
950 }
951 
952 /* Check if any consumer of the given geom is open. */
953 int
954 gv_consumer_is_open(struct g_consumer *cp)
955 {
956 	if (cp == NULL)
957 		return (0);
958 
959 	if (cp->acr || cp->acw || cp->ace)
960 		return (1);
961 
962 	return (0);
963 }
964 
965 int
966 gv_provider_is_open(struct g_provider *pp)
967 {
968 	if (pp == NULL)
969 		return (0);
970 
971 	if (pp->acr || pp->acw || pp->ace)
972 		return (1);
973 
974 	return (0);
975 }
976 
977 /*
978  * Compare the modification dates of the drives.
979  * Return 1 if a > b, 0 otherwise.
980  */
981 int
982 gv_drive_is_newer(struct gv_softc *sc, struct gv_drive *d)
983 {
984 	struct gv_drive *d2;
985 	struct timeval *a, *b;
986 
987 	KASSERT(!LIST_EMPTY(&sc->drives),
988 	    ("gv_is_drive_newer: empty drive list"));
989 
990 	a = &d->hdr->label.last_update;
991 	LIST_FOREACH(d2, &sc->drives, drive) {
992 		if ((d == d2) || (d2->state != GV_DRIVE_UP) ||
993 		    (d2->hdr == NULL))
994 			continue;
995 		b = &d2->hdr->label.last_update;
996 		if (timevalcmp(a, b, >))
997 			return (1);
998 	}
999 
1000 	return (0);
1001 }
1002 
1003 /* Return the type of object identified by string 'name'. */
1004 int
1005 gv_object_type(struct gv_softc *sc, char *name)
1006 {
1007 	struct gv_drive *d;
1008 	struct gv_plex *p;
1009 	struct gv_sd *s;
1010 	struct gv_volume *v;
1011 
1012 	LIST_FOREACH(v, &sc->volumes, volume) {
1013 		if (!strncmp(v->name, name, GV_MAXVOLNAME))
1014 			return (GV_TYPE_VOL);
1015 	}
1016 
1017 	LIST_FOREACH(p, &sc->plexes, plex) {
1018 		if (!strncmp(p->name, name, GV_MAXPLEXNAME))
1019 			return (GV_TYPE_PLEX);
1020 	}
1021 
1022 	LIST_FOREACH(s, &sc->subdisks, sd) {
1023 		if (!strncmp(s->name, name, GV_MAXSDNAME))
1024 			return (GV_TYPE_SD);
1025 	}
1026 
1027 	LIST_FOREACH(d, &sc->drives, drive) {
1028 		if (!strncmp(d->name, name, GV_MAXDRIVENAME))
1029 			return (GV_TYPE_DRIVE);
1030 	}
1031 
1032 	return (GV_ERR_NOTFOUND);
1033 }
1034 
1035 void
1036 gv_setup_objects(struct gv_softc *sc)
1037 {
1038 	struct g_provider *pp;
1039 	struct gv_volume *v;
1040 	struct gv_plex *p;
1041 	struct gv_sd *s;
1042 	struct gv_drive *d;
1043 
1044 	LIST_FOREACH(s, &sc->subdisks, sd) {
1045 		d = gv_find_drive(sc, s->drive);
1046 		if (d != NULL)
1047 			gv_sd_to_drive(s, d);
1048 		p = gv_find_plex(sc, s->plex);
1049 		if (p != NULL)
1050 			gv_sd_to_plex(s, p);
1051 		gv_update_sd_state(s);
1052 	}
1053 
1054 	LIST_FOREACH(p, &sc->plexes, plex) {
1055 		gv_update_plex_config(p);
1056 		v = gv_find_vol(sc, p->volume);
1057 		if (v != NULL && p->vol_sc != v) {
1058 			p->vol_sc = v;
1059 			v->plexcount++;
1060 			LIST_INSERT_HEAD(&v->plexes, p, in_volume);
1061 		}
1062 		gv_update_plex_config(p);
1063 	}
1064 
1065 	LIST_FOREACH(v, &sc->volumes, volume) {
1066 		v->size = gv_vol_size(v);
1067 		if (v->provider == NULL) {
1068 			g_topology_lock();
1069 			pp = g_new_providerf(sc->geom, "gvinum/%s", v->name);
1070 			pp->mediasize = v->size;
1071 			pp->sectorsize = 512;    /* XXX */
1072 			g_error_provider(pp, 0);
1073 			v->provider = pp;
1074 			pp->private = v;
1075 			g_topology_unlock();
1076 		} else if (v->provider->mediasize != v->size) {
1077 			g_topology_lock();
1078 			v->provider->mediasize = v->size;
1079 			g_topology_unlock();
1080 		}
1081 		v->flags &= ~GV_VOL_NEWBORN;
1082 		gv_update_vol_state(v);
1083 	}
1084 }
1085 
1086 void
1087 gv_cleanup(struct gv_softc *sc)
1088 {
1089 	struct gv_volume *v, *v2;
1090 	struct gv_plex *p, *p2;
1091 	struct gv_sd *s, *s2;
1092 	struct gv_drive *d, *d2;
1093 	struct gv_freelist *fl, *fl2;
1094 
1095 	mtx_lock(&sc->config_mtx);
1096 	LIST_FOREACH_SAFE(v, &sc->volumes, volume, v2) {
1097 		LIST_REMOVE(v, volume);
1098 		g_free(v->wqueue);
1099 		g_free(v);
1100 	}
1101 	LIST_FOREACH_SAFE(p, &sc->plexes, plex, p2) {
1102 		LIST_REMOVE(p, plex);
1103 		g_free(p->bqueue);
1104 		g_free(p->rqueue);
1105 		g_free(p->wqueue);
1106 		g_free(p);
1107 	}
1108 	LIST_FOREACH_SAFE(s, &sc->subdisks, sd, s2) {
1109 		LIST_REMOVE(s, sd);
1110 		g_free(s);
1111 	}
1112 	LIST_FOREACH_SAFE(d, &sc->drives, drive, d2) {
1113 		LIST_FOREACH_SAFE(fl, &d->freelist, freelist, fl2) {
1114 			LIST_REMOVE(fl, freelist);
1115 			g_free(fl);
1116 		}
1117 		LIST_REMOVE(d, drive);
1118 		g_free(d->hdr);
1119 		g_free(d);
1120 	}
1121 	mtx_destroy(&sc->config_mtx);
1122 }
1123 
1124 /* General 'attach' routine. */
1125 int
1126 gv_attach_plex(struct gv_plex *p, struct gv_volume *v, int rename)
1127 {
1128 	struct gv_sd *s;
1129 	struct gv_softc *sc;
1130 
1131 	g_topology_assert();
1132 
1133 	sc = p->vinumconf;
1134 	KASSERT(sc != NULL, ("NULL sc"));
1135 
1136 	if (p->vol_sc != NULL) {
1137 		G_VINUM_DEBUG(1, "unable to attach %s: already attached to %s",
1138 		    p->name, p->volume);
1139 		return (GV_ERR_ISATTACHED);
1140 	}
1141 
1142 	/* Stale all subdisks of this plex. */
1143 	LIST_FOREACH(s, &p->subdisks, in_plex) {
1144 		if (s->state != GV_SD_STALE)
1145 			gv_set_sd_state(s, GV_SD_STALE, GV_SETSTATE_FORCE);
1146 	}
1147 	/* Attach to volume. Make sure volume is not up and running. */
1148 	if (gv_provider_is_open(v->provider)) {
1149 		G_VINUM_DEBUG(1, "unable to attach %s: volume %s is busy",
1150 		    p->name, v->name);
1151 		return (GV_ERR_ISBUSY);
1152 	}
1153 	p->vol_sc = v;
1154 	strlcpy(p->volume, v->name, sizeof(p->volume));
1155 	v->plexcount++;
1156 	if (rename) {
1157 		snprintf(p->name, sizeof(p->name), "%s.p%d", v->name,
1158 		    v->plexcount);
1159 	}
1160 	LIST_INSERT_HEAD(&v->plexes, p, in_volume);
1161 
1162 	/* Get plex up again. */
1163 	gv_update_vol_size(v, gv_vol_size(v));
1164 	gv_set_plex_state(p, GV_PLEX_UP, 0);
1165 	gv_save_config(p->vinumconf);
1166 	return (0);
1167 }
1168 
1169 int
1170 gv_attach_sd(struct gv_sd *s, struct gv_plex *p, off_t offset, int rename)
1171 {
1172 	struct gv_sd *s2;
1173 	int error, sdcount;
1174 
1175 	g_topology_assert();
1176 
1177 	/* If subdisk is attached, don't do it. */
1178 	if (s->plex_sc != NULL) {
1179 		G_VINUM_DEBUG(1, "unable to attach %s: already attached to %s",
1180 		    s->name, s->plex);
1181 		return (GV_ERR_ISATTACHED);
1182 	}
1183 
1184 	gv_set_sd_state(s, GV_SD_STALE, GV_SETSTATE_FORCE);
1185 	/* First check that this subdisk has a correct offset. If none other
1186 	 * starts at the same, and it's correct module stripesize, it is */
1187 	if (offset != -1 && offset % p->stripesize != 0)
1188 		return (GV_ERR_BADOFFSET);
1189 	LIST_FOREACH(s2, &p->subdisks, in_plex) {
1190 		if (s2->plex_offset == offset)
1191 			return (GV_ERR_BADOFFSET);
1192 	}
1193 
1194 	/* Attach the subdisk to the plex at given offset. */
1195 	s->plex_offset = offset;
1196 	strlcpy(s->plex, p->name, sizeof(s->plex));
1197 
1198 	sdcount = p->sdcount;
1199 	error = gv_sd_to_plex(s, p);
1200 	if (error)
1201 		return (error);
1202 	gv_update_plex_config(p);
1203 
1204 	if (rename) {
1205 		snprintf(s->name, sizeof(s->name), "%s.s%d", s->plex,
1206 		    p->sdcount);
1207 	}
1208 	if (p->vol_sc != NULL)
1209 		gv_update_vol_size(p->vol_sc, gv_vol_size(p->vol_sc));
1210 	gv_save_config(p->vinumconf);
1211 	/* We don't update the subdisk state since the user might have to
1212 	 * initiate a rebuild/sync first. */
1213 	return (0);
1214 }
1215 
1216 /* Detach a plex from a volume. */
1217 int
1218 gv_detach_plex(struct gv_plex *p, int flags)
1219 {
1220 	struct gv_volume *v;
1221 
1222 	g_topology_assert();
1223 	v = p->vol_sc;
1224 
1225 	if (v == NULL) {
1226 		G_VINUM_DEBUG(1, "unable to detach %s: already detached",
1227 		    p->name);
1228 		return (0); /* Not an error. */
1229 	}
1230 
1231 	/*
1232 	 * Only proceed if forced or volume inactive.
1233 	 */
1234 	if (!(flags & GV_FLAG_F) && (gv_provider_is_open(v->provider) ||
1235 	    p->state == GV_PLEX_UP)) {
1236 		G_VINUM_DEBUG(1, "unable to detach %s: volume %s is busy",
1237 		    p->name, p->volume);
1238 		return (GV_ERR_ISBUSY);
1239 	}
1240 	v->plexcount--;
1241 	/* Make sure someone don't read us when gone. */
1242 	v->last_read_plex = NULL;
1243 	LIST_REMOVE(p, in_volume);
1244 	p->vol_sc = NULL;
1245 	memset(p->volume, 0, GV_MAXVOLNAME);
1246 	gv_update_vol_size(v, gv_vol_size(v));
1247 	gv_save_config(p->vinumconf);
1248 	return (0);
1249 }
1250 
1251 /* Detach a subdisk from a plex. */
1252 int
1253 gv_detach_sd(struct gv_sd *s, int flags)
1254 {
1255 	struct gv_plex *p;
1256 
1257 	g_topology_assert();
1258 	p = s->plex_sc;
1259 
1260 	if (p == NULL) {
1261 		G_VINUM_DEBUG(1, "unable to detach %s: already detached",
1262 		    s->name);
1263 		return (0); /* Not an error. */
1264 	}
1265 
1266 	/*
1267 	 * Don't proceed if we're not forcing, and the plex is up, or degraded
1268 	 * with this subdisk up.
1269 	 */
1270 	if (!(flags & GV_FLAG_F) && ((p->state > GV_PLEX_DEGRADED) ||
1271 	    ((p->state == GV_PLEX_DEGRADED) && (s->state == GV_SD_UP)))) {
1272 	    	G_VINUM_DEBUG(1, "unable to detach %s: plex %s is busy",
1273 		    s->name, s->plex);
1274 		return (GV_ERR_ISBUSY);
1275 	}
1276 
1277 	LIST_REMOVE(s, in_plex);
1278 	s->plex_sc = NULL;
1279 	memset(s->plex, 0, GV_MAXPLEXNAME);
1280 	p->sddetached++;
1281 	gv_save_config(s->vinumconf);
1282 	return (0);
1283 }
1284