xref: /freebsd/sys/geom/vinum/geom_vinum_subr.c (revision 22cf89c938886d14f5796fc49f9f020c23ea8eaf)
1 /*-
2  * SPDX-License-Identifier: BSD-4-Clause
3  *
4  * Copyright (c) 2004, 2007 Lukas Ertl
5  * Copyright (c) 2007, 2009 Ulf Lilleengen
6  * Copyright (c) 1997, 1998, 1999
7  *      Nan Yang Computer Services Limited.  All rights reserved.
8  *
9  *  Parts written by Greg Lehey
10  *
11  *  This software is distributed under the so-called ``Berkeley
12  *  License'':
13  *
14  * Redistribution and use in source and binary forms, with or without
15  * modification, are permitted provided that the following conditions
16  * are met:
17  * 1. Redistributions of source code must retain the above copyright
18  *    notice, this list of conditions and the following disclaimer.
19  * 2. Redistributions in binary form must reproduce the above copyright
20  *    notice, this list of conditions and the following disclaimer in the
21  *    documentation and/or other materials provided with the distribution.
22  * 3. All advertising materials mentioning features or use of this software
23  *    must display the following acknowledgement:
24  *      This product includes software developed by Nan Yang Computer
25  *      Services Limited.
26  * 4. Neither the name of the Company nor the names of its contributors
27  *    may be used to endorse or promote products derived from this software
28  *    without specific prior written permission.
29  *
30  * This software is provided ``as is'', and any express or implied
31  * warranties, including, but not limited to, the implied warranties of
32  * merchantability and fitness for a particular purpose are disclaimed.
33  * In no event shall the company or contributors be liable for any
34  * direct, indirect, incidental, special, exemplary, or consequential
35  * damages (including, but not limited to, procurement of substitute
36  * goods or services; loss of use, data, or profits; or business
37  * interruption) however caused and on any theory of liability, whether
38  * in contract, strict liability, or tort (including negligence or
39  * otherwise) arising in any way out of the use of this software, even if
40  * advised of the possibility of such damage.
41  *
42  */
43 
44 #include <sys/cdefs.h>
45 #include <sys/param.h>
46 #include <sys/malloc.h>
47 #include <sys/sbuf.h>
48 #include <sys/systm.h>
49 
50 #include <geom/geom.h>
51 #include <geom/geom_dbg.h>
52 #include <geom/vinum/geom_vinum_var.h>
53 #include <geom/vinum/geom_vinum.h>
54 #include <geom/vinum/geom_vinum_share.h>
55 
56 int	gv_drive_is_newer(struct gv_softc *, struct gv_drive *);
57 static off_t gv_plex_smallest_sd(struct gv_plex *);
58 
59 void
60 gv_parse_config(struct gv_softc *sc, char *buf, struct gv_drive *d)
61 {
62 	char *aptr, *bptr, *cptr;
63 	struct gv_volume *v, *v2;
64 	struct gv_plex *p, *p2;
65 	struct gv_sd *s, *s2;
66 	int error, is_newer, tokens;
67 	char *token[GV_MAXARGS];
68 
69 	is_newer = gv_drive_is_newer(sc, d);
70 
71 	/* Until the end of the string *buf. */
72 	for (aptr = buf; *aptr != '\0'; aptr = bptr) {
73 		bptr = aptr;
74 		cptr = aptr;
75 
76 		/* Separate input lines. */
77 		while (*bptr != '\n')
78 			bptr++;
79 		*bptr = '\0';
80 		bptr++;
81 
82 		tokens = gv_tokenize(cptr, token, GV_MAXARGS);
83 
84 		if (tokens <= 0)
85 			continue;
86 
87 		if (!strcmp(token[0], "volume")) {
88 			v = gv_new_volume(tokens, token);
89 			if (v == NULL) {
90 				G_VINUM_DEBUG(0, "config parse failed volume");
91 				break;
92 			}
93 
94 			v2 = gv_find_vol(sc, v->name);
95 			if (v2 != NULL) {
96 				if (is_newer) {
97 					v2->state = v->state;
98 					G_VINUM_DEBUG(2, "newer volume found!");
99 				}
100 				g_free(v);
101 				continue;
102 			}
103 
104 			gv_create_volume(sc, v);
105 
106 		} else if (!strcmp(token[0], "plex")) {
107 			p = gv_new_plex(tokens, token);
108 			if (p == NULL) {
109 				G_VINUM_DEBUG(0, "config parse failed plex");
110 				break;
111 			}
112 
113 			p2 = gv_find_plex(sc, p->name);
114 			if (p2 != NULL) {
115 				/* XXX */
116 				if (is_newer) {
117 					p2->state = p->state;
118 					G_VINUM_DEBUG(2, "newer plex found!");
119 				}
120 				g_free(p);
121 				continue;
122 			}
123 
124 			error = gv_create_plex(sc, p);
125 			if (error)
126 				continue;
127 			/*
128 			 * These flags were set in gv_create_plex() and are not
129 			 * needed here (on-disk config parsing).
130 			 */
131 			p->flags &= ~GV_PLEX_ADDED;
132 
133 		} else if (!strcmp(token[0], "sd")) {
134 			s = gv_new_sd(tokens, token);
135 
136 			if (s == NULL) {
137 				G_VINUM_DEBUG(0, "config parse failed subdisk");
138 				break;
139 			}
140 
141 			s2 = gv_find_sd(sc, s->name);
142 			if (s2 != NULL) {
143 				/* XXX */
144 				if (is_newer) {
145 					s2->state = s->state;
146 					G_VINUM_DEBUG(2, "newer subdisk found!");
147 				}
148 				g_free(s);
149 				continue;
150 			}
151 
152 			/*
153 			 * Signal that this subdisk was tasted, and could
154 			 * possibly reference a drive that isn't in our config
155 			 * yet.
156 			 */
157 			s->flags |= GV_SD_TASTED;
158 
159 			if (s->state == GV_SD_UP)
160 				s->flags |= GV_SD_CANGOUP;
161 
162 			error = gv_create_sd(sc, s);
163 			if (error)
164 				continue;
165 
166 			/*
167 			 * This flag was set in gv_create_sd() and is not
168 			 * needed here (on-disk config parsing).
169 			 */
170 			s->flags &= ~GV_SD_NEWBORN;
171 			s->flags &= ~GV_SD_GROW;
172 		}
173 	}
174 }
175 
176 /*
177  * Format the vinum configuration properly.  If ondisk is non-zero then the
178  * configuration is intended to be written to disk later.
179  */
180 void
181 gv_format_config(struct gv_softc *sc, struct sbuf *sb, int ondisk, char *prefix)
182 {
183 	struct gv_drive *d;
184 	struct gv_sd *s;
185 	struct gv_plex *p;
186 	struct gv_volume *v;
187 
188 	/*
189 	 * We don't need the drive configuration if we're not writing the
190 	 * config to disk.
191 	 */
192 	if (!ondisk) {
193 		LIST_FOREACH(d, &sc->drives, drive) {
194 			sbuf_printf(sb, "%sdrive %s device /dev/%s\n", prefix,
195 			    d->name, d->device);
196 		}
197 	}
198 
199 	LIST_FOREACH(v, &sc->volumes, volume) {
200 		if (!ondisk)
201 			sbuf_printf(sb, "%s", prefix);
202 		sbuf_printf(sb, "volume %s", v->name);
203 		if (ondisk)
204 			sbuf_printf(sb, " state %s", gv_volstate(v->state));
205 		sbuf_printf(sb, "\n");
206 	}
207 
208 	LIST_FOREACH(p, &sc->plexes, plex) {
209 		if (!ondisk)
210 			sbuf_printf(sb, "%s", prefix);
211 		sbuf_printf(sb, "plex name %s org %s ", p->name,
212 		    gv_plexorg(p->org));
213 		if (gv_is_striped(p))
214 			sbuf_printf(sb, "%ds ", p->stripesize / 512);
215 		if (p->vol_sc != NULL)
216 			sbuf_printf(sb, "vol %s", p->volume);
217 		if (ondisk)
218 			sbuf_printf(sb, " state %s", gv_plexstate(p->state));
219 		sbuf_printf(sb, "\n");
220 	}
221 
222 	LIST_FOREACH(s, &sc->subdisks, sd) {
223 		if (!ondisk)
224 			sbuf_printf(sb, "%s", prefix);
225 		sbuf_printf(sb, "sd name %s drive %s len %jds driveoffset "
226 		    "%jds", s->name, s->drive, s->size / 512,
227 		    s->drive_offset / 512);
228 		if (s->plex_sc != NULL) {
229 			sbuf_printf(sb, " plex %s plexoffset %jds", s->plex,
230 			    s->plex_offset / 512);
231 		}
232 		if (ondisk)
233 			sbuf_printf(sb, " state %s", gv_sdstate(s->state));
234 		sbuf_printf(sb, "\n");
235 	}
236 }
237 
238 static off_t
239 gv_plex_smallest_sd(struct gv_plex *p)
240 {
241 	struct gv_sd *s;
242 	off_t smallest;
243 
244 	KASSERT(p != NULL, ("gv_plex_smallest_sd: NULL p"));
245 
246 	s = LIST_FIRST(&p->subdisks);
247 	if (s == NULL)
248 		return (-1);
249 	smallest = s->size;
250 	LIST_FOREACH(s, &p->subdisks, in_plex) {
251 		if (s->size < smallest)
252 			smallest = s->size;
253 	}
254 	return (smallest);
255 }
256 
257 /* Walk over plexes in a volume and count how many are down. */
258 int
259 gv_plexdown(struct gv_volume *v)
260 {
261 	int plexdown;
262 	struct gv_plex *p;
263 
264 	KASSERT(v != NULL, ("gv_plexdown: NULL v"));
265 
266 	plexdown = 0;
267 
268 	LIST_FOREACH(p, &v->plexes, plex) {
269 		if (p->state == GV_PLEX_DOWN)
270 			plexdown++;
271 	}
272 	return (plexdown);
273 }
274 
275 int
276 gv_sd_to_plex(struct gv_sd *s, struct gv_plex *p)
277 {
278 	struct gv_sd *s2;
279 	off_t psizeorig, remainder, smallest;
280 
281 	/* If this subdisk was already given to this plex, do nothing. */
282 	if (s->plex_sc == p)
283 		return (0);
284 
285 	/* Check correct size of this subdisk. */
286 	s2 = LIST_FIRST(&p->subdisks);
287 	/* Adjust the subdisk-size if necessary. */
288 	if (s2 != NULL && gv_is_striped(p)) {
289 		/* First adjust to the stripesize. */
290 		remainder = s->size % p->stripesize;
291 
292 		if (remainder) {
293 			G_VINUM_DEBUG(1, "size of sd %s is not a "
294 			    "multiple of plex stripesize, taking off "
295 			    "%jd bytes", s->name,
296 			    (intmax_t)remainder);
297 			gv_adjust_freespace(s, remainder);
298 		}
299 
300 		smallest = gv_plex_smallest_sd(p);
301 		/* Then take off extra if other subdisks are smaller. */
302 		remainder = s->size - smallest;
303 
304 		/*
305 		 * Don't allow a remainder below zero for running plexes, it's too
306 		 * painful, and if someone were to accidentally do this, the
307 		 * resulting array might be smaller than the original... not god
308 		 */
309 		if (remainder < 0) {
310 			if (!(p->flags & GV_PLEX_NEWBORN)) {
311 				G_VINUM_DEBUG(0, "sd %s too small for plex %s!",
312 				    s->name, p->name);
313 				return (GV_ERR_BADSIZE);
314 			}
315 			/* Adjust other subdisks. */
316 			LIST_FOREACH(s2, &p->subdisks, in_plex) {
317 				G_VINUM_DEBUG(1, "size of sd %s is to big, "
318 				    "taking off %jd bytes", s->name,
319 				    (intmax_t)remainder);
320 				gv_adjust_freespace(s2, (remainder * -1));
321 			}
322 		} else if (remainder > 0) {
323 			G_VINUM_DEBUG(1, "size of sd %s is to big, "
324 			    "taking off %jd bytes", s->name,
325 			    (intmax_t)remainder);
326 			gv_adjust_freespace(s, remainder);
327 		}
328 	}
329 
330 	/* Find the correct plex offset for this subdisk, if needed. */
331 	if (s->plex_offset == -1) {
332 		/*
333 		 * First set it to 0 to catch the case where we had a detached
334 		 * subdisk that didn't get any good offset.
335 		 */
336 		s->plex_offset = 0;
337 		if (p->sdcount) {
338 			LIST_FOREACH(s2, &p->subdisks, in_plex) {
339 				if (gv_is_striped(p))
340 					s->plex_offset = p->sdcount *
341 					    p->stripesize;
342 				else
343 					s->plex_offset = s2->plex_offset +
344 					    s2->size;
345 			}
346 		}
347 	}
348 
349 	/* There are no subdisks for this plex yet, just insert it. */
350 	if (LIST_EMPTY(&p->subdisks)) {
351 		LIST_INSERT_HEAD(&p->subdisks, s, in_plex);
352 
353 	/* Insert in correct order, depending on plex_offset. */
354 	} else {
355 		LIST_FOREACH(s2, &p->subdisks, in_plex) {
356 			if (s->plex_offset < s2->plex_offset) {
357 				LIST_INSERT_BEFORE(s2, s, in_plex);
358 				break;
359 			} else if (LIST_NEXT(s2, in_plex) == NULL) {
360 				LIST_INSERT_AFTER(s2, s, in_plex);
361 				break;
362 			}
363 		}
364 	}
365 
366 	s->plex_sc = p;
367         /* Adjust the size of our plex. We check if the plex misses a subdisk,
368 	 * so we don't make the plex smaller than it actually should be.
369 	 */
370 	psizeorig = p->size;
371 	p->size = gv_plex_size(p);
372 	/* Make sure the size is not changed. */
373 	if (p->sddetached > 0) {
374 		if (p->size < psizeorig) {
375 			p->size = psizeorig;
376 			/* We make sure wee need another subdisk. */
377 			if (p->sddetached == 1)
378 				p->sddetached++;
379 		}
380 		p->sddetached--;
381 	} else {
382 		if ((p->org == GV_PLEX_RAID5 ||
383 		    p->org == GV_PLEX_STRIPED) &&
384 		    !(p->flags & GV_PLEX_NEWBORN) &&
385 		    p->state == GV_PLEX_UP) {
386 			s->flags |= GV_SD_GROW;
387 		}
388 		p->sdcount++;
389 	}
390 
391 	return (0);
392 }
393 
394 void
395 gv_update_vol_size(struct gv_volume *v, off_t size)
396 {
397 	if (v == NULL)
398 		return;
399 	if (v->provider != NULL) {
400 		g_topology_lock();
401 		v->provider->mediasize = size;
402 		g_topology_unlock();
403 	}
404 	v->size = size;
405 }
406 
407 /* Return how many subdisks that constitute the original plex. */
408 int
409 gv_sdcount(struct gv_plex *p, int growing)
410 {
411 	struct gv_sd *s;
412 	int sdcount;
413 
414 	sdcount = p->sdcount;
415 	if (growing) {
416 		LIST_FOREACH(s, &p->subdisks, in_plex) {
417 			if (s->flags & GV_SD_GROW)
418 				sdcount--;
419 		}
420 	}
421 
422 	return (sdcount);
423 }
424 
425 /* Calculates the plex size. */
426 off_t
427 gv_plex_size(struct gv_plex *p)
428 {
429 	struct gv_sd *s;
430 	off_t size;
431 	int sdcount;
432 
433 	KASSERT(p != NULL, ("gv_plex_size: NULL p"));
434 
435 	/* Adjust the size of our plex. */
436 	size = 0;
437 	sdcount = gv_sdcount(p, 1);
438 	switch (p->org) {
439 	case GV_PLEX_CONCAT:
440 		LIST_FOREACH(s, &p->subdisks, in_plex)
441 			size += s->size;
442 		break;
443 	case GV_PLEX_STRIPED:
444 		s = LIST_FIRST(&p->subdisks);
445 		size = ((s != NULL) ? (sdcount * s->size) : 0);
446 		break;
447 	case GV_PLEX_RAID5:
448 		s = LIST_FIRST(&p->subdisks);
449 		size = ((s != NULL) ? ((sdcount - 1) * s->size) : 0);
450 		break;
451 	}
452 
453 	return (size);
454 }
455 
456 /* Returns the size of a volume. */
457 off_t
458 gv_vol_size(struct gv_volume *v)
459 {
460 	struct gv_plex *p;
461 	off_t minplexsize;
462 
463 	KASSERT(v != NULL, ("gv_vol_size: NULL v"));
464 
465 	p = LIST_FIRST(&v->plexes);
466 	if (p == NULL)
467 		return (0);
468 
469 	minplexsize = p->size;
470 	LIST_FOREACH(p, &v->plexes, in_volume) {
471 		if (p->size < minplexsize) {
472 			minplexsize = p->size;
473 		}
474 	}
475 	return (minplexsize);
476 }
477 
478 void
479 gv_update_plex_config(struct gv_plex *p)
480 {
481 	struct gv_sd *s, *s2;
482 	off_t remainder;
483 	int required_sds, state;
484 
485 	KASSERT(p != NULL, ("gv_update_plex_config: NULL p"));
486 
487 	/* The plex was added to an already running volume. */
488 	if (p->flags & GV_PLEX_ADDED)
489 		gv_set_plex_state(p, GV_PLEX_DOWN, GV_SETSTATE_FORCE);
490 
491 	switch (p->org) {
492 	case GV_PLEX_STRIPED:
493 		required_sds = 2;
494 		break;
495 	case GV_PLEX_RAID5:
496 		required_sds = 3;
497 		break;
498 	case GV_PLEX_CONCAT:
499 	default:
500 		required_sds = 0;
501 		break;
502 	}
503 
504 	if (required_sds) {
505 		if (p->sdcount < required_sds) {
506 			gv_set_plex_state(p, GV_PLEX_DOWN, GV_SETSTATE_FORCE);
507 		}
508 
509 		/*
510 		 * The subdisks in striped plexes must all have the same size.
511 		 */
512 		s = LIST_FIRST(&p->subdisks);
513 		LIST_FOREACH(s2, &p->subdisks, in_plex) {
514 			if (s->size != s2->size) {
515 				G_VINUM_DEBUG(0, "subdisk size mismatch %s"
516 				    "(%jd) <> %s (%jd)", s->name, s->size,
517 				    s2->name, s2->size);
518 				gv_set_plex_state(p, GV_PLEX_DOWN,
519 				    GV_SETSTATE_FORCE);
520 			}
521 		}
522 
523 		LIST_FOREACH(s, &p->subdisks, in_plex) {
524 			/* Trim subdisk sizes to match the stripe size. */
525 			remainder = s->size % p->stripesize;
526 			if (remainder) {
527 				G_VINUM_DEBUG(1, "size of sd %s is not a "
528 				    "multiple of plex stripesize, taking off "
529 				    "%jd bytes", s->name, (intmax_t)remainder);
530 				gv_adjust_freespace(s, remainder);
531 			}
532 		}
533 	}
534 
535 	p->size = gv_plex_size(p);
536 	if (p->sdcount == 0)
537 		gv_set_plex_state(p, GV_PLEX_DOWN, GV_SETSTATE_FORCE);
538 	else if (p->org == GV_PLEX_RAID5 && p->flags & GV_PLEX_NEWBORN) {
539 		LIST_FOREACH(s, &p->subdisks, in_plex)
540 			gv_set_sd_state(s, GV_SD_UP, GV_SETSTATE_FORCE);
541 		/* If added to a volume, we want the plex to be down. */
542 		state = (p->flags & GV_PLEX_ADDED) ? GV_PLEX_DOWN : GV_PLEX_UP;
543 		gv_set_plex_state(p, state, GV_SETSTATE_FORCE);
544 		p->flags &= ~GV_PLEX_ADDED;
545 	} else if (p->flags & GV_PLEX_ADDED) {
546 		LIST_FOREACH(s, &p->subdisks, in_plex)
547 			gv_set_sd_state(s, GV_SD_STALE, GV_SETSTATE_FORCE);
548 		gv_set_plex_state(p, GV_PLEX_DOWN, GV_SETSTATE_FORCE);
549 		p->flags &= ~GV_PLEX_ADDED;
550 	} else if (p->state == GV_PLEX_UP) {
551 		LIST_FOREACH(s, &p->subdisks, in_plex) {
552 			if (s->flags & GV_SD_GROW) {
553 				gv_set_plex_state(p, GV_PLEX_GROWABLE,
554 				    GV_SETSTATE_FORCE);
555 				break;
556 			}
557 		}
558 	}
559 	/* Our plex is grown up now. */
560 	p->flags &= ~GV_PLEX_NEWBORN;
561 }
562 
563 /*
564  * Give a subdisk to a drive, check and adjust several parameters, adjust
565  * freelist.
566  */
567 int
568 gv_sd_to_drive(struct gv_sd *s, struct gv_drive *d)
569 {
570 	struct gv_sd *s2;
571 	struct gv_freelist *fl, *fl2;
572 	off_t tmp;
573 	int i;
574 
575 	fl2 = NULL;
576 
577 	/* Shortcut for "referenced" drives. */
578 	if (d->flags & GV_DRIVE_REFERENCED) {
579 		s->drive_sc = d;
580 		return (0);
581 	}
582 
583 	/* Check if this subdisk was already given to this drive. */
584 	if (s->drive_sc != NULL) {
585 		if (s->drive_sc == d) {
586 			if (!(s->flags & GV_SD_TASTED)) {
587 				return (0);
588 			}
589 		} else {
590 			G_VINUM_DEBUG(0, "error giving subdisk '%s' to '%s' "
591 			    "(already on '%s')", s->name, d->name,
592 			    s->drive_sc->name);
593 			return (GV_ERR_ISATTACHED);
594 		}
595 	}
596 
597 	/* Preliminary checks. */
598 	if ((s->size > d->avail) || (d->freelist_entries == 0)) {
599 		G_VINUM_DEBUG(0, "not enough space on '%s' for '%s'", d->name,
600 		    s->name);
601 		return (GV_ERR_NOSPACE);
602 	}
603 
604 	/* If no size was given for this subdisk, try to auto-size it... */
605 	if (s->size == -1) {
606 		/* Find the largest available slot. */
607 		LIST_FOREACH(fl, &d->freelist, freelist) {
608 			if (fl->size < s->size)
609 				continue;
610 			s->size = fl->size;
611 			s->drive_offset = fl->offset;
612 			fl2 = fl;
613 		}
614 
615 		/* No good slot found? */
616 		if (s->size == -1) {
617 			G_VINUM_DEBUG(0, "unable to autosize '%s' on '%s'",
618 			    s->name, d->name);
619 			return (GV_ERR_BADSIZE);
620 		}
621 
622 	/*
623 	 * ... or check if we have a free slot that's large enough for the
624 	 * given size.
625 	 */
626 	} else {
627 		i = 0;
628 		LIST_FOREACH(fl, &d->freelist, freelist) {
629 			if (fl->size < s->size)
630 				continue;
631 			/* Assign drive offset, if not given. */
632 			if (s->drive_offset == -1)
633 				s->drive_offset = fl->offset;
634 			fl2 = fl;
635 			i++;
636 			break;
637 		}
638 
639 		/* Couldn't find a good free slot. */
640 		if (i == 0) {
641 			G_VINUM_DEBUG(0, "free slots to small for '%s' on '%s'",
642 			    s->name, d->name);
643 			return (GV_ERR_NOSPACE);
644 		}
645 	}
646 
647 	/* No drive offset given, try to calculate it. */
648 	if (s->drive_offset == -1) {
649 		/* Add offsets and sizes from other subdisks on this drive. */
650 		LIST_FOREACH(s2, &d->subdisks, from_drive) {
651 			s->drive_offset = s2->drive_offset + s2->size;
652 		}
653 
654 		/*
655 		 * If there are no other subdisks yet, then set the default
656 		 * offset to GV_DATA_START.
657 		 */
658 		if (s->drive_offset == -1)
659 			s->drive_offset = GV_DATA_START;
660 
661 	/* Check if we have a free slot at the given drive offset. */
662 	} else {
663 		i = 0;
664 		LIST_FOREACH(fl, &d->freelist, freelist) {
665 			/* Yes, this subdisk fits. */
666 			if ((fl->offset <= s->drive_offset) &&
667 			    (fl->offset + fl->size >=
668 			    s->drive_offset + s->size)) {
669 				i++;
670 				fl2 = fl;
671 				break;
672 			}
673 		}
674 
675 		/* Couldn't find a good free slot. */
676 		if (i == 0) {
677 			G_VINUM_DEBUG(0, "given drive_offset for '%s' won't fit "
678 			    "on '%s'", s->name, d->name);
679 			return (GV_ERR_NOSPACE);
680 		}
681 	}
682 
683 	/*
684 	 * Now that all parameters are checked and set up, we can give the
685 	 * subdisk to the drive and adjust the freelist.
686 	 */
687 
688 	/* First, adjust the freelist. */
689 	LIST_FOREACH(fl, &d->freelist, freelist) {
690 		/* Look for the free slot that we have found before. */
691 		if (fl != fl2)
692 			continue;
693 
694 		/* The subdisk starts at the beginning of the free slot. */
695 		if (fl->offset == s->drive_offset) {
696 			fl->offset += s->size;
697 			fl->size -= s->size;
698 
699 			/* The subdisk uses the whole slot, so remove it. */
700 			if (fl->size == 0) {
701 				d->freelist_entries--;
702 				LIST_REMOVE(fl, freelist);
703 			}
704 		/*
705 		 * The subdisk does not start at the beginning of the free
706 		 * slot.
707 		 */
708 		} else {
709 			tmp = fl->offset + fl->size;
710 			fl->size = s->drive_offset - fl->offset;
711 
712 			/*
713 			 * The subdisk didn't use the complete rest of the free
714 			 * slot, so we need to split it.
715 			 */
716 			if (s->drive_offset + s->size != tmp) {
717 				fl2 = g_malloc(sizeof(*fl2), M_WAITOK | M_ZERO);
718 				fl2->offset = s->drive_offset + s->size;
719 				fl2->size = tmp - fl2->offset;
720 				LIST_INSERT_AFTER(fl, fl2, freelist);
721 				d->freelist_entries++;
722 			}
723 		}
724 		break;
725 	}
726 
727 	/*
728 	 * This is the first subdisk on this drive, just insert it into the
729 	 * list.
730 	 */
731 	if (LIST_EMPTY(&d->subdisks)) {
732 		LIST_INSERT_HEAD(&d->subdisks, s, from_drive);
733 
734 	/* There are other subdisks, so insert this one in correct order. */
735 	} else {
736 		LIST_FOREACH(s2, &d->subdisks, from_drive) {
737 			if (s->drive_offset < s2->drive_offset) {
738 				LIST_INSERT_BEFORE(s2, s, from_drive);
739 				break;
740 			} else if (LIST_NEXT(s2, from_drive) == NULL) {
741 				LIST_INSERT_AFTER(s2, s, from_drive);
742 				break;
743 			}
744 		}
745 	}
746 
747 	d->sdcount++;
748 	d->avail -= s->size;
749 
750 	s->flags &= ~GV_SD_TASTED;
751 
752 	/* Link back from the subdisk to this drive. */
753 	s->drive_sc = d;
754 
755 	return (0);
756 }
757 
758 void
759 gv_free_sd(struct gv_sd *s)
760 {
761 	struct gv_drive *d;
762 	struct gv_freelist *fl, *fl2;
763 
764 	KASSERT(s != NULL, ("gv_free_sd: NULL s"));
765 
766 	d = s->drive_sc;
767 	if (d == NULL)
768 		return;
769 
770 	/*
771 	 * First, find the free slot that's immediately before or after this
772 	 * subdisk.
773 	 */
774 	fl = NULL;
775 	LIST_FOREACH(fl, &d->freelist, freelist) {
776 		if (fl->offset == s->drive_offset + s->size)
777 			break;
778 		if (fl->offset + fl->size == s->drive_offset)
779 			break;
780 	}
781 
782 	/* If there is no free slot behind this subdisk, so create one. */
783 	if (fl == NULL) {
784 		fl = g_malloc(sizeof(*fl), M_WAITOK | M_ZERO);
785 		fl->size = s->size;
786 		fl->offset = s->drive_offset;
787 
788 		if (d->freelist_entries == 0) {
789 			LIST_INSERT_HEAD(&d->freelist, fl, freelist);
790 		} else {
791 			LIST_FOREACH(fl2, &d->freelist, freelist) {
792 				if (fl->offset < fl2->offset) {
793 					LIST_INSERT_BEFORE(fl2, fl, freelist);
794 					break;
795 				} else if (LIST_NEXT(fl2, freelist) == NULL) {
796 					LIST_INSERT_AFTER(fl2, fl, freelist);
797 					break;
798 				}
799 			}
800 		}
801 
802 		d->freelist_entries++;
803 
804 	/* Expand the free slot we just found. */
805 	} else {
806 		fl->size += s->size;
807 		if (fl->offset > s->drive_offset)
808 			fl->offset = s->drive_offset;
809 	}
810 
811 	d->avail += s->size;
812 	d->sdcount--;
813 }
814 
815 void
816 gv_adjust_freespace(struct gv_sd *s, off_t remainder)
817 {
818 	struct gv_drive *d;
819 	struct gv_freelist *fl, *fl2;
820 
821 	KASSERT(s != NULL, ("gv_adjust_freespace: NULL s"));
822 	d = s->drive_sc;
823 	KASSERT(d != NULL, ("gv_adjust_freespace: NULL d"));
824 
825 	/* First, find the free slot that's immediately after this subdisk. */
826 	fl = NULL;
827 	LIST_FOREACH(fl, &d->freelist, freelist) {
828 		if (fl->offset == s->drive_offset + s->size)
829 			break;
830 	}
831 
832 	/* If there is no free slot behind this subdisk, so create one. */
833 	if (fl == NULL) {
834 		fl = g_malloc(sizeof(*fl), M_WAITOK | M_ZERO);
835 		fl->size = remainder;
836 		fl->offset = s->drive_offset + s->size - remainder;
837 
838 		if (d->freelist_entries == 0) {
839 			LIST_INSERT_HEAD(&d->freelist, fl, freelist);
840 		} else {
841 			LIST_FOREACH(fl2, &d->freelist, freelist) {
842 				if (fl->offset < fl2->offset) {
843 					LIST_INSERT_BEFORE(fl2, fl, freelist);
844 					break;
845 				} else if (LIST_NEXT(fl2, freelist) == NULL) {
846 					LIST_INSERT_AFTER(fl2, fl, freelist);
847 					break;
848 				}
849 			}
850 		}
851 
852 		d->freelist_entries++;
853 
854 	/* Expand the free slot we just found. */
855 	} else {
856 		fl->offset -= remainder;
857 		fl->size += remainder;
858 	}
859 
860 	s->size -= remainder;
861 	d->avail += remainder;
862 }
863 
864 /* Check if the given plex is a striped one. */
865 int
866 gv_is_striped(struct gv_plex *p)
867 {
868 	KASSERT(p != NULL, ("gv_is_striped: NULL p"));
869 	switch(p->org) {
870 	case GV_PLEX_STRIPED:
871 	case GV_PLEX_RAID5:
872 		return (1);
873 	default:
874 		return (0);
875 	}
876 }
877 
878 /* Find a volume by name. */
879 struct gv_volume *
880 gv_find_vol(struct gv_softc *sc, char *name)
881 {
882 	struct gv_volume *v;
883 
884 	LIST_FOREACH(v, &sc->volumes, volume) {
885 		if (!strncmp(v->name, name, GV_MAXVOLNAME))
886 			return (v);
887 	}
888 
889 	return (NULL);
890 }
891 
892 /* Find a plex by name. */
893 struct gv_plex *
894 gv_find_plex(struct gv_softc *sc, char *name)
895 {
896 	struct gv_plex *p;
897 
898 	LIST_FOREACH(p, &sc->plexes, plex) {
899 		if (!strncmp(p->name, name, GV_MAXPLEXNAME))
900 			return (p);
901 	}
902 
903 	return (NULL);
904 }
905 
906 /* Find a subdisk by name. */
907 struct gv_sd *
908 gv_find_sd(struct gv_softc *sc, char *name)
909 {
910 	struct gv_sd *s;
911 
912 	LIST_FOREACH(s, &sc->subdisks, sd) {
913 		if (!strncmp(s->name, name, GV_MAXSDNAME))
914 			return (s);
915 	}
916 
917 	return (NULL);
918 }
919 
920 /* Find a drive by name. */
921 struct gv_drive *
922 gv_find_drive(struct gv_softc *sc, char *name)
923 {
924 	struct gv_drive *d;
925 
926 	LIST_FOREACH(d, &sc->drives, drive) {
927 		if (!strncmp(d->name, name, GV_MAXDRIVENAME))
928 			return (d);
929 	}
930 
931 	return (NULL);
932 }
933 
934 /* Find a drive given a device. */
935 struct gv_drive *
936 gv_find_drive_device(struct gv_softc *sc, char *device)
937 {
938 	struct gv_drive *d;
939 
940 	LIST_FOREACH(d, &sc->drives, drive) {
941 		if(!strcmp(d->device, device))
942 			return (d);
943 	}
944 
945 	return (NULL);
946 }
947 
948 /* Check if any consumer of the given geom is open. */
949 int
950 gv_consumer_is_open(struct g_consumer *cp)
951 {
952 	if (cp == NULL)
953 		return (0);
954 
955 	if (cp->acr || cp->acw || cp->ace)
956 		return (1);
957 
958 	return (0);
959 }
960 
961 int
962 gv_provider_is_open(struct g_provider *pp)
963 {
964 	if (pp == NULL)
965 		return (0);
966 
967 	if (pp->acr || pp->acw || pp->ace)
968 		return (1);
969 
970 	return (0);
971 }
972 
973 /*
974  * Compare the modification dates of the drives.
975  * Return 1 if a > b, 0 otherwise.
976  */
977 int
978 gv_drive_is_newer(struct gv_softc *sc, struct gv_drive *d)
979 {
980 	struct gv_drive *d2;
981 	struct timeval *a, *b;
982 
983 	KASSERT(!LIST_EMPTY(&sc->drives),
984 	    ("gv_is_drive_newer: empty drive list"));
985 
986 	a = &d->hdr->label.last_update;
987 	LIST_FOREACH(d2, &sc->drives, drive) {
988 		if ((d == d2) || (d2->state != GV_DRIVE_UP) ||
989 		    (d2->hdr == NULL))
990 			continue;
991 		b = &d2->hdr->label.last_update;
992 		if (timevalcmp(a, b, >))
993 			return (1);
994 	}
995 
996 	return (0);
997 }
998 
999 /* Return the type of object identified by string 'name'. */
1000 int
1001 gv_object_type(struct gv_softc *sc, char *name)
1002 {
1003 	struct gv_drive *d;
1004 	struct gv_plex *p;
1005 	struct gv_sd *s;
1006 	struct gv_volume *v;
1007 
1008 	LIST_FOREACH(v, &sc->volumes, volume) {
1009 		if (!strncmp(v->name, name, GV_MAXVOLNAME))
1010 			return (GV_TYPE_VOL);
1011 	}
1012 
1013 	LIST_FOREACH(p, &sc->plexes, plex) {
1014 		if (!strncmp(p->name, name, GV_MAXPLEXNAME))
1015 			return (GV_TYPE_PLEX);
1016 	}
1017 
1018 	LIST_FOREACH(s, &sc->subdisks, sd) {
1019 		if (!strncmp(s->name, name, GV_MAXSDNAME))
1020 			return (GV_TYPE_SD);
1021 	}
1022 
1023 	LIST_FOREACH(d, &sc->drives, drive) {
1024 		if (!strncmp(d->name, name, GV_MAXDRIVENAME))
1025 			return (GV_TYPE_DRIVE);
1026 	}
1027 
1028 	return (GV_ERR_NOTFOUND);
1029 }
1030 
1031 void
1032 gv_setup_objects(struct gv_softc *sc)
1033 {
1034 	struct g_provider *pp;
1035 	struct gv_volume *v;
1036 	struct gv_plex *p;
1037 	struct gv_sd *s;
1038 	struct gv_drive *d;
1039 
1040 	LIST_FOREACH(s, &sc->subdisks, sd) {
1041 		d = gv_find_drive(sc, s->drive);
1042 		if (d != NULL)
1043 			gv_sd_to_drive(s, d);
1044 		p = gv_find_plex(sc, s->plex);
1045 		if (p != NULL)
1046 			gv_sd_to_plex(s, p);
1047 		gv_update_sd_state(s);
1048 	}
1049 
1050 	LIST_FOREACH(p, &sc->plexes, plex) {
1051 		gv_update_plex_config(p);
1052 		v = gv_find_vol(sc, p->volume);
1053 		if (v != NULL && p->vol_sc != v) {
1054 			p->vol_sc = v;
1055 			v->plexcount++;
1056 			LIST_INSERT_HEAD(&v->plexes, p, in_volume);
1057 		}
1058 		gv_update_plex_config(p);
1059 	}
1060 
1061 	LIST_FOREACH(v, &sc->volumes, volume) {
1062 		v->size = gv_vol_size(v);
1063 		if (v->provider == NULL) {
1064 			g_topology_lock();
1065 			pp = g_new_providerf(sc->geom, "gvinum/%s", v->name);
1066 			pp->mediasize = v->size;
1067 			pp->sectorsize = 512;    /* XXX */
1068 			g_error_provider(pp, 0);
1069 			v->provider = pp;
1070 			pp->private = v;
1071 			g_topology_unlock();
1072 		} else if (v->provider->mediasize != v->size) {
1073 			g_topology_lock();
1074 			v->provider->mediasize = v->size;
1075 			g_topology_unlock();
1076 		}
1077 		v->flags &= ~GV_VOL_NEWBORN;
1078 		gv_update_vol_state(v);
1079 	}
1080 }
1081 
1082 void
1083 gv_cleanup(struct gv_softc *sc)
1084 {
1085 	struct gv_volume *v, *v2;
1086 	struct gv_plex *p, *p2;
1087 	struct gv_sd *s, *s2;
1088 	struct gv_drive *d, *d2;
1089 	struct gv_freelist *fl, *fl2;
1090 
1091 	mtx_lock(&sc->config_mtx);
1092 	LIST_FOREACH_SAFE(v, &sc->volumes, volume, v2) {
1093 		LIST_REMOVE(v, volume);
1094 		g_free(v->wqueue);
1095 		g_free(v);
1096 	}
1097 	LIST_FOREACH_SAFE(p, &sc->plexes, plex, p2) {
1098 		LIST_REMOVE(p, plex);
1099 		g_free(p->bqueue);
1100 		g_free(p->rqueue);
1101 		g_free(p->wqueue);
1102 		g_free(p);
1103 	}
1104 	LIST_FOREACH_SAFE(s, &sc->subdisks, sd, s2) {
1105 		LIST_REMOVE(s, sd);
1106 		g_free(s);
1107 	}
1108 	LIST_FOREACH_SAFE(d, &sc->drives, drive, d2) {
1109 		LIST_FOREACH_SAFE(fl, &d->freelist, freelist, fl2) {
1110 			LIST_REMOVE(fl, freelist);
1111 			g_free(fl);
1112 		}
1113 		LIST_REMOVE(d, drive);
1114 		g_free(d->hdr);
1115 		g_free(d);
1116 	}
1117 	mtx_destroy(&sc->config_mtx);
1118 }
1119 
1120 /* General 'attach' routine. */
1121 int
1122 gv_attach_plex(struct gv_plex *p, struct gv_volume *v, int rename)
1123 {
1124 	struct gv_sd *s;
1125 	struct gv_softc *sc __diagused;
1126 
1127 	g_topology_assert();
1128 
1129 	sc = p->vinumconf;
1130 	KASSERT(sc != NULL, ("NULL sc"));
1131 
1132 	if (p->vol_sc != NULL) {
1133 		G_VINUM_DEBUG(1, "unable to attach %s: already attached to %s",
1134 		    p->name, p->volume);
1135 		return (GV_ERR_ISATTACHED);
1136 	}
1137 
1138 	/* Stale all subdisks of this plex. */
1139 	LIST_FOREACH(s, &p->subdisks, in_plex) {
1140 		if (s->state != GV_SD_STALE)
1141 			gv_set_sd_state(s, GV_SD_STALE, GV_SETSTATE_FORCE);
1142 	}
1143 	/* Attach to volume. Make sure volume is not up and running. */
1144 	if (gv_provider_is_open(v->provider)) {
1145 		G_VINUM_DEBUG(1, "unable to attach %s: volume %s is busy",
1146 		    p->name, v->name);
1147 		return (GV_ERR_ISBUSY);
1148 	}
1149 	p->vol_sc = v;
1150 	strlcpy(p->volume, v->name, sizeof(p->volume));
1151 	v->plexcount++;
1152 	if (rename) {
1153 		snprintf(p->name, sizeof(p->name), "%s.p%d", v->name,
1154 		    v->plexcount);
1155 	}
1156 	LIST_INSERT_HEAD(&v->plexes, p, in_volume);
1157 
1158 	/* Get plex up again. */
1159 	gv_update_vol_size(v, gv_vol_size(v));
1160 	gv_set_plex_state(p, GV_PLEX_UP, 0);
1161 	gv_save_config(p->vinumconf);
1162 	return (0);
1163 }
1164 
1165 int
1166 gv_attach_sd(struct gv_sd *s, struct gv_plex *p, off_t offset, int rename)
1167 {
1168 	struct gv_sd *s2;
1169 	int error;
1170 
1171 	g_topology_assert();
1172 
1173 	/* If subdisk is attached, don't do it. */
1174 	if (s->plex_sc != NULL) {
1175 		G_VINUM_DEBUG(1, "unable to attach %s: already attached to %s",
1176 		    s->name, s->plex);
1177 		return (GV_ERR_ISATTACHED);
1178 	}
1179 
1180 	gv_set_sd_state(s, GV_SD_STALE, GV_SETSTATE_FORCE);
1181 	/* First check that this subdisk has a correct offset. If none other
1182 	 * starts at the same, and it's correct module stripesize, it is */
1183 	if (offset != -1 && offset % p->stripesize != 0)
1184 		return (GV_ERR_BADOFFSET);
1185 	LIST_FOREACH(s2, &p->subdisks, in_plex) {
1186 		if (s2->plex_offset == offset)
1187 			return (GV_ERR_BADOFFSET);
1188 	}
1189 
1190 	/* Attach the subdisk to the plex at given offset. */
1191 	s->plex_offset = offset;
1192 	strlcpy(s->plex, p->name, sizeof(s->plex));
1193 
1194 	error = gv_sd_to_plex(s, p);
1195 	if (error)
1196 		return (error);
1197 	gv_update_plex_config(p);
1198 
1199 	if (rename) {
1200 		snprintf(s->name, sizeof(s->name), "%s.s%d", s->plex,
1201 		    p->sdcount);
1202 	}
1203 	if (p->vol_sc != NULL)
1204 		gv_update_vol_size(p->vol_sc, gv_vol_size(p->vol_sc));
1205 	gv_save_config(p->vinumconf);
1206 	/* We don't update the subdisk state since the user might have to
1207 	 * initiate a rebuild/sync first. */
1208 	return (0);
1209 }
1210 
1211 /* Detach a plex from a volume. */
1212 int
1213 gv_detach_plex(struct gv_plex *p, int flags)
1214 {
1215 	struct gv_volume *v;
1216 
1217 	g_topology_assert();
1218 	v = p->vol_sc;
1219 
1220 	if (v == NULL) {
1221 		G_VINUM_DEBUG(1, "unable to detach %s: already detached",
1222 		    p->name);
1223 		return (0); /* Not an error. */
1224 	}
1225 
1226 	/*
1227 	 * Only proceed if forced or volume inactive.
1228 	 */
1229 	if (!(flags & GV_FLAG_F) && (gv_provider_is_open(v->provider) ||
1230 	    p->state == GV_PLEX_UP)) {
1231 		G_VINUM_DEBUG(1, "unable to detach %s: volume %s is busy",
1232 		    p->name, p->volume);
1233 		return (GV_ERR_ISBUSY);
1234 	}
1235 	v->plexcount--;
1236 	/* Make sure someone don't read us when gone. */
1237 	v->last_read_plex = NULL;
1238 	LIST_REMOVE(p, in_volume);
1239 	p->vol_sc = NULL;
1240 	memset(p->volume, 0, GV_MAXVOLNAME);
1241 	gv_update_vol_size(v, gv_vol_size(v));
1242 	gv_save_config(p->vinumconf);
1243 	return (0);
1244 }
1245 
1246 /* Detach a subdisk from a plex. */
1247 int
1248 gv_detach_sd(struct gv_sd *s, int flags)
1249 {
1250 	struct gv_plex *p;
1251 
1252 	g_topology_assert();
1253 	p = s->plex_sc;
1254 
1255 	if (p == NULL) {
1256 		G_VINUM_DEBUG(1, "unable to detach %s: already detached",
1257 		    s->name);
1258 		return (0); /* Not an error. */
1259 	}
1260 
1261 	/*
1262 	 * Don't proceed if we're not forcing, and the plex is up, or degraded
1263 	 * with this subdisk up.
1264 	 */
1265 	if (!(flags & GV_FLAG_F) && ((p->state > GV_PLEX_DEGRADED) ||
1266 	    ((p->state == GV_PLEX_DEGRADED) && (s->state == GV_SD_UP)))) {
1267 	    	G_VINUM_DEBUG(1, "unable to detach %s: plex %s is busy",
1268 		    s->name, s->plex);
1269 		return (GV_ERR_ISBUSY);
1270 	}
1271 
1272 	LIST_REMOVE(s, in_plex);
1273 	s->plex_sc = NULL;
1274 	memset(s->plex, 0, GV_MAXPLEXNAME);
1275 	p->sddetached++;
1276 	gv_save_config(s->vinumconf);
1277 	return (0);
1278 }
1279