1 /*-
2 * SPDX-License-Identifier: BSD-4-Clause
3 *
4 * Copyright (c) 2004, 2007 Lukas Ertl
5 * Copyright (c) 2007, 2009 Ulf Lilleengen
6 * Copyright (c) 1997, 1998, 1999
7 * Nan Yang Computer Services Limited. All rights reserved.
8 *
9 * Parts written by Greg Lehey
10 *
11 * This software is distributed under the so-called ``Berkeley
12 * License'':
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions
16 * are met:
17 * 1. Redistributions of source code must retain the above copyright
18 * notice, this list of conditions and the following disclaimer.
19 * 2. Redistributions in binary form must reproduce the above copyright
20 * notice, this list of conditions and the following disclaimer in the
21 * documentation and/or other materials provided with the distribution.
22 * 3. All advertising materials mentioning features or use of this software
23 * must display the following acknowledgement:
24 * This product includes software developed by Nan Yang Computer
25 * Services Limited.
26 * 4. Neither the name of the Company nor the names of its contributors
27 * may be used to endorse or promote products derived from this software
28 * without specific prior written permission.
29 *
30 * This software is provided ``as is'', and any express or implied
31 * warranties, including, but not limited to, the implied warranties of
32 * merchantability and fitness for a particular purpose are disclaimed.
33 * In no event shall the company or contributors be liable for any
34 * direct, indirect, incidental, special, exemplary, or consequential
35 * damages (including, but not limited to, procurement of substitute
36 * goods or services; loss of use, data, or profits; or business
37 * interruption) however caused and on any theory of liability, whether
38 * in contract, strict liability, or tort (including negligence or
39 * otherwise) arising in any way out of the use of this software, even if
40 * advised of the possibility of such damage.
41 *
42 */
43
44 #include <sys/param.h>
45 #include <sys/malloc.h>
46 #include <sys/sbuf.h>
47 #include <sys/systm.h>
48
49 #include <geom/geom.h>
50 #include <geom/geom_dbg.h>
51 #include <geom/vinum/geom_vinum_var.h>
52 #include <geom/vinum/geom_vinum.h>
53 #include <geom/vinum/geom_vinum_share.h>
54
55 int gv_drive_is_newer(struct gv_softc *, struct gv_drive *);
56 static off_t gv_plex_smallest_sd(struct gv_plex *);
57
58 void
gv_parse_config(struct gv_softc * sc,char * buf,struct gv_drive * d)59 gv_parse_config(struct gv_softc *sc, char *buf, struct gv_drive *d)
60 {
61 char *aptr, *bptr, *cptr;
62 struct gv_volume *v, *v2;
63 struct gv_plex *p, *p2;
64 struct gv_sd *s, *s2;
65 int error, is_newer, tokens;
66 char *token[GV_MAXARGS];
67
68 is_newer = gv_drive_is_newer(sc, d);
69
70 /* Until the end of the string *buf. */
71 for (aptr = buf; *aptr != '\0'; aptr = bptr) {
72 bptr = aptr;
73 cptr = aptr;
74
75 /* Separate input lines. */
76 while (*bptr != '\n')
77 bptr++;
78 *bptr = '\0';
79 bptr++;
80
81 tokens = gv_tokenize(cptr, token, GV_MAXARGS);
82
83 if (tokens <= 0)
84 continue;
85
86 if (!strcmp(token[0], "volume")) {
87 v = gv_new_volume(tokens, token);
88 if (v == NULL) {
89 G_VINUM_DEBUG(0, "config parse failed volume");
90 break;
91 }
92
93 v2 = gv_find_vol(sc, v->name);
94 if (v2 != NULL) {
95 if (is_newer) {
96 v2->state = v->state;
97 G_VINUM_DEBUG(2, "newer volume found!");
98 }
99 g_free(v);
100 continue;
101 }
102
103 gv_create_volume(sc, v);
104
105 } else if (!strcmp(token[0], "plex")) {
106 p = gv_new_plex(tokens, token);
107 if (p == NULL) {
108 G_VINUM_DEBUG(0, "config parse failed plex");
109 break;
110 }
111
112 p2 = gv_find_plex(sc, p->name);
113 if (p2 != NULL) {
114 /* XXX */
115 if (is_newer) {
116 p2->state = p->state;
117 G_VINUM_DEBUG(2, "newer plex found!");
118 }
119 g_free(p);
120 continue;
121 }
122
123 error = gv_create_plex(sc, p);
124 if (error)
125 continue;
126 /*
127 * These flags were set in gv_create_plex() and are not
128 * needed here (on-disk config parsing).
129 */
130 p->flags &= ~GV_PLEX_ADDED;
131
132 } else if (!strcmp(token[0], "sd")) {
133 s = gv_new_sd(tokens, token);
134
135 if (s == NULL) {
136 G_VINUM_DEBUG(0, "config parse failed subdisk");
137 break;
138 }
139
140 s2 = gv_find_sd(sc, s->name);
141 if (s2 != NULL) {
142 /* XXX */
143 if (is_newer) {
144 s2->state = s->state;
145 G_VINUM_DEBUG(2, "newer subdisk found!");
146 }
147 g_free(s);
148 continue;
149 }
150
151 /*
152 * Signal that this subdisk was tasted, and could
153 * possibly reference a drive that isn't in our config
154 * yet.
155 */
156 s->flags |= GV_SD_TASTED;
157
158 if (s->state == GV_SD_UP)
159 s->flags |= GV_SD_CANGOUP;
160
161 error = gv_create_sd(sc, s);
162 if (error)
163 continue;
164
165 /*
166 * This flag was set in gv_create_sd() and is not
167 * needed here (on-disk config parsing).
168 */
169 s->flags &= ~GV_SD_NEWBORN;
170 s->flags &= ~GV_SD_GROW;
171 }
172 }
173 }
174
175 /*
176 * Format the vinum configuration properly. If ondisk is non-zero then the
177 * configuration is intended to be written to disk later.
178 */
179 void
gv_format_config(struct gv_softc * sc,struct sbuf * sb,int ondisk,char * prefix)180 gv_format_config(struct gv_softc *sc, struct sbuf *sb, int ondisk, char *prefix)
181 {
182 struct gv_drive *d;
183 struct gv_sd *s;
184 struct gv_plex *p;
185 struct gv_volume *v;
186
187 /*
188 * We don't need the drive configuration if we're not writing the
189 * config to disk.
190 */
191 if (!ondisk) {
192 LIST_FOREACH(d, &sc->drives, drive) {
193 sbuf_printf(sb, "%sdrive %s device /dev/%s\n", prefix,
194 d->name, d->device);
195 }
196 }
197
198 LIST_FOREACH(v, &sc->volumes, volume) {
199 if (!ondisk)
200 sbuf_printf(sb, "%s", prefix);
201 sbuf_printf(sb, "volume %s", v->name);
202 if (ondisk)
203 sbuf_printf(sb, " state %s", gv_volstate(v->state));
204 sbuf_printf(sb, "\n");
205 }
206
207 LIST_FOREACH(p, &sc->plexes, plex) {
208 if (!ondisk)
209 sbuf_printf(sb, "%s", prefix);
210 sbuf_printf(sb, "plex name %s org %s ", p->name,
211 gv_plexorg(p->org));
212 if (gv_is_striped(p))
213 sbuf_printf(sb, "%ds ", p->stripesize / 512);
214 if (p->vol_sc != NULL)
215 sbuf_printf(sb, "vol %s", p->volume);
216 if (ondisk)
217 sbuf_printf(sb, " state %s", gv_plexstate(p->state));
218 sbuf_printf(sb, "\n");
219 }
220
221 LIST_FOREACH(s, &sc->subdisks, sd) {
222 if (!ondisk)
223 sbuf_printf(sb, "%s", prefix);
224 sbuf_printf(sb, "sd name %s drive %s len %jds driveoffset "
225 "%jds", s->name, s->drive, s->size / 512,
226 s->drive_offset / 512);
227 if (s->plex_sc != NULL) {
228 sbuf_printf(sb, " plex %s plexoffset %jds", s->plex,
229 s->plex_offset / 512);
230 }
231 if (ondisk)
232 sbuf_printf(sb, " state %s", gv_sdstate(s->state));
233 sbuf_printf(sb, "\n");
234 }
235 }
236
237 static off_t
gv_plex_smallest_sd(struct gv_plex * p)238 gv_plex_smallest_sd(struct gv_plex *p)
239 {
240 struct gv_sd *s;
241 off_t smallest;
242
243 KASSERT(p != NULL, ("gv_plex_smallest_sd: NULL p"));
244
245 s = LIST_FIRST(&p->subdisks);
246 if (s == NULL)
247 return (-1);
248 smallest = s->size;
249 LIST_FOREACH(s, &p->subdisks, in_plex) {
250 if (s->size < smallest)
251 smallest = s->size;
252 }
253 return (smallest);
254 }
255
256 /* Walk over plexes in a volume and count how many are down. */
257 int
gv_plexdown(struct gv_volume * v)258 gv_plexdown(struct gv_volume *v)
259 {
260 int plexdown;
261 struct gv_plex *p;
262
263 KASSERT(v != NULL, ("gv_plexdown: NULL v"));
264
265 plexdown = 0;
266
267 LIST_FOREACH(p, &v->plexes, plex) {
268 if (p->state == GV_PLEX_DOWN)
269 plexdown++;
270 }
271 return (plexdown);
272 }
273
274 int
gv_sd_to_plex(struct gv_sd * s,struct gv_plex * p)275 gv_sd_to_plex(struct gv_sd *s, struct gv_plex *p)
276 {
277 struct gv_sd *s2;
278 off_t psizeorig, remainder, smallest;
279
280 /* If this subdisk was already given to this plex, do nothing. */
281 if (s->plex_sc == p)
282 return (0);
283
284 /* Check correct size of this subdisk. */
285 s2 = LIST_FIRST(&p->subdisks);
286 /* Adjust the subdisk-size if necessary. */
287 if (s2 != NULL && gv_is_striped(p)) {
288 /* First adjust to the stripesize. */
289 remainder = s->size % p->stripesize;
290
291 if (remainder) {
292 G_VINUM_DEBUG(1, "size of sd %s is not a "
293 "multiple of plex stripesize, taking off "
294 "%jd bytes", s->name,
295 (intmax_t)remainder);
296 gv_adjust_freespace(s, remainder);
297 }
298
299 smallest = gv_plex_smallest_sd(p);
300 /* Then take off extra if other subdisks are smaller. */
301 remainder = s->size - smallest;
302
303 /*
304 * Don't allow a remainder below zero for running plexes, it's too
305 * painful, and if someone were to accidentally do this, the
306 * resulting array might be smaller than the original... not god
307 */
308 if (remainder < 0) {
309 if (!(p->flags & GV_PLEX_NEWBORN)) {
310 G_VINUM_DEBUG(0, "sd %s too small for plex %s!",
311 s->name, p->name);
312 return (GV_ERR_BADSIZE);
313 }
314 /* Adjust other subdisks. */
315 LIST_FOREACH(s2, &p->subdisks, in_plex) {
316 G_VINUM_DEBUG(1, "size of sd %s is to big, "
317 "taking off %jd bytes", s->name,
318 (intmax_t)remainder);
319 gv_adjust_freespace(s2, (remainder * -1));
320 }
321 } else if (remainder > 0) {
322 G_VINUM_DEBUG(1, "size of sd %s is to big, "
323 "taking off %jd bytes", s->name,
324 (intmax_t)remainder);
325 gv_adjust_freespace(s, remainder);
326 }
327 }
328
329 /* Find the correct plex offset for this subdisk, if needed. */
330 if (s->plex_offset == -1) {
331 /*
332 * First set it to 0 to catch the case where we had a detached
333 * subdisk that didn't get any good offset.
334 */
335 s->plex_offset = 0;
336 if (p->sdcount) {
337 LIST_FOREACH(s2, &p->subdisks, in_plex) {
338 if (gv_is_striped(p))
339 s->plex_offset = p->sdcount *
340 p->stripesize;
341 else
342 s->plex_offset = s2->plex_offset +
343 s2->size;
344 }
345 }
346 }
347
348 /* There are no subdisks for this plex yet, just insert it. */
349 if (LIST_EMPTY(&p->subdisks)) {
350 LIST_INSERT_HEAD(&p->subdisks, s, in_plex);
351
352 /* Insert in correct order, depending on plex_offset. */
353 } else {
354 LIST_FOREACH(s2, &p->subdisks, in_plex) {
355 if (s->plex_offset < s2->plex_offset) {
356 LIST_INSERT_BEFORE(s2, s, in_plex);
357 break;
358 } else if (LIST_NEXT(s2, in_plex) == NULL) {
359 LIST_INSERT_AFTER(s2, s, in_plex);
360 break;
361 }
362 }
363 }
364
365 s->plex_sc = p;
366 /* Adjust the size of our plex. We check if the plex misses a subdisk,
367 * so we don't make the plex smaller than it actually should be.
368 */
369 psizeorig = p->size;
370 p->size = gv_plex_size(p);
371 /* Make sure the size is not changed. */
372 if (p->sddetached > 0) {
373 if (p->size < psizeorig) {
374 p->size = psizeorig;
375 /* We make sure wee need another subdisk. */
376 if (p->sddetached == 1)
377 p->sddetached++;
378 }
379 p->sddetached--;
380 } else {
381 if ((p->org == GV_PLEX_RAID5 ||
382 p->org == GV_PLEX_STRIPED) &&
383 !(p->flags & GV_PLEX_NEWBORN) &&
384 p->state == GV_PLEX_UP) {
385 s->flags |= GV_SD_GROW;
386 }
387 p->sdcount++;
388 }
389
390 return (0);
391 }
392
393 void
gv_update_vol_size(struct gv_volume * v,off_t size)394 gv_update_vol_size(struct gv_volume *v, off_t size)
395 {
396 if (v == NULL)
397 return;
398 if (v->provider != NULL) {
399 g_topology_lock();
400 v->provider->mediasize = size;
401 g_topology_unlock();
402 }
403 v->size = size;
404 }
405
406 /* Return how many subdisks that constitute the original plex. */
407 int
gv_sdcount(struct gv_plex * p,int growing)408 gv_sdcount(struct gv_plex *p, int growing)
409 {
410 struct gv_sd *s;
411 int sdcount;
412
413 sdcount = p->sdcount;
414 if (growing) {
415 LIST_FOREACH(s, &p->subdisks, in_plex) {
416 if (s->flags & GV_SD_GROW)
417 sdcount--;
418 }
419 }
420
421 return (sdcount);
422 }
423
424 /* Calculates the plex size. */
425 off_t
gv_plex_size(struct gv_plex * p)426 gv_plex_size(struct gv_plex *p)
427 {
428 struct gv_sd *s;
429 off_t size;
430 int sdcount;
431
432 KASSERT(p != NULL, ("gv_plex_size: NULL p"));
433
434 /* Adjust the size of our plex. */
435 size = 0;
436 sdcount = gv_sdcount(p, 1);
437 switch (p->org) {
438 case GV_PLEX_CONCAT:
439 LIST_FOREACH(s, &p->subdisks, in_plex)
440 size += s->size;
441 break;
442 case GV_PLEX_STRIPED:
443 s = LIST_FIRST(&p->subdisks);
444 size = ((s != NULL) ? (sdcount * s->size) : 0);
445 break;
446 case GV_PLEX_RAID5:
447 s = LIST_FIRST(&p->subdisks);
448 size = ((s != NULL) ? ((sdcount - 1) * s->size) : 0);
449 break;
450 }
451
452 return (size);
453 }
454
455 /* Returns the size of a volume. */
456 off_t
gv_vol_size(struct gv_volume * v)457 gv_vol_size(struct gv_volume *v)
458 {
459 struct gv_plex *p;
460 off_t minplexsize;
461
462 KASSERT(v != NULL, ("gv_vol_size: NULL v"));
463
464 p = LIST_FIRST(&v->plexes);
465 if (p == NULL)
466 return (0);
467
468 minplexsize = p->size;
469 LIST_FOREACH(p, &v->plexes, in_volume) {
470 if (p->size < minplexsize) {
471 minplexsize = p->size;
472 }
473 }
474 return (minplexsize);
475 }
476
477 void
gv_update_plex_config(struct gv_plex * p)478 gv_update_plex_config(struct gv_plex *p)
479 {
480 struct gv_sd *s, *s2;
481 off_t remainder;
482 int required_sds, state;
483
484 KASSERT(p != NULL, ("gv_update_plex_config: NULL p"));
485
486 /* The plex was added to an already running volume. */
487 if (p->flags & GV_PLEX_ADDED)
488 gv_set_plex_state(p, GV_PLEX_DOWN, GV_SETSTATE_FORCE);
489
490 switch (p->org) {
491 case GV_PLEX_STRIPED:
492 required_sds = 2;
493 break;
494 case GV_PLEX_RAID5:
495 required_sds = 3;
496 break;
497 case GV_PLEX_CONCAT:
498 default:
499 required_sds = 0;
500 break;
501 }
502
503 if (required_sds) {
504 if (p->sdcount < required_sds) {
505 gv_set_plex_state(p, GV_PLEX_DOWN, GV_SETSTATE_FORCE);
506 }
507
508 /*
509 * The subdisks in striped plexes must all have the same size.
510 */
511 s = LIST_FIRST(&p->subdisks);
512 LIST_FOREACH(s2, &p->subdisks, in_plex) {
513 if (s->size != s2->size) {
514 G_VINUM_DEBUG(0, "subdisk size mismatch %s"
515 "(%jd) <> %s (%jd)", s->name, s->size,
516 s2->name, s2->size);
517 gv_set_plex_state(p, GV_PLEX_DOWN,
518 GV_SETSTATE_FORCE);
519 }
520 }
521
522 LIST_FOREACH(s, &p->subdisks, in_plex) {
523 /* Trim subdisk sizes to match the stripe size. */
524 remainder = s->size % p->stripesize;
525 if (remainder) {
526 G_VINUM_DEBUG(1, "size of sd %s is not a "
527 "multiple of plex stripesize, taking off "
528 "%jd bytes", s->name, (intmax_t)remainder);
529 gv_adjust_freespace(s, remainder);
530 }
531 }
532 }
533
534 p->size = gv_plex_size(p);
535 if (p->sdcount == 0)
536 gv_set_plex_state(p, GV_PLEX_DOWN, GV_SETSTATE_FORCE);
537 else if (p->org == GV_PLEX_RAID5 && p->flags & GV_PLEX_NEWBORN) {
538 LIST_FOREACH(s, &p->subdisks, in_plex)
539 gv_set_sd_state(s, GV_SD_UP, GV_SETSTATE_FORCE);
540 /* If added to a volume, we want the plex to be down. */
541 state = (p->flags & GV_PLEX_ADDED) ? GV_PLEX_DOWN : GV_PLEX_UP;
542 gv_set_plex_state(p, state, GV_SETSTATE_FORCE);
543 p->flags &= ~GV_PLEX_ADDED;
544 } else if (p->flags & GV_PLEX_ADDED) {
545 LIST_FOREACH(s, &p->subdisks, in_plex)
546 gv_set_sd_state(s, GV_SD_STALE, GV_SETSTATE_FORCE);
547 gv_set_plex_state(p, GV_PLEX_DOWN, GV_SETSTATE_FORCE);
548 p->flags &= ~GV_PLEX_ADDED;
549 } else if (p->state == GV_PLEX_UP) {
550 LIST_FOREACH(s, &p->subdisks, in_plex) {
551 if (s->flags & GV_SD_GROW) {
552 gv_set_plex_state(p, GV_PLEX_GROWABLE,
553 GV_SETSTATE_FORCE);
554 break;
555 }
556 }
557 }
558 /* Our plex is grown up now. */
559 p->flags &= ~GV_PLEX_NEWBORN;
560 }
561
562 /*
563 * Give a subdisk to a drive, check and adjust several parameters, adjust
564 * freelist.
565 */
566 int
gv_sd_to_drive(struct gv_sd * s,struct gv_drive * d)567 gv_sd_to_drive(struct gv_sd *s, struct gv_drive *d)
568 {
569 struct gv_sd *s2;
570 struct gv_freelist *fl, *fl2;
571 off_t tmp;
572 int i;
573
574 fl2 = NULL;
575
576 /* Shortcut for "referenced" drives. */
577 if (d->flags & GV_DRIVE_REFERENCED) {
578 s->drive_sc = d;
579 return (0);
580 }
581
582 /* Check if this subdisk was already given to this drive. */
583 if (s->drive_sc != NULL) {
584 if (s->drive_sc == d) {
585 if (!(s->flags & GV_SD_TASTED)) {
586 return (0);
587 }
588 } else {
589 G_VINUM_DEBUG(0, "error giving subdisk '%s' to '%s' "
590 "(already on '%s')", s->name, d->name,
591 s->drive_sc->name);
592 return (GV_ERR_ISATTACHED);
593 }
594 }
595
596 /* Preliminary checks. */
597 if ((s->size > d->avail) || (d->freelist_entries == 0)) {
598 G_VINUM_DEBUG(0, "not enough space on '%s' for '%s'", d->name,
599 s->name);
600 return (GV_ERR_NOSPACE);
601 }
602
603 /* If no size was given for this subdisk, try to auto-size it... */
604 if (s->size == -1) {
605 /* Find the largest available slot. */
606 LIST_FOREACH(fl, &d->freelist, freelist) {
607 if (fl->size < s->size)
608 continue;
609 s->size = fl->size;
610 s->drive_offset = fl->offset;
611 fl2 = fl;
612 }
613
614 /* No good slot found? */
615 if (s->size == -1) {
616 G_VINUM_DEBUG(0, "unable to autosize '%s' on '%s'",
617 s->name, d->name);
618 return (GV_ERR_BADSIZE);
619 }
620
621 /*
622 * ... or check if we have a free slot that's large enough for the
623 * given size.
624 */
625 } else {
626 i = 0;
627 LIST_FOREACH(fl, &d->freelist, freelist) {
628 if (fl->size < s->size)
629 continue;
630 /* Assign drive offset, if not given. */
631 if (s->drive_offset == -1)
632 s->drive_offset = fl->offset;
633 fl2 = fl;
634 i++;
635 break;
636 }
637
638 /* Couldn't find a good free slot. */
639 if (i == 0) {
640 G_VINUM_DEBUG(0, "free slots to small for '%s' on '%s'",
641 s->name, d->name);
642 return (GV_ERR_NOSPACE);
643 }
644 }
645
646 /* No drive offset given, try to calculate it. */
647 if (s->drive_offset == -1) {
648 /* Add offsets and sizes from other subdisks on this drive. */
649 LIST_FOREACH(s2, &d->subdisks, from_drive) {
650 s->drive_offset = s2->drive_offset + s2->size;
651 }
652
653 /*
654 * If there are no other subdisks yet, then set the default
655 * offset to GV_DATA_START.
656 */
657 if (s->drive_offset == -1)
658 s->drive_offset = GV_DATA_START;
659
660 /* Check if we have a free slot at the given drive offset. */
661 } else {
662 i = 0;
663 LIST_FOREACH(fl, &d->freelist, freelist) {
664 /* Yes, this subdisk fits. */
665 if ((fl->offset <= s->drive_offset) &&
666 (fl->offset + fl->size >=
667 s->drive_offset + s->size)) {
668 i++;
669 fl2 = fl;
670 break;
671 }
672 }
673
674 /* Couldn't find a good free slot. */
675 if (i == 0) {
676 G_VINUM_DEBUG(0, "given drive_offset for '%s' won't fit "
677 "on '%s'", s->name, d->name);
678 return (GV_ERR_NOSPACE);
679 }
680 }
681
682 /*
683 * Now that all parameters are checked and set up, we can give the
684 * subdisk to the drive and adjust the freelist.
685 */
686
687 /* First, adjust the freelist. */
688 LIST_FOREACH(fl, &d->freelist, freelist) {
689 /* Look for the free slot that we have found before. */
690 if (fl != fl2)
691 continue;
692
693 /* The subdisk starts at the beginning of the free slot. */
694 if (fl->offset == s->drive_offset) {
695 fl->offset += s->size;
696 fl->size -= s->size;
697
698 /* The subdisk uses the whole slot, so remove it. */
699 if (fl->size == 0) {
700 d->freelist_entries--;
701 LIST_REMOVE(fl, freelist);
702 }
703 /*
704 * The subdisk does not start at the beginning of the free
705 * slot.
706 */
707 } else {
708 tmp = fl->offset + fl->size;
709 fl->size = s->drive_offset - fl->offset;
710
711 /*
712 * The subdisk didn't use the complete rest of the free
713 * slot, so we need to split it.
714 */
715 if (s->drive_offset + s->size != tmp) {
716 fl2 = g_malloc(sizeof(*fl2), M_WAITOK | M_ZERO);
717 fl2->offset = s->drive_offset + s->size;
718 fl2->size = tmp - fl2->offset;
719 LIST_INSERT_AFTER(fl, fl2, freelist);
720 d->freelist_entries++;
721 }
722 }
723 break;
724 }
725
726 /*
727 * This is the first subdisk on this drive, just insert it into the
728 * list.
729 */
730 if (LIST_EMPTY(&d->subdisks)) {
731 LIST_INSERT_HEAD(&d->subdisks, s, from_drive);
732
733 /* There are other subdisks, so insert this one in correct order. */
734 } else {
735 LIST_FOREACH(s2, &d->subdisks, from_drive) {
736 if (s->drive_offset < s2->drive_offset) {
737 LIST_INSERT_BEFORE(s2, s, from_drive);
738 break;
739 } else if (LIST_NEXT(s2, from_drive) == NULL) {
740 LIST_INSERT_AFTER(s2, s, from_drive);
741 break;
742 }
743 }
744 }
745
746 d->sdcount++;
747 d->avail -= s->size;
748
749 s->flags &= ~GV_SD_TASTED;
750
751 /* Link back from the subdisk to this drive. */
752 s->drive_sc = d;
753
754 return (0);
755 }
756
757 void
gv_free_sd(struct gv_sd * s)758 gv_free_sd(struct gv_sd *s)
759 {
760 struct gv_drive *d;
761 struct gv_freelist *fl, *fl2;
762
763 KASSERT(s != NULL, ("gv_free_sd: NULL s"));
764
765 d = s->drive_sc;
766 if (d == NULL)
767 return;
768
769 /*
770 * First, find the free slot that's immediately before or after this
771 * subdisk.
772 */
773 fl = NULL;
774 LIST_FOREACH(fl, &d->freelist, freelist) {
775 if (fl->offset == s->drive_offset + s->size)
776 break;
777 if (fl->offset + fl->size == s->drive_offset)
778 break;
779 }
780
781 /* If there is no free slot behind this subdisk, so create one. */
782 if (fl == NULL) {
783 fl = g_malloc(sizeof(*fl), M_WAITOK | M_ZERO);
784 fl->size = s->size;
785 fl->offset = s->drive_offset;
786
787 if (d->freelist_entries == 0) {
788 LIST_INSERT_HEAD(&d->freelist, fl, freelist);
789 } else {
790 LIST_FOREACH(fl2, &d->freelist, freelist) {
791 if (fl->offset < fl2->offset) {
792 LIST_INSERT_BEFORE(fl2, fl, freelist);
793 break;
794 } else if (LIST_NEXT(fl2, freelist) == NULL) {
795 LIST_INSERT_AFTER(fl2, fl, freelist);
796 break;
797 }
798 }
799 }
800
801 d->freelist_entries++;
802
803 /* Expand the free slot we just found. */
804 } else {
805 fl->size += s->size;
806 if (fl->offset > s->drive_offset)
807 fl->offset = s->drive_offset;
808 }
809
810 d->avail += s->size;
811 d->sdcount--;
812 }
813
814 void
gv_adjust_freespace(struct gv_sd * s,off_t remainder)815 gv_adjust_freespace(struct gv_sd *s, off_t remainder)
816 {
817 struct gv_drive *d;
818 struct gv_freelist *fl, *fl2;
819
820 KASSERT(s != NULL, ("gv_adjust_freespace: NULL s"));
821 d = s->drive_sc;
822 KASSERT(d != NULL, ("gv_adjust_freespace: NULL d"));
823
824 /* First, find the free slot that's immediately after this subdisk. */
825 fl = NULL;
826 LIST_FOREACH(fl, &d->freelist, freelist) {
827 if (fl->offset == s->drive_offset + s->size)
828 break;
829 }
830
831 /* If there is no free slot behind this subdisk, so create one. */
832 if (fl == NULL) {
833 fl = g_malloc(sizeof(*fl), M_WAITOK | M_ZERO);
834 fl->size = remainder;
835 fl->offset = s->drive_offset + s->size - remainder;
836
837 if (d->freelist_entries == 0) {
838 LIST_INSERT_HEAD(&d->freelist, fl, freelist);
839 } else {
840 LIST_FOREACH(fl2, &d->freelist, freelist) {
841 if (fl->offset < fl2->offset) {
842 LIST_INSERT_BEFORE(fl2, fl, freelist);
843 break;
844 } else if (LIST_NEXT(fl2, freelist) == NULL) {
845 LIST_INSERT_AFTER(fl2, fl, freelist);
846 break;
847 }
848 }
849 }
850
851 d->freelist_entries++;
852
853 /* Expand the free slot we just found. */
854 } else {
855 fl->offset -= remainder;
856 fl->size += remainder;
857 }
858
859 s->size -= remainder;
860 d->avail += remainder;
861 }
862
863 /* Check if the given plex is a striped one. */
864 int
gv_is_striped(struct gv_plex * p)865 gv_is_striped(struct gv_plex *p)
866 {
867 KASSERT(p != NULL, ("gv_is_striped: NULL p"));
868 switch(p->org) {
869 case GV_PLEX_STRIPED:
870 case GV_PLEX_RAID5:
871 return (1);
872 default:
873 return (0);
874 }
875 }
876
877 /* Find a volume by name. */
878 struct gv_volume *
gv_find_vol(struct gv_softc * sc,char * name)879 gv_find_vol(struct gv_softc *sc, char *name)
880 {
881 struct gv_volume *v;
882
883 LIST_FOREACH(v, &sc->volumes, volume) {
884 if (!strncmp(v->name, name, GV_MAXVOLNAME))
885 return (v);
886 }
887
888 return (NULL);
889 }
890
891 /* Find a plex by name. */
892 struct gv_plex *
gv_find_plex(struct gv_softc * sc,char * name)893 gv_find_plex(struct gv_softc *sc, char *name)
894 {
895 struct gv_plex *p;
896
897 LIST_FOREACH(p, &sc->plexes, plex) {
898 if (!strncmp(p->name, name, GV_MAXPLEXNAME))
899 return (p);
900 }
901
902 return (NULL);
903 }
904
905 /* Find a subdisk by name. */
906 struct gv_sd *
gv_find_sd(struct gv_softc * sc,char * name)907 gv_find_sd(struct gv_softc *sc, char *name)
908 {
909 struct gv_sd *s;
910
911 LIST_FOREACH(s, &sc->subdisks, sd) {
912 if (!strncmp(s->name, name, GV_MAXSDNAME))
913 return (s);
914 }
915
916 return (NULL);
917 }
918
919 /* Find a drive by name. */
920 struct gv_drive *
gv_find_drive(struct gv_softc * sc,char * name)921 gv_find_drive(struct gv_softc *sc, char *name)
922 {
923 struct gv_drive *d;
924
925 LIST_FOREACH(d, &sc->drives, drive) {
926 if (!strncmp(d->name, name, GV_MAXDRIVENAME))
927 return (d);
928 }
929
930 return (NULL);
931 }
932
933 /* Find a drive given a device. */
934 struct gv_drive *
gv_find_drive_device(struct gv_softc * sc,char * device)935 gv_find_drive_device(struct gv_softc *sc, char *device)
936 {
937 struct gv_drive *d;
938
939 LIST_FOREACH(d, &sc->drives, drive) {
940 if(!strcmp(d->device, device))
941 return (d);
942 }
943
944 return (NULL);
945 }
946
947 /* Check if any consumer of the given geom is open. */
948 int
gv_consumer_is_open(struct g_consumer * cp)949 gv_consumer_is_open(struct g_consumer *cp)
950 {
951 if (cp == NULL)
952 return (0);
953
954 if (cp->acr || cp->acw || cp->ace)
955 return (1);
956
957 return (0);
958 }
959
960 int
gv_provider_is_open(struct g_provider * pp)961 gv_provider_is_open(struct g_provider *pp)
962 {
963 if (pp == NULL)
964 return (0);
965
966 if (pp->acr || pp->acw || pp->ace)
967 return (1);
968
969 return (0);
970 }
971
972 /*
973 * Compare the modification dates of the drives.
974 * Return 1 if a > b, 0 otherwise.
975 */
976 int
gv_drive_is_newer(struct gv_softc * sc,struct gv_drive * d)977 gv_drive_is_newer(struct gv_softc *sc, struct gv_drive *d)
978 {
979 struct gv_drive *d2;
980 struct timeval *a, *b;
981
982 KASSERT(!LIST_EMPTY(&sc->drives),
983 ("gv_is_drive_newer: empty drive list"));
984
985 a = &d->hdr->label.last_update;
986 LIST_FOREACH(d2, &sc->drives, drive) {
987 if ((d == d2) || (d2->state != GV_DRIVE_UP) ||
988 (d2->hdr == NULL))
989 continue;
990 b = &d2->hdr->label.last_update;
991 if (timevalcmp(a, b, >))
992 return (1);
993 }
994
995 return (0);
996 }
997
998 /* Return the type of object identified by string 'name'. */
999 int
gv_object_type(struct gv_softc * sc,char * name)1000 gv_object_type(struct gv_softc *sc, char *name)
1001 {
1002 struct gv_drive *d;
1003 struct gv_plex *p;
1004 struct gv_sd *s;
1005 struct gv_volume *v;
1006
1007 LIST_FOREACH(v, &sc->volumes, volume) {
1008 if (!strncmp(v->name, name, GV_MAXVOLNAME))
1009 return (GV_TYPE_VOL);
1010 }
1011
1012 LIST_FOREACH(p, &sc->plexes, plex) {
1013 if (!strncmp(p->name, name, GV_MAXPLEXNAME))
1014 return (GV_TYPE_PLEX);
1015 }
1016
1017 LIST_FOREACH(s, &sc->subdisks, sd) {
1018 if (!strncmp(s->name, name, GV_MAXSDNAME))
1019 return (GV_TYPE_SD);
1020 }
1021
1022 LIST_FOREACH(d, &sc->drives, drive) {
1023 if (!strncmp(d->name, name, GV_MAXDRIVENAME))
1024 return (GV_TYPE_DRIVE);
1025 }
1026
1027 return (GV_ERR_NOTFOUND);
1028 }
1029
1030 void
gv_setup_objects(struct gv_softc * sc)1031 gv_setup_objects(struct gv_softc *sc)
1032 {
1033 struct g_provider *pp;
1034 struct gv_volume *v;
1035 struct gv_plex *p;
1036 struct gv_sd *s;
1037 struct gv_drive *d;
1038
1039 LIST_FOREACH(s, &sc->subdisks, sd) {
1040 d = gv_find_drive(sc, s->drive);
1041 if (d != NULL)
1042 gv_sd_to_drive(s, d);
1043 p = gv_find_plex(sc, s->plex);
1044 if (p != NULL)
1045 gv_sd_to_plex(s, p);
1046 gv_update_sd_state(s);
1047 }
1048
1049 LIST_FOREACH(p, &sc->plexes, plex) {
1050 gv_update_plex_config(p);
1051 v = gv_find_vol(sc, p->volume);
1052 if (v != NULL && p->vol_sc != v) {
1053 p->vol_sc = v;
1054 v->plexcount++;
1055 LIST_INSERT_HEAD(&v->plexes, p, in_volume);
1056 }
1057 gv_update_plex_config(p);
1058 }
1059
1060 LIST_FOREACH(v, &sc->volumes, volume) {
1061 v->size = gv_vol_size(v);
1062 if (v->provider == NULL) {
1063 g_topology_lock();
1064 pp = g_new_providerf(sc->geom, "gvinum/%s", v->name);
1065 pp->mediasize = v->size;
1066 pp->sectorsize = 512; /* XXX */
1067 g_error_provider(pp, 0);
1068 v->provider = pp;
1069 pp->private = v;
1070 g_topology_unlock();
1071 } else if (v->provider->mediasize != v->size) {
1072 g_topology_lock();
1073 v->provider->mediasize = v->size;
1074 g_topology_unlock();
1075 }
1076 v->flags &= ~GV_VOL_NEWBORN;
1077 gv_update_vol_state(v);
1078 }
1079 }
1080
1081 void
gv_cleanup(struct gv_softc * sc)1082 gv_cleanup(struct gv_softc *sc)
1083 {
1084 struct gv_volume *v, *v2;
1085 struct gv_plex *p, *p2;
1086 struct gv_sd *s, *s2;
1087 struct gv_drive *d, *d2;
1088 struct gv_freelist *fl, *fl2;
1089
1090 mtx_lock(&sc->config_mtx);
1091 LIST_FOREACH_SAFE(v, &sc->volumes, volume, v2) {
1092 LIST_REMOVE(v, volume);
1093 g_free(v->wqueue);
1094 g_free(v);
1095 }
1096 LIST_FOREACH_SAFE(p, &sc->plexes, plex, p2) {
1097 LIST_REMOVE(p, plex);
1098 g_free(p->bqueue);
1099 g_free(p->rqueue);
1100 g_free(p->wqueue);
1101 g_free(p);
1102 }
1103 LIST_FOREACH_SAFE(s, &sc->subdisks, sd, s2) {
1104 LIST_REMOVE(s, sd);
1105 g_free(s);
1106 }
1107 LIST_FOREACH_SAFE(d, &sc->drives, drive, d2) {
1108 LIST_FOREACH_SAFE(fl, &d->freelist, freelist, fl2) {
1109 LIST_REMOVE(fl, freelist);
1110 g_free(fl);
1111 }
1112 LIST_REMOVE(d, drive);
1113 g_free(d->hdr);
1114 g_free(d);
1115 }
1116 mtx_destroy(&sc->config_mtx);
1117 }
1118
1119 /* General 'attach' routine. */
1120 int
gv_attach_plex(struct gv_plex * p,struct gv_volume * v,int rename)1121 gv_attach_plex(struct gv_plex *p, struct gv_volume *v, int rename)
1122 {
1123 struct gv_sd *s;
1124 struct gv_softc *sc __diagused;
1125
1126 g_topology_assert();
1127
1128 sc = p->vinumconf;
1129 KASSERT(sc != NULL, ("NULL sc"));
1130
1131 if (p->vol_sc != NULL) {
1132 G_VINUM_DEBUG(1, "unable to attach %s: already attached to %s",
1133 p->name, p->volume);
1134 return (GV_ERR_ISATTACHED);
1135 }
1136
1137 /* Stale all subdisks of this plex. */
1138 LIST_FOREACH(s, &p->subdisks, in_plex) {
1139 if (s->state != GV_SD_STALE)
1140 gv_set_sd_state(s, GV_SD_STALE, GV_SETSTATE_FORCE);
1141 }
1142 /* Attach to volume. Make sure volume is not up and running. */
1143 if (gv_provider_is_open(v->provider)) {
1144 G_VINUM_DEBUG(1, "unable to attach %s: volume %s is busy",
1145 p->name, v->name);
1146 return (GV_ERR_ISBUSY);
1147 }
1148 p->vol_sc = v;
1149 strlcpy(p->volume, v->name, sizeof(p->volume));
1150 v->plexcount++;
1151 if (rename) {
1152 snprintf(p->name, sizeof(p->name), "%s.p%d", v->name,
1153 v->plexcount);
1154 }
1155 LIST_INSERT_HEAD(&v->plexes, p, in_volume);
1156
1157 /* Get plex up again. */
1158 gv_update_vol_size(v, gv_vol_size(v));
1159 gv_set_plex_state(p, GV_PLEX_UP, 0);
1160 gv_save_config(p->vinumconf);
1161 return (0);
1162 }
1163
1164 int
gv_attach_sd(struct gv_sd * s,struct gv_plex * p,off_t offset,int rename)1165 gv_attach_sd(struct gv_sd *s, struct gv_plex *p, off_t offset, int rename)
1166 {
1167 struct gv_sd *s2;
1168 int error;
1169
1170 g_topology_assert();
1171
1172 /* If subdisk is attached, don't do it. */
1173 if (s->plex_sc != NULL) {
1174 G_VINUM_DEBUG(1, "unable to attach %s: already attached to %s",
1175 s->name, s->plex);
1176 return (GV_ERR_ISATTACHED);
1177 }
1178
1179 gv_set_sd_state(s, GV_SD_STALE, GV_SETSTATE_FORCE);
1180 /* First check that this subdisk has a correct offset. If none other
1181 * starts at the same, and it's correct module stripesize, it is */
1182 if (offset != -1 && offset % p->stripesize != 0)
1183 return (GV_ERR_BADOFFSET);
1184 LIST_FOREACH(s2, &p->subdisks, in_plex) {
1185 if (s2->plex_offset == offset)
1186 return (GV_ERR_BADOFFSET);
1187 }
1188
1189 /* Attach the subdisk to the plex at given offset. */
1190 s->plex_offset = offset;
1191 strlcpy(s->plex, p->name, sizeof(s->plex));
1192
1193 error = gv_sd_to_plex(s, p);
1194 if (error)
1195 return (error);
1196 gv_update_plex_config(p);
1197
1198 if (rename) {
1199 snprintf(s->name, sizeof(s->name), "%s.s%d", s->plex,
1200 p->sdcount);
1201 }
1202 if (p->vol_sc != NULL)
1203 gv_update_vol_size(p->vol_sc, gv_vol_size(p->vol_sc));
1204 gv_save_config(p->vinumconf);
1205 /* We don't update the subdisk state since the user might have to
1206 * initiate a rebuild/sync first. */
1207 return (0);
1208 }
1209
1210 /* Detach a plex from a volume. */
1211 int
gv_detach_plex(struct gv_plex * p,int flags)1212 gv_detach_plex(struct gv_plex *p, int flags)
1213 {
1214 struct gv_volume *v;
1215
1216 g_topology_assert();
1217 v = p->vol_sc;
1218
1219 if (v == NULL) {
1220 G_VINUM_DEBUG(1, "unable to detach %s: already detached",
1221 p->name);
1222 return (0); /* Not an error. */
1223 }
1224
1225 /*
1226 * Only proceed if forced or volume inactive.
1227 */
1228 if (!(flags & GV_FLAG_F) && (gv_provider_is_open(v->provider) ||
1229 p->state == GV_PLEX_UP)) {
1230 G_VINUM_DEBUG(1, "unable to detach %s: volume %s is busy",
1231 p->name, p->volume);
1232 return (GV_ERR_ISBUSY);
1233 }
1234 v->plexcount--;
1235 /* Make sure someone don't read us when gone. */
1236 v->last_read_plex = NULL;
1237 LIST_REMOVE(p, in_volume);
1238 p->vol_sc = NULL;
1239 memset(p->volume, 0, GV_MAXVOLNAME);
1240 gv_update_vol_size(v, gv_vol_size(v));
1241 gv_save_config(p->vinumconf);
1242 return (0);
1243 }
1244
1245 /* Detach a subdisk from a plex. */
1246 int
gv_detach_sd(struct gv_sd * s,int flags)1247 gv_detach_sd(struct gv_sd *s, int flags)
1248 {
1249 struct gv_plex *p;
1250
1251 g_topology_assert();
1252 p = s->plex_sc;
1253
1254 if (p == NULL) {
1255 G_VINUM_DEBUG(1, "unable to detach %s: already detached",
1256 s->name);
1257 return (0); /* Not an error. */
1258 }
1259
1260 /*
1261 * Don't proceed if we're not forcing, and the plex is up, or degraded
1262 * with this subdisk up.
1263 */
1264 if (!(flags & GV_FLAG_F) && ((p->state > GV_PLEX_DEGRADED) ||
1265 ((p->state == GV_PLEX_DEGRADED) && (s->state == GV_SD_UP)))) {
1266 G_VINUM_DEBUG(1, "unable to detach %s: plex %s is busy",
1267 s->name, s->plex);
1268 return (GV_ERR_ISBUSY);
1269 }
1270
1271 LIST_REMOVE(s, in_plex);
1272 s->plex_sc = NULL;
1273 memset(s->plex, 0, GV_MAXPLEXNAME);
1274 p->sddetached++;
1275 gv_save_config(s->vinumconf);
1276 return (0);
1277 }
1278