1 /*- 2 * SPDX-License-Identifier: BSD-4-Clause 3 * 4 * Copyright (c) 2004, 2007 Lukas Ertl 5 * Copyright (c) 2007, 2009 Ulf Lilleengen 6 * Copyright (c) 1997, 1998, 1999 7 * Nan Yang Computer Services Limited. All rights reserved. 8 * 9 * Parts written by Greg Lehey 10 * 11 * This software is distributed under the so-called ``Berkeley 12 * License'': 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions 16 * are met: 17 * 1. Redistributions of source code must retain the above copyright 18 * notice, this list of conditions and the following disclaimer. 19 * 2. Redistributions in binary form must reproduce the above copyright 20 * notice, this list of conditions and the following disclaimer in the 21 * documentation and/or other materials provided with the distribution. 22 * 3. All advertising materials mentioning features or use of this software 23 * must display the following acknowledgement: 24 * This product includes software developed by Nan Yang Computer 25 * Services Limited. 26 * 4. Neither the name of the Company nor the names of its contributors 27 * may be used to endorse or promote products derived from this software 28 * without specific prior written permission. 29 * 30 * This software is provided ``as is'', and any express or implied 31 * warranties, including, but not limited to, the implied warranties of 32 * merchantability and fitness for a particular purpose are disclaimed. 33 * In no event shall the company or contributors be liable for any 34 * direct, indirect, incidental, special, exemplary, or consequential 35 * damages (including, but not limited to, procurement of substitute 36 * goods or services; loss of use, data, or profits; or business 37 * interruption) however caused and on any theory of liability, whether 38 * in contract, strict liability, or tort (including negligence or 39 * otherwise) arising in any way out of the use of this software, even if 40 * advised of the possibility of such damage. 41 * 42 */ 43 44 #include <sys/cdefs.h> 45 __FBSDID("$FreeBSD$"); 46 47 #include <sys/param.h> 48 #include <sys/malloc.h> 49 #include <sys/sbuf.h> 50 #include <sys/systm.h> 51 52 #include <geom/geom.h> 53 #include <geom/geom_dbg.h> 54 #include <geom/vinum/geom_vinum_var.h> 55 #include <geom/vinum/geom_vinum.h> 56 #include <geom/vinum/geom_vinum_share.h> 57 58 int gv_drive_is_newer(struct gv_softc *, struct gv_drive *); 59 static off_t gv_plex_smallest_sd(struct gv_plex *); 60 61 void 62 gv_parse_config(struct gv_softc *sc, char *buf, struct gv_drive *d) 63 { 64 char *aptr, *bptr, *cptr; 65 struct gv_volume *v, *v2; 66 struct gv_plex *p, *p2; 67 struct gv_sd *s, *s2; 68 int error, is_newer, tokens; 69 char *token[GV_MAXARGS]; 70 71 is_newer = gv_drive_is_newer(sc, d); 72 73 /* Until the end of the string *buf. */ 74 for (aptr = buf; *aptr != '\0'; aptr = bptr) { 75 bptr = aptr; 76 cptr = aptr; 77 78 /* Separate input lines. */ 79 while (*bptr != '\n') 80 bptr++; 81 *bptr = '\0'; 82 bptr++; 83 84 tokens = gv_tokenize(cptr, token, GV_MAXARGS); 85 86 if (tokens <= 0) 87 continue; 88 89 if (!strcmp(token[0], "volume")) { 90 v = gv_new_volume(tokens, token); 91 if (v == NULL) { 92 G_VINUM_DEBUG(0, "config parse failed volume"); 93 break; 94 } 95 96 v2 = gv_find_vol(sc, v->name); 97 if (v2 != NULL) { 98 if (is_newer) { 99 v2->state = v->state; 100 G_VINUM_DEBUG(2, "newer volume found!"); 101 } 102 g_free(v); 103 continue; 104 } 105 106 gv_create_volume(sc, v); 107 108 } else if (!strcmp(token[0], "plex")) { 109 p = gv_new_plex(tokens, token); 110 if (p == NULL) { 111 G_VINUM_DEBUG(0, "config parse failed plex"); 112 break; 113 } 114 115 p2 = gv_find_plex(sc, p->name); 116 if (p2 != NULL) { 117 /* XXX */ 118 if (is_newer) { 119 p2->state = p->state; 120 G_VINUM_DEBUG(2, "newer plex found!"); 121 } 122 g_free(p); 123 continue; 124 } 125 126 error = gv_create_plex(sc, p); 127 if (error) 128 continue; 129 /* 130 * These flags were set in gv_create_plex() and are not 131 * needed here (on-disk config parsing). 132 */ 133 p->flags &= ~GV_PLEX_ADDED; 134 135 } else if (!strcmp(token[0], "sd")) { 136 s = gv_new_sd(tokens, token); 137 138 if (s == NULL) { 139 G_VINUM_DEBUG(0, "config parse failed subdisk"); 140 break; 141 } 142 143 s2 = gv_find_sd(sc, s->name); 144 if (s2 != NULL) { 145 /* XXX */ 146 if (is_newer) { 147 s2->state = s->state; 148 G_VINUM_DEBUG(2, "newer subdisk found!"); 149 } 150 g_free(s); 151 continue; 152 } 153 154 /* 155 * Signal that this subdisk was tasted, and could 156 * possibly reference a drive that isn't in our config 157 * yet. 158 */ 159 s->flags |= GV_SD_TASTED; 160 161 if (s->state == GV_SD_UP) 162 s->flags |= GV_SD_CANGOUP; 163 164 error = gv_create_sd(sc, s); 165 if (error) 166 continue; 167 168 /* 169 * This flag was set in gv_create_sd() and is not 170 * needed here (on-disk config parsing). 171 */ 172 s->flags &= ~GV_SD_NEWBORN; 173 s->flags &= ~GV_SD_GROW; 174 } 175 } 176 } 177 178 /* 179 * Format the vinum configuration properly. If ondisk is non-zero then the 180 * configuration is intended to be written to disk later. 181 */ 182 void 183 gv_format_config(struct gv_softc *sc, struct sbuf *sb, int ondisk, char *prefix) 184 { 185 struct gv_drive *d; 186 struct gv_sd *s; 187 struct gv_plex *p; 188 struct gv_volume *v; 189 190 /* 191 * We don't need the drive configuration if we're not writing the 192 * config to disk. 193 */ 194 if (!ondisk) { 195 LIST_FOREACH(d, &sc->drives, drive) { 196 sbuf_printf(sb, "%sdrive %s device /dev/%s\n", prefix, 197 d->name, d->device); 198 } 199 } 200 201 LIST_FOREACH(v, &sc->volumes, volume) { 202 if (!ondisk) 203 sbuf_printf(sb, "%s", prefix); 204 sbuf_printf(sb, "volume %s", v->name); 205 if (ondisk) 206 sbuf_printf(sb, " state %s", gv_volstate(v->state)); 207 sbuf_printf(sb, "\n"); 208 } 209 210 LIST_FOREACH(p, &sc->plexes, plex) { 211 if (!ondisk) 212 sbuf_printf(sb, "%s", prefix); 213 sbuf_printf(sb, "plex name %s org %s ", p->name, 214 gv_plexorg(p->org)); 215 if (gv_is_striped(p)) 216 sbuf_printf(sb, "%ds ", p->stripesize / 512); 217 if (p->vol_sc != NULL) 218 sbuf_printf(sb, "vol %s", p->volume); 219 if (ondisk) 220 sbuf_printf(sb, " state %s", gv_plexstate(p->state)); 221 sbuf_printf(sb, "\n"); 222 } 223 224 LIST_FOREACH(s, &sc->subdisks, sd) { 225 if (!ondisk) 226 sbuf_printf(sb, "%s", prefix); 227 sbuf_printf(sb, "sd name %s drive %s len %jds driveoffset " 228 "%jds", s->name, s->drive, s->size / 512, 229 s->drive_offset / 512); 230 if (s->plex_sc != NULL) { 231 sbuf_printf(sb, " plex %s plexoffset %jds", s->plex, 232 s->plex_offset / 512); 233 } 234 if (ondisk) 235 sbuf_printf(sb, " state %s", gv_sdstate(s->state)); 236 sbuf_printf(sb, "\n"); 237 } 238 } 239 240 static off_t 241 gv_plex_smallest_sd(struct gv_plex *p) 242 { 243 struct gv_sd *s; 244 off_t smallest; 245 246 KASSERT(p != NULL, ("gv_plex_smallest_sd: NULL p")); 247 248 s = LIST_FIRST(&p->subdisks); 249 if (s == NULL) 250 return (-1); 251 smallest = s->size; 252 LIST_FOREACH(s, &p->subdisks, in_plex) { 253 if (s->size < smallest) 254 smallest = s->size; 255 } 256 return (smallest); 257 } 258 259 /* Walk over plexes in a volume and count how many are down. */ 260 int 261 gv_plexdown(struct gv_volume *v) 262 { 263 int plexdown; 264 struct gv_plex *p; 265 266 KASSERT(v != NULL, ("gv_plexdown: NULL v")); 267 268 plexdown = 0; 269 270 LIST_FOREACH(p, &v->plexes, plex) { 271 if (p->state == GV_PLEX_DOWN) 272 plexdown++; 273 } 274 return (plexdown); 275 } 276 277 int 278 gv_sd_to_plex(struct gv_sd *s, struct gv_plex *p) 279 { 280 struct gv_sd *s2; 281 off_t psizeorig, remainder, smallest; 282 283 /* If this subdisk was already given to this plex, do nothing. */ 284 if (s->plex_sc == p) 285 return (0); 286 287 /* Check correct size of this subdisk. */ 288 s2 = LIST_FIRST(&p->subdisks); 289 /* Adjust the subdisk-size if necessary. */ 290 if (s2 != NULL && gv_is_striped(p)) { 291 /* First adjust to the stripesize. */ 292 remainder = s->size % p->stripesize; 293 294 if (remainder) { 295 G_VINUM_DEBUG(1, "size of sd %s is not a " 296 "multiple of plex stripesize, taking off " 297 "%jd bytes", s->name, 298 (intmax_t)remainder); 299 gv_adjust_freespace(s, remainder); 300 } 301 302 smallest = gv_plex_smallest_sd(p); 303 /* Then take off extra if other subdisks are smaller. */ 304 remainder = s->size - smallest; 305 306 /* 307 * Don't allow a remainder below zero for running plexes, it's too 308 * painful, and if someone were to accidentally do this, the 309 * resulting array might be smaller than the original... not god 310 */ 311 if (remainder < 0) { 312 if (!(p->flags & GV_PLEX_NEWBORN)) { 313 G_VINUM_DEBUG(0, "sd %s too small for plex %s!", 314 s->name, p->name); 315 return (GV_ERR_BADSIZE); 316 } 317 /* Adjust other subdisks. */ 318 LIST_FOREACH(s2, &p->subdisks, in_plex) { 319 G_VINUM_DEBUG(1, "size of sd %s is to big, " 320 "taking off %jd bytes", s->name, 321 (intmax_t)remainder); 322 gv_adjust_freespace(s2, (remainder * -1)); 323 } 324 } else if (remainder > 0) { 325 G_VINUM_DEBUG(1, "size of sd %s is to big, " 326 "taking off %jd bytes", s->name, 327 (intmax_t)remainder); 328 gv_adjust_freespace(s, remainder); 329 } 330 } 331 332 /* Find the correct plex offset for this subdisk, if needed. */ 333 if (s->plex_offset == -1) { 334 /* 335 * First set it to 0 to catch the case where we had a detached 336 * subdisk that didn't get any good offset. 337 */ 338 s->plex_offset = 0; 339 if (p->sdcount) { 340 LIST_FOREACH(s2, &p->subdisks, in_plex) { 341 if (gv_is_striped(p)) 342 s->plex_offset = p->sdcount * 343 p->stripesize; 344 else 345 s->plex_offset = s2->plex_offset + 346 s2->size; 347 } 348 } 349 } 350 351 /* There are no subdisks for this plex yet, just insert it. */ 352 if (LIST_EMPTY(&p->subdisks)) { 353 LIST_INSERT_HEAD(&p->subdisks, s, in_plex); 354 355 /* Insert in correct order, depending on plex_offset. */ 356 } else { 357 LIST_FOREACH(s2, &p->subdisks, in_plex) { 358 if (s->plex_offset < s2->plex_offset) { 359 LIST_INSERT_BEFORE(s2, s, in_plex); 360 break; 361 } else if (LIST_NEXT(s2, in_plex) == NULL) { 362 LIST_INSERT_AFTER(s2, s, in_plex); 363 break; 364 } 365 } 366 } 367 368 s->plex_sc = p; 369 /* Adjust the size of our plex. We check if the plex misses a subdisk, 370 * so we don't make the plex smaller than it actually should be. 371 */ 372 psizeorig = p->size; 373 p->size = gv_plex_size(p); 374 /* Make sure the size is not changed. */ 375 if (p->sddetached > 0) { 376 if (p->size < psizeorig) { 377 p->size = psizeorig; 378 /* We make sure wee need another subdisk. */ 379 if (p->sddetached == 1) 380 p->sddetached++; 381 } 382 p->sddetached--; 383 } else { 384 if ((p->org == GV_PLEX_RAID5 || 385 p->org == GV_PLEX_STRIPED) && 386 !(p->flags & GV_PLEX_NEWBORN) && 387 p->state == GV_PLEX_UP) { 388 s->flags |= GV_SD_GROW; 389 } 390 p->sdcount++; 391 } 392 393 return (0); 394 } 395 396 void 397 gv_update_vol_size(struct gv_volume *v, off_t size) 398 { 399 if (v == NULL) 400 return; 401 if (v->provider != NULL) { 402 g_topology_lock(); 403 v->provider->mediasize = size; 404 g_topology_unlock(); 405 } 406 v->size = size; 407 } 408 409 /* Return how many subdisks that constitute the original plex. */ 410 int 411 gv_sdcount(struct gv_plex *p, int growing) 412 { 413 struct gv_sd *s; 414 int sdcount; 415 416 sdcount = p->sdcount; 417 if (growing) { 418 LIST_FOREACH(s, &p->subdisks, in_plex) { 419 if (s->flags & GV_SD_GROW) 420 sdcount--; 421 } 422 } 423 424 return (sdcount); 425 } 426 427 /* Calculates the plex size. */ 428 off_t 429 gv_plex_size(struct gv_plex *p) 430 { 431 struct gv_sd *s; 432 off_t size; 433 int sdcount; 434 435 KASSERT(p != NULL, ("gv_plex_size: NULL p")); 436 437 /* Adjust the size of our plex. */ 438 size = 0; 439 sdcount = gv_sdcount(p, 1); 440 switch (p->org) { 441 case GV_PLEX_CONCAT: 442 LIST_FOREACH(s, &p->subdisks, in_plex) 443 size += s->size; 444 break; 445 case GV_PLEX_STRIPED: 446 s = LIST_FIRST(&p->subdisks); 447 size = ((s != NULL) ? (sdcount * s->size) : 0); 448 break; 449 case GV_PLEX_RAID5: 450 s = LIST_FIRST(&p->subdisks); 451 size = ((s != NULL) ? ((sdcount - 1) * s->size) : 0); 452 break; 453 } 454 455 return (size); 456 } 457 458 /* Returns the size of a volume. */ 459 off_t 460 gv_vol_size(struct gv_volume *v) 461 { 462 struct gv_plex *p; 463 off_t minplexsize; 464 465 KASSERT(v != NULL, ("gv_vol_size: NULL v")); 466 467 p = LIST_FIRST(&v->plexes); 468 if (p == NULL) 469 return (0); 470 471 minplexsize = p->size; 472 LIST_FOREACH(p, &v->plexes, in_volume) { 473 if (p->size < minplexsize) { 474 minplexsize = p->size; 475 } 476 } 477 return (minplexsize); 478 } 479 480 void 481 gv_update_plex_config(struct gv_plex *p) 482 { 483 struct gv_sd *s, *s2; 484 off_t remainder; 485 int required_sds, state; 486 487 KASSERT(p != NULL, ("gv_update_plex_config: NULL p")); 488 489 /* The plex was added to an already running volume. */ 490 if (p->flags & GV_PLEX_ADDED) 491 gv_set_plex_state(p, GV_PLEX_DOWN, GV_SETSTATE_FORCE); 492 493 switch (p->org) { 494 case GV_PLEX_STRIPED: 495 required_sds = 2; 496 break; 497 case GV_PLEX_RAID5: 498 required_sds = 3; 499 break; 500 case GV_PLEX_CONCAT: 501 default: 502 required_sds = 0; 503 break; 504 } 505 506 if (required_sds) { 507 if (p->sdcount < required_sds) { 508 gv_set_plex_state(p, GV_PLEX_DOWN, GV_SETSTATE_FORCE); 509 } 510 511 /* 512 * The subdisks in striped plexes must all have the same size. 513 */ 514 s = LIST_FIRST(&p->subdisks); 515 LIST_FOREACH(s2, &p->subdisks, in_plex) { 516 if (s->size != s2->size) { 517 G_VINUM_DEBUG(0, "subdisk size mismatch %s" 518 "(%jd) <> %s (%jd)", s->name, s->size, 519 s2->name, s2->size); 520 gv_set_plex_state(p, GV_PLEX_DOWN, 521 GV_SETSTATE_FORCE); 522 } 523 } 524 525 LIST_FOREACH(s, &p->subdisks, in_plex) { 526 /* Trim subdisk sizes to match the stripe size. */ 527 remainder = s->size % p->stripesize; 528 if (remainder) { 529 G_VINUM_DEBUG(1, "size of sd %s is not a " 530 "multiple of plex stripesize, taking off " 531 "%jd bytes", s->name, (intmax_t)remainder); 532 gv_adjust_freespace(s, remainder); 533 } 534 } 535 } 536 537 p->size = gv_plex_size(p); 538 if (p->sdcount == 0) 539 gv_set_plex_state(p, GV_PLEX_DOWN, GV_SETSTATE_FORCE); 540 else if (p->org == GV_PLEX_RAID5 && p->flags & GV_PLEX_NEWBORN) { 541 LIST_FOREACH(s, &p->subdisks, in_plex) 542 gv_set_sd_state(s, GV_SD_UP, GV_SETSTATE_FORCE); 543 /* If added to a volume, we want the plex to be down. */ 544 state = (p->flags & GV_PLEX_ADDED) ? GV_PLEX_DOWN : GV_PLEX_UP; 545 gv_set_plex_state(p, state, GV_SETSTATE_FORCE); 546 p->flags &= ~GV_PLEX_ADDED; 547 } else if (p->flags & GV_PLEX_ADDED) { 548 LIST_FOREACH(s, &p->subdisks, in_plex) 549 gv_set_sd_state(s, GV_SD_STALE, GV_SETSTATE_FORCE); 550 gv_set_plex_state(p, GV_PLEX_DOWN, GV_SETSTATE_FORCE); 551 p->flags &= ~GV_PLEX_ADDED; 552 } else if (p->state == GV_PLEX_UP) { 553 LIST_FOREACH(s, &p->subdisks, in_plex) { 554 if (s->flags & GV_SD_GROW) { 555 gv_set_plex_state(p, GV_PLEX_GROWABLE, 556 GV_SETSTATE_FORCE); 557 break; 558 } 559 } 560 } 561 /* Our plex is grown up now. */ 562 p->flags &= ~GV_PLEX_NEWBORN; 563 } 564 565 /* 566 * Give a subdisk to a drive, check and adjust several parameters, adjust 567 * freelist. 568 */ 569 int 570 gv_sd_to_drive(struct gv_sd *s, struct gv_drive *d) 571 { 572 struct gv_sd *s2; 573 struct gv_freelist *fl, *fl2; 574 off_t tmp; 575 int i; 576 577 fl2 = NULL; 578 579 /* Shortcut for "referenced" drives. */ 580 if (d->flags & GV_DRIVE_REFERENCED) { 581 s->drive_sc = d; 582 return (0); 583 } 584 585 /* Check if this subdisk was already given to this drive. */ 586 if (s->drive_sc != NULL) { 587 if (s->drive_sc == d) { 588 if (!(s->flags & GV_SD_TASTED)) { 589 return (0); 590 } 591 } else { 592 G_VINUM_DEBUG(0, "error giving subdisk '%s' to '%s' " 593 "(already on '%s')", s->name, d->name, 594 s->drive_sc->name); 595 return (GV_ERR_ISATTACHED); 596 } 597 } 598 599 /* Preliminary checks. */ 600 if ((s->size > d->avail) || (d->freelist_entries == 0)) { 601 G_VINUM_DEBUG(0, "not enough space on '%s' for '%s'", d->name, 602 s->name); 603 return (GV_ERR_NOSPACE); 604 } 605 606 /* If no size was given for this subdisk, try to auto-size it... */ 607 if (s->size == -1) { 608 /* Find the largest available slot. */ 609 LIST_FOREACH(fl, &d->freelist, freelist) { 610 if (fl->size < s->size) 611 continue; 612 s->size = fl->size; 613 s->drive_offset = fl->offset; 614 fl2 = fl; 615 } 616 617 /* No good slot found? */ 618 if (s->size == -1) { 619 G_VINUM_DEBUG(0, "unable to autosize '%s' on '%s'", 620 s->name, d->name); 621 return (GV_ERR_BADSIZE); 622 } 623 624 /* 625 * ... or check if we have a free slot that's large enough for the 626 * given size. 627 */ 628 } else { 629 i = 0; 630 LIST_FOREACH(fl, &d->freelist, freelist) { 631 if (fl->size < s->size) 632 continue; 633 /* Assign drive offset, if not given. */ 634 if (s->drive_offset == -1) 635 s->drive_offset = fl->offset; 636 fl2 = fl; 637 i++; 638 break; 639 } 640 641 /* Couldn't find a good free slot. */ 642 if (i == 0) { 643 G_VINUM_DEBUG(0, "free slots to small for '%s' on '%s'", 644 s->name, d->name); 645 return (GV_ERR_NOSPACE); 646 } 647 } 648 649 /* No drive offset given, try to calculate it. */ 650 if (s->drive_offset == -1) { 651 652 /* Add offsets and sizes from other subdisks on this drive. */ 653 LIST_FOREACH(s2, &d->subdisks, from_drive) { 654 s->drive_offset = s2->drive_offset + s2->size; 655 } 656 657 /* 658 * If there are no other subdisks yet, then set the default 659 * offset to GV_DATA_START. 660 */ 661 if (s->drive_offset == -1) 662 s->drive_offset = GV_DATA_START; 663 664 /* Check if we have a free slot at the given drive offset. */ 665 } else { 666 i = 0; 667 LIST_FOREACH(fl, &d->freelist, freelist) { 668 /* Yes, this subdisk fits. */ 669 if ((fl->offset <= s->drive_offset) && 670 (fl->offset + fl->size >= 671 s->drive_offset + s->size)) { 672 i++; 673 fl2 = fl; 674 break; 675 } 676 } 677 678 /* Couldn't find a good free slot. */ 679 if (i == 0) { 680 G_VINUM_DEBUG(0, "given drive_offset for '%s' won't fit " 681 "on '%s'", s->name, d->name); 682 return (GV_ERR_NOSPACE); 683 } 684 } 685 686 /* 687 * Now that all parameters are checked and set up, we can give the 688 * subdisk to the drive and adjust the freelist. 689 */ 690 691 /* First, adjust the freelist. */ 692 LIST_FOREACH(fl, &d->freelist, freelist) { 693 /* Look for the free slot that we have found before. */ 694 if (fl != fl2) 695 continue; 696 697 /* The subdisk starts at the beginning of the free slot. */ 698 if (fl->offset == s->drive_offset) { 699 fl->offset += s->size; 700 fl->size -= s->size; 701 702 /* The subdisk uses the whole slot, so remove it. */ 703 if (fl->size == 0) { 704 d->freelist_entries--; 705 LIST_REMOVE(fl, freelist); 706 } 707 /* 708 * The subdisk does not start at the beginning of the free 709 * slot. 710 */ 711 } else { 712 tmp = fl->offset + fl->size; 713 fl->size = s->drive_offset - fl->offset; 714 715 /* 716 * The subdisk didn't use the complete rest of the free 717 * slot, so we need to split it. 718 */ 719 if (s->drive_offset + s->size != tmp) { 720 fl2 = g_malloc(sizeof(*fl2), M_WAITOK | M_ZERO); 721 fl2->offset = s->drive_offset + s->size; 722 fl2->size = tmp - fl2->offset; 723 LIST_INSERT_AFTER(fl, fl2, freelist); 724 d->freelist_entries++; 725 } 726 } 727 break; 728 } 729 730 /* 731 * This is the first subdisk on this drive, just insert it into the 732 * list. 733 */ 734 if (LIST_EMPTY(&d->subdisks)) { 735 LIST_INSERT_HEAD(&d->subdisks, s, from_drive); 736 737 /* There are other subdisks, so insert this one in correct order. */ 738 } else { 739 LIST_FOREACH(s2, &d->subdisks, from_drive) { 740 if (s->drive_offset < s2->drive_offset) { 741 LIST_INSERT_BEFORE(s2, s, from_drive); 742 break; 743 } else if (LIST_NEXT(s2, from_drive) == NULL) { 744 LIST_INSERT_AFTER(s2, s, from_drive); 745 break; 746 } 747 } 748 } 749 750 d->sdcount++; 751 d->avail -= s->size; 752 753 s->flags &= ~GV_SD_TASTED; 754 755 /* Link back from the subdisk to this drive. */ 756 s->drive_sc = d; 757 758 return (0); 759 } 760 761 void 762 gv_free_sd(struct gv_sd *s) 763 { 764 struct gv_drive *d; 765 struct gv_freelist *fl, *fl2; 766 767 KASSERT(s != NULL, ("gv_free_sd: NULL s")); 768 769 d = s->drive_sc; 770 if (d == NULL) 771 return; 772 773 /* 774 * First, find the free slot that's immediately before or after this 775 * subdisk. 776 */ 777 fl = NULL; 778 LIST_FOREACH(fl, &d->freelist, freelist) { 779 if (fl->offset == s->drive_offset + s->size) 780 break; 781 if (fl->offset + fl->size == s->drive_offset) 782 break; 783 } 784 785 /* If there is no free slot behind this subdisk, so create one. */ 786 if (fl == NULL) { 787 788 fl = g_malloc(sizeof(*fl), M_WAITOK | M_ZERO); 789 fl->size = s->size; 790 fl->offset = s->drive_offset; 791 792 if (d->freelist_entries == 0) { 793 LIST_INSERT_HEAD(&d->freelist, fl, freelist); 794 } else { 795 LIST_FOREACH(fl2, &d->freelist, freelist) { 796 if (fl->offset < fl2->offset) { 797 LIST_INSERT_BEFORE(fl2, fl, freelist); 798 break; 799 } else if (LIST_NEXT(fl2, freelist) == NULL) { 800 LIST_INSERT_AFTER(fl2, fl, freelist); 801 break; 802 } 803 } 804 } 805 806 d->freelist_entries++; 807 808 /* Expand the free slot we just found. */ 809 } else { 810 fl->size += s->size; 811 if (fl->offset > s->drive_offset) 812 fl->offset = s->drive_offset; 813 } 814 815 d->avail += s->size; 816 d->sdcount--; 817 } 818 819 void 820 gv_adjust_freespace(struct gv_sd *s, off_t remainder) 821 { 822 struct gv_drive *d; 823 struct gv_freelist *fl, *fl2; 824 825 KASSERT(s != NULL, ("gv_adjust_freespace: NULL s")); 826 d = s->drive_sc; 827 KASSERT(d != NULL, ("gv_adjust_freespace: NULL d")); 828 829 /* First, find the free slot that's immediately after this subdisk. */ 830 fl = NULL; 831 LIST_FOREACH(fl, &d->freelist, freelist) { 832 if (fl->offset == s->drive_offset + s->size) 833 break; 834 } 835 836 /* If there is no free slot behind this subdisk, so create one. */ 837 if (fl == NULL) { 838 839 fl = g_malloc(sizeof(*fl), M_WAITOK | M_ZERO); 840 fl->size = remainder; 841 fl->offset = s->drive_offset + s->size - remainder; 842 843 if (d->freelist_entries == 0) { 844 LIST_INSERT_HEAD(&d->freelist, fl, freelist); 845 } else { 846 LIST_FOREACH(fl2, &d->freelist, freelist) { 847 if (fl->offset < fl2->offset) { 848 LIST_INSERT_BEFORE(fl2, fl, freelist); 849 break; 850 } else if (LIST_NEXT(fl2, freelist) == NULL) { 851 LIST_INSERT_AFTER(fl2, fl, freelist); 852 break; 853 } 854 } 855 } 856 857 d->freelist_entries++; 858 859 /* Expand the free slot we just found. */ 860 } else { 861 fl->offset -= remainder; 862 fl->size += remainder; 863 } 864 865 s->size -= remainder; 866 d->avail += remainder; 867 } 868 869 /* Check if the given plex is a striped one. */ 870 int 871 gv_is_striped(struct gv_plex *p) 872 { 873 KASSERT(p != NULL, ("gv_is_striped: NULL p")); 874 switch(p->org) { 875 case GV_PLEX_STRIPED: 876 case GV_PLEX_RAID5: 877 return (1); 878 default: 879 return (0); 880 } 881 } 882 883 /* Find a volume by name. */ 884 struct gv_volume * 885 gv_find_vol(struct gv_softc *sc, char *name) 886 { 887 struct gv_volume *v; 888 889 LIST_FOREACH(v, &sc->volumes, volume) { 890 if (!strncmp(v->name, name, GV_MAXVOLNAME)) 891 return (v); 892 } 893 894 return (NULL); 895 } 896 897 /* Find a plex by name. */ 898 struct gv_plex * 899 gv_find_plex(struct gv_softc *sc, char *name) 900 { 901 struct gv_plex *p; 902 903 LIST_FOREACH(p, &sc->plexes, plex) { 904 if (!strncmp(p->name, name, GV_MAXPLEXNAME)) 905 return (p); 906 } 907 908 return (NULL); 909 } 910 911 /* Find a subdisk by name. */ 912 struct gv_sd * 913 gv_find_sd(struct gv_softc *sc, char *name) 914 { 915 struct gv_sd *s; 916 917 LIST_FOREACH(s, &sc->subdisks, sd) { 918 if (!strncmp(s->name, name, GV_MAXSDNAME)) 919 return (s); 920 } 921 922 return (NULL); 923 } 924 925 /* Find a drive by name. */ 926 struct gv_drive * 927 gv_find_drive(struct gv_softc *sc, char *name) 928 { 929 struct gv_drive *d; 930 931 LIST_FOREACH(d, &sc->drives, drive) { 932 if (!strncmp(d->name, name, GV_MAXDRIVENAME)) 933 return (d); 934 } 935 936 return (NULL); 937 } 938 939 /* Find a drive given a device. */ 940 struct gv_drive * 941 gv_find_drive_device(struct gv_softc *sc, char *device) 942 { 943 struct gv_drive *d; 944 945 LIST_FOREACH(d, &sc->drives, drive) { 946 if(!strcmp(d->device, device)) 947 return (d); 948 } 949 950 return (NULL); 951 } 952 953 /* Check if any consumer of the given geom is open. */ 954 int 955 gv_consumer_is_open(struct g_consumer *cp) 956 { 957 if (cp == NULL) 958 return (0); 959 960 if (cp->acr || cp->acw || cp->ace) 961 return (1); 962 963 return (0); 964 } 965 966 int 967 gv_provider_is_open(struct g_provider *pp) 968 { 969 if (pp == NULL) 970 return (0); 971 972 if (pp->acr || pp->acw || pp->ace) 973 return (1); 974 975 return (0); 976 } 977 978 /* 979 * Compare the modification dates of the drives. 980 * Return 1 if a > b, 0 otherwise. 981 */ 982 int 983 gv_drive_is_newer(struct gv_softc *sc, struct gv_drive *d) 984 { 985 struct gv_drive *d2; 986 struct timeval *a, *b; 987 988 KASSERT(!LIST_EMPTY(&sc->drives), 989 ("gv_is_drive_newer: empty drive list")); 990 991 a = &d->hdr->label.last_update; 992 LIST_FOREACH(d2, &sc->drives, drive) { 993 if ((d == d2) || (d2->state != GV_DRIVE_UP) || 994 (d2->hdr == NULL)) 995 continue; 996 b = &d2->hdr->label.last_update; 997 if (timevalcmp(a, b, >)) 998 return (1); 999 } 1000 1001 return (0); 1002 } 1003 1004 /* Return the type of object identified by string 'name'. */ 1005 int 1006 gv_object_type(struct gv_softc *sc, char *name) 1007 { 1008 struct gv_drive *d; 1009 struct gv_plex *p; 1010 struct gv_sd *s; 1011 struct gv_volume *v; 1012 1013 LIST_FOREACH(v, &sc->volumes, volume) { 1014 if (!strncmp(v->name, name, GV_MAXVOLNAME)) 1015 return (GV_TYPE_VOL); 1016 } 1017 1018 LIST_FOREACH(p, &sc->plexes, plex) { 1019 if (!strncmp(p->name, name, GV_MAXPLEXNAME)) 1020 return (GV_TYPE_PLEX); 1021 } 1022 1023 LIST_FOREACH(s, &sc->subdisks, sd) { 1024 if (!strncmp(s->name, name, GV_MAXSDNAME)) 1025 return (GV_TYPE_SD); 1026 } 1027 1028 LIST_FOREACH(d, &sc->drives, drive) { 1029 if (!strncmp(d->name, name, GV_MAXDRIVENAME)) 1030 return (GV_TYPE_DRIVE); 1031 } 1032 1033 return (GV_ERR_NOTFOUND); 1034 } 1035 1036 void 1037 gv_setup_objects(struct gv_softc *sc) 1038 { 1039 struct g_provider *pp; 1040 struct gv_volume *v; 1041 struct gv_plex *p; 1042 struct gv_sd *s; 1043 struct gv_drive *d; 1044 1045 LIST_FOREACH(s, &sc->subdisks, sd) { 1046 d = gv_find_drive(sc, s->drive); 1047 if (d != NULL) 1048 gv_sd_to_drive(s, d); 1049 p = gv_find_plex(sc, s->plex); 1050 if (p != NULL) 1051 gv_sd_to_plex(s, p); 1052 gv_update_sd_state(s); 1053 } 1054 1055 LIST_FOREACH(p, &sc->plexes, plex) { 1056 gv_update_plex_config(p); 1057 v = gv_find_vol(sc, p->volume); 1058 if (v != NULL && p->vol_sc != v) { 1059 p->vol_sc = v; 1060 v->plexcount++; 1061 LIST_INSERT_HEAD(&v->plexes, p, in_volume); 1062 } 1063 gv_update_plex_config(p); 1064 } 1065 1066 LIST_FOREACH(v, &sc->volumes, volume) { 1067 v->size = gv_vol_size(v); 1068 if (v->provider == NULL) { 1069 g_topology_lock(); 1070 pp = g_new_providerf(sc->geom, "gvinum/%s", v->name); 1071 pp->mediasize = v->size; 1072 pp->sectorsize = 512; /* XXX */ 1073 g_error_provider(pp, 0); 1074 v->provider = pp; 1075 pp->private = v; 1076 g_topology_unlock(); 1077 } else if (v->provider->mediasize != v->size) { 1078 g_topology_lock(); 1079 v->provider->mediasize = v->size; 1080 g_topology_unlock(); 1081 } 1082 v->flags &= ~GV_VOL_NEWBORN; 1083 gv_update_vol_state(v); 1084 } 1085 } 1086 1087 void 1088 gv_cleanup(struct gv_softc *sc) 1089 { 1090 struct gv_volume *v, *v2; 1091 struct gv_plex *p, *p2; 1092 struct gv_sd *s, *s2; 1093 struct gv_drive *d, *d2; 1094 struct gv_freelist *fl, *fl2; 1095 1096 mtx_lock(&sc->config_mtx); 1097 LIST_FOREACH_SAFE(v, &sc->volumes, volume, v2) { 1098 LIST_REMOVE(v, volume); 1099 g_free(v->wqueue); 1100 g_free(v); 1101 } 1102 LIST_FOREACH_SAFE(p, &sc->plexes, plex, p2) { 1103 LIST_REMOVE(p, plex); 1104 g_free(p->bqueue); 1105 g_free(p->rqueue); 1106 g_free(p->wqueue); 1107 g_free(p); 1108 } 1109 LIST_FOREACH_SAFE(s, &sc->subdisks, sd, s2) { 1110 LIST_REMOVE(s, sd); 1111 g_free(s); 1112 } 1113 LIST_FOREACH_SAFE(d, &sc->drives, drive, d2) { 1114 LIST_FOREACH_SAFE(fl, &d->freelist, freelist, fl2) { 1115 LIST_REMOVE(fl, freelist); 1116 g_free(fl); 1117 } 1118 LIST_REMOVE(d, drive); 1119 g_free(d->hdr); 1120 g_free(d); 1121 } 1122 mtx_destroy(&sc->config_mtx); 1123 } 1124 1125 /* General 'attach' routine. */ 1126 int 1127 gv_attach_plex(struct gv_plex *p, struct gv_volume *v, int rename) 1128 { 1129 struct gv_sd *s; 1130 struct gv_softc *sc; 1131 1132 g_topology_assert(); 1133 1134 sc = p->vinumconf; 1135 KASSERT(sc != NULL, ("NULL sc")); 1136 1137 if (p->vol_sc != NULL) { 1138 G_VINUM_DEBUG(1, "unable to attach %s: already attached to %s", 1139 p->name, p->volume); 1140 return (GV_ERR_ISATTACHED); 1141 } 1142 1143 /* Stale all subdisks of this plex. */ 1144 LIST_FOREACH(s, &p->subdisks, in_plex) { 1145 if (s->state != GV_SD_STALE) 1146 gv_set_sd_state(s, GV_SD_STALE, GV_SETSTATE_FORCE); 1147 } 1148 /* Attach to volume. Make sure volume is not up and running. */ 1149 if (gv_provider_is_open(v->provider)) { 1150 G_VINUM_DEBUG(1, "unable to attach %s: volume %s is busy", 1151 p->name, v->name); 1152 return (GV_ERR_ISBUSY); 1153 } 1154 p->vol_sc = v; 1155 strlcpy(p->volume, v->name, sizeof(p->volume)); 1156 v->plexcount++; 1157 if (rename) { 1158 snprintf(p->name, sizeof(p->name), "%s.p%d", v->name, 1159 v->plexcount); 1160 } 1161 LIST_INSERT_HEAD(&v->plexes, p, in_volume); 1162 1163 /* Get plex up again. */ 1164 gv_update_vol_size(v, gv_vol_size(v)); 1165 gv_set_plex_state(p, GV_PLEX_UP, 0); 1166 gv_save_config(p->vinumconf); 1167 return (0); 1168 } 1169 1170 int 1171 gv_attach_sd(struct gv_sd *s, struct gv_plex *p, off_t offset, int rename) 1172 { 1173 struct gv_sd *s2; 1174 int error, sdcount; 1175 1176 g_topology_assert(); 1177 1178 /* If subdisk is attached, don't do it. */ 1179 if (s->plex_sc != NULL) { 1180 G_VINUM_DEBUG(1, "unable to attach %s: already attached to %s", 1181 s->name, s->plex); 1182 return (GV_ERR_ISATTACHED); 1183 } 1184 1185 gv_set_sd_state(s, GV_SD_STALE, GV_SETSTATE_FORCE); 1186 /* First check that this subdisk has a correct offset. If none other 1187 * starts at the same, and it's correct module stripesize, it is */ 1188 if (offset != -1 && offset % p->stripesize != 0) 1189 return (GV_ERR_BADOFFSET); 1190 LIST_FOREACH(s2, &p->subdisks, in_plex) { 1191 if (s2->plex_offset == offset) 1192 return (GV_ERR_BADOFFSET); 1193 } 1194 1195 /* Attach the subdisk to the plex at given offset. */ 1196 s->plex_offset = offset; 1197 strlcpy(s->plex, p->name, sizeof(s->plex)); 1198 1199 sdcount = p->sdcount; 1200 error = gv_sd_to_plex(s, p); 1201 if (error) 1202 return (error); 1203 gv_update_plex_config(p); 1204 1205 if (rename) { 1206 snprintf(s->name, sizeof(s->name), "%s.s%d", s->plex, 1207 p->sdcount); 1208 } 1209 if (p->vol_sc != NULL) 1210 gv_update_vol_size(p->vol_sc, gv_vol_size(p->vol_sc)); 1211 gv_save_config(p->vinumconf); 1212 /* We don't update the subdisk state since the user might have to 1213 * initiate a rebuild/sync first. */ 1214 return (0); 1215 } 1216 1217 /* Detach a plex from a volume. */ 1218 int 1219 gv_detach_plex(struct gv_plex *p, int flags) 1220 { 1221 struct gv_volume *v; 1222 1223 g_topology_assert(); 1224 v = p->vol_sc; 1225 1226 if (v == NULL) { 1227 G_VINUM_DEBUG(1, "unable to detach %s: already detached", 1228 p->name); 1229 return (0); /* Not an error. */ 1230 } 1231 1232 /* 1233 * Only proceed if forced or volume inactive. 1234 */ 1235 if (!(flags & GV_FLAG_F) && (gv_provider_is_open(v->provider) || 1236 p->state == GV_PLEX_UP)) { 1237 G_VINUM_DEBUG(1, "unable to detach %s: volume %s is busy", 1238 p->name, p->volume); 1239 return (GV_ERR_ISBUSY); 1240 } 1241 v->plexcount--; 1242 /* Make sure someone don't read us when gone. */ 1243 v->last_read_plex = NULL; 1244 LIST_REMOVE(p, in_volume); 1245 p->vol_sc = NULL; 1246 memset(p->volume, 0, GV_MAXVOLNAME); 1247 gv_update_vol_size(v, gv_vol_size(v)); 1248 gv_save_config(p->vinumconf); 1249 return (0); 1250 } 1251 1252 /* Detach a subdisk from a plex. */ 1253 int 1254 gv_detach_sd(struct gv_sd *s, int flags) 1255 { 1256 struct gv_plex *p; 1257 1258 g_topology_assert(); 1259 p = s->plex_sc; 1260 1261 if (p == NULL) { 1262 G_VINUM_DEBUG(1, "unable to detach %s: already detached", 1263 s->name); 1264 return (0); /* Not an error. */ 1265 } 1266 1267 /* 1268 * Don't proceed if we're not forcing, and the plex is up, or degraded 1269 * with this subdisk up. 1270 */ 1271 if (!(flags & GV_FLAG_F) && ((p->state > GV_PLEX_DEGRADED) || 1272 ((p->state == GV_PLEX_DEGRADED) && (s->state == GV_SD_UP)))) { 1273 G_VINUM_DEBUG(1, "unable to detach %s: plex %s is busy", 1274 s->name, s->plex); 1275 return (GV_ERR_ISBUSY); 1276 } 1277 1278 LIST_REMOVE(s, in_plex); 1279 s->plex_sc = NULL; 1280 memset(s->plex, 0, GV_MAXPLEXNAME); 1281 p->sddetached++; 1282 gv_save_config(s->vinumconf); 1283 return (0); 1284 } 1285