1 /*- 2 * Copyright (c) 2004, 2007 Lukas Ertl 3 * Copyright (c) 2007, 2009 Ulf Lilleengen 4 * Copyright (c) 1997, 1998, 1999 5 * Nan Yang Computer Services Limited. All rights reserved. 6 * 7 * Parts written by Greg Lehey 8 * 9 * This software is distributed under the so-called ``Berkeley 10 * License'': 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. All advertising materials mentioning features or use of this software 21 * must display the following acknowledgement: 22 * This product includes software developed by Nan Yang Computer 23 * Services Limited. 24 * 4. Neither the name of the Company nor the names of its contributors 25 * may be used to endorse or promote products derived from this software 26 * without specific prior written permission. 27 * 28 * This software is provided ``as is'', and any express or implied 29 * warranties, including, but not limited to, the implied warranties of 30 * merchantability and fitness for a particular purpose are disclaimed. 31 * In no event shall the company or contributors be liable for any 32 * direct, indirect, incidental, special, exemplary, or consequential 33 * damages (including, but not limited to, procurement of substitute 34 * goods or services; loss of use, data, or profits; or business 35 * interruption) however caused and on any theory of liability, whether 36 * in contract, strict liability, or tort (including negligence or 37 * otherwise) arising in any way out of the use of this software, even if 38 * advised of the possibility of such damage. 39 * 40 */ 41 42 #include <sys/cdefs.h> 43 __FBSDID("$FreeBSD$"); 44 45 #include <sys/param.h> 46 #include <sys/malloc.h> 47 #include <sys/systm.h> 48 49 #include <geom/geom.h> 50 #include <geom/vinum/geom_vinum_var.h> 51 #include <geom/vinum/geom_vinum.h> 52 #include <geom/vinum/geom_vinum_share.h> 53 54 int gv_drive_is_newer(struct gv_softc *, struct gv_drive *); 55 static off_t gv_plex_smallest_sd(struct gv_plex *); 56 57 void 58 gv_parse_config(struct gv_softc *sc, char *buf, struct gv_drive *d) 59 { 60 char *aptr, *bptr, *cptr; 61 struct gv_volume *v, *v2; 62 struct gv_plex *p, *p2; 63 struct gv_sd *s, *s2; 64 int error, is_newer, tokens; 65 char *token[GV_MAXARGS]; 66 67 is_newer = gv_drive_is_newer(sc, d); 68 69 /* Until the end of the string *buf. */ 70 for (aptr = buf; *aptr != '\0'; aptr = bptr) { 71 bptr = aptr; 72 cptr = aptr; 73 74 /* Seperate input lines. */ 75 while (*bptr != '\n') 76 bptr++; 77 *bptr = '\0'; 78 bptr++; 79 80 tokens = gv_tokenize(cptr, token, GV_MAXARGS); 81 82 if (tokens <= 0) 83 continue; 84 85 if (!strcmp(token[0], "volume")) { 86 v = gv_new_volume(tokens, token); 87 if (v == NULL) { 88 G_VINUM_DEBUG(0, "config parse failed volume"); 89 break; 90 } 91 92 v2 = gv_find_vol(sc, v->name); 93 if (v2 != NULL) { 94 if (is_newer) { 95 v2->state = v->state; 96 G_VINUM_DEBUG(2, "newer volume found!"); 97 } 98 g_free(v); 99 continue; 100 } 101 102 gv_create_volume(sc, v); 103 104 } else if (!strcmp(token[0], "plex")) { 105 p = gv_new_plex(tokens, token); 106 if (p == NULL) { 107 G_VINUM_DEBUG(0, "config parse failed plex"); 108 break; 109 } 110 111 p2 = gv_find_plex(sc, p->name); 112 if (p2 != NULL) { 113 /* XXX */ 114 if (is_newer) { 115 p2->state = p->state; 116 G_VINUM_DEBUG(2, "newer plex found!"); 117 } 118 g_free(p); 119 continue; 120 } 121 122 error = gv_create_plex(sc, p); 123 if (error) 124 continue; 125 /* 126 * These flags were set in gv_create_plex() and are not 127 * needed here (on-disk config parsing). 128 */ 129 p->flags &= ~GV_PLEX_ADDED; 130 131 } else if (!strcmp(token[0], "sd")) { 132 s = gv_new_sd(tokens, token); 133 134 if (s == NULL) { 135 G_VINUM_DEBUG(0, "config parse failed subdisk"); 136 break; 137 } 138 139 s2 = gv_find_sd(sc, s->name); 140 if (s2 != NULL) { 141 /* XXX */ 142 if (is_newer) { 143 s2->state = s->state; 144 G_VINUM_DEBUG(2, "newer subdisk found!"); 145 } 146 g_free(s); 147 continue; 148 } 149 150 /* 151 * Signal that this subdisk was tasted, and could 152 * possibly reference a drive that isn't in our config 153 * yet. 154 */ 155 s->flags |= GV_SD_TASTED; 156 157 if (s->state == GV_SD_UP) 158 s->flags |= GV_SD_CANGOUP; 159 160 error = gv_create_sd(sc, s); 161 if (error) 162 continue; 163 164 /* 165 * This flag was set in gv_create_sd() and is not 166 * needed here (on-disk config parsing). 167 */ 168 s->flags &= ~GV_SD_NEWBORN; 169 s->flags &= ~GV_SD_GROW; 170 } 171 } 172 } 173 174 /* 175 * Format the vinum configuration properly. If ondisk is non-zero then the 176 * configuration is intended to be written to disk later. 177 */ 178 void 179 gv_format_config(struct gv_softc *sc, struct sbuf *sb, int ondisk, char *prefix) 180 { 181 struct gv_drive *d; 182 struct gv_sd *s; 183 struct gv_plex *p; 184 struct gv_volume *v; 185 186 /* 187 * We don't need the drive configuration if we're not writing the 188 * config to disk. 189 */ 190 if (!ondisk) { 191 LIST_FOREACH(d, &sc->drives, drive) { 192 sbuf_printf(sb, "%sdrive %s device /dev/%s\n", prefix, 193 d->name, d->device); 194 } 195 } 196 197 LIST_FOREACH(v, &sc->volumes, volume) { 198 if (!ondisk) 199 sbuf_printf(sb, "%s", prefix); 200 sbuf_printf(sb, "volume %s", v->name); 201 if (ondisk) 202 sbuf_printf(sb, " state %s", gv_volstate(v->state)); 203 sbuf_printf(sb, "\n"); 204 } 205 206 LIST_FOREACH(p, &sc->plexes, plex) { 207 if (!ondisk) 208 sbuf_printf(sb, "%s", prefix); 209 sbuf_printf(sb, "plex name %s org %s ", p->name, 210 gv_plexorg(p->org)); 211 if (gv_is_striped(p)) 212 sbuf_printf(sb, "%ds ", p->stripesize / 512); 213 if (p->vol_sc != NULL) 214 sbuf_printf(sb, "vol %s", p->volume); 215 if (ondisk) 216 sbuf_printf(sb, " state %s", gv_plexstate(p->state)); 217 sbuf_printf(sb, "\n"); 218 } 219 220 LIST_FOREACH(s, &sc->subdisks, sd) { 221 if (!ondisk) 222 sbuf_printf(sb, "%s", prefix); 223 sbuf_printf(sb, "sd name %s drive %s len %jds driveoffset " 224 "%jds", s->name, s->drive, s->size / 512, 225 s->drive_offset / 512); 226 if (s->plex_sc != NULL) { 227 sbuf_printf(sb, " plex %s plexoffset %jds", s->plex, 228 s->plex_offset / 512); 229 } 230 if (ondisk) 231 sbuf_printf(sb, " state %s", gv_sdstate(s->state)); 232 sbuf_printf(sb, "\n"); 233 } 234 } 235 236 static off_t 237 gv_plex_smallest_sd(struct gv_plex *p) 238 { 239 struct gv_sd *s; 240 off_t smallest; 241 242 KASSERT(p != NULL, ("gv_plex_smallest_sd: NULL p")); 243 244 s = LIST_FIRST(&p->subdisks); 245 if (s == NULL) 246 return (-1); 247 smallest = s->size; 248 LIST_FOREACH(s, &p->subdisks, in_plex) { 249 if (s->size < smallest) 250 smallest = s->size; 251 } 252 return (smallest); 253 } 254 255 /* Walk over plexes in a volume and count how many are down. */ 256 int 257 gv_plexdown(struct gv_volume *v) 258 { 259 int plexdown; 260 struct gv_plex *p; 261 262 KASSERT(v != NULL, ("gv_plexdown: NULL v")); 263 264 plexdown = 0; 265 266 LIST_FOREACH(p, &v->plexes, plex) { 267 if (p->state == GV_PLEX_DOWN) 268 plexdown++; 269 } 270 return (plexdown); 271 } 272 273 int 274 gv_sd_to_plex(struct gv_sd *s, struct gv_plex *p) 275 { 276 struct gv_sd *s2; 277 off_t psizeorig, remainder, smallest; 278 279 /* If this subdisk was already given to this plex, do nothing. */ 280 if (s->plex_sc == p) 281 return (0); 282 283 /* Check correct size of this subdisk. */ 284 s2 = LIST_FIRST(&p->subdisks); 285 /* Adjust the subdisk-size if necessary. */ 286 if (s2 != NULL && gv_is_striped(p)) { 287 /* First adjust to the stripesize. */ 288 remainder = s->size % p->stripesize; 289 290 if (remainder) { 291 G_VINUM_DEBUG(1, "size of sd %s is not a " 292 "multiple of plex stripesize, taking off " 293 "%jd bytes", s->name, 294 (intmax_t)remainder); 295 gv_adjust_freespace(s, remainder); 296 } 297 298 smallest = gv_plex_smallest_sd(p); 299 /* Then take off extra if other subdisks are smaller. */ 300 remainder = s->size - smallest; 301 302 /* 303 * Don't allow a remainder below zero for running plexes, it's too 304 * painful, and if someone were to accidentally do this, the 305 * resulting array might be smaller than the original... not god 306 */ 307 if (remainder < 0) { 308 if (!(p->flags & GV_PLEX_NEWBORN)) { 309 G_VINUM_DEBUG(0, "sd %s too small for plex %s!", 310 s->name, p->name); 311 return (GV_ERR_BADSIZE); 312 } 313 /* Adjust other subdisks. */ 314 LIST_FOREACH(s2, &p->subdisks, in_plex) { 315 G_VINUM_DEBUG(1, "size of sd %s is to big, " 316 "taking off %jd bytes", s->name, 317 (intmax_t)remainder); 318 gv_adjust_freespace(s2, (remainder * -1)); 319 } 320 } else if (remainder > 0) { 321 G_VINUM_DEBUG(1, "size of sd %s is to big, " 322 "taking off %jd bytes", s->name, 323 (intmax_t)remainder); 324 gv_adjust_freespace(s, remainder); 325 } 326 } 327 328 /* Find the correct plex offset for this subdisk, if needed. */ 329 if (s->plex_offset == -1) { 330 /* 331 * First set it to 0 to catch the case where we had a detached 332 * subdisk that didn't get any good offset. 333 */ 334 s->plex_offset = 0; 335 if (p->sdcount) { 336 LIST_FOREACH(s2, &p->subdisks, in_plex) { 337 if (gv_is_striped(p)) 338 s->plex_offset = p->sdcount * 339 p->stripesize; 340 else 341 s->plex_offset = s2->plex_offset + 342 s2->size; 343 } 344 } 345 } 346 347 /* There are no subdisks for this plex yet, just insert it. */ 348 if (LIST_EMPTY(&p->subdisks)) { 349 LIST_INSERT_HEAD(&p->subdisks, s, in_plex); 350 351 /* Insert in correct order, depending on plex_offset. */ 352 } else { 353 LIST_FOREACH(s2, &p->subdisks, in_plex) { 354 if (s->plex_offset < s2->plex_offset) { 355 LIST_INSERT_BEFORE(s2, s, in_plex); 356 break; 357 } else if (LIST_NEXT(s2, in_plex) == NULL) { 358 LIST_INSERT_AFTER(s2, s, in_plex); 359 break; 360 } 361 } 362 } 363 364 s->plex_sc = p; 365 /* Adjust the size of our plex. We check if the plex misses a subdisk, 366 * so we don't make the plex smaller than it actually should be. 367 */ 368 psizeorig = p->size; 369 p->size = gv_plex_size(p); 370 /* Make sure the size is not changed. */ 371 if (p->sddetached > 0) { 372 if (p->size < psizeorig) { 373 p->size = psizeorig; 374 /* We make sure wee need another subdisk. */ 375 if (p->sddetached == 1) 376 p->sddetached++; 377 } 378 p->sddetached--; 379 } else { 380 if ((p->org == GV_PLEX_RAID5 || 381 p->org == GV_PLEX_STRIPED) && 382 !(p->flags & GV_PLEX_NEWBORN) && 383 p->state == GV_PLEX_UP) { 384 s->flags |= GV_SD_GROW; 385 } 386 p->sdcount++; 387 } 388 389 return (0); 390 } 391 392 void 393 gv_update_vol_size(struct gv_volume *v, off_t size) 394 { 395 if (v == NULL) 396 return; 397 if (v->provider != NULL) { 398 g_topology_lock(); 399 v->provider->mediasize = size; 400 g_topology_unlock(); 401 } 402 v->size = size; 403 } 404 405 /* Return how many subdisks that constitute the original plex. */ 406 int 407 gv_sdcount(struct gv_plex *p, int growing) 408 { 409 struct gv_sd *s; 410 int sdcount; 411 412 sdcount = p->sdcount; 413 if (growing) { 414 LIST_FOREACH(s, &p->subdisks, in_plex) { 415 if (s->flags & GV_SD_GROW) 416 sdcount--; 417 } 418 } 419 420 return (sdcount); 421 } 422 423 /* Calculates the plex size. */ 424 off_t 425 gv_plex_size(struct gv_plex *p) 426 { 427 struct gv_sd *s; 428 off_t size; 429 int sdcount; 430 431 KASSERT(p != NULL, ("gv_plex_size: NULL p")); 432 433 /* Adjust the size of our plex. */ 434 size = 0; 435 sdcount = gv_sdcount(p, 1); 436 switch (p->org) { 437 case GV_PLEX_CONCAT: 438 LIST_FOREACH(s, &p->subdisks, in_plex) 439 size += s->size; 440 break; 441 case GV_PLEX_STRIPED: 442 s = LIST_FIRST(&p->subdisks); 443 size = ((s != NULL) ? (sdcount * s->size) : 0); 444 break; 445 case GV_PLEX_RAID5: 446 s = LIST_FIRST(&p->subdisks); 447 size = ((s != NULL) ? ((sdcount - 1) * s->size) : 0); 448 break; 449 } 450 451 return (size); 452 } 453 454 /* Returns the size of a volume. */ 455 off_t 456 gv_vol_size(struct gv_volume *v) 457 { 458 struct gv_plex *p; 459 off_t minplexsize; 460 461 KASSERT(v != NULL, ("gv_vol_size: NULL v")); 462 463 p = LIST_FIRST(&v->plexes); 464 if (p == NULL) 465 return (0); 466 467 minplexsize = p->size; 468 LIST_FOREACH(p, &v->plexes, in_volume) { 469 if (p->size < minplexsize) { 470 minplexsize = p->size; 471 } 472 } 473 return (minplexsize); 474 } 475 476 void 477 gv_update_plex_config(struct gv_plex *p) 478 { 479 struct gv_sd *s, *s2; 480 off_t remainder; 481 int required_sds, state; 482 483 KASSERT(p != NULL, ("gv_update_plex_config: NULL p")); 484 485 /* The plex was added to an already running volume. */ 486 if (p->flags & GV_PLEX_ADDED) 487 gv_set_plex_state(p, GV_PLEX_DOWN, GV_SETSTATE_FORCE); 488 489 switch (p->org) { 490 case GV_PLEX_STRIPED: 491 required_sds = 2; 492 break; 493 case GV_PLEX_RAID5: 494 required_sds = 3; 495 break; 496 case GV_PLEX_CONCAT: 497 default: 498 required_sds = 0; 499 break; 500 } 501 502 if (required_sds) { 503 if (p->sdcount < required_sds) { 504 gv_set_plex_state(p, GV_PLEX_DOWN, GV_SETSTATE_FORCE); 505 } 506 507 /* 508 * The subdisks in striped plexes must all have the same size. 509 */ 510 s = LIST_FIRST(&p->subdisks); 511 LIST_FOREACH(s2, &p->subdisks, in_plex) { 512 if (s->size != s2->size) { 513 G_VINUM_DEBUG(0, "subdisk size mismatch %s" 514 "(%jd) <> %s (%jd)", s->name, s->size, 515 s2->name, s2->size); 516 gv_set_plex_state(p, GV_PLEX_DOWN, 517 GV_SETSTATE_FORCE); 518 } 519 } 520 521 LIST_FOREACH(s, &p->subdisks, in_plex) { 522 /* Trim subdisk sizes to match the stripe size. */ 523 remainder = s->size % p->stripesize; 524 if (remainder) { 525 G_VINUM_DEBUG(1, "size of sd %s is not a " 526 "multiple of plex stripesize, taking off " 527 "%jd bytes", s->name, (intmax_t)remainder); 528 gv_adjust_freespace(s, remainder); 529 } 530 } 531 } 532 533 p->size = gv_plex_size(p); 534 if (p->sdcount == 0) 535 gv_set_plex_state(p, GV_PLEX_DOWN, GV_SETSTATE_FORCE); 536 else if (p->org == GV_PLEX_RAID5 && p->flags & GV_PLEX_NEWBORN) { 537 LIST_FOREACH(s, &p->subdisks, in_plex) 538 gv_set_sd_state(s, GV_SD_UP, GV_SETSTATE_FORCE); 539 /* If added to a volume, we want the plex to be down. */ 540 state = (p->flags & GV_PLEX_ADDED) ? GV_PLEX_DOWN : GV_PLEX_UP; 541 gv_set_plex_state(p, state, GV_SETSTATE_FORCE); 542 p->flags &= ~GV_PLEX_ADDED; 543 } else if (p->flags & GV_PLEX_ADDED) { 544 LIST_FOREACH(s, &p->subdisks, in_plex) 545 gv_set_sd_state(s, GV_SD_STALE, GV_SETSTATE_FORCE); 546 gv_set_plex_state(p, GV_PLEX_DOWN, GV_SETSTATE_FORCE); 547 p->flags &= ~GV_PLEX_ADDED; 548 } else if (p->state == GV_PLEX_UP) { 549 LIST_FOREACH(s, &p->subdisks, in_plex) { 550 if (s->flags & GV_SD_GROW) { 551 gv_set_plex_state(p, GV_PLEX_GROWABLE, 552 GV_SETSTATE_FORCE); 553 break; 554 } 555 } 556 } 557 /* Our plex is grown up now. */ 558 p->flags &= ~GV_PLEX_NEWBORN; 559 } 560 561 /* 562 * Give a subdisk to a drive, check and adjust several parameters, adjust 563 * freelist. 564 */ 565 int 566 gv_sd_to_drive(struct gv_sd *s, struct gv_drive *d) 567 { 568 struct gv_sd *s2; 569 struct gv_freelist *fl, *fl2; 570 off_t tmp; 571 int i; 572 573 fl2 = NULL; 574 575 /* Shortcut for "referenced" drives. */ 576 if (d->flags & GV_DRIVE_REFERENCED) { 577 s->drive_sc = d; 578 return (0); 579 } 580 581 /* Check if this subdisk was already given to this drive. */ 582 if (s->drive_sc != NULL) { 583 if (s->drive_sc == d) { 584 if (!(s->flags & GV_SD_TASTED)) { 585 return (0); 586 } 587 } else { 588 G_VINUM_DEBUG(0, "error giving subdisk '%s' to '%s' " 589 "(already on '%s')", s->name, d->name, 590 s->drive_sc->name); 591 return (GV_ERR_ISATTACHED); 592 } 593 } 594 595 /* Preliminary checks. */ 596 if ((s->size > d->avail) || (d->freelist_entries == 0)) { 597 G_VINUM_DEBUG(0, "not enough space on '%s' for '%s'", d->name, 598 s->name); 599 return (GV_ERR_NOSPACE); 600 } 601 602 /* If no size was given for this subdisk, try to auto-size it... */ 603 if (s->size == -1) { 604 /* Find the largest available slot. */ 605 LIST_FOREACH(fl, &d->freelist, freelist) { 606 if (fl->size < s->size) 607 continue; 608 s->size = fl->size; 609 s->drive_offset = fl->offset; 610 fl2 = fl; 611 } 612 613 /* No good slot found? */ 614 if (s->size == -1) { 615 G_VINUM_DEBUG(0, "unable to autosize '%s' on '%s'", 616 s->name, d->name); 617 return (GV_ERR_BADSIZE); 618 } 619 620 /* 621 * ... or check if we have a free slot that's large enough for the 622 * given size. 623 */ 624 } else { 625 i = 0; 626 LIST_FOREACH(fl, &d->freelist, freelist) { 627 if (fl->size < s->size) 628 continue; 629 /* Assign drive offset, if not given. */ 630 if (s->drive_offset == -1) 631 s->drive_offset = fl->offset; 632 fl2 = fl; 633 i++; 634 break; 635 } 636 637 /* Couldn't find a good free slot. */ 638 if (i == 0) { 639 G_VINUM_DEBUG(0, "free slots to small for '%s' on '%s'", 640 s->name, d->name); 641 return (GV_ERR_NOSPACE); 642 } 643 } 644 645 /* No drive offset given, try to calculate it. */ 646 if (s->drive_offset == -1) { 647 648 /* Add offsets and sizes from other subdisks on this drive. */ 649 LIST_FOREACH(s2, &d->subdisks, from_drive) { 650 s->drive_offset = s2->drive_offset + s2->size; 651 } 652 653 /* 654 * If there are no other subdisks yet, then set the default 655 * offset to GV_DATA_START. 656 */ 657 if (s->drive_offset == -1) 658 s->drive_offset = GV_DATA_START; 659 660 /* Check if we have a free slot at the given drive offset. */ 661 } else { 662 i = 0; 663 LIST_FOREACH(fl, &d->freelist, freelist) { 664 /* Yes, this subdisk fits. */ 665 if ((fl->offset <= s->drive_offset) && 666 (fl->offset + fl->size >= 667 s->drive_offset + s->size)) { 668 i++; 669 fl2 = fl; 670 break; 671 } 672 } 673 674 /* Couldn't find a good free slot. */ 675 if (i == 0) { 676 G_VINUM_DEBUG(0, "given drive_offset for '%s' won't fit " 677 "on '%s'", s->name, d->name); 678 return (GV_ERR_NOSPACE); 679 } 680 } 681 682 /* 683 * Now that all parameters are checked and set up, we can give the 684 * subdisk to the drive and adjust the freelist. 685 */ 686 687 /* First, adjust the freelist. */ 688 LIST_FOREACH(fl, &d->freelist, freelist) { 689 /* Look for the free slot that we have found before. */ 690 if (fl != fl2) 691 continue; 692 693 /* The subdisk starts at the beginning of the free slot. */ 694 if (fl->offset == s->drive_offset) { 695 fl->offset += s->size; 696 fl->size -= s->size; 697 698 /* The subdisk uses the whole slot, so remove it. */ 699 if (fl->size == 0) { 700 d->freelist_entries--; 701 LIST_REMOVE(fl, freelist); 702 } 703 /* 704 * The subdisk does not start at the beginning of the free 705 * slot. 706 */ 707 } else { 708 tmp = fl->offset + fl->size; 709 fl->size = s->drive_offset - fl->offset; 710 711 /* 712 * The subdisk didn't use the complete rest of the free 713 * slot, so we need to split it. 714 */ 715 if (s->drive_offset + s->size != tmp) { 716 fl2 = g_malloc(sizeof(*fl2), M_WAITOK | M_ZERO); 717 fl2->offset = s->drive_offset + s->size; 718 fl2->size = tmp - fl2->offset; 719 LIST_INSERT_AFTER(fl, fl2, freelist); 720 d->freelist_entries++; 721 } 722 } 723 break; 724 } 725 726 /* 727 * This is the first subdisk on this drive, just insert it into the 728 * list. 729 */ 730 if (LIST_EMPTY(&d->subdisks)) { 731 LIST_INSERT_HEAD(&d->subdisks, s, from_drive); 732 733 /* There are other subdisks, so insert this one in correct order. */ 734 } else { 735 LIST_FOREACH(s2, &d->subdisks, from_drive) { 736 if (s->drive_offset < s2->drive_offset) { 737 LIST_INSERT_BEFORE(s2, s, from_drive); 738 break; 739 } else if (LIST_NEXT(s2, from_drive) == NULL) { 740 LIST_INSERT_AFTER(s2, s, from_drive); 741 break; 742 } 743 } 744 } 745 746 d->sdcount++; 747 d->avail -= s->size; 748 749 s->flags &= ~GV_SD_TASTED; 750 751 /* Link back from the subdisk to this drive. */ 752 s->drive_sc = d; 753 754 return (0); 755 } 756 757 void 758 gv_free_sd(struct gv_sd *s) 759 { 760 struct gv_drive *d; 761 struct gv_freelist *fl, *fl2; 762 763 KASSERT(s != NULL, ("gv_free_sd: NULL s")); 764 765 d = s->drive_sc; 766 if (d == NULL) 767 return; 768 769 /* 770 * First, find the free slot that's immediately before or after this 771 * subdisk. 772 */ 773 fl = NULL; 774 LIST_FOREACH(fl, &d->freelist, freelist) { 775 if (fl->offset == s->drive_offset + s->size) 776 break; 777 if (fl->offset + fl->size == s->drive_offset) 778 break; 779 } 780 781 /* If there is no free slot behind this subdisk, so create one. */ 782 if (fl == NULL) { 783 784 fl = g_malloc(sizeof(*fl), M_WAITOK | M_ZERO); 785 fl->size = s->size; 786 fl->offset = s->drive_offset; 787 788 if (d->freelist_entries == 0) { 789 LIST_INSERT_HEAD(&d->freelist, fl, freelist); 790 } else { 791 LIST_FOREACH(fl2, &d->freelist, freelist) { 792 if (fl->offset < fl2->offset) { 793 LIST_INSERT_BEFORE(fl2, fl, freelist); 794 break; 795 } else if (LIST_NEXT(fl2, freelist) == NULL) { 796 LIST_INSERT_AFTER(fl2, fl, freelist); 797 break; 798 } 799 } 800 } 801 802 d->freelist_entries++; 803 804 /* Expand the free slot we just found. */ 805 } else { 806 fl->size += s->size; 807 if (fl->offset > s->drive_offset) 808 fl->offset = s->drive_offset; 809 } 810 811 d->avail += s->size; 812 d->sdcount--; 813 } 814 815 void 816 gv_adjust_freespace(struct gv_sd *s, off_t remainder) 817 { 818 struct gv_drive *d; 819 struct gv_freelist *fl, *fl2; 820 821 KASSERT(s != NULL, ("gv_adjust_freespace: NULL s")); 822 d = s->drive_sc; 823 KASSERT(d != NULL, ("gv_adjust_freespace: NULL d")); 824 825 /* First, find the free slot that's immediately after this subdisk. */ 826 fl = NULL; 827 LIST_FOREACH(fl, &d->freelist, freelist) { 828 if (fl->offset == s->drive_offset + s->size) 829 break; 830 } 831 832 /* If there is no free slot behind this subdisk, so create one. */ 833 if (fl == NULL) { 834 835 fl = g_malloc(sizeof(*fl), M_WAITOK | M_ZERO); 836 fl->size = remainder; 837 fl->offset = s->drive_offset + s->size - remainder; 838 839 if (d->freelist_entries == 0) { 840 LIST_INSERT_HEAD(&d->freelist, fl, freelist); 841 } else { 842 LIST_FOREACH(fl2, &d->freelist, freelist) { 843 if (fl->offset < fl2->offset) { 844 LIST_INSERT_BEFORE(fl2, fl, freelist); 845 break; 846 } else if (LIST_NEXT(fl2, freelist) == NULL) { 847 LIST_INSERT_AFTER(fl2, fl, freelist); 848 break; 849 } 850 } 851 } 852 853 d->freelist_entries++; 854 855 /* Expand the free slot we just found. */ 856 } else { 857 fl->offset -= remainder; 858 fl->size += remainder; 859 } 860 861 s->size -= remainder; 862 d->avail += remainder; 863 } 864 865 /* Check if the given plex is a striped one. */ 866 int 867 gv_is_striped(struct gv_plex *p) 868 { 869 KASSERT(p != NULL, ("gv_is_striped: NULL p")); 870 switch(p->org) { 871 case GV_PLEX_STRIPED: 872 case GV_PLEX_RAID5: 873 return (1); 874 default: 875 return (0); 876 } 877 } 878 879 /* Find a volume by name. */ 880 struct gv_volume * 881 gv_find_vol(struct gv_softc *sc, char *name) 882 { 883 struct gv_volume *v; 884 885 LIST_FOREACH(v, &sc->volumes, volume) { 886 if (!strncmp(v->name, name, GV_MAXVOLNAME)) 887 return (v); 888 } 889 890 return (NULL); 891 } 892 893 /* Find a plex by name. */ 894 struct gv_plex * 895 gv_find_plex(struct gv_softc *sc, char *name) 896 { 897 struct gv_plex *p; 898 899 LIST_FOREACH(p, &sc->plexes, plex) { 900 if (!strncmp(p->name, name, GV_MAXPLEXNAME)) 901 return (p); 902 } 903 904 return (NULL); 905 } 906 907 /* Find a subdisk by name. */ 908 struct gv_sd * 909 gv_find_sd(struct gv_softc *sc, char *name) 910 { 911 struct gv_sd *s; 912 913 LIST_FOREACH(s, &sc->subdisks, sd) { 914 if (!strncmp(s->name, name, GV_MAXSDNAME)) 915 return (s); 916 } 917 918 return (NULL); 919 } 920 921 /* Find a drive by name. */ 922 struct gv_drive * 923 gv_find_drive(struct gv_softc *sc, char *name) 924 { 925 struct gv_drive *d; 926 927 LIST_FOREACH(d, &sc->drives, drive) { 928 if (!strncmp(d->name, name, GV_MAXDRIVENAME)) 929 return (d); 930 } 931 932 return (NULL); 933 } 934 935 /* Find a drive given a device. */ 936 struct gv_drive * 937 gv_find_drive_device(struct gv_softc *sc, char *device) 938 { 939 struct gv_drive *d; 940 941 LIST_FOREACH(d, &sc->drives, drive) { 942 if(!strcmp(d->device, device)) 943 return (d); 944 } 945 946 return (NULL); 947 } 948 949 /* Check if any consumer of the given geom is open. */ 950 int 951 gv_consumer_is_open(struct g_consumer *cp) 952 { 953 if (cp == NULL) 954 return (0); 955 956 if (cp->acr || cp->acw || cp->ace) 957 return (1); 958 959 return (0); 960 } 961 962 int 963 gv_provider_is_open(struct g_provider *pp) 964 { 965 if (pp == NULL) 966 return (0); 967 968 if (pp->acr || pp->acw || pp->ace) 969 return (1); 970 971 return (0); 972 } 973 974 /* 975 * Compare the modification dates of the drives. 976 * Return 1 if a > b, 0 otherwise. 977 */ 978 int 979 gv_drive_is_newer(struct gv_softc *sc, struct gv_drive *d) 980 { 981 struct gv_drive *d2; 982 struct timeval *a, *b; 983 984 KASSERT(!LIST_EMPTY(&sc->drives), 985 ("gv_is_drive_newer: empty drive list")); 986 987 a = &d->hdr->label.last_update; 988 LIST_FOREACH(d2, &sc->drives, drive) { 989 if ((d == d2) || (d2->state != GV_DRIVE_UP) || 990 (d2->hdr == NULL)) 991 continue; 992 b = &d2->hdr->label.last_update; 993 if (timevalcmp(a, b, >)) 994 return (1); 995 } 996 997 return (0); 998 } 999 1000 /* Return the type of object identified by string 'name'. */ 1001 int 1002 gv_object_type(struct gv_softc *sc, char *name) 1003 { 1004 struct gv_drive *d; 1005 struct gv_plex *p; 1006 struct gv_sd *s; 1007 struct gv_volume *v; 1008 1009 LIST_FOREACH(v, &sc->volumes, volume) { 1010 if (!strncmp(v->name, name, GV_MAXVOLNAME)) 1011 return (GV_TYPE_VOL); 1012 } 1013 1014 LIST_FOREACH(p, &sc->plexes, plex) { 1015 if (!strncmp(p->name, name, GV_MAXPLEXNAME)) 1016 return (GV_TYPE_PLEX); 1017 } 1018 1019 LIST_FOREACH(s, &sc->subdisks, sd) { 1020 if (!strncmp(s->name, name, GV_MAXSDNAME)) 1021 return (GV_TYPE_SD); 1022 } 1023 1024 LIST_FOREACH(d, &sc->drives, drive) { 1025 if (!strncmp(d->name, name, GV_MAXDRIVENAME)) 1026 return (GV_TYPE_DRIVE); 1027 } 1028 1029 return (GV_ERR_NOTFOUND); 1030 } 1031 1032 void 1033 gv_setup_objects(struct gv_softc *sc) 1034 { 1035 struct g_provider *pp; 1036 struct gv_volume *v; 1037 struct gv_plex *p; 1038 struct gv_sd *s; 1039 struct gv_drive *d; 1040 1041 LIST_FOREACH(s, &sc->subdisks, sd) { 1042 d = gv_find_drive(sc, s->drive); 1043 if (d != NULL) 1044 gv_sd_to_drive(s, d); 1045 p = gv_find_plex(sc, s->plex); 1046 if (p != NULL) 1047 gv_sd_to_plex(s, p); 1048 gv_update_sd_state(s); 1049 } 1050 1051 LIST_FOREACH(p, &sc->plexes, plex) { 1052 gv_update_plex_config(p); 1053 v = gv_find_vol(sc, p->volume); 1054 if (v != NULL && p->vol_sc != v) { 1055 p->vol_sc = v; 1056 v->plexcount++; 1057 LIST_INSERT_HEAD(&v->plexes, p, in_volume); 1058 } 1059 gv_update_plex_config(p); 1060 } 1061 1062 LIST_FOREACH(v, &sc->volumes, volume) { 1063 v->size = gv_vol_size(v); 1064 if (v->provider == NULL) { 1065 g_topology_lock(); 1066 pp = g_new_providerf(sc->geom, "gvinum/%s", v->name); 1067 pp->mediasize = v->size; 1068 pp->sectorsize = 512; /* XXX */ 1069 g_error_provider(pp, 0); 1070 v->provider = pp; 1071 pp->private = v; 1072 g_topology_unlock(); 1073 } else if (v->provider->mediasize != v->size) { 1074 g_topology_lock(); 1075 v->provider->mediasize = v->size; 1076 g_topology_unlock(); 1077 } 1078 v->flags &= ~GV_VOL_NEWBORN; 1079 gv_update_vol_state(v); 1080 } 1081 } 1082 1083 void 1084 gv_cleanup(struct gv_softc *sc) 1085 { 1086 struct gv_volume *v, *v2; 1087 struct gv_plex *p, *p2; 1088 struct gv_sd *s, *s2; 1089 struct gv_drive *d, *d2; 1090 struct gv_freelist *fl, *fl2; 1091 1092 mtx_lock(&sc->config_mtx); 1093 LIST_FOREACH_SAFE(v, &sc->volumes, volume, v2) { 1094 LIST_REMOVE(v, volume); 1095 g_free(v->wqueue); 1096 g_free(v); 1097 } 1098 LIST_FOREACH_SAFE(p, &sc->plexes, plex, p2) { 1099 LIST_REMOVE(p, plex); 1100 g_free(p->bqueue); 1101 g_free(p->rqueue); 1102 g_free(p->wqueue); 1103 g_free(p); 1104 } 1105 LIST_FOREACH_SAFE(s, &sc->subdisks, sd, s2) { 1106 LIST_REMOVE(s, sd); 1107 g_free(s); 1108 } 1109 LIST_FOREACH_SAFE(d, &sc->drives, drive, d2) { 1110 LIST_FOREACH_SAFE(fl, &d->freelist, freelist, fl2) { 1111 LIST_REMOVE(fl, freelist); 1112 g_free(fl); 1113 } 1114 LIST_REMOVE(d, drive); 1115 g_free(d->hdr); 1116 g_free(d); 1117 } 1118 mtx_destroy(&sc->config_mtx); 1119 } 1120 1121 /* General 'attach' routine. */ 1122 int 1123 gv_attach_plex(struct gv_plex *p, struct gv_volume *v, int rename) 1124 { 1125 struct gv_sd *s; 1126 struct gv_softc *sc; 1127 1128 g_topology_assert(); 1129 1130 sc = p->vinumconf; 1131 KASSERT(sc != NULL, ("NULL sc")); 1132 1133 if (p->vol_sc != NULL) { 1134 G_VINUM_DEBUG(1, "unable to attach %s: already attached to %s", 1135 p->name, p->volume); 1136 return (GV_ERR_ISATTACHED); 1137 } 1138 1139 /* Stale all subdisks of this plex. */ 1140 LIST_FOREACH(s, &p->subdisks, in_plex) { 1141 if (s->state != GV_SD_STALE) 1142 gv_set_sd_state(s, GV_SD_STALE, GV_SETSTATE_FORCE); 1143 } 1144 /* Attach to volume. Make sure volume is not up and running. */ 1145 if (gv_provider_is_open(v->provider)) { 1146 G_VINUM_DEBUG(1, "unable to attach %s: volume %s is busy", 1147 p->name, v->name); 1148 return (GV_ERR_ISBUSY); 1149 } 1150 p->vol_sc = v; 1151 strlcpy(p->volume, v->name, sizeof(p->volume)); 1152 v->plexcount++; 1153 if (rename) { 1154 snprintf(p->name, sizeof(p->name), "%s.p%d", v->name, 1155 v->plexcount); 1156 } 1157 LIST_INSERT_HEAD(&v->plexes, p, in_volume); 1158 1159 /* Get plex up again. */ 1160 gv_update_vol_size(v, gv_vol_size(v)); 1161 gv_set_plex_state(p, GV_PLEX_UP, 0); 1162 gv_save_config(p->vinumconf); 1163 return (0); 1164 } 1165 1166 int 1167 gv_attach_sd(struct gv_sd *s, struct gv_plex *p, off_t offset, int rename) 1168 { 1169 struct gv_sd *s2; 1170 int error, sdcount; 1171 1172 g_topology_assert(); 1173 1174 /* If subdisk is attached, don't do it. */ 1175 if (s->plex_sc != NULL) { 1176 G_VINUM_DEBUG(1, "unable to attach %s: already attached to %s", 1177 s->name, s->plex); 1178 return (GV_ERR_ISATTACHED); 1179 } 1180 1181 gv_set_sd_state(s, GV_SD_STALE, GV_SETSTATE_FORCE); 1182 /* First check that this subdisk has a correct offset. If none other 1183 * starts at the same, and it's correct module stripesize, it is */ 1184 if (offset != -1 && offset % p->stripesize != 0) 1185 return (GV_ERR_BADOFFSET); 1186 LIST_FOREACH(s2, &p->subdisks, in_plex) { 1187 if (s2->plex_offset == offset) 1188 return (GV_ERR_BADOFFSET); 1189 } 1190 1191 /* Attach the subdisk to the plex at given offset. */ 1192 s->plex_offset = offset; 1193 strlcpy(s->plex, p->name, sizeof(s->plex)); 1194 1195 sdcount = p->sdcount; 1196 error = gv_sd_to_plex(s, p); 1197 if (error) 1198 return (error); 1199 gv_update_plex_config(p); 1200 1201 if (rename) { 1202 snprintf(s->name, sizeof(s->name), "%s.s%d", s->plex, 1203 p->sdcount); 1204 } 1205 if (p->vol_sc != NULL) 1206 gv_update_vol_size(p->vol_sc, gv_vol_size(p->vol_sc)); 1207 gv_save_config(p->vinumconf); 1208 /* We don't update the subdisk state since the user might have to 1209 * initiate a rebuild/sync first. */ 1210 return (0); 1211 } 1212 1213 /* Detach a plex from a volume. */ 1214 int 1215 gv_detach_plex(struct gv_plex *p, int flags) 1216 { 1217 struct gv_volume *v; 1218 1219 g_topology_assert(); 1220 v = p->vol_sc; 1221 1222 if (v == NULL) { 1223 G_VINUM_DEBUG(1, "unable to detach %s: already detached", 1224 p->name); 1225 return (0); /* Not an error. */ 1226 } 1227 1228 /* 1229 * Only proceed if forced or volume inactive. 1230 */ 1231 if (!(flags & GV_FLAG_F) && (gv_provider_is_open(v->provider) || 1232 p->state == GV_PLEX_UP)) { 1233 G_VINUM_DEBUG(1, "unable to detach %s: volume %s is busy", 1234 p->name, p->volume); 1235 return (GV_ERR_ISBUSY); 1236 } 1237 v->plexcount--; 1238 /* Make sure someone don't read us when gone. */ 1239 v->last_read_plex = NULL; 1240 LIST_REMOVE(p, in_volume); 1241 p->vol_sc = NULL; 1242 memset(p->volume, 0, GV_MAXVOLNAME); 1243 gv_update_vol_size(v, gv_vol_size(v)); 1244 gv_save_config(p->vinumconf); 1245 return (0); 1246 } 1247 1248 /* Detach a subdisk from a plex. */ 1249 int 1250 gv_detach_sd(struct gv_sd *s, int flags) 1251 { 1252 struct gv_plex *p; 1253 1254 g_topology_assert(); 1255 p = s->plex_sc; 1256 1257 if (p == NULL) { 1258 G_VINUM_DEBUG(1, "unable to detach %s: already detached", 1259 s->name); 1260 return (0); /* Not an error. */ 1261 } 1262 1263 /* 1264 * Don't proceed if we're not forcing, and the plex is up, or degraded 1265 * with this subdisk up. 1266 */ 1267 if (!(flags & GV_FLAG_F) && ((p->state > GV_PLEX_DEGRADED) || 1268 ((p->state == GV_PLEX_DEGRADED) && (s->state == GV_SD_UP)))) { 1269 G_VINUM_DEBUG(1, "unable to detach %s: plex %s is busy", 1270 s->name, s->plex); 1271 return (GV_ERR_ISBUSY); 1272 } 1273 1274 LIST_REMOVE(s, in_plex); 1275 s->plex_sc = NULL; 1276 memset(s->plex, 0, GV_MAXPLEXNAME); 1277 p->sddetached++; 1278 gv_save_config(s->vinumconf); 1279 return (0); 1280 } 1281