1 /*- 2 * Copyright (c) 2011 Alexander Motin <mav@FreeBSD.org> 3 * Copyright (c) 2000 - 2008 Søren Schmidt <sos@FreeBSD.org> 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __FBSDID("$FreeBSD$"); 30 31 #include <sys/param.h> 32 #include <sys/bio.h> 33 #include <sys/endian.h> 34 #include <sys/kernel.h> 35 #include <sys/kobj.h> 36 #include <sys/limits.h> 37 #include <sys/lock.h> 38 #include <sys/malloc.h> 39 #include <sys/mutex.h> 40 #include <sys/systm.h> 41 #include <geom/geom.h> 42 #include "geom/raid/g_raid.h" 43 #include "g_raid_md_if.h" 44 45 static MALLOC_DEFINE(M_MD_PROMISE, "md_promise_data", "GEOM_RAID Promise metadata"); 46 47 #define PROMISE_MAX_DISKS 8 48 #define PROMISE_MAX_SUBDISKS 2 49 #define PROMISE_META_OFFSET 14 50 51 struct promise_raid_disk { 52 uint8_t flags; /* Subdisk status. */ 53 #define PROMISE_F_VALID 0x01 54 #define PROMISE_F_ONLINE 0x02 55 #define PROMISE_F_ASSIGNED 0x04 56 #define PROMISE_F_SPARE 0x08 57 #define PROMISE_F_DUPLICATE 0x10 58 #define PROMISE_F_REDIR 0x20 59 #define PROMISE_F_DOWN 0x40 60 #define PROMISE_F_READY 0x80 61 62 uint8_t number; /* Position in a volume. */ 63 uint8_t channel; /* ATA channel number. */ 64 uint8_t device; /* ATA device number. */ 65 uint64_t id __packed; /* Subdisk ID. */ 66 } __packed; 67 68 struct promise_raid_conf { 69 char promise_id[24]; 70 #define PROMISE_MAGIC "Promise Technology, Inc." 71 #define FREEBSD_MAGIC "FreeBSD ATA driver RAID " 72 73 uint32_t dummy_0; 74 uint64_t magic_0; 75 #define PROMISE_MAGIC0(x) (((uint64_t)(x.channel) << 48) | \ 76 ((uint64_t)(x.device != 0) << 56)) 77 uint16_t magic_1; 78 uint32_t magic_2; 79 uint8_t filler1[470]; 80 81 uint32_t integrity; 82 #define PROMISE_I_VALID 0x00000080 83 84 struct promise_raid_disk disk; /* This subdisk info. */ 85 uint32_t disk_offset; /* Subdisk offset. */ 86 uint32_t disk_sectors; /* Subdisk size */ 87 uint32_t rebuild_lba; /* Rebuild position. */ 88 uint16_t generation; /* Generation number. */ 89 uint8_t status; /* Volume status. */ 90 #define PROMISE_S_VALID 0x01 91 #define PROMISE_S_ONLINE 0x02 92 #define PROMISE_S_INITED 0x04 93 #define PROMISE_S_READY 0x08 94 #define PROMISE_S_DEGRADED 0x10 95 #define PROMISE_S_MARKED 0x20 96 #define PROMISE_S_MIGRATING 0x40 97 #define PROMISE_S_FUNCTIONAL 0x80 98 99 uint8_t type; /* Voluem type. */ 100 #define PROMISE_T_RAID0 0x00 101 #define PROMISE_T_RAID1 0x01 102 #define PROMISE_T_RAID3 0x02 103 #define PROMISE_T_RAID5 0x04 104 #define PROMISE_T_SPAN 0x08 105 #define PROMISE_T_JBOD 0x10 106 107 uint8_t total_disks; /* Disks in this volume. */ 108 uint8_t stripe_shift; /* Strip size. */ 109 uint8_t array_width; /* Number of RAID0 stripes. */ 110 uint8_t array_number; /* Global volume number. */ 111 uint32_t total_sectors; /* Volume size. */ 112 uint16_t cylinders; /* Volume geometry: C. */ 113 uint8_t heads; /* Volume geometry: H. */ 114 uint8_t sectors; /* Volume geometry: S. */ 115 uint64_t volume_id __packed; /* Volume ID, */ 116 struct promise_raid_disk disks[PROMISE_MAX_DISKS]; 117 /* Subdisks in this volume. */ 118 char name[32]; /* Volume label. */ 119 120 uint32_t filler2[8]; 121 uint32_t magic_3; /* Something related to rebuild. */ 122 uint64_t rebuild_lba64; /* Per-volume rebuild position. */ 123 uint32_t magic_4; 124 uint32_t magic_5; 125 uint32_t total_sectors_high; 126 uint32_t filler3[324]; 127 uint32_t checksum; 128 } __packed; 129 130 struct g_raid_md_promise_perdisk { 131 int pd_updated; 132 int pd_subdisks; 133 struct promise_raid_conf *pd_meta[PROMISE_MAX_SUBDISKS]; 134 }; 135 136 struct g_raid_md_promise_pervolume { 137 struct promise_raid_conf *pv_meta; 138 uint64_t pv_id; 139 uint16_t pv_generation; 140 int pv_disks_present; 141 int pv_started; 142 struct callout pv_start_co; /* STARTING state timer. */ 143 }; 144 145 static g_raid_md_create_t g_raid_md_create_promise; 146 static g_raid_md_taste_t g_raid_md_taste_promise; 147 static g_raid_md_event_t g_raid_md_event_promise; 148 static g_raid_md_volume_event_t g_raid_md_volume_event_promise; 149 static g_raid_md_ctl_t g_raid_md_ctl_promise; 150 static g_raid_md_write_t g_raid_md_write_promise; 151 static g_raid_md_fail_disk_t g_raid_md_fail_disk_promise; 152 static g_raid_md_free_disk_t g_raid_md_free_disk_promise; 153 static g_raid_md_free_volume_t g_raid_md_free_volume_promise; 154 static g_raid_md_free_t g_raid_md_free_promise; 155 156 static kobj_method_t g_raid_md_promise_methods[] = { 157 KOBJMETHOD(g_raid_md_create, g_raid_md_create_promise), 158 KOBJMETHOD(g_raid_md_taste, g_raid_md_taste_promise), 159 KOBJMETHOD(g_raid_md_event, g_raid_md_event_promise), 160 KOBJMETHOD(g_raid_md_volume_event, g_raid_md_volume_event_promise), 161 KOBJMETHOD(g_raid_md_ctl, g_raid_md_ctl_promise), 162 KOBJMETHOD(g_raid_md_write, g_raid_md_write_promise), 163 KOBJMETHOD(g_raid_md_fail_disk, g_raid_md_fail_disk_promise), 164 KOBJMETHOD(g_raid_md_free_disk, g_raid_md_free_disk_promise), 165 KOBJMETHOD(g_raid_md_free_volume, g_raid_md_free_volume_promise), 166 KOBJMETHOD(g_raid_md_free, g_raid_md_free_promise), 167 { 0, 0 } 168 }; 169 170 static struct g_raid_md_class g_raid_md_promise_class = { 171 "Promise", 172 g_raid_md_promise_methods, 173 sizeof(struct g_raid_md_object), 174 .mdc_enable = 1, 175 .mdc_priority = 100 176 }; 177 178 179 static void 180 g_raid_md_promise_print(struct promise_raid_conf *meta) 181 { 182 int i; 183 184 if (g_raid_debug < 1) 185 return; 186 187 printf("********* ATA Promise Metadata *********\n"); 188 printf("promise_id <%.24s>\n", meta->promise_id); 189 printf("disk %02x %02x %02x %02x %016jx\n", 190 meta->disk.flags, meta->disk.number, meta->disk.channel, 191 meta->disk.device, meta->disk.id); 192 printf("disk_offset %u\n", meta->disk_offset); 193 printf("disk_sectors %u\n", meta->disk_sectors); 194 printf("rebuild_lba %u\n", meta->rebuild_lba); 195 printf("generation %u\n", meta->generation); 196 printf("status 0x%02x\n", meta->status); 197 printf("type %u\n", meta->type); 198 printf("total_disks %u\n", meta->total_disks); 199 printf("stripe_shift %u\n", meta->stripe_shift); 200 printf("array_width %u\n", meta->array_width); 201 printf("array_number %u\n", meta->array_number); 202 printf("total_sectors %u\n", meta->total_sectors); 203 printf("cylinders %u\n", meta->cylinders); 204 printf("heads %u\n", meta->heads); 205 printf("sectors %u\n", meta->sectors); 206 printf("volume_id 0x%016jx\n", meta->volume_id); 207 printf("disks:\n"); 208 for (i = 0; i < PROMISE_MAX_DISKS; i++ ) { 209 printf(" %02x %02x %02x %02x %016jx\n", 210 meta->disks[i].flags, meta->disks[i].number, 211 meta->disks[i].channel, meta->disks[i].device, 212 meta->disks[i].id); 213 } 214 printf("name <%.32s>\n", meta->name); 215 printf("magic_3 0x%08x\n", meta->magic_3); 216 printf("rebuild_lba64 %ju\n", meta->rebuild_lba64); 217 printf("magic_4 0x%08x\n", meta->magic_4); 218 printf("magic_5 0x%08x\n", meta->magic_5); 219 printf("total_sectors_high 0x%08x\n", meta->total_sectors_high); 220 printf("=================================================\n"); 221 } 222 223 static struct promise_raid_conf * 224 promise_meta_copy(struct promise_raid_conf *meta) 225 { 226 struct promise_raid_conf *nmeta; 227 228 nmeta = malloc(sizeof(*nmeta), M_MD_PROMISE, M_WAITOK); 229 memcpy(nmeta, meta, sizeof(*nmeta)); 230 return (nmeta); 231 } 232 233 static int 234 promise_meta_find_disk(struct promise_raid_conf *meta, uint64_t id) 235 { 236 int pos; 237 238 for (pos = 0; pos < meta->total_disks; pos++) { 239 if (meta->disks[pos].id == id) 240 return (pos); 241 } 242 return (-1); 243 } 244 245 static int 246 promise_meta_unused_range(struct promise_raid_conf **metaarr, int nsd, 247 uint32_t sectors, uint32_t *off, uint32_t *size) 248 { 249 uint32_t coff, csize; 250 int i, j; 251 252 sectors -= 131072; 253 *off = 0; 254 *size = 0; 255 coff = 0; 256 csize = sectors; 257 i = 0; 258 while (1) { 259 for (j = 0; j < nsd; j++) { 260 if (metaarr[j]->disk_offset >= coff) { 261 csize = MIN(csize, 262 metaarr[j]->disk_offset - coff); 263 } 264 } 265 if (csize > *size) { 266 *off = coff; 267 *size = csize; 268 } 269 if (i >= nsd) 270 break; 271 coff = metaarr[i]->disk_offset + metaarr[i]->disk_sectors; 272 csize = sectors - coff; 273 i++; 274 }; 275 return ((*size > 0) ? 1 : 0); 276 } 277 278 static int 279 promise_meta_translate_disk(struct g_raid_volume *vol, int md_disk_pos) 280 { 281 int disk_pos, width; 282 283 if (md_disk_pos >= 0 && vol->v_raid_level == G_RAID_VOLUME_RL_RAID1E) { 284 width = vol->v_disks_count / 2; 285 disk_pos = (md_disk_pos / width) + 286 (md_disk_pos % width) * width; 287 } else 288 disk_pos = md_disk_pos; 289 return (disk_pos); 290 } 291 292 static void 293 promise_meta_get_name(struct promise_raid_conf *meta, char *buf) 294 { 295 int i; 296 297 strncpy(buf, meta->name, 32); 298 buf[32] = 0; 299 for (i = 31; i >= 0; i--) { 300 if (buf[i] > 0x20) 301 break; 302 buf[i] = 0; 303 } 304 } 305 306 static void 307 promise_meta_put_name(struct promise_raid_conf *meta, char *buf) 308 { 309 310 memset(meta->name, 0x20, 32); 311 memcpy(meta->name, buf, MIN(strlen(buf), 32)); 312 } 313 314 static int 315 promise_meta_read(struct g_consumer *cp, struct promise_raid_conf **metaarr) 316 { 317 struct g_provider *pp; 318 struct promise_raid_conf *meta; 319 char *buf; 320 int error, i, subdisks; 321 uint32_t checksum, *ptr; 322 323 pp = cp->provider; 324 subdisks = 0; 325 next: 326 /* Read metadata block. */ 327 buf = g_read_data(cp, pp->mediasize - pp->sectorsize * 328 (63 - subdisks * PROMISE_META_OFFSET), 329 pp->sectorsize * 4, &error); 330 if (buf == NULL) { 331 G_RAID_DEBUG(1, "Cannot read metadata from %s (error=%d).", 332 pp->name, error); 333 return (subdisks); 334 } 335 meta = (struct promise_raid_conf *)buf; 336 337 /* Check if this is an Promise RAID struct */ 338 if (strncmp(meta->promise_id, PROMISE_MAGIC, strlen(PROMISE_MAGIC)) && 339 strncmp(meta->promise_id, FREEBSD_MAGIC, strlen(FREEBSD_MAGIC))) { 340 if (subdisks == 0) 341 G_RAID_DEBUG(1, 342 "Promise signature check failed on %s", pp->name); 343 g_free(buf); 344 return (subdisks); 345 } 346 meta = malloc(sizeof(*meta), M_MD_PROMISE, M_WAITOK); 347 memcpy(meta, buf, MIN(sizeof(*meta), pp->sectorsize * 4)); 348 g_free(buf); 349 350 /* Check metadata checksum. */ 351 for (checksum = 0, ptr = (uint32_t *)meta, i = 0; i < 511; i++) 352 checksum += *ptr++; 353 if (checksum != meta->checksum) { 354 G_RAID_DEBUG(1, "Promise checksum check failed on %s", pp->name); 355 free(meta, M_MD_PROMISE); 356 return (subdisks); 357 } 358 359 if ((meta->integrity & PROMISE_I_VALID) == 0) { 360 G_RAID_DEBUG(1, "Promise metadata is invalid on %s", pp->name); 361 free(meta, M_MD_PROMISE); 362 return (subdisks); 363 } 364 365 if (meta->total_disks > PROMISE_MAX_DISKS) { 366 G_RAID_DEBUG(1, "Wrong number of disks on %s (%d)", 367 pp->name, meta->total_disks); 368 free(meta, M_MD_PROMISE); 369 return (subdisks); 370 } 371 372 /* Save this part and look for next. */ 373 *metaarr = meta; 374 metaarr++; 375 subdisks++; 376 if (subdisks < PROMISE_MAX_SUBDISKS) 377 goto next; 378 379 return (subdisks); 380 } 381 382 static int 383 promise_meta_write(struct g_consumer *cp, 384 struct promise_raid_conf **metaarr, int nsd) 385 { 386 struct g_provider *pp; 387 struct promise_raid_conf *meta; 388 char *buf; 389 int error, i, subdisk, fake; 390 uint32_t checksum, *ptr, off, size; 391 392 pp = cp->provider; 393 subdisk = 0; 394 fake = 0; 395 next: 396 buf = malloc(pp->sectorsize * 4, M_MD_PROMISE, M_WAITOK | M_ZERO); 397 meta = NULL; 398 if (subdisk < nsd) { 399 meta = metaarr[subdisk]; 400 } else if (!fake && promise_meta_unused_range(metaarr, nsd, 401 cp->provider->mediasize / cp->provider->sectorsize, 402 &off, &size)) { 403 /* Optionally add record for unused space. */ 404 meta = (struct promise_raid_conf *)buf; 405 memcpy(&meta->promise_id[0], PROMISE_MAGIC, 406 sizeof(PROMISE_MAGIC) - 1); 407 meta->dummy_0 = 0x00020000; 408 meta->integrity = PROMISE_I_VALID; 409 meta->disk.flags = PROMISE_F_ONLINE | PROMISE_F_VALID; 410 meta->disk.number = 0xff; 411 arc4rand(&meta->disk.id, sizeof(meta->disk.id), 0); 412 meta->disk_offset = off; 413 meta->disk_sectors = size; 414 meta->rebuild_lba = UINT32_MAX; 415 fake = 1; 416 } 417 if (meta != NULL) { 418 /* Recalculate checksum for case if metadata were changed. */ 419 meta->checksum = 0; 420 for (checksum = 0, ptr = (uint32_t *)meta, i = 0; i < 511; i++) 421 checksum += *ptr++; 422 meta->checksum = checksum; 423 memcpy(buf, meta, MIN(pp->sectorsize * 4, sizeof(*meta))); 424 } 425 error = g_write_data(cp, pp->mediasize - pp->sectorsize * 426 (63 - subdisk * PROMISE_META_OFFSET), 427 buf, pp->sectorsize * 4); 428 if (error != 0) { 429 G_RAID_DEBUG(1, "Cannot write metadata to %s (error=%d).", 430 pp->name, error); 431 } 432 free(buf, M_MD_PROMISE); 433 434 subdisk++; 435 if (subdisk < PROMISE_MAX_SUBDISKS) 436 goto next; 437 438 return (error); 439 } 440 441 static int 442 promise_meta_erase(struct g_consumer *cp) 443 { 444 struct g_provider *pp; 445 char *buf; 446 int error, subdisk; 447 448 pp = cp->provider; 449 buf = malloc(4 * pp->sectorsize, M_MD_PROMISE, M_WAITOK | M_ZERO); 450 for (subdisk = 0; subdisk < PROMISE_MAX_SUBDISKS; subdisk++) { 451 error = g_write_data(cp, pp->mediasize - pp->sectorsize * 452 (63 - subdisk * PROMISE_META_OFFSET), 453 buf, 4 * pp->sectorsize); 454 if (error != 0) { 455 G_RAID_DEBUG(1, "Cannot erase metadata on %s (error=%d).", 456 pp->name, error); 457 } 458 } 459 free(buf, M_MD_PROMISE); 460 return (error); 461 } 462 463 static int 464 promise_meta_write_spare(struct g_consumer *cp) 465 { 466 struct promise_raid_conf *meta; 467 int error; 468 469 meta = malloc(sizeof(*meta), M_MD_PROMISE, M_WAITOK | M_ZERO); 470 memcpy(&meta->promise_id[0], PROMISE_MAGIC, sizeof(PROMISE_MAGIC) - 1); 471 meta->dummy_0 = 0x00020000; 472 meta->integrity = PROMISE_I_VALID; 473 meta->disk.flags = PROMISE_F_SPARE | PROMISE_F_ONLINE | PROMISE_F_VALID; 474 meta->disk.number = 0xff; 475 arc4rand(&meta->disk.id, sizeof(meta->disk.id), 0); 476 meta->disk_sectors = cp->provider->mediasize / cp->provider->sectorsize; 477 meta->disk_sectors -= 131072; 478 meta->rebuild_lba = UINT32_MAX; 479 error = promise_meta_write(cp, &meta, 1); 480 free(meta, M_MD_PROMISE); 481 return (error); 482 } 483 484 static struct g_raid_volume * 485 g_raid_md_promise_get_volume(struct g_raid_softc *sc, uint64_t id) 486 { 487 struct g_raid_volume *vol; 488 struct g_raid_md_promise_pervolume *pv; 489 490 TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) { 491 pv = vol->v_md_data; 492 if (pv->pv_id == id) 493 break; 494 } 495 return (vol); 496 } 497 498 static int 499 g_raid_md_promise_purge_volumes(struct g_raid_softc *sc) 500 { 501 struct g_raid_volume *vol, *tvol; 502 struct g_raid_md_promise_pervolume *pv; 503 int i, res; 504 505 res = 0; 506 TAILQ_FOREACH_SAFE(vol, &sc->sc_volumes, v_next, tvol) { 507 pv = vol->v_md_data; 508 if (!pv->pv_started || vol->v_stopping) 509 continue; 510 for (i = 0; i < vol->v_disks_count; i++) { 511 if (vol->v_subdisks[i].sd_state != G_RAID_SUBDISK_S_NONE) 512 break; 513 } 514 if (i >= vol->v_disks_count) { 515 g_raid_destroy_volume(vol); 516 res = 1; 517 } 518 } 519 return (res); 520 } 521 522 static int 523 g_raid_md_promise_purge_disks(struct g_raid_softc *sc) 524 { 525 struct g_raid_disk *disk, *tdisk; 526 struct g_raid_volume *vol; 527 struct g_raid_md_promise_perdisk *pd; 528 int i, j, res; 529 530 res = 0; 531 TAILQ_FOREACH_SAFE(disk, &sc->sc_disks, d_next, tdisk) { 532 if (disk->d_state == G_RAID_DISK_S_SPARE) 533 continue; 534 pd = (struct g_raid_md_promise_perdisk *)disk->d_md_data; 535 536 /* Scan for deleted volumes. */ 537 for (i = 0; i < pd->pd_subdisks; ) { 538 vol = g_raid_md_promise_get_volume(sc, 539 pd->pd_meta[i]->volume_id); 540 if (vol != NULL && !vol->v_stopping) { 541 i++; 542 continue; 543 } 544 free(pd->pd_meta[i], M_MD_PROMISE); 545 for (j = i; j < pd->pd_subdisks - 1; j++) 546 pd->pd_meta[j] = pd->pd_meta[j + 1]; 547 pd->pd_meta[pd->pd_subdisks - 1] = NULL; 548 pd->pd_subdisks--; 549 pd->pd_updated = 1; 550 } 551 552 /* If there is no metadata left - erase and delete disk. */ 553 if (pd->pd_subdisks == 0) { 554 promise_meta_erase(disk->d_consumer); 555 g_raid_destroy_disk(disk); 556 res = 1; 557 } 558 } 559 return (res); 560 } 561 562 static int 563 g_raid_md_promise_supported(int level, int qual, int disks, int force) 564 { 565 566 if (disks > PROMISE_MAX_DISKS) 567 return (0); 568 switch (level) { 569 case G_RAID_VOLUME_RL_RAID0: 570 if (disks < 1) 571 return (0); 572 if (!force && disks < 2) 573 return (0); 574 break; 575 case G_RAID_VOLUME_RL_RAID1: 576 if (disks < 1) 577 return (0); 578 if (!force && (disks != 2)) 579 return (0); 580 break; 581 case G_RAID_VOLUME_RL_RAID1E: 582 if (disks < 2) 583 return (0); 584 if (disks % 2 != 0) 585 return (0); 586 if (!force && (disks != 4)) 587 return (0); 588 break; 589 case G_RAID_VOLUME_RL_SINGLE: 590 if (disks != 1) 591 return (0); 592 break; 593 case G_RAID_VOLUME_RL_CONCAT: 594 if (disks < 2) 595 return (0); 596 break; 597 case G_RAID_VOLUME_RL_RAID5: 598 if (disks < 3) 599 return (0); 600 if (qual != G_RAID_VOLUME_RLQ_R5LA) 601 return (0); 602 break; 603 default: 604 return (0); 605 } 606 if (level != G_RAID_VOLUME_RL_RAID5 && qual != G_RAID_VOLUME_RLQ_NONE) 607 return (0); 608 return (1); 609 } 610 611 static int 612 g_raid_md_promise_start_disk(struct g_raid_disk *disk, int sdn, 613 struct g_raid_volume *vol) 614 { 615 struct g_raid_softc *sc; 616 struct g_raid_subdisk *sd; 617 struct g_raid_md_promise_perdisk *pd; 618 struct g_raid_md_promise_pervolume *pv; 619 struct promise_raid_conf *meta; 620 off_t size; 621 int disk_pos, md_disk_pos, i, resurrection = 0; 622 uint32_t eoff, esize; 623 624 sc = disk->d_softc; 625 pd = (struct g_raid_md_promise_perdisk *)disk->d_md_data; 626 627 pv = vol->v_md_data; 628 meta = pv->pv_meta; 629 630 if (sdn >= 0) { 631 /* Find disk position in metadata by it's serial. */ 632 md_disk_pos = promise_meta_find_disk(meta, pd->pd_meta[sdn]->disk.id); 633 /* For RAID0+1 we need to translate order. */ 634 disk_pos = promise_meta_translate_disk(vol, md_disk_pos); 635 } else { 636 md_disk_pos = -1; 637 disk_pos = -1; 638 } 639 if (disk_pos < 0) { 640 G_RAID_DEBUG1(1, sc, "Disk %s is not part of the volume %s", 641 g_raid_get_diskname(disk), vol->v_name); 642 /* Failed stale disk is useless for us. */ 643 if (sdn >= 0 && 644 pd->pd_meta[sdn]->disk.flags & PROMISE_F_DOWN) { 645 g_raid_change_disk_state(disk, G_RAID_DISK_S_STALE_FAILED); 646 return (0); 647 } 648 /* If we were given specific metadata subdisk - erase it. */ 649 if (sdn >= 0) { 650 free(pd->pd_meta[sdn], M_MD_PROMISE); 651 for (i = sdn; i < pd->pd_subdisks - 1; i++) 652 pd->pd_meta[i] = pd->pd_meta[i + 1]; 653 pd->pd_meta[pd->pd_subdisks - 1] = NULL; 654 pd->pd_subdisks--; 655 } 656 /* If we are in the start process, that's all for now. */ 657 if (!pv->pv_started) 658 goto nofit; 659 /* 660 * If we have already started - try to get use of the disk. 661 * Try to replace OFFLINE disks first, then FAILED. 662 */ 663 promise_meta_unused_range(pd->pd_meta, pd->pd_subdisks, 664 disk->d_consumer->provider->mediasize / 665 disk->d_consumer->provider->sectorsize, 666 &eoff, &esize); 667 if (esize == 0) { 668 G_RAID_DEBUG1(1, sc, "No free space on disk %s", 669 g_raid_get_diskname(disk)); 670 goto nofit; 671 } 672 size = INT64_MAX; 673 for (i = 0; i < vol->v_disks_count; i++) { 674 sd = &vol->v_subdisks[i]; 675 if (sd->sd_state != G_RAID_SUBDISK_S_NONE) 676 size = sd->sd_size; 677 if (sd->sd_state <= G_RAID_SUBDISK_S_FAILED && 678 (disk_pos < 0 || 679 vol->v_subdisks[i].sd_state < sd->sd_state)) 680 disk_pos = i; 681 } 682 if (disk_pos >= 0 && 683 vol->v_raid_level != G_RAID_VOLUME_RL_CONCAT && 684 (off_t)esize * 512 < size) { 685 G_RAID_DEBUG1(1, sc, "Disk %s free space " 686 "is too small (%ju < %ju)", 687 g_raid_get_diskname(disk), 688 (off_t)esize * 512, size); 689 disk_pos = -1; 690 } 691 if (disk_pos >= 0) { 692 if (vol->v_raid_level != G_RAID_VOLUME_RL_CONCAT) 693 esize = size / 512; 694 /* For RAID0+1 we need to translate order. */ 695 md_disk_pos = promise_meta_translate_disk(vol, disk_pos); 696 } else { 697 nofit: 698 if (pd->pd_subdisks == 0) { 699 g_raid_change_disk_state(disk, 700 G_RAID_DISK_S_SPARE); 701 } 702 return (0); 703 } 704 G_RAID_DEBUG1(1, sc, "Disk %s takes pos %d in the volume %s", 705 g_raid_get_diskname(disk), disk_pos, vol->v_name); 706 resurrection = 1; 707 } 708 709 sd = &vol->v_subdisks[disk_pos]; 710 711 if (resurrection && sd->sd_disk != NULL) { 712 g_raid_change_disk_state(sd->sd_disk, 713 G_RAID_DISK_S_STALE_FAILED); 714 TAILQ_REMOVE(&sd->sd_disk->d_subdisks, 715 sd, sd_next); 716 } 717 vol->v_subdisks[disk_pos].sd_disk = disk; 718 TAILQ_INSERT_TAIL(&disk->d_subdisks, sd, sd_next); 719 720 /* Welcome the new disk. */ 721 if (resurrection) 722 g_raid_change_disk_state(disk, G_RAID_DISK_S_ACTIVE); 723 else if (meta->disks[md_disk_pos].flags & PROMISE_F_DOWN) 724 g_raid_change_disk_state(disk, G_RAID_DISK_S_FAILED); 725 else 726 g_raid_change_disk_state(disk, G_RAID_DISK_S_ACTIVE); 727 728 if (resurrection) { 729 sd->sd_offset = (off_t)eoff * 512; 730 sd->sd_size = (off_t)esize * 512; 731 } else { 732 sd->sd_offset = (off_t)pd->pd_meta[sdn]->disk_offset * 512; 733 sd->sd_size = (off_t)pd->pd_meta[sdn]->disk_sectors * 512; 734 } 735 736 if (resurrection) { 737 /* Stale disk, almost same as new. */ 738 g_raid_change_subdisk_state(sd, 739 G_RAID_SUBDISK_S_NEW); 740 } else if (meta->disks[md_disk_pos].flags & PROMISE_F_DOWN) { 741 /* Failed disk. */ 742 g_raid_change_subdisk_state(sd, 743 G_RAID_SUBDISK_S_FAILED); 744 } else if (meta->disks[md_disk_pos].flags & PROMISE_F_REDIR) { 745 /* Rebuilding disk. */ 746 g_raid_change_subdisk_state(sd, 747 G_RAID_SUBDISK_S_REBUILD); 748 if (pd->pd_meta[sdn]->generation != meta->generation) 749 sd->sd_rebuild_pos = 0; 750 else { 751 sd->sd_rebuild_pos = 752 (off_t)pd->pd_meta[sdn]->rebuild_lba * 512; 753 } 754 } else if (!(meta->disks[md_disk_pos].flags & PROMISE_F_ONLINE)) { 755 /* Rebuilding disk. */ 756 g_raid_change_subdisk_state(sd, 757 G_RAID_SUBDISK_S_NEW); 758 } else if (pd->pd_meta[sdn]->generation != meta->generation || 759 (meta->status & PROMISE_S_MARKED)) { 760 /* Stale disk or dirty volume (unclean shutdown). */ 761 g_raid_change_subdisk_state(sd, 762 G_RAID_SUBDISK_S_STALE); 763 } else { 764 /* Up to date disk. */ 765 g_raid_change_subdisk_state(sd, 766 G_RAID_SUBDISK_S_ACTIVE); 767 } 768 g_raid_event_send(sd, G_RAID_SUBDISK_E_NEW, 769 G_RAID_EVENT_SUBDISK); 770 771 return (resurrection); 772 } 773 774 static void 775 g_raid_md_promise_refill(struct g_raid_softc *sc) 776 { 777 struct g_raid_volume *vol; 778 struct g_raid_subdisk *sd; 779 struct g_raid_disk *disk; 780 struct g_raid_md_object *md; 781 struct g_raid_md_promise_perdisk *pd; 782 struct g_raid_md_promise_pervolume *pv; 783 int update, updated, i, bad; 784 785 md = sc->sc_md; 786 restart: 787 updated = 0; 788 TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) { 789 pv = vol->v_md_data; 790 if (!pv->pv_started || vol->v_stopping) 791 continue; 792 793 /* Search for subdisk that needs replacement. */ 794 bad = 0; 795 for (i = 0; i < vol->v_disks_count; i++) { 796 sd = &vol->v_subdisks[i]; 797 if (sd->sd_state == G_RAID_SUBDISK_S_NONE || 798 sd->sd_state == G_RAID_SUBDISK_S_FAILED) 799 bad = 1; 800 } 801 if (!bad) 802 continue; 803 804 G_RAID_DEBUG1(1, sc, "Volume %s is not complete, " 805 "trying to refill.", vol->v_name); 806 807 TAILQ_FOREACH(disk, &sc->sc_disks, d_next) { 808 /* Skip failed. */ 809 if (disk->d_state < G_RAID_DISK_S_SPARE) 810 continue; 811 /* Skip already used by this volume. */ 812 for (i = 0; i < vol->v_disks_count; i++) { 813 sd = &vol->v_subdisks[i]; 814 if (sd->sd_disk == disk) 815 break; 816 } 817 if (i < vol->v_disks_count) 818 continue; 819 820 /* Try to use disk if it has empty extents. */ 821 pd = disk->d_md_data; 822 if (pd->pd_subdisks < PROMISE_MAX_SUBDISKS) { 823 update = 824 g_raid_md_promise_start_disk(disk, -1, vol); 825 } else 826 update = 0; 827 if (update) { 828 updated = 1; 829 g_raid_md_write_promise(md, vol, NULL, disk); 830 break; 831 } 832 } 833 } 834 if (updated) 835 goto restart; 836 } 837 838 static void 839 g_raid_md_promise_start(struct g_raid_volume *vol) 840 { 841 struct g_raid_softc *sc; 842 struct g_raid_subdisk *sd; 843 struct g_raid_disk *disk; 844 struct g_raid_md_object *md; 845 struct g_raid_md_promise_perdisk *pd; 846 struct g_raid_md_promise_pervolume *pv; 847 struct promise_raid_conf *meta; 848 int i; 849 850 sc = vol->v_softc; 851 md = sc->sc_md; 852 pv = vol->v_md_data; 853 meta = pv->pv_meta; 854 855 vol->v_raid_level_qualifier = G_RAID_VOLUME_RLQ_NONE; 856 if (meta->type == PROMISE_T_RAID0) 857 vol->v_raid_level = G_RAID_VOLUME_RL_RAID0; 858 else if (meta->type == PROMISE_T_RAID1) { 859 if (meta->array_width == 1) 860 vol->v_raid_level = G_RAID_VOLUME_RL_RAID1; 861 else 862 vol->v_raid_level = G_RAID_VOLUME_RL_RAID1E; 863 } else if (meta->type == PROMISE_T_RAID3) 864 vol->v_raid_level = G_RAID_VOLUME_RL_RAID3; 865 else if (meta->type == PROMISE_T_RAID5) { 866 vol->v_raid_level = G_RAID_VOLUME_RL_RAID5; 867 vol->v_raid_level_qualifier = G_RAID_VOLUME_RLQ_R5LA; 868 } else if (meta->type == PROMISE_T_SPAN) 869 vol->v_raid_level = G_RAID_VOLUME_RL_CONCAT; 870 else if (meta->type == PROMISE_T_JBOD) 871 vol->v_raid_level = G_RAID_VOLUME_RL_SINGLE; 872 else 873 vol->v_raid_level = G_RAID_VOLUME_RL_UNKNOWN; 874 vol->v_strip_size = 512 << meta->stripe_shift; //ZZZ 875 vol->v_disks_count = meta->total_disks; 876 vol->v_mediasize = (off_t)meta->total_sectors * 512; //ZZZ 877 if (meta->total_sectors_high < 256) /* If value looks sane. */ 878 vol->v_mediasize |= 879 ((off_t)meta->total_sectors_high << 32) * 512; //ZZZ 880 vol->v_sectorsize = 512; //ZZZ 881 for (i = 0; i < vol->v_disks_count; i++) { 882 sd = &vol->v_subdisks[i]; 883 sd->sd_offset = (off_t)meta->disk_offset * 512; //ZZZ 884 sd->sd_size = (off_t)meta->disk_sectors * 512; //ZZZ 885 } 886 g_raid_start_volume(vol); 887 888 /* Make all disks found till the moment take their places. */ 889 TAILQ_FOREACH(disk, &sc->sc_disks, d_next) { 890 pd = disk->d_md_data; 891 for (i = 0; i < pd->pd_subdisks; i++) { 892 if (pd->pd_meta[i]->volume_id == meta->volume_id) 893 g_raid_md_promise_start_disk(disk, i, vol); 894 } 895 } 896 897 pv->pv_started = 1; 898 callout_stop(&pv->pv_start_co); 899 G_RAID_DEBUG1(0, sc, "Volume started."); 900 g_raid_md_write_promise(md, vol, NULL, NULL); 901 902 /* Pickup any STALE/SPARE disks to refill array if needed. */ 903 g_raid_md_promise_refill(sc); 904 905 g_raid_event_send(vol, G_RAID_VOLUME_E_START, G_RAID_EVENT_VOLUME); 906 } 907 908 static void 909 g_raid_promise_go(void *arg) 910 { 911 struct g_raid_volume *vol; 912 struct g_raid_softc *sc; 913 struct g_raid_md_promise_pervolume *pv; 914 915 vol = arg; 916 pv = vol->v_md_data; 917 sc = vol->v_softc; 918 if (!pv->pv_started) { 919 G_RAID_DEBUG1(0, sc, "Force volume start due to timeout."); 920 g_raid_event_send(vol, G_RAID_VOLUME_E_STARTMD, 921 G_RAID_EVENT_VOLUME); 922 } 923 } 924 925 static void 926 g_raid_md_promise_new_disk(struct g_raid_disk *disk) 927 { 928 struct g_raid_softc *sc; 929 struct g_raid_md_object *md; 930 struct promise_raid_conf *pdmeta; 931 struct g_raid_md_promise_perdisk *pd; 932 struct g_raid_md_promise_pervolume *pv; 933 struct g_raid_volume *vol; 934 int i; 935 char buf[33]; 936 937 sc = disk->d_softc; 938 md = sc->sc_md; 939 pd = (struct g_raid_md_promise_perdisk *)disk->d_md_data; 940 941 if (pd->pd_subdisks == 0) { 942 g_raid_change_disk_state(disk, G_RAID_DISK_S_SPARE); 943 g_raid_md_promise_refill(sc); 944 return; 945 } 946 947 for (i = 0; i < pd->pd_subdisks; i++) { 948 pdmeta = pd->pd_meta[i]; 949 950 /* Look for volume with matching ID. */ 951 vol = g_raid_md_promise_get_volume(sc, pdmeta->volume_id); 952 if (vol == NULL) { 953 promise_meta_get_name(pdmeta, buf); 954 vol = g_raid_create_volume(sc, buf, pdmeta->array_number); 955 pv = malloc(sizeof(*pv), M_MD_PROMISE, M_WAITOK | M_ZERO); 956 pv->pv_id = pdmeta->volume_id; 957 vol->v_md_data = pv; 958 callout_init(&pv->pv_start_co, 1); 959 callout_reset(&pv->pv_start_co, 960 g_raid_start_timeout * hz, 961 g_raid_promise_go, vol); 962 } else 963 pv = vol->v_md_data; 964 965 /* If we haven't started yet - check metadata freshness. */ 966 if (pv->pv_meta == NULL || !pv->pv_started) { 967 if (pv->pv_meta == NULL || 968 ((int16_t)(pdmeta->generation - pv->pv_generation)) > 0) { 969 G_RAID_DEBUG1(1, sc, "Newer disk"); 970 if (pv->pv_meta != NULL) 971 free(pv->pv_meta, M_MD_PROMISE); 972 pv->pv_meta = promise_meta_copy(pdmeta); 973 pv->pv_generation = pv->pv_meta->generation; 974 pv->pv_disks_present = 1; 975 } else if (pdmeta->generation == pv->pv_generation) { 976 pv->pv_disks_present++; 977 G_RAID_DEBUG1(1, sc, "Matching disk (%d of %d up)", 978 pv->pv_disks_present, 979 pv->pv_meta->total_disks); 980 } else { 981 G_RAID_DEBUG1(1, sc, "Older disk"); 982 } 983 } 984 } 985 986 for (i = 0; i < pd->pd_subdisks; i++) { 987 pdmeta = pd->pd_meta[i]; 988 989 /* Look for volume with matching ID. */ 990 vol = g_raid_md_promise_get_volume(sc, pdmeta->volume_id); 991 if (vol == NULL) 992 continue; 993 pv = vol->v_md_data; 994 995 if (pv->pv_started) { 996 if (g_raid_md_promise_start_disk(disk, i, vol)) 997 g_raid_md_write_promise(md, vol, NULL, NULL); 998 } else { 999 /* If we collected all needed disks - start array. */ 1000 if (pv->pv_disks_present == pv->pv_meta->total_disks) 1001 g_raid_md_promise_start(vol); 1002 } 1003 } 1004 } 1005 1006 static int 1007 g_raid_md_create_promise(struct g_raid_md_object *md, struct g_class *mp, 1008 struct g_geom **gp) 1009 { 1010 struct g_geom *geom; 1011 struct g_raid_softc *sc; 1012 1013 /* Search for existing node. */ 1014 LIST_FOREACH(geom, &mp->geom, geom) { 1015 sc = geom->softc; 1016 if (sc == NULL) 1017 continue; 1018 if (sc->sc_stopping != 0) 1019 continue; 1020 if (sc->sc_md->mdo_class != md->mdo_class) 1021 continue; 1022 break; 1023 } 1024 if (geom != NULL) { 1025 *gp = geom; 1026 return (G_RAID_MD_TASTE_EXISTING); 1027 } 1028 1029 /* Create new one if not found. */ 1030 sc = g_raid_create_node(mp, "Promise", md); 1031 if (sc == NULL) 1032 return (G_RAID_MD_TASTE_FAIL); 1033 md->mdo_softc = sc; 1034 *gp = sc->sc_geom; 1035 return (G_RAID_MD_TASTE_NEW); 1036 } 1037 1038 static int 1039 g_raid_md_taste_promise(struct g_raid_md_object *md, struct g_class *mp, 1040 struct g_consumer *cp, struct g_geom **gp) 1041 { 1042 struct g_consumer *rcp; 1043 struct g_provider *pp; 1044 struct g_raid_softc *sc; 1045 struct g_raid_disk *disk; 1046 struct promise_raid_conf *meta, *metaarr[4]; 1047 struct g_raid_md_promise_perdisk *pd; 1048 struct g_geom *geom; 1049 int error, i, j, result, len, subdisks; 1050 char name[16]; 1051 uint16_t vendor; 1052 1053 G_RAID_DEBUG(1, "Tasting Promise on %s", cp->provider->name); 1054 pp = cp->provider; 1055 1056 /* Read metadata from device. */ 1057 meta = NULL; 1058 vendor = 0xffff; 1059 if (g_access(cp, 1, 0, 0) != 0) 1060 return (G_RAID_MD_TASTE_FAIL); 1061 g_topology_unlock(); 1062 len = 2; 1063 if (pp->geom->rank == 1) 1064 g_io_getattr("GEOM::hba_vendor", cp, &len, &vendor); 1065 subdisks = promise_meta_read(cp, metaarr); 1066 g_topology_lock(); 1067 g_access(cp, -1, 0, 0); 1068 if (subdisks == 0) { 1069 if (g_raid_aggressive_spare) { 1070 if (vendor == 0x105a || vendor == 0x1002) { 1071 G_RAID_DEBUG(1, 1072 "No Promise metadata, forcing spare."); 1073 goto search; 1074 } else { 1075 G_RAID_DEBUG(1, 1076 "Promise/ATI vendor mismatch " 1077 "0x%04x != 0x105a/0x1002", 1078 vendor); 1079 } 1080 } 1081 return (G_RAID_MD_TASTE_FAIL); 1082 } 1083 1084 /* Metadata valid. Print it. */ 1085 for (i = 0; i < subdisks; i++) 1086 g_raid_md_promise_print(metaarr[i]); 1087 1088 /* Purge meaningless (empty/spare) records. */ 1089 for (i = 0; i < subdisks; ) { 1090 if (metaarr[i]->disk.flags & PROMISE_F_ASSIGNED) { 1091 i++; 1092 continue; 1093 } 1094 free(metaarr[i], M_MD_PROMISE); 1095 for (j = i; j < subdisks - 1; j++) 1096 metaarr[i] = metaarr[j + 1]; 1097 metaarr[subdisks - 1] = NULL; 1098 subdisks--; 1099 } 1100 1101 search: 1102 /* Search for matching node. */ 1103 sc = NULL; 1104 LIST_FOREACH(geom, &mp->geom, geom) { 1105 sc = geom->softc; 1106 if (sc == NULL) 1107 continue; 1108 if (sc->sc_stopping != 0) 1109 continue; 1110 if (sc->sc_md->mdo_class != md->mdo_class) 1111 continue; 1112 break; 1113 } 1114 1115 /* Found matching node. */ 1116 if (geom != NULL) { 1117 G_RAID_DEBUG(1, "Found matching array %s", sc->sc_name); 1118 result = G_RAID_MD_TASTE_EXISTING; 1119 1120 } else { /* Not found matching node -- create one. */ 1121 result = G_RAID_MD_TASTE_NEW; 1122 snprintf(name, sizeof(name), "Promise"); 1123 sc = g_raid_create_node(mp, name, md); 1124 md->mdo_softc = sc; 1125 geom = sc->sc_geom; 1126 } 1127 1128 rcp = g_new_consumer(geom); 1129 g_attach(rcp, pp); 1130 if (g_access(rcp, 1, 1, 1) != 0) 1131 ; //goto fail1; 1132 1133 g_topology_unlock(); 1134 sx_xlock(&sc->sc_lock); 1135 1136 pd = malloc(sizeof(*pd), M_MD_PROMISE, M_WAITOK | M_ZERO); 1137 pd->pd_subdisks = subdisks; 1138 for (i = 0; i < subdisks; i++) 1139 pd->pd_meta[i] = metaarr[i]; 1140 disk = g_raid_create_disk(sc); 1141 disk->d_md_data = (void *)pd; 1142 disk->d_consumer = rcp; 1143 rcp->private = disk; 1144 1145 /* Read kernel dumping information. */ 1146 disk->d_kd.offset = 0; 1147 disk->d_kd.length = OFF_MAX; 1148 len = sizeof(disk->d_kd); 1149 error = g_io_getattr("GEOM::kerneldump", rcp, &len, &disk->d_kd); 1150 if (disk->d_kd.di.dumper == NULL) 1151 G_RAID_DEBUG1(2, sc, "Dumping not supported by %s: %d.", 1152 rcp->provider->name, error); 1153 1154 g_raid_md_promise_new_disk(disk); 1155 1156 sx_xunlock(&sc->sc_lock); 1157 g_topology_lock(); 1158 *gp = geom; 1159 return (result); 1160 } 1161 1162 static int 1163 g_raid_md_event_promise(struct g_raid_md_object *md, 1164 struct g_raid_disk *disk, u_int event) 1165 { 1166 struct g_raid_softc *sc; 1167 1168 sc = md->mdo_softc; 1169 if (disk == NULL) 1170 return (-1); 1171 switch (event) { 1172 case G_RAID_DISK_E_DISCONNECTED: 1173 /* Delete disk. */ 1174 g_raid_change_disk_state(disk, G_RAID_DISK_S_NONE); 1175 g_raid_destroy_disk(disk); 1176 g_raid_md_promise_purge_volumes(sc); 1177 1178 /* Write updated metadata to all disks. */ 1179 g_raid_md_write_promise(md, NULL, NULL, NULL); 1180 1181 /* Check if anything left. */ 1182 if (g_raid_ndisks(sc, -1) == 0) 1183 g_raid_destroy_node(sc, 0); 1184 else 1185 g_raid_md_promise_refill(sc); 1186 return (0); 1187 } 1188 return (-2); 1189 } 1190 1191 static int 1192 g_raid_md_volume_event_promise(struct g_raid_md_object *md, 1193 struct g_raid_volume *vol, u_int event) 1194 { 1195 struct g_raid_md_promise_pervolume *pv; 1196 1197 pv = (struct g_raid_md_promise_pervolume *)vol->v_md_data; 1198 switch (event) { 1199 case G_RAID_VOLUME_E_STARTMD: 1200 if (!pv->pv_started) 1201 g_raid_md_promise_start(vol); 1202 return (0); 1203 } 1204 return (-2); 1205 } 1206 1207 static int 1208 g_raid_md_ctl_promise(struct g_raid_md_object *md, 1209 struct gctl_req *req) 1210 { 1211 struct g_raid_softc *sc; 1212 struct g_raid_volume *vol, *vol1; 1213 struct g_raid_subdisk *sd; 1214 struct g_raid_disk *disk, *disks[PROMISE_MAX_DISKS]; 1215 struct g_raid_md_promise_perdisk *pd; 1216 struct g_raid_md_promise_pervolume *pv; 1217 struct g_consumer *cp; 1218 struct g_provider *pp; 1219 char arg[16]; 1220 const char *nodename, *verb, *volname, *levelname, *diskname; 1221 char *tmp; 1222 int *nargs, *force; 1223 off_t size, sectorsize, strip; 1224 intmax_t *sizearg, *striparg; 1225 uint32_t offs[PROMISE_MAX_DISKS], esize; 1226 int numdisks, i, len, level, qual; 1227 int error; 1228 1229 sc = md->mdo_softc; 1230 verb = gctl_get_param(req, "verb", NULL); 1231 nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs)); 1232 error = 0; 1233 if (strcmp(verb, "label") == 0) { 1234 1235 if (*nargs < 4) { 1236 gctl_error(req, "Invalid number of arguments."); 1237 return (-1); 1238 } 1239 volname = gctl_get_asciiparam(req, "arg1"); 1240 if (volname == NULL) { 1241 gctl_error(req, "No volume name."); 1242 return (-2); 1243 } 1244 levelname = gctl_get_asciiparam(req, "arg2"); 1245 if (levelname == NULL) { 1246 gctl_error(req, "No RAID level."); 1247 return (-3); 1248 } 1249 if (strcasecmp(levelname, "RAID5") == 0) 1250 levelname = "RAID5-LA"; 1251 if (g_raid_volume_str2level(levelname, &level, &qual)) { 1252 gctl_error(req, "Unknown RAID level '%s'.", levelname); 1253 return (-4); 1254 } 1255 numdisks = *nargs - 3; 1256 force = gctl_get_paraml(req, "force", sizeof(*force)); 1257 if (!g_raid_md_promise_supported(level, qual, numdisks, 1258 force ? *force : 0)) { 1259 gctl_error(req, "Unsupported RAID level " 1260 "(0x%02x/0x%02x), or number of disks (%d).", 1261 level, qual, numdisks); 1262 return (-5); 1263 } 1264 1265 /* Search for disks, connect them and probe. */ 1266 size = INT64_MAX; 1267 sectorsize = 0; 1268 bzero(disks, sizeof(disks)); 1269 bzero(offs, sizeof(offs)); 1270 for (i = 0; i < numdisks; i++) { 1271 snprintf(arg, sizeof(arg), "arg%d", i + 3); 1272 diskname = gctl_get_asciiparam(req, arg); 1273 if (diskname == NULL) { 1274 gctl_error(req, "No disk name (%s).", arg); 1275 error = -6; 1276 break; 1277 } 1278 if (strcmp(diskname, "NONE") == 0) 1279 continue; 1280 1281 TAILQ_FOREACH(disk, &sc->sc_disks, d_next) { 1282 if (disk->d_consumer != NULL && 1283 disk->d_consumer->provider != NULL && 1284 strcmp(disk->d_consumer->provider->name, 1285 diskname) == 0) 1286 break; 1287 } 1288 if (disk != NULL) { 1289 if (disk->d_state != G_RAID_DISK_S_ACTIVE) { 1290 gctl_error(req, "Disk '%s' is in a " 1291 "wrong state (%s).", diskname, 1292 g_raid_disk_state2str(disk->d_state)); 1293 error = -7; 1294 break; 1295 } 1296 pd = disk->d_md_data; 1297 if (pd->pd_subdisks >= PROMISE_MAX_SUBDISKS) { 1298 gctl_error(req, "Disk '%s' already " 1299 "used by %d volumes.", 1300 diskname, pd->pd_subdisks); 1301 error = -7; 1302 break; 1303 } 1304 pp = disk->d_consumer->provider; 1305 disks[i] = disk; 1306 promise_meta_unused_range(pd->pd_meta, 1307 pd->pd_subdisks, 1308 pp->mediasize / pp->sectorsize, 1309 &offs[i], &esize); 1310 size = MIN(size, (off_t)esize * pp->sectorsize); 1311 sectorsize = MAX(sectorsize, pp->sectorsize); 1312 continue; 1313 } 1314 1315 g_topology_lock(); 1316 cp = g_raid_open_consumer(sc, diskname); 1317 if (cp == NULL) { 1318 gctl_error(req, "Can't open disk '%s'.", 1319 diskname); 1320 g_topology_unlock(); 1321 error = -8; 1322 break; 1323 } 1324 pp = cp->provider; 1325 pd = malloc(sizeof(*pd), M_MD_PROMISE, M_WAITOK | M_ZERO); 1326 disk = g_raid_create_disk(sc); 1327 disk->d_md_data = (void *)pd; 1328 disk->d_consumer = cp; 1329 disks[i] = disk; 1330 cp->private = disk; 1331 g_topology_unlock(); 1332 1333 if (pp->mediasize / pp->sectorsize > UINT32_MAX) { 1334 gctl_error(req, 1335 "Disk '%s' is too big.", diskname); 1336 error = -8; 1337 break; 1338 } 1339 1340 /* Read kernel dumping information. */ 1341 disk->d_kd.offset = 0; 1342 disk->d_kd.length = OFF_MAX; 1343 len = sizeof(disk->d_kd); 1344 g_io_getattr("GEOM::kerneldump", cp, &len, &disk->d_kd); 1345 if (disk->d_kd.di.dumper == NULL) 1346 G_RAID_DEBUG1(2, sc, 1347 "Dumping not supported by %s.", 1348 cp->provider->name); 1349 1350 /* Reserve some space for metadata. */ 1351 size = MIN(size, pp->mediasize - 131072llu * pp->sectorsize); 1352 sectorsize = MAX(sectorsize, pp->sectorsize); 1353 } 1354 if (error != 0) { 1355 for (i = 0; i < numdisks; i++) { 1356 if (disks[i] != NULL && 1357 disks[i]->d_state == G_RAID_DISK_S_NONE) 1358 g_raid_destroy_disk(disks[i]); 1359 } 1360 return (error); 1361 } 1362 1363 if (sectorsize <= 0) { 1364 gctl_error(req, "Can't get sector size."); 1365 return (-8); 1366 } 1367 1368 /* Handle size argument. */ 1369 len = sizeof(*sizearg); 1370 sizearg = gctl_get_param(req, "size", &len); 1371 if (sizearg != NULL && len == sizeof(*sizearg) && 1372 *sizearg > 0) { 1373 if (*sizearg > size) { 1374 gctl_error(req, "Size too big %lld > %lld.", 1375 (long long)*sizearg, (long long)size); 1376 return (-9); 1377 } 1378 size = *sizearg; 1379 } 1380 1381 /* Handle strip argument. */ 1382 strip = 131072; 1383 len = sizeof(*striparg); 1384 striparg = gctl_get_param(req, "strip", &len); 1385 if (striparg != NULL && len == sizeof(*striparg) && 1386 *striparg > 0) { 1387 if (*striparg < sectorsize) { 1388 gctl_error(req, "Strip size too small."); 1389 return (-10); 1390 } 1391 if (*striparg % sectorsize != 0) { 1392 gctl_error(req, "Incorrect strip size."); 1393 return (-11); 1394 } 1395 strip = *striparg; 1396 } 1397 1398 /* Round size down to strip or sector. */ 1399 if (level == G_RAID_VOLUME_RL_RAID1 || 1400 level == G_RAID_VOLUME_RL_SINGLE || 1401 level == G_RAID_VOLUME_RL_CONCAT) 1402 size -= (size % sectorsize); 1403 else if (level == G_RAID_VOLUME_RL_RAID1E && 1404 (numdisks & 1) != 0) 1405 size -= (size % (2 * strip)); 1406 else 1407 size -= (size % strip); 1408 if (size <= 0) { 1409 gctl_error(req, "Size too small."); 1410 return (-13); 1411 } 1412 if (size > 0xffffffffllu * sectorsize) { 1413 gctl_error(req, "Size too big."); 1414 return (-14); 1415 } 1416 1417 /* We have all we need, create things: volume, ... */ 1418 pv = malloc(sizeof(*pv), M_MD_PROMISE, M_WAITOK | M_ZERO); 1419 arc4rand(&pv->pv_id, sizeof(pv->pv_id), 0); 1420 pv->pv_generation = 0; 1421 pv->pv_started = 1; 1422 vol = g_raid_create_volume(sc, volname, -1); 1423 vol->v_md_data = pv; 1424 vol->v_raid_level = level; 1425 vol->v_raid_level_qualifier = qual; 1426 vol->v_strip_size = strip; 1427 vol->v_disks_count = numdisks; 1428 if (level == G_RAID_VOLUME_RL_RAID0 || 1429 level == G_RAID_VOLUME_RL_CONCAT || 1430 level == G_RAID_VOLUME_RL_SINGLE) 1431 vol->v_mediasize = size * numdisks; 1432 else if (level == G_RAID_VOLUME_RL_RAID1) 1433 vol->v_mediasize = size; 1434 else if (level == G_RAID_VOLUME_RL_RAID3 || 1435 level == G_RAID_VOLUME_RL_RAID5) 1436 vol->v_mediasize = size * (numdisks - 1); 1437 else { /* RAID1E */ 1438 vol->v_mediasize = ((size * numdisks) / strip / 2) * 1439 strip; 1440 } 1441 vol->v_sectorsize = sectorsize; 1442 g_raid_start_volume(vol); 1443 1444 /* , and subdisks. */ 1445 for (i = 0; i < numdisks; i++) { 1446 disk = disks[i]; 1447 sd = &vol->v_subdisks[i]; 1448 sd->sd_disk = disk; 1449 sd->sd_offset = (off_t)offs[i] * 512; 1450 sd->sd_size = size; 1451 if (disk == NULL) 1452 continue; 1453 TAILQ_INSERT_TAIL(&disk->d_subdisks, sd, sd_next); 1454 g_raid_change_disk_state(disk, 1455 G_RAID_DISK_S_ACTIVE); 1456 g_raid_change_subdisk_state(sd, 1457 G_RAID_SUBDISK_S_ACTIVE); 1458 g_raid_event_send(sd, G_RAID_SUBDISK_E_NEW, 1459 G_RAID_EVENT_SUBDISK); 1460 } 1461 1462 /* Write metadata based on created entities. */ 1463 G_RAID_DEBUG1(0, sc, "Array started."); 1464 g_raid_md_write_promise(md, vol, NULL, NULL); 1465 1466 /* Pickup any STALE/SPARE disks to refill array if needed. */ 1467 g_raid_md_promise_refill(sc); 1468 1469 g_raid_event_send(vol, G_RAID_VOLUME_E_START, 1470 G_RAID_EVENT_VOLUME); 1471 return (0); 1472 } 1473 if (strcmp(verb, "add") == 0) { 1474 1475 gctl_error(req, "`add` command is not applicable, " 1476 "use `label` instead."); 1477 return (-99); 1478 } 1479 if (strcmp(verb, "delete") == 0) { 1480 1481 nodename = gctl_get_asciiparam(req, "arg0"); 1482 if (nodename != NULL && strcasecmp(sc->sc_name, nodename) != 0) 1483 nodename = NULL; 1484 1485 /* Full node destruction. */ 1486 if (*nargs == 1 && nodename != NULL) { 1487 /* Check if some volume is still open. */ 1488 force = gctl_get_paraml(req, "force", sizeof(*force)); 1489 if (force != NULL && *force == 0 && 1490 g_raid_nopens(sc) != 0) { 1491 gctl_error(req, "Some volume is still open."); 1492 return (-4); 1493 } 1494 1495 TAILQ_FOREACH(disk, &sc->sc_disks, d_next) { 1496 if (disk->d_consumer) 1497 promise_meta_erase(disk->d_consumer); 1498 } 1499 g_raid_destroy_node(sc, 0); 1500 return (0); 1501 } 1502 1503 /* Destroy specified volume. If it was last - all node. */ 1504 if (*nargs > 2) { 1505 gctl_error(req, "Invalid number of arguments."); 1506 return (-1); 1507 } 1508 volname = gctl_get_asciiparam(req, 1509 nodename != NULL ? "arg1" : "arg0"); 1510 if (volname == NULL) { 1511 gctl_error(req, "No volume name."); 1512 return (-2); 1513 } 1514 1515 /* Search for volume. */ 1516 TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) { 1517 if (strcmp(vol->v_name, volname) == 0) 1518 break; 1519 pp = vol->v_provider; 1520 if (pp == NULL) 1521 continue; 1522 if (strcmp(pp->name, volname) == 0) 1523 break; 1524 if (strncmp(pp->name, "raid/", 5) == 0 && 1525 strcmp(pp->name + 5, volname) == 0) 1526 break; 1527 } 1528 if (vol == NULL) { 1529 i = strtol(volname, &tmp, 10); 1530 if (verb != volname && tmp[0] == 0) { 1531 TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) { 1532 if (vol->v_global_id == i) 1533 break; 1534 } 1535 } 1536 } 1537 if (vol == NULL) { 1538 gctl_error(req, "Volume '%s' not found.", volname); 1539 return (-3); 1540 } 1541 1542 /* Check if volume is still open. */ 1543 force = gctl_get_paraml(req, "force", sizeof(*force)); 1544 if (force != NULL && *force == 0 && 1545 vol->v_provider_open != 0) { 1546 gctl_error(req, "Volume is still open."); 1547 return (-4); 1548 } 1549 1550 /* Destroy volume and potentially node. */ 1551 i = 0; 1552 TAILQ_FOREACH(vol1, &sc->sc_volumes, v_next) 1553 i++; 1554 if (i >= 2) { 1555 g_raid_destroy_volume(vol); 1556 g_raid_md_promise_purge_disks(sc); 1557 g_raid_md_write_promise(md, NULL, NULL, NULL); 1558 } else { 1559 TAILQ_FOREACH(disk, &sc->sc_disks, d_next) { 1560 if (disk->d_consumer) 1561 promise_meta_erase(disk->d_consumer); 1562 } 1563 g_raid_destroy_node(sc, 0); 1564 } 1565 return (0); 1566 } 1567 if (strcmp(verb, "remove") == 0 || 1568 strcmp(verb, "fail") == 0) { 1569 if (*nargs < 2) { 1570 gctl_error(req, "Invalid number of arguments."); 1571 return (-1); 1572 } 1573 for (i = 1; i < *nargs; i++) { 1574 snprintf(arg, sizeof(arg), "arg%d", i); 1575 diskname = gctl_get_asciiparam(req, arg); 1576 if (diskname == NULL) { 1577 gctl_error(req, "No disk name (%s).", arg); 1578 error = -2; 1579 break; 1580 } 1581 if (strncmp(diskname, "/dev/", 5) == 0) 1582 diskname += 5; 1583 1584 TAILQ_FOREACH(disk, &sc->sc_disks, d_next) { 1585 if (disk->d_consumer != NULL && 1586 disk->d_consumer->provider != NULL && 1587 strcmp(disk->d_consumer->provider->name, 1588 diskname) == 0) 1589 break; 1590 } 1591 if (disk == NULL) { 1592 gctl_error(req, "Disk '%s' not found.", 1593 diskname); 1594 error = -3; 1595 break; 1596 } 1597 1598 if (strcmp(verb, "fail") == 0) { 1599 g_raid_md_fail_disk_promise(md, NULL, disk); 1600 continue; 1601 } 1602 1603 /* Erase metadata on deleting disk and destroy it. */ 1604 promise_meta_erase(disk->d_consumer); 1605 g_raid_destroy_disk(disk); 1606 } 1607 g_raid_md_promise_purge_volumes(sc); 1608 1609 /* Write updated metadata to remaining disks. */ 1610 g_raid_md_write_promise(md, NULL, NULL, NULL); 1611 1612 /* Check if anything left. */ 1613 if (g_raid_ndisks(sc, -1) == 0) 1614 g_raid_destroy_node(sc, 0); 1615 else 1616 g_raid_md_promise_refill(sc); 1617 return (error); 1618 } 1619 if (strcmp(verb, "insert") == 0) { 1620 if (*nargs < 2) { 1621 gctl_error(req, "Invalid number of arguments."); 1622 return (-1); 1623 } 1624 for (i = 1; i < *nargs; i++) { 1625 /* Get disk name. */ 1626 snprintf(arg, sizeof(arg), "arg%d", i); 1627 diskname = gctl_get_asciiparam(req, arg); 1628 if (diskname == NULL) { 1629 gctl_error(req, "No disk name (%s).", arg); 1630 error = -3; 1631 break; 1632 } 1633 1634 /* Try to find provider with specified name. */ 1635 g_topology_lock(); 1636 cp = g_raid_open_consumer(sc, diskname); 1637 if (cp == NULL) { 1638 gctl_error(req, "Can't open disk '%s'.", 1639 diskname); 1640 g_topology_unlock(); 1641 error = -4; 1642 break; 1643 } 1644 pp = cp->provider; 1645 g_topology_unlock(); 1646 1647 if (pp->mediasize / pp->sectorsize > UINT32_MAX) { 1648 gctl_error(req, 1649 "Disk '%s' is too big.", diskname); 1650 g_raid_kill_consumer(sc, cp); 1651 error = -8; 1652 break; 1653 } 1654 1655 pd = malloc(sizeof(*pd), M_MD_PROMISE, M_WAITOK | M_ZERO); 1656 1657 disk = g_raid_create_disk(sc); 1658 disk->d_consumer = cp; 1659 disk->d_md_data = (void *)pd; 1660 cp->private = disk; 1661 1662 /* Read kernel dumping information. */ 1663 disk->d_kd.offset = 0; 1664 disk->d_kd.length = OFF_MAX; 1665 len = sizeof(disk->d_kd); 1666 g_io_getattr("GEOM::kerneldump", cp, &len, &disk->d_kd); 1667 if (disk->d_kd.di.dumper == NULL) 1668 G_RAID_DEBUG1(2, sc, 1669 "Dumping not supported by %s.", 1670 cp->provider->name); 1671 1672 /* Welcome the "new" disk. */ 1673 g_raid_change_disk_state(disk, G_RAID_DISK_S_SPARE); 1674 promise_meta_write_spare(cp); 1675 g_raid_md_promise_refill(sc); 1676 } 1677 return (error); 1678 } 1679 return (-100); 1680 } 1681 1682 static int 1683 g_raid_md_write_promise(struct g_raid_md_object *md, struct g_raid_volume *tvol, 1684 struct g_raid_subdisk *tsd, struct g_raid_disk *tdisk) 1685 { 1686 struct g_raid_softc *sc; 1687 struct g_raid_volume *vol; 1688 struct g_raid_subdisk *sd; 1689 struct g_raid_disk *disk; 1690 struct g_raid_md_promise_perdisk *pd; 1691 struct g_raid_md_promise_pervolume *pv; 1692 struct promise_raid_conf *meta; 1693 off_t rebuild_lba64; 1694 int i, j, pos, rebuild; 1695 1696 sc = md->mdo_softc; 1697 1698 if (sc->sc_stopping == G_RAID_DESTROY_HARD) 1699 return (0); 1700 1701 /* Generate new per-volume metadata for affected volumes. */ 1702 TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) { 1703 if (vol->v_stopping) 1704 continue; 1705 1706 /* Skip volumes not related to specified targets. */ 1707 if (tvol != NULL && vol != tvol) 1708 continue; 1709 if (tsd != NULL && vol != tsd->sd_volume) 1710 continue; 1711 if (tdisk != NULL) { 1712 for (i = 0; i < vol->v_disks_count; i++) { 1713 if (vol->v_subdisks[i].sd_disk == tdisk) 1714 break; 1715 } 1716 if (i >= vol->v_disks_count) 1717 continue; 1718 } 1719 1720 pv = (struct g_raid_md_promise_pervolume *)vol->v_md_data; 1721 pv->pv_generation++; 1722 1723 meta = malloc(sizeof(*meta), M_MD_PROMISE, M_WAITOK | M_ZERO); 1724 if (pv->pv_meta != NULL) 1725 memcpy(meta, pv->pv_meta, sizeof(*meta)); 1726 memcpy(meta->promise_id, PROMISE_MAGIC, 1727 sizeof(PROMISE_MAGIC) - 1); 1728 meta->dummy_0 = 0x00020000; 1729 meta->integrity = PROMISE_I_VALID; 1730 1731 meta->generation = pv->pv_generation; 1732 meta->status = PROMISE_S_VALID | PROMISE_S_ONLINE | 1733 PROMISE_S_INITED | PROMISE_S_READY; 1734 if (vol->v_state <= G_RAID_VOLUME_S_DEGRADED) 1735 meta->status |= PROMISE_S_DEGRADED; 1736 if (vol->v_dirty) 1737 meta->status |= PROMISE_S_MARKED; /* XXX: INVENTED! */ 1738 if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID0 || 1739 vol->v_raid_level == G_RAID_VOLUME_RL_SINGLE) 1740 meta->type = PROMISE_T_RAID0; 1741 else if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID1 || 1742 vol->v_raid_level == G_RAID_VOLUME_RL_RAID1E) 1743 meta->type = PROMISE_T_RAID1; 1744 else if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID3) 1745 meta->type = PROMISE_T_RAID3; 1746 else if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID5) 1747 meta->type = PROMISE_T_RAID5; 1748 else if (vol->v_raid_level == G_RAID_VOLUME_RL_CONCAT) 1749 meta->type = PROMISE_T_SPAN; 1750 else 1751 meta->type = PROMISE_T_JBOD; 1752 meta->total_disks = vol->v_disks_count; 1753 meta->stripe_shift = ffs(vol->v_strip_size / 1024); 1754 meta->array_width = vol->v_disks_count; 1755 if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID1 || 1756 vol->v_raid_level == G_RAID_VOLUME_RL_RAID1E) 1757 meta->array_width /= 2; 1758 meta->array_number = vol->v_global_id; 1759 meta->total_sectors = vol->v_mediasize / vol->v_sectorsize; 1760 meta->total_sectors_high = 1761 (vol->v_mediasize / vol->v_sectorsize) >> 32; 1762 meta->cylinders = meta->total_sectors / (255 * 63) - 1; 1763 meta->heads = 254; 1764 meta->sectors = 63; 1765 meta->volume_id = pv->pv_id; 1766 rebuild_lba64 = UINT64_MAX; 1767 rebuild = 0; 1768 for (i = 0; i < vol->v_disks_count; i++) { 1769 sd = &vol->v_subdisks[i]; 1770 /* For RAID0+1 we need to translate order. */ 1771 pos = promise_meta_translate_disk(vol, i); 1772 meta->disks[pos].flags = PROMISE_F_VALID | 1773 PROMISE_F_ASSIGNED; 1774 if (sd->sd_state == G_RAID_SUBDISK_S_NONE) { 1775 meta->disks[pos].flags |= 0; 1776 } else if (sd->sd_state == G_RAID_SUBDISK_S_FAILED) { 1777 meta->disks[pos].flags |= 1778 PROMISE_F_DOWN | PROMISE_F_REDIR; 1779 } else if (sd->sd_state <= G_RAID_SUBDISK_S_REBUILD) { 1780 meta->disks[pos].flags |= 1781 PROMISE_F_ONLINE | PROMISE_F_REDIR; 1782 if (sd->sd_state == G_RAID_SUBDISK_S_REBUILD) { 1783 rebuild_lba64 = MIN(rebuild_lba64, 1784 sd->sd_rebuild_pos / 512); 1785 } else 1786 rebuild_lba64 = 0; 1787 rebuild = 1; 1788 } else { 1789 meta->disks[pos].flags |= PROMISE_F_ONLINE; 1790 if (sd->sd_state < G_RAID_SUBDISK_S_ACTIVE) { 1791 meta->status |= PROMISE_S_MARKED; 1792 if (sd->sd_state == G_RAID_SUBDISK_S_RESYNC) { 1793 rebuild_lba64 = MIN(rebuild_lba64, 1794 sd->sd_rebuild_pos / 512); 1795 } else 1796 rebuild_lba64 = 0; 1797 } 1798 } 1799 if (pv->pv_meta != NULL) { 1800 meta->disks[pos].id = pv->pv_meta->disks[pos].id; 1801 } else { 1802 meta->disks[pos].number = i * 2; 1803 arc4rand(&meta->disks[pos].id, 1804 sizeof(meta->disks[pos].id), 0); 1805 } 1806 } 1807 promise_meta_put_name(meta, vol->v_name); 1808 1809 /* Try to mimic AMD BIOS rebuild/resync behavior. */ 1810 if (rebuild_lba64 != UINT64_MAX) { 1811 if (rebuild) 1812 meta->magic_3 = 0x03040010UL; /* Rebuild? */ 1813 else 1814 meta->magic_3 = 0x03040008UL; /* Resync? */ 1815 /* Translate from per-disk to per-volume LBA. */ 1816 if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID1 || 1817 vol->v_raid_level == G_RAID_VOLUME_RL_RAID1E) { 1818 rebuild_lba64 *= meta->array_width; 1819 } else if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID3 || 1820 vol->v_raid_level == G_RAID_VOLUME_RL_RAID5) { 1821 rebuild_lba64 *= meta->array_width - 1; 1822 } else 1823 rebuild_lba64 = 0; 1824 } else 1825 meta->magic_3 = 0x03000000UL; 1826 meta->rebuild_lba64 = rebuild_lba64; 1827 meta->magic_4 = 0x04010101UL; 1828 1829 /* Replace per-volume metadata with new. */ 1830 if (pv->pv_meta != NULL) 1831 free(pv->pv_meta, M_MD_PROMISE); 1832 pv->pv_meta = meta; 1833 1834 /* Copy new metadata to the disks, adding or replacing old. */ 1835 for (i = 0; i < vol->v_disks_count; i++) { 1836 sd = &vol->v_subdisks[i]; 1837 disk = sd->sd_disk; 1838 if (disk == NULL) 1839 continue; 1840 /* For RAID0+1 we need to translate order. */ 1841 pos = promise_meta_translate_disk(vol, i); 1842 pd = (struct g_raid_md_promise_perdisk *)disk->d_md_data; 1843 for (j = 0; j < pd->pd_subdisks; j++) { 1844 if (pd->pd_meta[j]->volume_id == meta->volume_id) 1845 break; 1846 } 1847 if (j == pd->pd_subdisks) 1848 pd->pd_subdisks++; 1849 if (pd->pd_meta[j] != NULL) 1850 free(pd->pd_meta[j], M_MD_PROMISE); 1851 pd->pd_meta[j] = promise_meta_copy(meta); 1852 pd->pd_meta[j]->disk = meta->disks[pos]; 1853 pd->pd_meta[j]->disk.number = pos; 1854 pd->pd_meta[j]->disk_offset = sd->sd_offset / 512; 1855 pd->pd_meta[j]->disk_sectors = sd->sd_size / 512; 1856 if (sd->sd_state == G_RAID_SUBDISK_S_REBUILD) { 1857 pd->pd_meta[j]->rebuild_lba = 1858 sd->sd_rebuild_pos / 512; 1859 } else if (sd->sd_state < G_RAID_SUBDISK_S_REBUILD) 1860 pd->pd_meta[j]->rebuild_lba = 0; 1861 else 1862 pd->pd_meta[j]->rebuild_lba = UINT32_MAX; 1863 pd->pd_updated = 1; 1864 } 1865 } 1866 1867 TAILQ_FOREACH(disk, &sc->sc_disks, d_next) { 1868 pd = (struct g_raid_md_promise_perdisk *)disk->d_md_data; 1869 if (disk->d_state != G_RAID_DISK_S_ACTIVE) 1870 continue; 1871 if (!pd->pd_updated) 1872 continue; 1873 G_RAID_DEBUG(1, "Writing Promise metadata to %s", 1874 g_raid_get_diskname(disk)); 1875 for (i = 0; i < pd->pd_subdisks; i++) 1876 g_raid_md_promise_print(pd->pd_meta[i]); 1877 promise_meta_write(disk->d_consumer, 1878 pd->pd_meta, pd->pd_subdisks); 1879 pd->pd_updated = 0; 1880 } 1881 1882 return (0); 1883 } 1884 1885 static int 1886 g_raid_md_fail_disk_promise(struct g_raid_md_object *md, 1887 struct g_raid_subdisk *tsd, struct g_raid_disk *tdisk) 1888 { 1889 struct g_raid_softc *sc; 1890 struct g_raid_md_promise_perdisk *pd; 1891 struct g_raid_subdisk *sd; 1892 int i, pos; 1893 1894 sc = md->mdo_softc; 1895 pd = (struct g_raid_md_promise_perdisk *)tdisk->d_md_data; 1896 1897 /* We can't fail disk that is not a part of array now. */ 1898 if (tdisk->d_state != G_RAID_DISK_S_ACTIVE) 1899 return (-1); 1900 1901 /* 1902 * Mark disk as failed in metadata and try to write that metadata 1903 * to the disk itself to prevent it's later resurrection as STALE. 1904 */ 1905 if (pd->pd_subdisks > 0 && tdisk->d_consumer != NULL) 1906 G_RAID_DEBUG(1, "Writing Promise metadata to %s", 1907 g_raid_get_diskname(tdisk)); 1908 for (i = 0; i < pd->pd_subdisks; i++) { 1909 pd->pd_meta[i]->disk.flags |= 1910 PROMISE_F_DOWN | PROMISE_F_REDIR; 1911 pos = pd->pd_meta[i]->disk.number; 1912 if (pos >= 0 && pos < PROMISE_MAX_DISKS) { 1913 pd->pd_meta[i]->disks[pos].flags |= 1914 PROMISE_F_DOWN | PROMISE_F_REDIR; 1915 } 1916 g_raid_md_promise_print(pd->pd_meta[i]); 1917 } 1918 if (tdisk->d_consumer != NULL) 1919 promise_meta_write(tdisk->d_consumer, 1920 pd->pd_meta, pd->pd_subdisks); 1921 1922 /* Change states. */ 1923 g_raid_change_disk_state(tdisk, G_RAID_DISK_S_FAILED); 1924 TAILQ_FOREACH(sd, &tdisk->d_subdisks, sd_next) { 1925 g_raid_change_subdisk_state(sd, 1926 G_RAID_SUBDISK_S_FAILED); 1927 g_raid_event_send(sd, G_RAID_SUBDISK_E_FAILED, 1928 G_RAID_EVENT_SUBDISK); 1929 } 1930 1931 /* Write updated metadata to remaining disks. */ 1932 g_raid_md_write_promise(md, NULL, NULL, tdisk); 1933 1934 g_raid_md_promise_refill(sc); 1935 return (0); 1936 } 1937 1938 static int 1939 g_raid_md_free_disk_promise(struct g_raid_md_object *md, 1940 struct g_raid_disk *disk) 1941 { 1942 struct g_raid_md_promise_perdisk *pd; 1943 int i; 1944 1945 pd = (struct g_raid_md_promise_perdisk *)disk->d_md_data; 1946 for (i = 0; i < pd->pd_subdisks; i++) { 1947 if (pd->pd_meta[i] != NULL) { 1948 free(pd->pd_meta[i], M_MD_PROMISE); 1949 pd->pd_meta[i] = NULL; 1950 } 1951 } 1952 free(pd, M_MD_PROMISE); 1953 disk->d_md_data = NULL; 1954 return (0); 1955 } 1956 1957 static int 1958 g_raid_md_free_volume_promise(struct g_raid_md_object *md, 1959 struct g_raid_volume *vol) 1960 { 1961 struct g_raid_md_promise_pervolume *pv; 1962 1963 pv = (struct g_raid_md_promise_pervolume *)vol->v_md_data; 1964 if (pv && pv->pv_meta != NULL) { 1965 free(pv->pv_meta, M_MD_PROMISE); 1966 pv->pv_meta = NULL; 1967 } 1968 if (pv && !pv->pv_started) { 1969 pv->pv_started = 1; 1970 callout_stop(&pv->pv_start_co); 1971 } 1972 free(pv, M_MD_PROMISE); 1973 vol->v_md_data = NULL; 1974 return (0); 1975 } 1976 1977 static int 1978 g_raid_md_free_promise(struct g_raid_md_object *md) 1979 { 1980 1981 return (0); 1982 } 1983 1984 G_RAID_MD_DECLARE(promise, "Promise"); 1985