1 /*- 2 * Copyright (c) 2011 Alexander Motin <mav@FreeBSD.org> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27 #include <sys/cdefs.h> 28 __FBSDID("$FreeBSD$"); 29 30 #include <sys/param.h> 31 #include <sys/bio.h> 32 #include <sys/endian.h> 33 #include <sys/kernel.h> 34 #include <sys/kobj.h> 35 #include <sys/limits.h> 36 #include <sys/lock.h> 37 #include <sys/malloc.h> 38 #include <sys/mutex.h> 39 #include <sys/systm.h> 40 #include <geom/geom.h> 41 #include "geom/raid/g_raid.h" 42 #include "g_raid_md_if.h" 43 44 static MALLOC_DEFINE(M_MD_PROMISE, "md_promise_data", "GEOM_RAID Promise metadata"); 45 46 #define PROMISE_MAX_DISKS 8 47 #define PROMISE_MAX_SUBDISKS 2 48 #define PROMISE_META_OFFSET 14 49 50 struct promise_raid_disk { 51 uint8_t flags; /* Subdisk status. */ 52 #define PROMISE_F_VALID 0x01 53 #define PROMISE_F_ONLINE 0x02 54 #define PROMISE_F_ASSIGNED 0x04 55 #define PROMISE_F_SPARE 0x08 56 #define PROMISE_F_DUPLICATE 0x10 57 #define PROMISE_F_REDIR 0x20 58 #define PROMISE_F_DOWN 0x40 59 #define PROMISE_F_READY 0x80 60 61 uint8_t number; /* Position in a volume. */ 62 uint8_t channel; /* ATA channel number. */ 63 uint8_t device; /* ATA device number. */ 64 uint64_t id __packed; /* Subdisk ID. */ 65 } __packed; 66 67 struct promise_raid_conf { 68 char promise_id[24]; 69 #define PROMISE_MAGIC "Promise Technology, Inc." 70 #define FREEBSD_MAGIC "FreeBSD ATA driver RAID " 71 72 uint32_t dummy_0; 73 uint64_t magic_0; 74 #define PROMISE_MAGIC0(x) (((uint64_t)(x.channel) << 48) | \ 75 ((uint64_t)(x.device != 0) << 56)) 76 uint16_t magic_1; 77 uint32_t magic_2; 78 uint8_t filler1[470]; 79 80 uint32_t integrity; 81 #define PROMISE_I_VALID 0x00000080 82 83 struct promise_raid_disk disk; /* This subdisk info. */ 84 uint32_t disk_offset; /* Subdisk offset. */ 85 uint32_t disk_sectors; /* Subdisk size */ 86 uint32_t rebuild_lba; /* Rebuild position. */ 87 uint16_t generation; /* Generation number. */ 88 uint8_t status; /* Volume status. */ 89 #define PROMISE_S_VALID 0x01 90 #define PROMISE_S_ONLINE 0x02 91 #define PROMISE_S_INITED 0x04 92 #define PROMISE_S_READY 0x08 93 #define PROMISE_S_DEGRADED 0x10 94 #define PROMISE_S_MARKED 0x20 95 #define PROMISE_S_MIGRATING 0x40 96 #define PROMISE_S_FUNCTIONAL 0x80 97 98 uint8_t type; /* Voluem type. */ 99 #define PROMISE_T_RAID0 0x00 100 #define PROMISE_T_RAID1 0x01 101 #define PROMISE_T_RAID3 0x02 102 #define PROMISE_T_RAID5 0x04 103 #define PROMISE_T_SPAN 0x08 104 #define PROMISE_T_JBOD 0x10 105 106 uint8_t total_disks; /* Disks in this volume. */ 107 uint8_t stripe_shift; /* Strip size. */ 108 uint8_t array_width; /* Number of RAID0 stripes. */ 109 uint8_t array_number; /* Global volume number. */ 110 uint32_t total_sectors; /* Volume size. */ 111 uint16_t cylinders; /* Volume geometry: C. */ 112 uint8_t heads; /* Volume geometry: H. */ 113 uint8_t sectors; /* Volume geometry: S. */ 114 uint64_t volume_id __packed; /* Volume ID, */ 115 struct promise_raid_disk disks[PROMISE_MAX_DISKS]; 116 /* Subdisks in this volume. */ 117 char name[32]; /* Volume label. */ 118 119 uint32_t filler2[8]; 120 uint32_t magic_3; /* Something related to rebuild. */ 121 uint64_t rebuild_lba64; /* Per-volume rebuild position. */ 122 uint32_t magic_4; 123 uint32_t magic_5; 124 uint32_t filler3[325]; 125 uint32_t checksum; 126 } __packed; 127 128 struct g_raid_md_promise_perdisk { 129 int pd_updated; 130 int pd_subdisks; 131 struct promise_raid_conf *pd_meta[PROMISE_MAX_SUBDISKS]; 132 }; 133 134 struct g_raid_md_promise_pervolume { 135 struct promise_raid_conf *pv_meta; 136 uint64_t pv_id; 137 uint16_t pv_generation; 138 int pv_disks_present; 139 int pv_started; 140 struct callout pv_start_co; /* STARTING state timer. */ 141 }; 142 143 static g_raid_md_create_t g_raid_md_create_promise; 144 static g_raid_md_taste_t g_raid_md_taste_promise; 145 static g_raid_md_event_t g_raid_md_event_promise; 146 static g_raid_md_volume_event_t g_raid_md_volume_event_promise; 147 static g_raid_md_ctl_t g_raid_md_ctl_promise; 148 static g_raid_md_write_t g_raid_md_write_promise; 149 static g_raid_md_fail_disk_t g_raid_md_fail_disk_promise; 150 static g_raid_md_free_disk_t g_raid_md_free_disk_promise; 151 static g_raid_md_free_volume_t g_raid_md_free_volume_promise; 152 static g_raid_md_free_t g_raid_md_free_promise; 153 154 static kobj_method_t g_raid_md_promise_methods[] = { 155 KOBJMETHOD(g_raid_md_create, g_raid_md_create_promise), 156 KOBJMETHOD(g_raid_md_taste, g_raid_md_taste_promise), 157 KOBJMETHOD(g_raid_md_event, g_raid_md_event_promise), 158 KOBJMETHOD(g_raid_md_volume_event, g_raid_md_volume_event_promise), 159 KOBJMETHOD(g_raid_md_ctl, g_raid_md_ctl_promise), 160 KOBJMETHOD(g_raid_md_write, g_raid_md_write_promise), 161 KOBJMETHOD(g_raid_md_fail_disk, g_raid_md_fail_disk_promise), 162 KOBJMETHOD(g_raid_md_free_disk, g_raid_md_free_disk_promise), 163 KOBJMETHOD(g_raid_md_free_volume, g_raid_md_free_volume_promise), 164 KOBJMETHOD(g_raid_md_free, g_raid_md_free_promise), 165 { 0, 0 } 166 }; 167 168 static struct g_raid_md_class g_raid_md_promise_class = { 169 "Promise", 170 g_raid_md_promise_methods, 171 sizeof(struct g_raid_md_object), 172 .mdc_priority = 100 173 }; 174 175 176 static void 177 g_raid_md_promise_print(struct promise_raid_conf *meta) 178 { 179 int i; 180 181 if (g_raid_debug < 1) 182 return; 183 184 printf("********* ATA Promise Metadata *********\n"); 185 printf("promise_id <%.24s>\n", meta->promise_id); 186 printf("disk %02x %02x %02x %02x %016jx\n", 187 meta->disk.flags, meta->disk.number, meta->disk.channel, 188 meta->disk.device, meta->disk.id); 189 printf("disk_offset %u\n", meta->disk_offset); 190 printf("disk_sectors %u\n", meta->disk_sectors); 191 printf("rebuild_lba %u\n", meta->rebuild_lba); 192 printf("generation %u\n", meta->generation); 193 printf("status 0x%02x\n", meta->status); 194 printf("type %u\n", meta->type); 195 printf("total_disks %u\n", meta->total_disks); 196 printf("stripe_shift %u\n", meta->stripe_shift); 197 printf("array_width %u\n", meta->array_width); 198 printf("array_number %u\n", meta->array_number); 199 printf("total_sectors %u\n", meta->total_sectors); 200 printf("cylinders %u\n", meta->cylinders); 201 printf("heads %u\n", meta->heads); 202 printf("sectors %u\n", meta->sectors); 203 printf("volume_id 0x%016jx\n", meta->volume_id); 204 printf("disks:\n"); 205 for (i = 0; i < PROMISE_MAX_DISKS; i++ ) { 206 printf(" %02x %02x %02x %02x %016jx\n", 207 meta->disks[i].flags, meta->disks[i].number, 208 meta->disks[i].channel, meta->disks[i].device, 209 meta->disks[i].id); 210 } 211 printf("name <%.32s>\n", meta->name); 212 printf("magic_3 0x%08x\n", meta->magic_3); 213 printf("rebuild_lba64 %ju\n", meta->rebuild_lba64); 214 printf("magic_4 0x%08x\n", meta->magic_4); 215 printf("magic_5 0x%08x\n", meta->magic_5); 216 printf("=================================================\n"); 217 } 218 219 static struct promise_raid_conf * 220 promise_meta_copy(struct promise_raid_conf *meta) 221 { 222 struct promise_raid_conf *nmeta; 223 224 nmeta = malloc(sizeof(*nmeta), M_MD_PROMISE, M_WAITOK); 225 memcpy(nmeta, meta, sizeof(*nmeta)); 226 return (nmeta); 227 } 228 229 static int 230 promise_meta_find_disk(struct promise_raid_conf *meta, uint64_t id) 231 { 232 int pos; 233 234 for (pos = 0; pos < meta->total_disks; pos++) { 235 if (meta->disks[pos].id == id) 236 return (pos); 237 } 238 return (-1); 239 } 240 241 static int 242 promise_meta_unused_range(struct promise_raid_conf **metaarr, int nsd, 243 uint32_t sectors, uint32_t *off, uint32_t *size) 244 { 245 uint32_t coff, csize; 246 int i, j; 247 248 sectors -= 131072; 249 *off = 0; 250 *size = 0; 251 coff = 0; 252 csize = sectors; 253 i = 0; 254 while (1) { 255 for (j = 0; j < nsd; j++) { 256 if (metaarr[j]->disk_offset >= coff) { 257 csize = MIN(csize, 258 metaarr[j]->disk_offset - coff); 259 } 260 } 261 if (csize > *size) { 262 *off = coff; 263 *size = csize; 264 } 265 if (i >= nsd) 266 break; 267 coff = metaarr[i]->disk_offset + metaarr[i]->disk_sectors; 268 csize = sectors - coff; 269 i++; 270 }; 271 return ((*size > 0) ? 1 : 0); 272 } 273 274 static int 275 promise_meta_translate_disk(struct g_raid_volume *vol, int md_disk_pos) 276 { 277 int disk_pos, width; 278 279 if (md_disk_pos >= 0 && vol->v_raid_level == G_RAID_VOLUME_RL_RAID1E) { 280 width = vol->v_disks_count / 2; 281 disk_pos = (md_disk_pos / width) + 282 (md_disk_pos % width) * width; 283 } else 284 disk_pos = md_disk_pos; 285 return (disk_pos); 286 } 287 288 static void 289 promise_meta_get_name(struct promise_raid_conf *meta, char *buf) 290 { 291 int i; 292 293 strncpy(buf, meta->name, 32); 294 buf[32] = 0; 295 for (i = 31; i >= 0; i--) { 296 if (buf[i] > 0x20) 297 break; 298 buf[i] = 0; 299 } 300 } 301 302 static void 303 promise_meta_put_name(struct promise_raid_conf *meta, char *buf) 304 { 305 306 memset(meta->name, 0x20, 32); 307 memcpy(meta->name, buf, MIN(strlen(buf), 32)); 308 } 309 310 static int 311 promise_meta_read(struct g_consumer *cp, struct promise_raid_conf **metaarr) 312 { 313 struct g_provider *pp; 314 struct promise_raid_conf *meta; 315 char *buf; 316 int error, i, subdisks; 317 uint32_t checksum, *ptr; 318 319 pp = cp->provider; 320 subdisks = 0; 321 next: 322 /* Read metadata block. */ 323 buf = g_read_data(cp, pp->mediasize - pp->sectorsize * 324 (63 - subdisks * PROMISE_META_OFFSET), 325 pp->sectorsize * 4, &error); 326 if (buf == NULL) { 327 G_RAID_DEBUG(1, "Cannot read metadata from %s (error=%d).", 328 pp->name, error); 329 return (subdisks); 330 } 331 meta = (struct promise_raid_conf *)buf; 332 333 /* Check if this is an Promise RAID struct */ 334 if (strncmp(meta->promise_id, PROMISE_MAGIC, strlen(PROMISE_MAGIC)) && 335 strncmp(meta->promise_id, FREEBSD_MAGIC, strlen(FREEBSD_MAGIC))) { 336 if (subdisks == 0) 337 G_RAID_DEBUG(1, 338 "Promise signature check failed on %s", pp->name); 339 g_free(buf); 340 return (subdisks); 341 } 342 meta = malloc(sizeof(*meta), M_MD_PROMISE, M_WAITOK); 343 memcpy(meta, buf, MIN(sizeof(*meta), pp->sectorsize * 4)); 344 g_free(buf); 345 346 /* Check metadata checksum. */ 347 for (checksum = 0, ptr = (uint32_t *)meta, i = 0; i < 511; i++) 348 checksum += *ptr++; 349 if (checksum != meta->checksum) { 350 G_RAID_DEBUG(1, "Promise checksum check failed on %s", pp->name); 351 free(meta, M_MD_PROMISE); 352 return (subdisks); 353 } 354 355 if ((meta->integrity & PROMISE_I_VALID) == 0) { 356 G_RAID_DEBUG(1, "Promise metadata is invalid on %s", pp->name); 357 free(meta, M_MD_PROMISE); 358 return (subdisks); 359 } 360 361 if (meta->total_disks > PROMISE_MAX_DISKS) { 362 G_RAID_DEBUG(1, "Wrong number of disks on %s (%d)", 363 pp->name, meta->total_disks); 364 free(meta, M_MD_PROMISE); 365 return (subdisks); 366 } 367 368 /* Save this part and look for next. */ 369 *metaarr = meta; 370 metaarr++; 371 subdisks++; 372 if (subdisks < PROMISE_MAX_SUBDISKS) 373 goto next; 374 375 return (subdisks); 376 } 377 378 static int 379 promise_meta_write(struct g_consumer *cp, 380 struct promise_raid_conf **metaarr, int nsd) 381 { 382 struct g_provider *pp; 383 struct promise_raid_conf *meta; 384 char *buf; 385 int error, i, subdisk, fake; 386 uint32_t checksum, *ptr, off, size; 387 388 pp = cp->provider; 389 subdisk = 0; 390 fake = 0; 391 next: 392 buf = malloc(pp->sectorsize * 4, M_MD_PROMISE, M_WAITOK | M_ZERO); 393 meta = NULL; 394 if (subdisk < nsd) { 395 meta = metaarr[subdisk]; 396 } else if (!fake && promise_meta_unused_range(metaarr, nsd, 397 cp->provider->mediasize / cp->provider->sectorsize, 398 &off, &size)) { 399 /* Optionally add record for unused space. */ 400 meta = (struct promise_raid_conf *)buf; 401 memcpy(&meta->promise_id[0], PROMISE_MAGIC, 402 sizeof(PROMISE_MAGIC) - 1); 403 meta->dummy_0 = 0x00020000; 404 meta->integrity = PROMISE_I_VALID; 405 meta->disk.flags = PROMISE_F_ONLINE | PROMISE_F_VALID; 406 meta->disk.number = 0xff; 407 arc4rand(&meta->disk.id, sizeof(meta->disk.id), 0); 408 meta->disk_offset = off; 409 meta->disk_sectors = size; 410 meta->rebuild_lba = UINT32_MAX; 411 fake = 1; 412 } 413 if (meta != NULL) { 414 /* Recalculate checksum for case if metadata were changed. */ 415 meta->checksum = 0; 416 for (checksum = 0, ptr = (uint32_t *)meta, i = 0; i < 511; i++) 417 checksum += *ptr++; 418 meta->checksum = checksum; 419 memcpy(buf, meta, MIN(pp->sectorsize * 4, sizeof(*meta))); 420 } 421 error = g_write_data(cp, pp->mediasize - pp->sectorsize * 422 (63 - subdisk * PROMISE_META_OFFSET), 423 buf, pp->sectorsize * 4); 424 if (error != 0) { 425 G_RAID_DEBUG(1, "Cannot write metadata to %s (error=%d).", 426 pp->name, error); 427 } 428 free(buf, M_MD_PROMISE); 429 430 subdisk++; 431 if (subdisk < PROMISE_MAX_SUBDISKS) 432 goto next; 433 434 return (error); 435 } 436 437 static int 438 promise_meta_erase(struct g_consumer *cp) 439 { 440 struct g_provider *pp; 441 char *buf; 442 int error, subdisk; 443 444 pp = cp->provider; 445 buf = malloc(4 * pp->sectorsize, M_MD_PROMISE, M_WAITOK | M_ZERO); 446 for (subdisk = 0; subdisk < PROMISE_MAX_SUBDISKS; subdisk++) { 447 error = g_write_data(cp, pp->mediasize - pp->sectorsize * 448 (63 - subdisk * PROMISE_META_OFFSET), 449 buf, 4 * pp->sectorsize); 450 if (error != 0) { 451 G_RAID_DEBUG(1, "Cannot erase metadata on %s (error=%d).", 452 pp->name, error); 453 } 454 } 455 free(buf, M_MD_PROMISE); 456 return (error); 457 } 458 459 static int 460 promise_meta_write_spare(struct g_consumer *cp) 461 { 462 struct promise_raid_conf *meta; 463 int error; 464 465 meta = malloc(sizeof(*meta), M_MD_PROMISE, M_WAITOK | M_ZERO); 466 memcpy(&meta->promise_id[0], PROMISE_MAGIC, sizeof(PROMISE_MAGIC) - 1); 467 meta->dummy_0 = 0x00020000; 468 meta->integrity = PROMISE_I_VALID; 469 meta->disk.flags = PROMISE_F_SPARE | PROMISE_F_ONLINE | PROMISE_F_VALID; 470 meta->disk.number = 0xff; 471 arc4rand(&meta->disk.id, sizeof(meta->disk.id), 0); 472 meta->disk_sectors = cp->provider->mediasize / cp->provider->sectorsize; 473 meta->disk_sectors -= 131072; 474 meta->rebuild_lba = UINT32_MAX; 475 error = promise_meta_write(cp, &meta, 1); 476 free(meta, M_MD_PROMISE); 477 return (error); 478 } 479 480 static struct g_raid_volume * 481 g_raid_md_promise_get_volume(struct g_raid_softc *sc, uint64_t id) 482 { 483 struct g_raid_volume *vol; 484 struct g_raid_md_promise_pervolume *pv; 485 486 TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) { 487 pv = vol->v_md_data; 488 if (pv->pv_id == id) 489 break; 490 } 491 return (vol); 492 } 493 494 static int 495 g_raid_md_promise_purge_volumes(struct g_raid_softc *sc) 496 { 497 struct g_raid_volume *vol, *tvol; 498 struct g_raid_md_promise_pervolume *pv; 499 int i, res; 500 501 res = 0; 502 TAILQ_FOREACH_SAFE(vol, &sc->sc_volumes, v_next, tvol) { 503 pv = vol->v_md_data; 504 if (!pv->pv_started || vol->v_stopping) 505 continue; 506 for (i = 0; i < vol->v_disks_count; i++) { 507 if (vol->v_subdisks[i].sd_state != G_RAID_SUBDISK_S_NONE) 508 break; 509 } 510 if (i >= vol->v_disks_count) { 511 g_raid_destroy_volume(vol); 512 res = 1; 513 } 514 } 515 return (res); 516 } 517 518 static int 519 g_raid_md_promise_purge_disks(struct g_raid_softc *sc) 520 { 521 struct g_raid_disk *disk, *tdisk; 522 struct g_raid_volume *vol; 523 struct g_raid_md_promise_perdisk *pd; 524 int i, j, res; 525 526 res = 0; 527 TAILQ_FOREACH_SAFE(disk, &sc->sc_disks, d_next, tdisk) { 528 if (disk->d_state == G_RAID_DISK_S_SPARE) 529 continue; 530 pd = (struct g_raid_md_promise_perdisk *)disk->d_md_data; 531 532 /* Scan for deleted volumes. */ 533 for (i = 0; i < pd->pd_subdisks; ) { 534 vol = g_raid_md_promise_get_volume(sc, 535 pd->pd_meta[i]->volume_id); 536 if (vol != NULL && !vol->v_stopping) { 537 i++; 538 continue; 539 } 540 free(pd->pd_meta[i], M_MD_PROMISE); 541 for (j = i; j < pd->pd_subdisks - 1; j++) 542 pd->pd_meta[j] = pd->pd_meta[j + 1]; 543 pd->pd_meta[PROMISE_MAX_SUBDISKS - 1] = NULL; 544 pd->pd_subdisks--; 545 pd->pd_updated = 1; 546 } 547 548 /* If there is no metadata left - erase and delete disk. */ 549 if (pd->pd_subdisks == 0) { 550 promise_meta_erase(disk->d_consumer); 551 g_raid_destroy_disk(disk); 552 res = 1; 553 } 554 } 555 return (res); 556 } 557 558 static int 559 g_raid_md_promise_supported(int level, int qual, int disks, int force) 560 { 561 562 if (disks > PROMISE_MAX_DISKS) 563 return (0); 564 switch (level) { 565 case G_RAID_VOLUME_RL_RAID0: 566 if (disks < 1) 567 return (0); 568 if (!force && disks < 2) 569 return (0); 570 break; 571 case G_RAID_VOLUME_RL_RAID1: 572 if (disks < 1) 573 return (0); 574 if (!force && (disks != 2)) 575 return (0); 576 break; 577 case G_RAID_VOLUME_RL_RAID1E: 578 if (disks < 2) 579 return (0); 580 if (disks % 2 != 0) 581 return (0); 582 if (!force && (disks != 4)) 583 return (0); 584 break; 585 case G_RAID_VOLUME_RL_SINGLE: 586 if (disks != 1) 587 return (0); 588 break; 589 case G_RAID_VOLUME_RL_CONCAT: 590 if (disks < 2) 591 return (0); 592 break; 593 case G_RAID_VOLUME_RL_RAID5: 594 if (disks < 3) 595 return (0); 596 break; 597 default: 598 return (0); 599 } 600 if (qual != G_RAID_VOLUME_RLQ_NONE) 601 return (0); 602 return (1); 603 } 604 605 static int 606 g_raid_md_promise_start_disk(struct g_raid_disk *disk, int sdn, 607 struct g_raid_volume *vol) 608 { 609 struct g_raid_softc *sc; 610 struct g_raid_subdisk *sd; 611 struct g_raid_md_promise_perdisk *pd; 612 struct g_raid_md_promise_pervolume *pv; 613 struct promise_raid_conf *meta; 614 off_t size; 615 int disk_pos, md_disk_pos, i, resurrection = 0; 616 uint32_t eoff, esize; 617 618 sc = disk->d_softc; 619 pd = (struct g_raid_md_promise_perdisk *)disk->d_md_data; 620 621 pv = vol->v_md_data; 622 meta = pv->pv_meta; 623 624 if (sdn >= 0) { 625 /* Find disk position in metadata by it's serial. */ 626 md_disk_pos = promise_meta_find_disk(meta, pd->pd_meta[sdn]->disk.id); 627 /* For RAID0+1 we need to translate order. */ 628 disk_pos = promise_meta_translate_disk(vol, md_disk_pos); 629 } else { 630 md_disk_pos = -1; 631 disk_pos = -1; 632 } 633 if (disk_pos < 0) { 634 G_RAID_DEBUG1(1, sc, "Disk %s is not part of the volume %s", 635 g_raid_get_diskname(disk), vol->v_name); 636 /* Failed stale disk is useless for us. */ 637 if (sdn >= 0 && 638 pd->pd_meta[sdn]->disk.flags & PROMISE_F_DOWN) { 639 g_raid_change_disk_state(disk, G_RAID_DISK_S_STALE_FAILED); 640 return (0); 641 } 642 /* If we were given specific metadata subdisk - erase it. */ 643 if (sdn >= 0) { 644 free(pd->pd_meta[sdn], M_MD_PROMISE); 645 for (i = sdn; i < pd->pd_subdisks - 1; i++) 646 pd->pd_meta[i] = pd->pd_meta[i + 1]; 647 pd->pd_meta[PROMISE_MAX_SUBDISKS - 1] = NULL; 648 pd->pd_subdisks--; 649 } 650 /* If we are in the start process, that's all for now. */ 651 if (!pv->pv_started) 652 goto nofit; 653 /* 654 * If we have already started - try to get use of the disk. 655 * Try to replace OFFLINE disks first, then FAILED. 656 */ 657 promise_meta_unused_range(pd->pd_meta, pd->pd_subdisks, 658 disk->d_consumer->provider->mediasize / 659 disk->d_consumer->provider->sectorsize, 660 &eoff, &esize); 661 if (esize == 0) { 662 G_RAID_DEBUG1(1, sc, "No free space on disk %s", 663 g_raid_get_diskname(disk)); 664 goto nofit; 665 } 666 size = INT64_MAX; 667 for (i = 0; i < vol->v_disks_count; i++) { 668 sd = &vol->v_subdisks[i]; 669 if (sd->sd_state != G_RAID_SUBDISK_S_NONE) 670 size = sd->sd_size; 671 if (sd->sd_state <= G_RAID_SUBDISK_S_FAILED && 672 (disk_pos < 0 || 673 vol->v_subdisks[i].sd_state < sd->sd_state)) 674 disk_pos = i; 675 } 676 if (disk_pos >= 0 && 677 vol->v_raid_level != G_RAID_VOLUME_RL_CONCAT && 678 (off_t)esize * 512 < size) { 679 G_RAID_DEBUG1(1, sc, "Disk %s free space " 680 "is too small (%ju < %ju)", 681 g_raid_get_diskname(disk), 682 (off_t)esize * 512, size); 683 disk_pos = -1; 684 } 685 if (disk_pos >= 0) { 686 if (vol->v_raid_level != G_RAID_VOLUME_RL_CONCAT) 687 esize = size / 512; 688 /* For RAID0+1 we need to translate order. */ 689 md_disk_pos = promise_meta_translate_disk(vol, disk_pos); 690 } else { 691 nofit: 692 if (pd->pd_subdisks == 0) { 693 g_raid_change_disk_state(disk, 694 G_RAID_DISK_S_SPARE); 695 } 696 return (0); 697 } 698 G_RAID_DEBUG1(1, sc, "Disk %s takes pos %d in the volume %s", 699 g_raid_get_diskname(disk), disk_pos, vol->v_name); 700 resurrection = 1; 701 } 702 703 sd = &vol->v_subdisks[disk_pos]; 704 705 if (resurrection && sd->sd_disk != NULL) { 706 g_raid_change_disk_state(sd->sd_disk, 707 G_RAID_DISK_S_STALE_FAILED); 708 TAILQ_REMOVE(&sd->sd_disk->d_subdisks, 709 sd, sd_next); 710 } 711 vol->v_subdisks[disk_pos].sd_disk = disk; 712 TAILQ_INSERT_TAIL(&disk->d_subdisks, sd, sd_next); 713 714 /* Welcome the new disk. */ 715 if (resurrection) 716 g_raid_change_disk_state(disk, G_RAID_DISK_S_ACTIVE); 717 else if (meta->disks[md_disk_pos].flags & PROMISE_F_DOWN) 718 g_raid_change_disk_state(disk, G_RAID_DISK_S_FAILED); 719 else 720 g_raid_change_disk_state(disk, G_RAID_DISK_S_ACTIVE); 721 722 if (resurrection) { 723 sd->sd_offset = (off_t)eoff * 512; 724 sd->sd_size = (off_t)esize * 512; 725 } else { 726 sd->sd_offset = (off_t)pd->pd_meta[sdn]->disk_offset * 512; 727 sd->sd_size = (off_t)pd->pd_meta[sdn]->disk_sectors * 512; 728 } 729 730 if (resurrection) { 731 /* Stale disk, almost same as new. */ 732 g_raid_change_subdisk_state(sd, 733 G_RAID_SUBDISK_S_NEW); 734 } else if (meta->disks[md_disk_pos].flags & PROMISE_F_DOWN) { 735 /* Failed disk. */ 736 g_raid_change_subdisk_state(sd, 737 G_RAID_SUBDISK_S_FAILED); 738 } else if (meta->disks[md_disk_pos].flags & PROMISE_F_REDIR) { 739 /* Rebuilding disk. */ 740 g_raid_change_subdisk_state(sd, 741 G_RAID_SUBDISK_S_REBUILD); 742 if (pd->pd_meta[sdn]->generation != meta->generation) 743 sd->sd_rebuild_pos = 0; 744 else { 745 sd->sd_rebuild_pos = 746 (off_t)pd->pd_meta[sdn]->rebuild_lba * 512; 747 } 748 } else if (!(meta->disks[md_disk_pos].flags & PROMISE_F_ONLINE)) { 749 /* Rebuilding disk. */ 750 g_raid_change_subdisk_state(sd, 751 G_RAID_SUBDISK_S_NEW); 752 } else if (pd->pd_meta[sdn]->generation != meta->generation || 753 (meta->status & PROMISE_S_MARKED)) { 754 /* Stale disk or dirty volume (unclean shutdown). */ 755 g_raid_change_subdisk_state(sd, 756 G_RAID_SUBDISK_S_STALE); 757 } else { 758 /* Up to date disk. */ 759 g_raid_change_subdisk_state(sd, 760 G_RAID_SUBDISK_S_ACTIVE); 761 } 762 g_raid_event_send(sd, G_RAID_SUBDISK_E_NEW, 763 G_RAID_EVENT_SUBDISK); 764 765 return (resurrection); 766 } 767 768 static void 769 g_raid_md_promise_refill(struct g_raid_softc *sc) 770 { 771 struct g_raid_volume *vol; 772 struct g_raid_subdisk *sd; 773 struct g_raid_disk *disk; 774 struct g_raid_md_object *md; 775 struct g_raid_md_promise_perdisk *pd; 776 struct g_raid_md_promise_pervolume *pv; 777 int update, updated, i, bad; 778 779 md = sc->sc_md; 780 restart: 781 updated = 0; 782 TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) { 783 pv = vol->v_md_data; 784 if (!pv->pv_started || vol->v_stopping) 785 continue; 786 787 /* Search for subdisk that needs replacement. */ 788 bad = 0; 789 for (i = 0; i < vol->v_disks_count; i++) { 790 sd = &vol->v_subdisks[i]; 791 if (sd->sd_state == G_RAID_SUBDISK_S_NONE || 792 sd->sd_state == G_RAID_SUBDISK_S_FAILED) 793 bad = 1; 794 } 795 if (!bad) 796 continue; 797 798 G_RAID_DEBUG1(1, sc, "Volume %s is not complete, " 799 "trying to refill.", vol->v_name); 800 801 TAILQ_FOREACH(disk, &sc->sc_disks, d_next) { 802 /* Skip failed. */ 803 if (disk->d_state < G_RAID_DISK_S_SPARE) 804 continue; 805 /* Skip already used by this volume. */ 806 for (i = 0; i < vol->v_disks_count; i++) { 807 sd = &vol->v_subdisks[i]; 808 if (sd->sd_disk == disk) 809 break; 810 } 811 if (i < vol->v_disks_count) 812 continue; 813 814 /* Try to use disk if it has empty extents. */ 815 pd = disk->d_md_data; 816 if (pd->pd_subdisks < PROMISE_MAX_SUBDISKS) { 817 update = 818 g_raid_md_promise_start_disk(disk, -1, vol); 819 } else 820 update = 0; 821 if (update) { 822 updated = 1; 823 g_raid_md_write_promise(md, vol, NULL, disk); 824 break; 825 } 826 } 827 } 828 if (updated) 829 goto restart; 830 } 831 832 static void 833 g_raid_md_promise_start(struct g_raid_volume *vol) 834 { 835 struct g_raid_softc *sc; 836 struct g_raid_subdisk *sd; 837 struct g_raid_disk *disk; 838 struct g_raid_md_object *md; 839 struct g_raid_md_promise_perdisk *pd; 840 struct g_raid_md_promise_pervolume *pv; 841 struct promise_raid_conf *meta; 842 int i; 843 844 sc = vol->v_softc; 845 md = sc->sc_md; 846 pv = vol->v_md_data; 847 meta = pv->pv_meta; 848 849 if (meta->type == PROMISE_T_RAID0) 850 vol->v_raid_level = G_RAID_VOLUME_RL_RAID0; 851 else if (meta->type == PROMISE_T_RAID1) { 852 if (meta->array_width == 1) 853 vol->v_raid_level = G_RAID_VOLUME_RL_RAID1; 854 else 855 vol->v_raid_level = G_RAID_VOLUME_RL_RAID1E; 856 } else if (meta->type == PROMISE_T_RAID3) 857 vol->v_raid_level = G_RAID_VOLUME_RL_RAID3; 858 else if (meta->type == PROMISE_T_RAID5) 859 vol->v_raid_level = G_RAID_VOLUME_RL_RAID5; 860 else if (meta->type == PROMISE_T_SPAN) 861 vol->v_raid_level = G_RAID_VOLUME_RL_CONCAT; 862 else if (meta->type == PROMISE_T_JBOD) 863 vol->v_raid_level = G_RAID_VOLUME_RL_SINGLE; 864 else 865 vol->v_raid_level = G_RAID_VOLUME_RL_UNKNOWN; 866 vol->v_raid_level_qualifier = G_RAID_VOLUME_RLQ_NONE; 867 vol->v_strip_size = 512 << meta->stripe_shift; //ZZZ 868 vol->v_disks_count = meta->total_disks; 869 vol->v_mediasize = (off_t)meta->total_sectors * 512; //ZZZ 870 vol->v_sectorsize = 512; //ZZZ 871 for (i = 0; i < vol->v_disks_count; i++) { 872 sd = &vol->v_subdisks[i]; 873 sd->sd_offset = (off_t)meta->disk_offset * 512; //ZZZ 874 sd->sd_size = (off_t)meta->disk_sectors * 512; //ZZZ 875 } 876 g_raid_start_volume(vol); 877 878 /* Make all disks found till the moment take their places. */ 879 TAILQ_FOREACH(disk, &sc->sc_disks, d_next) { 880 pd = disk->d_md_data; 881 for (i = 0; i < pd->pd_subdisks; i++) { 882 if (pd->pd_meta[i]->volume_id == meta->volume_id) 883 g_raid_md_promise_start_disk(disk, i, vol); 884 } 885 } 886 887 pv->pv_started = 1; 888 callout_stop(&pv->pv_start_co); 889 G_RAID_DEBUG1(0, sc, "Volume started."); 890 g_raid_md_write_promise(md, vol, NULL, NULL); 891 892 /* Pickup any STALE/SPARE disks to refill array if needed. */ 893 g_raid_md_promise_refill(sc); 894 895 g_raid_event_send(vol, G_RAID_VOLUME_E_START, G_RAID_EVENT_VOLUME); 896 } 897 898 static void 899 g_raid_promise_go(void *arg) 900 { 901 struct g_raid_volume *vol; 902 struct g_raid_softc *sc; 903 struct g_raid_md_promise_pervolume *pv; 904 905 vol = arg; 906 pv = vol->v_md_data; 907 sc = vol->v_softc; 908 if (!pv->pv_started) { 909 G_RAID_DEBUG1(0, sc, "Force volume start due to timeout."); 910 g_raid_event_send(vol, G_RAID_VOLUME_E_STARTMD, 911 G_RAID_EVENT_VOLUME); 912 } 913 } 914 915 static void 916 g_raid_md_promise_new_disk(struct g_raid_disk *disk) 917 { 918 struct g_raid_softc *sc; 919 struct g_raid_md_object *md; 920 struct promise_raid_conf *pdmeta; 921 struct g_raid_md_promise_perdisk *pd; 922 struct g_raid_md_promise_pervolume *pv; 923 struct g_raid_volume *vol; 924 int i; 925 char buf[33]; 926 927 sc = disk->d_softc; 928 md = sc->sc_md; 929 pd = (struct g_raid_md_promise_perdisk *)disk->d_md_data; 930 931 if (pd->pd_subdisks == 0) { 932 g_raid_change_disk_state(disk, G_RAID_DISK_S_SPARE); 933 g_raid_md_promise_refill(sc); 934 return; 935 } 936 937 for (i = 0; i < pd->pd_subdisks; i++) { 938 pdmeta = pd->pd_meta[i]; 939 940 /* Look for volume with matching ID. */ 941 vol = g_raid_md_promise_get_volume(sc, pdmeta->volume_id); 942 if (vol == NULL) { 943 promise_meta_get_name(pdmeta, buf); 944 vol = g_raid_create_volume(sc, buf, pdmeta->array_number); 945 pv = malloc(sizeof(*pv), M_MD_PROMISE, M_WAITOK | M_ZERO); 946 pv->pv_id = pdmeta->volume_id; 947 vol->v_md_data = pv; 948 callout_init(&pv->pv_start_co, 1); 949 callout_reset(&pv->pv_start_co, 950 g_raid_start_timeout * hz, 951 g_raid_promise_go, vol); 952 } else 953 pv = vol->v_md_data; 954 955 /* If we haven't started yet - check metadata freshness. */ 956 if (pv->pv_meta == NULL || !pv->pv_started) { 957 if (pv->pv_meta == NULL || 958 ((int16_t)(pdmeta->generation - pv->pv_generation)) > 0) { 959 G_RAID_DEBUG1(1, sc, "Newer disk"); 960 if (pv->pv_meta != NULL) 961 free(pv->pv_meta, M_MD_PROMISE); 962 pv->pv_meta = promise_meta_copy(pdmeta); 963 pv->pv_generation = pv->pv_meta->generation; 964 pv->pv_disks_present = 1; 965 } else if (pdmeta->generation == pv->pv_generation) { 966 pv->pv_disks_present++; 967 G_RAID_DEBUG1(1, sc, "Matching disk (%d of %d up)", 968 pv->pv_disks_present, 969 pv->pv_meta->total_disks); 970 } else { 971 G_RAID_DEBUG1(1, sc, "Older disk"); 972 } 973 } 974 } 975 976 for (i = 0; i < pd->pd_subdisks; i++) { 977 pdmeta = pd->pd_meta[i]; 978 979 /* Look for volume with matching ID. */ 980 vol = g_raid_md_promise_get_volume(sc, pdmeta->volume_id); 981 if (vol == NULL) 982 continue; 983 pv = vol->v_md_data; 984 985 if (pv->pv_started) { 986 if (g_raid_md_promise_start_disk(disk, i, vol)) 987 g_raid_md_write_promise(md, vol, NULL, NULL); 988 } else { 989 /* If we collected all needed disks - start array. */ 990 if (pv->pv_disks_present == pv->pv_meta->total_disks) 991 g_raid_md_promise_start(vol); 992 } 993 } 994 } 995 996 static int 997 g_raid_md_create_promise(struct g_raid_md_object *md, struct g_class *mp, 998 struct g_geom **gp) 999 { 1000 struct g_geom *geom; 1001 struct g_raid_softc *sc; 1002 1003 /* Search for existing node. */ 1004 LIST_FOREACH(geom, &mp->geom, geom) { 1005 sc = geom->softc; 1006 if (sc == NULL) 1007 continue; 1008 if (sc->sc_stopping != 0) 1009 continue; 1010 if (sc->sc_md->mdo_class != md->mdo_class) 1011 continue; 1012 break; 1013 } 1014 if (geom != NULL) { 1015 *gp = geom; 1016 return (G_RAID_MD_TASTE_EXISTING); 1017 } 1018 1019 /* Create new one if not found. */ 1020 sc = g_raid_create_node(mp, "Promise", md); 1021 if (sc == NULL) 1022 return (G_RAID_MD_TASTE_FAIL); 1023 md->mdo_softc = sc; 1024 *gp = sc->sc_geom; 1025 return (G_RAID_MD_TASTE_NEW); 1026 } 1027 1028 static int 1029 g_raid_md_taste_promise(struct g_raid_md_object *md, struct g_class *mp, 1030 struct g_consumer *cp, struct g_geom **gp) 1031 { 1032 struct g_consumer *rcp; 1033 struct g_provider *pp; 1034 struct g_raid_softc *sc; 1035 struct g_raid_disk *disk; 1036 struct promise_raid_conf *meta, *metaarr[4]; 1037 struct g_raid_md_promise_perdisk *pd; 1038 struct g_geom *geom; 1039 int error, i, j, result, len, subdisks; 1040 char name[16]; 1041 uint16_t vendor; 1042 1043 G_RAID_DEBUG(1, "Tasting Promise on %s", cp->provider->name); 1044 pp = cp->provider; 1045 1046 /* Read metadata from device. */ 1047 meta = NULL; 1048 vendor = 0xffff; 1049 if (g_access(cp, 1, 0, 0) != 0) 1050 return (G_RAID_MD_TASTE_FAIL); 1051 g_topology_unlock(); 1052 len = 2; 1053 if (pp->geom->rank == 1) 1054 g_io_getattr("GEOM::hba_vendor", cp, &len, &vendor); 1055 subdisks = promise_meta_read(cp, metaarr); 1056 g_topology_lock(); 1057 g_access(cp, -1, 0, 0); 1058 if (subdisks == 0) { 1059 if (g_raid_aggressive_spare) { 1060 if (vendor == 0x105a || vendor == 0x1002) { 1061 G_RAID_DEBUG(1, 1062 "No Promise metadata, forcing spare."); 1063 goto search; 1064 } else { 1065 G_RAID_DEBUG(1, 1066 "Promise/ATI vendor mismatch " 1067 "0x%04x != 0x105a/0x1002", 1068 vendor); 1069 } 1070 } 1071 return (G_RAID_MD_TASTE_FAIL); 1072 } 1073 1074 /* Metadata valid. Print it. */ 1075 for (i = 0; i < subdisks; i++) 1076 g_raid_md_promise_print(metaarr[i]); 1077 1078 /* Purge meaningless (empty/spare) records. */ 1079 for (i = 0; i < subdisks; ) { 1080 if (metaarr[i]->disk.flags & PROMISE_F_ASSIGNED) { 1081 i++; 1082 continue; 1083 } 1084 free(metaarr[i], M_MD_PROMISE); 1085 for (j = i; j < subdisks - 1; j++) 1086 metaarr[i] = metaarr[j + 1]; 1087 metaarr[PROMISE_MAX_SUBDISKS - 1] = NULL; 1088 subdisks--; 1089 } 1090 1091 search: 1092 /* Search for matching node. */ 1093 sc = NULL; 1094 LIST_FOREACH(geom, &mp->geom, geom) { 1095 sc = geom->softc; 1096 if (sc == NULL) 1097 continue; 1098 if (sc->sc_stopping != 0) 1099 continue; 1100 if (sc->sc_md->mdo_class != md->mdo_class) 1101 continue; 1102 break; 1103 } 1104 1105 /* Found matching node. */ 1106 if (geom != NULL) { 1107 G_RAID_DEBUG(1, "Found matching array %s", sc->sc_name); 1108 result = G_RAID_MD_TASTE_EXISTING; 1109 1110 } else { /* Not found matching node -- create one. */ 1111 result = G_RAID_MD_TASTE_NEW; 1112 snprintf(name, sizeof(name), "Promise"); 1113 sc = g_raid_create_node(mp, name, md); 1114 md->mdo_softc = sc; 1115 geom = sc->sc_geom; 1116 } 1117 1118 rcp = g_new_consumer(geom); 1119 g_attach(rcp, pp); 1120 if (g_access(rcp, 1, 1, 1) != 0) 1121 ; //goto fail1; 1122 1123 g_topology_unlock(); 1124 sx_xlock(&sc->sc_lock); 1125 1126 pd = malloc(sizeof(*pd), M_MD_PROMISE, M_WAITOK | M_ZERO); 1127 pd->pd_subdisks = subdisks; 1128 for (i = 0; i < subdisks; i++) 1129 pd->pd_meta[i] = metaarr[i]; 1130 disk = g_raid_create_disk(sc); 1131 disk->d_md_data = (void *)pd; 1132 disk->d_consumer = rcp; 1133 rcp->private = disk; 1134 1135 /* Read kernel dumping information. */ 1136 disk->d_kd.offset = 0; 1137 disk->d_kd.length = OFF_MAX; 1138 len = sizeof(disk->d_kd); 1139 error = g_io_getattr("GEOM::kerneldump", rcp, &len, &disk->d_kd); 1140 if (disk->d_kd.di.dumper == NULL) 1141 G_RAID_DEBUG1(2, sc, "Dumping not supported by %s: %d.", 1142 rcp->provider->name, error); 1143 1144 g_raid_md_promise_new_disk(disk); 1145 1146 sx_xunlock(&sc->sc_lock); 1147 g_topology_lock(); 1148 *gp = geom; 1149 return (result); 1150 } 1151 1152 static int 1153 g_raid_md_event_promise(struct g_raid_md_object *md, 1154 struct g_raid_disk *disk, u_int event) 1155 { 1156 struct g_raid_softc *sc; 1157 1158 sc = md->mdo_softc; 1159 if (disk == NULL) 1160 return (-1); 1161 switch (event) { 1162 case G_RAID_DISK_E_DISCONNECTED: 1163 /* Delete disk. */ 1164 g_raid_change_disk_state(disk, G_RAID_DISK_S_NONE); 1165 g_raid_destroy_disk(disk); 1166 g_raid_md_promise_purge_volumes(sc); 1167 1168 /* Write updated metadata to all disks. */ 1169 g_raid_md_write_promise(md, NULL, NULL, NULL); 1170 1171 /* Check if anything left. */ 1172 if (g_raid_ndisks(sc, -1) == 0) 1173 g_raid_destroy_node(sc, 0); 1174 else 1175 g_raid_md_promise_refill(sc); 1176 return (0); 1177 } 1178 return (-2); 1179 } 1180 1181 static int 1182 g_raid_md_volume_event_promise(struct g_raid_md_object *md, 1183 struct g_raid_volume *vol, u_int event) 1184 { 1185 struct g_raid_md_promise_pervolume *pv; 1186 1187 pv = (struct g_raid_md_promise_pervolume *)vol->v_md_data; 1188 switch (event) { 1189 case G_RAID_VOLUME_E_STARTMD: 1190 if (!pv->pv_started) 1191 g_raid_md_promise_start(vol); 1192 return (0); 1193 } 1194 return (-2); 1195 } 1196 1197 static int 1198 g_raid_md_ctl_promise(struct g_raid_md_object *md, 1199 struct gctl_req *req) 1200 { 1201 struct g_raid_softc *sc; 1202 struct g_raid_volume *vol, *vol1; 1203 struct g_raid_subdisk *sd; 1204 struct g_raid_disk *disk, *disks[PROMISE_MAX_DISKS]; 1205 struct g_raid_md_promise_perdisk *pd; 1206 struct g_raid_md_promise_pervolume *pv; 1207 struct g_consumer *cp; 1208 struct g_provider *pp; 1209 char arg[16]; 1210 const char *verb, *volname, *levelname, *diskname; 1211 char *tmp; 1212 int *nargs, *force; 1213 off_t size, sectorsize, strip; 1214 intmax_t *sizearg, *striparg; 1215 uint32_t offs[PROMISE_MAX_DISKS], esize; 1216 int numdisks, i, len, level, qual; 1217 int error; 1218 1219 sc = md->mdo_softc; 1220 verb = gctl_get_param(req, "verb", NULL); 1221 nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs)); 1222 error = 0; 1223 if (strcmp(verb, "label") == 0) { 1224 1225 if (*nargs < 4) { 1226 gctl_error(req, "Invalid number of arguments."); 1227 return (-1); 1228 } 1229 volname = gctl_get_asciiparam(req, "arg1"); 1230 if (volname == NULL) { 1231 gctl_error(req, "No volume name."); 1232 return (-2); 1233 } 1234 levelname = gctl_get_asciiparam(req, "arg2"); 1235 if (levelname == NULL) { 1236 gctl_error(req, "No RAID level."); 1237 return (-3); 1238 } 1239 if (g_raid_volume_str2level(levelname, &level, &qual)) { 1240 gctl_error(req, "Unknown RAID level '%s'.", levelname); 1241 return (-4); 1242 } 1243 numdisks = *nargs - 3; 1244 force = gctl_get_paraml(req, "force", sizeof(*force)); 1245 if (!g_raid_md_promise_supported(level, qual, numdisks, 1246 force ? *force : 0)) { 1247 gctl_error(req, "Unsupported RAID level " 1248 "(0x%02x/0x%02x), or number of disks (%d).", 1249 level, qual, numdisks); 1250 return (-5); 1251 } 1252 1253 /* Search for disks, connect them and probe. */ 1254 size = INT64_MAX; 1255 sectorsize = 0; 1256 bzero(disks, sizeof(disks)); 1257 bzero(offs, sizeof(offs)); 1258 for (i = 0; i < numdisks; i++) { 1259 snprintf(arg, sizeof(arg), "arg%d", i + 3); 1260 diskname = gctl_get_asciiparam(req, arg); 1261 if (diskname == NULL) { 1262 gctl_error(req, "No disk name (%s).", arg); 1263 error = -6; 1264 break; 1265 } 1266 if (strcmp(diskname, "NONE") == 0) 1267 continue; 1268 1269 TAILQ_FOREACH(disk, &sc->sc_disks, d_next) { 1270 if (disk->d_consumer != NULL && 1271 disk->d_consumer->provider != NULL && 1272 strcmp(disk->d_consumer->provider->name, 1273 diskname) == 0) 1274 break; 1275 } 1276 if (disk != NULL) { 1277 if (disk->d_state != G_RAID_DISK_S_ACTIVE) { 1278 gctl_error(req, "Disk '%s' is in a " 1279 "wrong state (%s).", diskname, 1280 g_raid_disk_state2str(disk->d_state)); 1281 error = -7; 1282 break; 1283 } 1284 pd = disk->d_md_data; 1285 if (pd->pd_subdisks >= PROMISE_MAX_SUBDISKS) { 1286 gctl_error(req, "Disk '%s' already " 1287 "used by %d volumes.", 1288 diskname, pd->pd_subdisks); 1289 error = -7; 1290 break; 1291 } 1292 pp = disk->d_consumer->provider; 1293 disks[i] = disk; 1294 promise_meta_unused_range(pd->pd_meta, 1295 pd->pd_subdisks, 1296 pp->mediasize / pp->sectorsize, 1297 &offs[i], &esize); 1298 size = MIN(size, (off_t)esize * pp->sectorsize); 1299 sectorsize = MAX(sectorsize, pp->sectorsize); 1300 continue; 1301 } 1302 1303 g_topology_lock(); 1304 cp = g_raid_open_consumer(sc, diskname); 1305 if (cp == NULL) { 1306 gctl_error(req, "Can't open disk '%s'.", 1307 diskname); 1308 g_topology_unlock(); 1309 error = -8; 1310 break; 1311 } 1312 pp = cp->provider; 1313 pd = malloc(sizeof(*pd), M_MD_PROMISE, M_WAITOK | M_ZERO); 1314 disk = g_raid_create_disk(sc); 1315 disk->d_md_data = (void *)pd; 1316 disk->d_consumer = cp; 1317 disks[i] = disk; 1318 cp->private = disk; 1319 g_topology_unlock(); 1320 1321 /* Read kernel dumping information. */ 1322 disk->d_kd.offset = 0; 1323 disk->d_kd.length = OFF_MAX; 1324 len = sizeof(disk->d_kd); 1325 g_io_getattr("GEOM::kerneldump", cp, &len, &disk->d_kd); 1326 if (disk->d_kd.di.dumper == NULL) 1327 G_RAID_DEBUG1(2, sc, 1328 "Dumping not supported by %s.", 1329 cp->provider->name); 1330 1331 /* Reserve some space for metadata. */ 1332 size = MIN(size, pp->mediasize - 131072llu * pp->sectorsize); 1333 sectorsize = MAX(sectorsize, pp->sectorsize); 1334 } 1335 if (error != 0) { 1336 for (i = 0; i < numdisks; i++) { 1337 if (disks[i] != NULL && 1338 disks[i]->d_state == G_RAID_DISK_S_NONE) 1339 g_raid_destroy_disk(disks[i]); 1340 } 1341 return (error); 1342 } 1343 1344 if (sectorsize <= 0) { 1345 gctl_error(req, "Can't get sector size."); 1346 return (-8); 1347 } 1348 1349 /* Handle size argument. */ 1350 len = sizeof(*sizearg); 1351 sizearg = gctl_get_param(req, "size", &len); 1352 if (sizearg != NULL && len == sizeof(*sizearg) && 1353 *sizearg > 0) { 1354 if (*sizearg > size) { 1355 gctl_error(req, "Size too big %lld > %lld.", 1356 (long long)*sizearg, (long long)size); 1357 return (-9); 1358 } 1359 size = *sizearg; 1360 } 1361 1362 /* Handle strip argument. */ 1363 strip = 131072; 1364 len = sizeof(*striparg); 1365 striparg = gctl_get_param(req, "strip", &len); 1366 if (striparg != NULL && len == sizeof(*striparg) && 1367 *striparg > 0) { 1368 if (*striparg < sectorsize) { 1369 gctl_error(req, "Strip size too small."); 1370 return (-10); 1371 } 1372 if (*striparg % sectorsize != 0) { 1373 gctl_error(req, "Incorrect strip size."); 1374 return (-11); 1375 } 1376 strip = *striparg; 1377 } 1378 1379 /* Round size down to strip or sector. */ 1380 if (level == G_RAID_VOLUME_RL_RAID1 || 1381 level == G_RAID_VOLUME_RL_SINGLE || 1382 level == G_RAID_VOLUME_RL_CONCAT) 1383 size -= (size % sectorsize); 1384 else if (level == G_RAID_VOLUME_RL_RAID1E && 1385 (numdisks & 1) != 0) 1386 size -= (size % (2 * strip)); 1387 else 1388 size -= (size % strip); 1389 if (size <= 0) { 1390 gctl_error(req, "Size too small."); 1391 return (-13); 1392 } 1393 if (size > 0xffffffffllu * sectorsize) { 1394 gctl_error(req, "Size too big."); 1395 return (-14); 1396 } 1397 1398 /* We have all we need, create things: volume, ... */ 1399 pv = malloc(sizeof(*pv), M_MD_PROMISE, M_WAITOK | M_ZERO); 1400 arc4rand(&pv->pv_id, sizeof(pv->pv_id), 0); 1401 pv->pv_generation = 0; 1402 pv->pv_started = 1; 1403 vol = g_raid_create_volume(sc, volname, -1); 1404 vol->v_md_data = pv; 1405 vol->v_raid_level = level; 1406 vol->v_raid_level_qualifier = G_RAID_VOLUME_RLQ_NONE; 1407 vol->v_strip_size = strip; 1408 vol->v_disks_count = numdisks; 1409 if (level == G_RAID_VOLUME_RL_RAID0 || 1410 level == G_RAID_VOLUME_RL_CONCAT || 1411 level == G_RAID_VOLUME_RL_SINGLE) 1412 vol->v_mediasize = size * numdisks; 1413 else if (level == G_RAID_VOLUME_RL_RAID1) 1414 vol->v_mediasize = size; 1415 else if (level == G_RAID_VOLUME_RL_RAID3 || 1416 level == G_RAID_VOLUME_RL_RAID5) 1417 vol->v_mediasize = size * (numdisks - 1); 1418 else { /* RAID1E */ 1419 vol->v_mediasize = ((size * numdisks) / strip / 2) * 1420 strip; 1421 } 1422 vol->v_sectorsize = sectorsize; 1423 g_raid_start_volume(vol); 1424 1425 /* , and subdisks. */ 1426 for (i = 0; i < numdisks; i++) { 1427 disk = disks[i]; 1428 sd = &vol->v_subdisks[i]; 1429 sd->sd_disk = disk; 1430 sd->sd_offset = (off_t)offs[i] * 512; 1431 sd->sd_size = size; 1432 if (disk == NULL) 1433 continue; 1434 TAILQ_INSERT_TAIL(&disk->d_subdisks, sd, sd_next); 1435 g_raid_change_disk_state(disk, 1436 G_RAID_DISK_S_ACTIVE); 1437 g_raid_change_subdisk_state(sd, 1438 G_RAID_SUBDISK_S_ACTIVE); 1439 g_raid_event_send(sd, G_RAID_SUBDISK_E_NEW, 1440 G_RAID_EVENT_SUBDISK); 1441 } 1442 1443 /* Write metadata based on created entities. */ 1444 G_RAID_DEBUG1(0, sc, "Array started."); 1445 g_raid_md_write_promise(md, vol, NULL, NULL); 1446 1447 /* Pickup any STALE/SPARE disks to refill array if needed. */ 1448 g_raid_md_promise_refill(sc); 1449 1450 g_raid_event_send(vol, G_RAID_VOLUME_E_START, 1451 G_RAID_EVENT_VOLUME); 1452 return (0); 1453 } 1454 if (strcmp(verb, "add") == 0) { 1455 1456 gctl_error(req, "`add` command is not applicable, " 1457 "use `label` instead."); 1458 return (-99); 1459 } 1460 if (strcmp(verb, "delete") == 0) { 1461 1462 /* Full node destruction. */ 1463 if (*nargs == 1) { 1464 /* Check if some volume is still open. */ 1465 force = gctl_get_paraml(req, "force", sizeof(*force)); 1466 if (force != NULL && *force == 0 && 1467 g_raid_nopens(sc) != 0) { 1468 gctl_error(req, "Some volume is still open."); 1469 return (-4); 1470 } 1471 1472 TAILQ_FOREACH(disk, &sc->sc_disks, d_next) { 1473 if (disk->d_consumer) 1474 promise_meta_erase(disk->d_consumer); 1475 } 1476 g_raid_destroy_node(sc, 0); 1477 return (0); 1478 } 1479 1480 /* Destroy specified volume. If it was last - all node. */ 1481 if (*nargs != 2) { 1482 gctl_error(req, "Invalid number of arguments."); 1483 return (-1); 1484 } 1485 volname = gctl_get_asciiparam(req, "arg1"); 1486 if (volname == NULL) { 1487 gctl_error(req, "No volume name."); 1488 return (-2); 1489 } 1490 1491 /* Search for volume. */ 1492 TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) { 1493 if (strcmp(vol->v_name, volname) == 0) 1494 break; 1495 } 1496 if (vol == NULL) { 1497 i = strtol(volname, &tmp, 10); 1498 if (verb != volname && tmp[0] == 0) { 1499 TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) { 1500 if (vol->v_global_id == i) 1501 break; 1502 } 1503 } 1504 } 1505 if (vol == NULL) { 1506 gctl_error(req, "Volume '%s' not found.", volname); 1507 return (-3); 1508 } 1509 1510 /* Check if volume is still open. */ 1511 force = gctl_get_paraml(req, "force", sizeof(*force)); 1512 if (force != NULL && *force == 0 && 1513 vol->v_provider_open != 0) { 1514 gctl_error(req, "Volume is still open."); 1515 return (-4); 1516 } 1517 1518 /* Destroy volume and potentially node. */ 1519 i = 0; 1520 TAILQ_FOREACH(vol1, &sc->sc_volumes, v_next) 1521 i++; 1522 if (i >= 2) { 1523 g_raid_destroy_volume(vol); 1524 g_raid_md_promise_purge_disks(sc); 1525 g_raid_md_write_promise(md, NULL, NULL, NULL); 1526 } else { 1527 TAILQ_FOREACH(disk, &sc->sc_disks, d_next) { 1528 if (disk->d_consumer) 1529 promise_meta_erase(disk->d_consumer); 1530 } 1531 g_raid_destroy_node(sc, 0); 1532 } 1533 return (0); 1534 } 1535 if (strcmp(verb, "remove") == 0 || 1536 strcmp(verb, "fail") == 0) { 1537 if (*nargs < 2) { 1538 gctl_error(req, "Invalid number of arguments."); 1539 return (-1); 1540 } 1541 for (i = 1; i < *nargs; i++) { 1542 snprintf(arg, sizeof(arg), "arg%d", i); 1543 diskname = gctl_get_asciiparam(req, arg); 1544 if (diskname == NULL) { 1545 gctl_error(req, "No disk name (%s).", arg); 1546 error = -2; 1547 break; 1548 } 1549 if (strncmp(diskname, "/dev/", 5) == 0) 1550 diskname += 5; 1551 1552 TAILQ_FOREACH(disk, &sc->sc_disks, d_next) { 1553 if (disk->d_consumer != NULL && 1554 disk->d_consumer->provider != NULL && 1555 strcmp(disk->d_consumer->provider->name, 1556 diskname) == 0) 1557 break; 1558 } 1559 if (disk == NULL) { 1560 gctl_error(req, "Disk '%s' not found.", 1561 diskname); 1562 error = -3; 1563 break; 1564 } 1565 1566 if (strcmp(verb, "fail") == 0) { 1567 g_raid_md_fail_disk_promise(md, NULL, disk); 1568 continue; 1569 } 1570 1571 /* Erase metadata on deleting disk and destroy it. */ 1572 promise_meta_erase(disk->d_consumer); 1573 g_raid_destroy_disk(disk); 1574 } 1575 g_raid_md_promise_purge_volumes(sc); 1576 1577 /* Write updated metadata to remaining disks. */ 1578 g_raid_md_write_promise(md, NULL, NULL, NULL); 1579 1580 /* Check if anything left. */ 1581 if (g_raid_ndisks(sc, -1) == 0) 1582 g_raid_destroy_node(sc, 0); 1583 else 1584 g_raid_md_promise_refill(sc); 1585 return (error); 1586 } 1587 if (strcmp(verb, "insert") == 0) { 1588 if (*nargs < 2) { 1589 gctl_error(req, "Invalid number of arguments."); 1590 return (-1); 1591 } 1592 for (i = 1; i < *nargs; i++) { 1593 /* Get disk name. */ 1594 snprintf(arg, sizeof(arg), "arg%d", i); 1595 diskname = gctl_get_asciiparam(req, arg); 1596 if (diskname == NULL) { 1597 gctl_error(req, "No disk name (%s).", arg); 1598 error = -3; 1599 break; 1600 } 1601 1602 /* Try to find provider with specified name. */ 1603 g_topology_lock(); 1604 cp = g_raid_open_consumer(sc, diskname); 1605 if (cp == NULL) { 1606 gctl_error(req, "Can't open disk '%s'.", 1607 diskname); 1608 g_topology_unlock(); 1609 error = -4; 1610 break; 1611 } 1612 g_topology_unlock(); 1613 1614 pd = malloc(sizeof(*pd), M_MD_PROMISE, M_WAITOK | M_ZERO); 1615 1616 disk = g_raid_create_disk(sc); 1617 disk->d_consumer = cp; 1618 disk->d_md_data = (void *)pd; 1619 cp->private = disk; 1620 1621 /* Read kernel dumping information. */ 1622 disk->d_kd.offset = 0; 1623 disk->d_kd.length = OFF_MAX; 1624 len = sizeof(disk->d_kd); 1625 g_io_getattr("GEOM::kerneldump", cp, &len, &disk->d_kd); 1626 if (disk->d_kd.di.dumper == NULL) 1627 G_RAID_DEBUG1(2, sc, 1628 "Dumping not supported by %s.", 1629 cp->provider->name); 1630 1631 /* Welcome the "new" disk. */ 1632 g_raid_change_disk_state(disk, G_RAID_DISK_S_SPARE); 1633 promise_meta_write_spare(cp); 1634 g_raid_md_promise_refill(sc); 1635 } 1636 return (error); 1637 } 1638 return (-100); 1639 } 1640 1641 static int 1642 g_raid_md_write_promise(struct g_raid_md_object *md, struct g_raid_volume *tvol, 1643 struct g_raid_subdisk *tsd, struct g_raid_disk *tdisk) 1644 { 1645 struct g_raid_softc *sc; 1646 struct g_raid_volume *vol; 1647 struct g_raid_subdisk *sd; 1648 struct g_raid_disk *disk; 1649 struct g_raid_md_promise_perdisk *pd; 1650 struct g_raid_md_promise_pervolume *pv; 1651 struct promise_raid_conf *meta; 1652 off_t rebuild_lba64; 1653 int i, j, pos, rebuild; 1654 1655 sc = md->mdo_softc; 1656 1657 if (sc->sc_stopping == G_RAID_DESTROY_HARD) 1658 return (0); 1659 1660 /* Generate new per-volume metadata for affected volumes. */ 1661 TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) { 1662 if (vol->v_stopping) 1663 continue; 1664 1665 /* Skip volumes not related to specified targets. */ 1666 if (tvol != NULL && vol != tvol) 1667 continue; 1668 if (tsd != NULL && vol != tsd->sd_volume) 1669 continue; 1670 if (tdisk != NULL) { 1671 for (i = 0; i < vol->v_disks_count; i++) { 1672 if (vol->v_subdisks[i].sd_disk == tdisk) 1673 break; 1674 } 1675 if (i >= vol->v_disks_count) 1676 continue; 1677 } 1678 1679 pv = (struct g_raid_md_promise_pervolume *)vol->v_md_data; 1680 pv->pv_generation++; 1681 1682 meta = malloc(sizeof(*meta), M_MD_PROMISE, M_WAITOK | M_ZERO); 1683 if (pv->pv_meta != NULL) 1684 memcpy(meta, pv->pv_meta, sizeof(*meta)); 1685 memcpy(meta->promise_id, PROMISE_MAGIC, 1686 sizeof(PROMISE_MAGIC) - 1); 1687 meta->dummy_0 = 0x00020000; 1688 meta->integrity = PROMISE_I_VALID; 1689 1690 meta->generation = pv->pv_generation; 1691 meta->status = PROMISE_S_VALID | PROMISE_S_ONLINE | 1692 PROMISE_S_INITED | PROMISE_S_READY; 1693 if (vol->v_state <= G_RAID_VOLUME_S_DEGRADED) 1694 meta->status |= PROMISE_S_DEGRADED; 1695 if (vol->v_dirty) 1696 meta->status |= PROMISE_S_MARKED; /* XXX: INVENTED! */ 1697 if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID0 || 1698 vol->v_raid_level == G_RAID_VOLUME_RL_SINGLE) 1699 meta->type = PROMISE_T_RAID0; 1700 else if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID1 || 1701 vol->v_raid_level == G_RAID_VOLUME_RL_RAID1E) 1702 meta->type = PROMISE_T_RAID1; 1703 else if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID3) 1704 meta->type = PROMISE_T_RAID3; 1705 else if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID5) 1706 meta->type = PROMISE_T_RAID5; 1707 else if (vol->v_raid_level == G_RAID_VOLUME_RL_CONCAT) 1708 meta->type = PROMISE_T_SPAN; 1709 else 1710 meta->type = PROMISE_T_JBOD; 1711 meta->total_disks = vol->v_disks_count; 1712 meta->stripe_shift = ffs(vol->v_strip_size / 1024); 1713 meta->array_width = vol->v_disks_count; 1714 if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID1 || 1715 vol->v_raid_level == G_RAID_VOLUME_RL_RAID1E) 1716 meta->array_width /= 2; 1717 meta->array_number = vol->v_global_id; 1718 meta->total_sectors = vol->v_mediasize / vol->v_sectorsize; 1719 meta->cylinders = meta->total_sectors / (255 * 63) - 1; 1720 meta->heads = 254; 1721 meta->sectors = 63; 1722 meta->volume_id = pv->pv_id; 1723 rebuild_lba64 = UINT64_MAX; 1724 rebuild = 0; 1725 for (i = 0; i < vol->v_disks_count; i++) { 1726 sd = &vol->v_subdisks[i]; 1727 /* For RAID0+1 we need to translate order. */ 1728 pos = promise_meta_translate_disk(vol, i); 1729 meta->disks[pos].flags = PROMISE_F_VALID | 1730 PROMISE_F_ASSIGNED; 1731 if (sd->sd_state == G_RAID_SUBDISK_S_NONE) { 1732 meta->disks[pos].flags |= 0; 1733 } else if (sd->sd_state == G_RAID_SUBDISK_S_FAILED) { 1734 meta->disks[pos].flags |= 1735 PROMISE_F_DOWN | PROMISE_F_REDIR; 1736 } else if (sd->sd_state <= G_RAID_SUBDISK_S_REBUILD) { 1737 meta->disks[pos].flags |= 1738 PROMISE_F_ONLINE | PROMISE_F_REDIR; 1739 if (sd->sd_state == G_RAID_SUBDISK_S_REBUILD) { 1740 rebuild_lba64 = MIN(rebuild_lba64, 1741 sd->sd_rebuild_pos / 512); 1742 } else 1743 rebuild_lba64 = 0; 1744 rebuild = 1; 1745 } else { 1746 meta->disks[pos].flags |= PROMISE_F_ONLINE; 1747 if (sd->sd_state < G_RAID_SUBDISK_S_ACTIVE) { 1748 meta->status |= PROMISE_S_MARKED; 1749 if (sd->sd_state == G_RAID_SUBDISK_S_RESYNC) { 1750 rebuild_lba64 = MIN(rebuild_lba64, 1751 sd->sd_rebuild_pos / 512); 1752 } else 1753 rebuild_lba64 = 0; 1754 } 1755 } 1756 if (pv->pv_meta != NULL) { 1757 meta->disks[pos].id = pv->pv_meta->disks[pos].id; 1758 } else { 1759 meta->disks[pos].number = i * 2; 1760 arc4rand(&meta->disks[pos].id, 1761 sizeof(meta->disks[pos].id), 0); 1762 } 1763 } 1764 promise_meta_put_name(meta, vol->v_name); 1765 1766 /* Try to mimic AMD BIOS rebuild/resync behavior. */ 1767 if (rebuild_lba64 != UINT64_MAX) { 1768 if (rebuild) 1769 meta->magic_3 = 0x03040010UL; /* Rebuild? */ 1770 else 1771 meta->magic_3 = 0x03040008UL; /* Resync? */ 1772 /* Translate from per-disk to per-volume LBA. */ 1773 if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID1 || 1774 vol->v_raid_level == G_RAID_VOLUME_RL_RAID1E) { 1775 rebuild_lba64 *= meta->array_width; 1776 } else if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID3 || 1777 vol->v_raid_level == G_RAID_VOLUME_RL_RAID5) { 1778 rebuild_lba64 *= meta->array_width - 1; 1779 } else 1780 rebuild_lba64 = 0; 1781 } else 1782 meta->magic_3 = 0x03000000UL; 1783 meta->rebuild_lba64 = rebuild_lba64; 1784 meta->magic_4 = 0x04010101UL; 1785 1786 /* Replace per-volume metadata with new. */ 1787 if (pv->pv_meta != NULL) 1788 free(pv->pv_meta, M_MD_PROMISE); 1789 pv->pv_meta = meta; 1790 1791 /* Copy new metadata to the disks, adding or replacing old. */ 1792 for (i = 0; i < vol->v_disks_count; i++) { 1793 sd = &vol->v_subdisks[i]; 1794 disk = sd->sd_disk; 1795 if (disk == NULL) 1796 continue; 1797 /* For RAID0+1 we need to translate order. */ 1798 pos = promise_meta_translate_disk(vol, i); 1799 pd = (struct g_raid_md_promise_perdisk *)disk->d_md_data; 1800 for (j = 0; j < pd->pd_subdisks; j++) { 1801 if (pd->pd_meta[j]->volume_id == meta->volume_id) 1802 break; 1803 } 1804 if (j == pd->pd_subdisks) 1805 pd->pd_subdisks++; 1806 if (pd->pd_meta[j] != NULL) 1807 free(pd->pd_meta[j], M_MD_PROMISE); 1808 pd->pd_meta[j] = promise_meta_copy(meta); 1809 pd->pd_meta[j]->disk = meta->disks[pos]; 1810 pd->pd_meta[j]->disk.number = pos; 1811 pd->pd_meta[j]->disk_offset = sd->sd_offset / 512; 1812 pd->pd_meta[j]->disk_sectors = sd->sd_size / 512; 1813 if (sd->sd_state == G_RAID_SUBDISK_S_REBUILD) { 1814 pd->pd_meta[j]->rebuild_lba = 1815 sd->sd_rebuild_pos / 512; 1816 } else if (sd->sd_state < G_RAID_SUBDISK_S_REBUILD) 1817 pd->pd_meta[j]->rebuild_lba = 0; 1818 else 1819 pd->pd_meta[j]->rebuild_lba = UINT32_MAX; 1820 pd->pd_updated = 1; 1821 } 1822 } 1823 1824 TAILQ_FOREACH(disk, &sc->sc_disks, d_next) { 1825 pd = (struct g_raid_md_promise_perdisk *)disk->d_md_data; 1826 if (disk->d_state != G_RAID_DISK_S_ACTIVE) 1827 continue; 1828 if (!pd->pd_updated) 1829 continue; 1830 G_RAID_DEBUG(1, "Writing Promise metadata to %s", 1831 g_raid_get_diskname(disk)); 1832 for (i = 0; i < pd->pd_subdisks; i++) 1833 g_raid_md_promise_print(pd->pd_meta[i]); 1834 promise_meta_write(disk->d_consumer, 1835 pd->pd_meta, pd->pd_subdisks); 1836 pd->pd_updated = 0; 1837 } 1838 1839 return (0); 1840 } 1841 1842 static int 1843 g_raid_md_fail_disk_promise(struct g_raid_md_object *md, 1844 struct g_raid_subdisk *tsd, struct g_raid_disk *tdisk) 1845 { 1846 struct g_raid_softc *sc; 1847 struct g_raid_md_promise_perdisk *pd; 1848 struct g_raid_subdisk *sd; 1849 int i, pos; 1850 1851 sc = md->mdo_softc; 1852 pd = (struct g_raid_md_promise_perdisk *)tdisk->d_md_data; 1853 1854 /* We can't fail disk that is not a part of array now. */ 1855 if (tdisk->d_state != G_RAID_DISK_S_ACTIVE) 1856 return (-1); 1857 1858 /* 1859 * Mark disk as failed in metadata and try to write that metadata 1860 * to the disk itself to prevent it's later resurrection as STALE. 1861 */ 1862 if (pd->pd_subdisks > 0 && tdisk->d_consumer != NULL) 1863 G_RAID_DEBUG(1, "Writing Promise metadata to %s", 1864 g_raid_get_diskname(tdisk)); 1865 for (i = 0; i < pd->pd_subdisks; i++) { 1866 pd->pd_meta[i]->disk.flags |= 1867 PROMISE_F_DOWN | PROMISE_F_REDIR; 1868 pos = pd->pd_meta[i]->disk.number; 1869 if (pos >= 0 && pos < PROMISE_MAX_DISKS) { 1870 pd->pd_meta[i]->disks[pos].flags |= 1871 PROMISE_F_DOWN | PROMISE_F_REDIR; 1872 } 1873 g_raid_md_promise_print(pd->pd_meta[i]); 1874 } 1875 if (tdisk->d_consumer != NULL) 1876 promise_meta_write(tdisk->d_consumer, 1877 pd->pd_meta, pd->pd_subdisks); 1878 1879 /* Change states. */ 1880 g_raid_change_disk_state(tdisk, G_RAID_DISK_S_FAILED); 1881 TAILQ_FOREACH(sd, &tdisk->d_subdisks, sd_next) { 1882 g_raid_change_subdisk_state(sd, 1883 G_RAID_SUBDISK_S_FAILED); 1884 g_raid_event_send(sd, G_RAID_SUBDISK_E_FAILED, 1885 G_RAID_EVENT_SUBDISK); 1886 } 1887 1888 /* Write updated metadata to remaining disks. */ 1889 g_raid_md_write_promise(md, NULL, NULL, tdisk); 1890 1891 g_raid_md_promise_refill(sc); 1892 return (0); 1893 } 1894 1895 static int 1896 g_raid_md_free_disk_promise(struct g_raid_md_object *md, 1897 struct g_raid_disk *disk) 1898 { 1899 struct g_raid_md_promise_perdisk *pd; 1900 int i; 1901 1902 pd = (struct g_raid_md_promise_perdisk *)disk->d_md_data; 1903 for (i = 0; i < pd->pd_subdisks; i++) { 1904 if (pd->pd_meta[i] != NULL) { 1905 free(pd->pd_meta[i], M_MD_PROMISE); 1906 pd->pd_meta[i] = NULL; 1907 } 1908 } 1909 free(pd, M_MD_PROMISE); 1910 disk->d_md_data = NULL; 1911 return (0); 1912 } 1913 1914 static int 1915 g_raid_md_free_volume_promise(struct g_raid_md_object *md, 1916 struct g_raid_volume *vol) 1917 { 1918 struct g_raid_md_promise_pervolume *pv; 1919 1920 pv = (struct g_raid_md_promise_pervolume *)vol->v_md_data; 1921 if (pv && pv->pv_meta != NULL) { 1922 free(pv->pv_meta, M_MD_PROMISE); 1923 pv->pv_meta = NULL; 1924 } 1925 if (pv && !pv->pv_started) { 1926 pv->pv_started = 1; 1927 callout_stop(&pv->pv_start_co); 1928 } 1929 return (0); 1930 } 1931 1932 static int 1933 g_raid_md_free_promise(struct g_raid_md_object *md) 1934 { 1935 1936 return (0); 1937 } 1938 1939 G_RAID_MD_DECLARE(g_raid_md_promise); 1940