1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2011 Alexander Motin <mav@FreeBSD.org> 5 * Copyright (c) 2000 - 2008 Søren Schmidt <sos@FreeBSD.org> 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 #include <sys/param.h> 31 #include <sys/bio.h> 32 #include <sys/endian.h> 33 #include <sys/kernel.h> 34 #include <sys/kobj.h> 35 #include <sys/limits.h> 36 #include <sys/lock.h> 37 #include <sys/malloc.h> 38 #include <sys/mutex.h> 39 #include <sys/systm.h> 40 #include <geom/geom.h> 41 #include <geom/geom_dbg.h> 42 #include "geom/raid/g_raid.h" 43 #include "g_raid_md_if.h" 44 45 static MALLOC_DEFINE(M_MD_PROMISE, "md_promise_data", "GEOM_RAID Promise metadata"); 46 47 #define PROMISE_MAX_DISKS 8 48 #define PROMISE_MAX_SUBDISKS 2 49 #define PROMISE_META_OFFSET 14 50 51 struct promise_raid_disk { 52 uint8_t flags; /* Subdisk status. */ 53 #define PROMISE_F_VALID 0x01 54 #define PROMISE_F_ONLINE 0x02 55 #define PROMISE_F_ASSIGNED 0x04 56 #define PROMISE_F_SPARE 0x08 57 #define PROMISE_F_DUPLICATE 0x10 58 #define PROMISE_F_REDIR 0x20 59 #define PROMISE_F_DOWN 0x40 60 #define PROMISE_F_READY 0x80 61 62 uint8_t number; /* Position in a volume. */ 63 uint8_t channel; /* ATA channel number. */ 64 uint8_t device; /* ATA device number. */ 65 uint64_t id __packed; /* Subdisk ID. */ 66 } __packed; 67 68 struct promise_raid_conf { 69 char promise_id[24]; 70 #define PROMISE_MAGIC "Promise Technology, Inc." 71 #define FREEBSD_MAGIC "FreeBSD ATA driver RAID " 72 73 uint32_t dummy_0; 74 uint64_t magic_0; 75 #define PROMISE_MAGIC0(x) (((uint64_t)(x.channel) << 48) | \ 76 ((uint64_t)(x.device != 0) << 56)) 77 uint16_t magic_1; 78 uint32_t magic_2; 79 uint8_t filler1[470]; 80 81 uint32_t integrity; 82 #define PROMISE_I_VALID 0x00000080 83 84 struct promise_raid_disk disk; /* This subdisk info. */ 85 uint32_t disk_offset; /* Subdisk offset. */ 86 uint32_t disk_sectors; /* Subdisk size */ 87 uint32_t disk_rebuild; /* Rebuild position. */ 88 uint16_t generation; /* Generation number. */ 89 uint8_t status; /* Volume status. */ 90 #define PROMISE_S_VALID 0x01 91 #define PROMISE_S_ONLINE 0x02 92 #define PROMISE_S_INITED 0x04 93 #define PROMISE_S_READY 0x08 94 #define PROMISE_S_DEGRADED 0x10 95 #define PROMISE_S_MARKED 0x20 96 #define PROMISE_S_MIGRATING 0x40 97 #define PROMISE_S_FUNCTIONAL 0x80 98 99 uint8_t type; /* Voluem type. */ 100 #define PROMISE_T_RAID0 0x00 101 #define PROMISE_T_RAID1 0x01 102 #define PROMISE_T_RAID3 0x02 103 #define PROMISE_T_RAID5 0x04 104 #define PROMISE_T_SPAN 0x08 105 #define PROMISE_T_JBOD 0x10 106 107 uint8_t total_disks; /* Disks in this volume. */ 108 uint8_t stripe_shift; /* Strip size. */ 109 uint8_t array_width; /* Number of RAID0 stripes. */ 110 uint8_t array_number; /* Global volume number. */ 111 uint32_t total_sectors; /* Volume size. */ 112 uint16_t cylinders; /* Volume geometry: C. */ 113 uint8_t heads; /* Volume geometry: H. */ 114 uint8_t sectors; /* Volume geometry: S. */ 115 uint64_t volume_id __packed; /* Volume ID, */ 116 struct promise_raid_disk disks[PROMISE_MAX_DISKS]; 117 /* Subdisks in this volume. */ 118 char name[32]; /* Volume label. */ 119 120 uint32_t filler2[8]; 121 uint32_t magic_3; /* Something related to rebuild. */ 122 uint64_t rebuild_lba64; /* Per-volume rebuild position. */ 123 uint32_t magic_4; 124 uint32_t magic_5; 125 uint32_t total_sectors_high; 126 uint8_t magic_6; 127 uint8_t sector_size; 128 uint16_t magic_7; 129 uint32_t magic_8[31]; 130 uint32_t backup_time; 131 uint16_t magic_9; 132 uint32_t disk_offset_high; 133 uint32_t disk_sectors_high; 134 uint32_t disk_rebuild_high; 135 uint16_t magic_10; 136 uint32_t magic_11[3]; 137 uint32_t filler3[284]; 138 uint32_t checksum; 139 } __packed; 140 141 struct g_raid_md_promise_perdisk { 142 int pd_updated; 143 int pd_subdisks; 144 struct promise_raid_conf *pd_meta[PROMISE_MAX_SUBDISKS]; 145 }; 146 147 struct g_raid_md_promise_pervolume { 148 struct promise_raid_conf *pv_meta; 149 uint64_t pv_id; 150 uint16_t pv_generation; 151 int pv_disks_present; 152 int pv_started; 153 struct callout pv_start_co; /* STARTING state timer. */ 154 }; 155 156 static g_raid_md_create_t g_raid_md_create_promise; 157 static g_raid_md_taste_t g_raid_md_taste_promise; 158 static g_raid_md_event_t g_raid_md_event_promise; 159 static g_raid_md_volume_event_t g_raid_md_volume_event_promise; 160 static g_raid_md_ctl_t g_raid_md_ctl_promise; 161 static g_raid_md_write_t g_raid_md_write_promise; 162 static g_raid_md_fail_disk_t g_raid_md_fail_disk_promise; 163 static g_raid_md_free_disk_t g_raid_md_free_disk_promise; 164 static g_raid_md_free_volume_t g_raid_md_free_volume_promise; 165 static g_raid_md_free_t g_raid_md_free_promise; 166 167 static kobj_method_t g_raid_md_promise_methods[] = { 168 KOBJMETHOD(g_raid_md_create, g_raid_md_create_promise), 169 KOBJMETHOD(g_raid_md_taste, g_raid_md_taste_promise), 170 KOBJMETHOD(g_raid_md_event, g_raid_md_event_promise), 171 KOBJMETHOD(g_raid_md_volume_event, g_raid_md_volume_event_promise), 172 KOBJMETHOD(g_raid_md_ctl, g_raid_md_ctl_promise), 173 KOBJMETHOD(g_raid_md_write, g_raid_md_write_promise), 174 KOBJMETHOD(g_raid_md_fail_disk, g_raid_md_fail_disk_promise), 175 KOBJMETHOD(g_raid_md_free_disk, g_raid_md_free_disk_promise), 176 KOBJMETHOD(g_raid_md_free_volume, g_raid_md_free_volume_promise), 177 KOBJMETHOD(g_raid_md_free, g_raid_md_free_promise), 178 { 0, 0 } 179 }; 180 181 static struct g_raid_md_class g_raid_md_promise_class = { 182 "Promise", 183 g_raid_md_promise_methods, 184 sizeof(struct g_raid_md_object), 185 .mdc_enable = 1, 186 .mdc_priority = 100 187 }; 188 189 static void 190 g_raid_md_promise_print(struct promise_raid_conf *meta) 191 { 192 int i; 193 194 if (g_raid_debug < 1) 195 return; 196 197 printf("********* ATA Promise Metadata *********\n"); 198 printf("promise_id <%.24s>\n", meta->promise_id); 199 printf("disk %02x %02x %02x %02x %016jx\n", 200 meta->disk.flags, meta->disk.number, meta->disk.channel, 201 meta->disk.device, meta->disk.id); 202 printf("disk_offset %u\n", meta->disk_offset); 203 printf("disk_sectors %u\n", meta->disk_sectors); 204 printf("disk_rebuild %u\n", meta->disk_rebuild); 205 printf("generation %u\n", meta->generation); 206 printf("status 0x%02x\n", meta->status); 207 printf("type %u\n", meta->type); 208 printf("total_disks %u\n", meta->total_disks); 209 printf("stripe_shift %u\n", meta->stripe_shift); 210 printf("array_width %u\n", meta->array_width); 211 printf("array_number %u\n", meta->array_number); 212 printf("total_sectors %u\n", meta->total_sectors); 213 printf("cylinders %u\n", meta->cylinders); 214 printf("heads %u\n", meta->heads); 215 printf("sectors %u\n", meta->sectors); 216 printf("volume_id 0x%016jx\n", meta->volume_id); 217 printf("disks:\n"); 218 for (i = 0; i < PROMISE_MAX_DISKS; i++ ) { 219 printf(" %02x %02x %02x %02x %016jx\n", 220 meta->disks[i].flags, meta->disks[i].number, 221 meta->disks[i].channel, meta->disks[i].device, 222 meta->disks[i].id); 223 } 224 printf("name <%.32s>\n", meta->name); 225 printf("magic_3 0x%08x\n", meta->magic_3); 226 printf("rebuild_lba64 %ju\n", meta->rebuild_lba64); 227 printf("magic_4 0x%08x\n", meta->magic_4); 228 printf("magic_5 0x%08x\n", meta->magic_5); 229 printf("total_sectors_high 0x%08x\n", meta->total_sectors_high); 230 printf("sector_size %u\n", meta->sector_size); 231 printf("backup_time %d\n", meta->backup_time); 232 printf("disk_offset_high 0x%08x\n", meta->disk_offset_high); 233 printf("disk_sectors_high 0x%08x\n", meta->disk_sectors_high); 234 printf("disk_rebuild_high 0x%08x\n", meta->disk_rebuild_high); 235 printf("=================================================\n"); 236 } 237 238 static struct promise_raid_conf * 239 promise_meta_copy(struct promise_raid_conf *meta) 240 { 241 struct promise_raid_conf *nmeta; 242 243 nmeta = malloc(sizeof(*nmeta), M_MD_PROMISE, M_WAITOK); 244 memcpy(nmeta, meta, sizeof(*nmeta)); 245 return (nmeta); 246 } 247 248 static int 249 promise_meta_find_disk(struct promise_raid_conf *meta, uint64_t id) 250 { 251 int pos; 252 253 for (pos = 0; pos < meta->total_disks; pos++) { 254 if (meta->disks[pos].id == id) 255 return (pos); 256 } 257 return (-1); 258 } 259 260 static int 261 promise_meta_unused_range(struct promise_raid_conf **metaarr, int nsd, 262 off_t sectors, off_t *off, off_t *size) 263 { 264 off_t coff, csize, tmp; 265 int i, j; 266 267 sectors -= 131072; 268 *off = 0; 269 *size = 0; 270 coff = 0; 271 csize = sectors; 272 i = 0; 273 while (1) { 274 for (j = 0; j < nsd; j++) { 275 tmp = ((off_t)metaarr[j]->disk_offset_high << 32) + 276 metaarr[j]->disk_offset; 277 if (tmp >= coff) 278 csize = MIN(csize, tmp - coff); 279 } 280 if (csize > *size) { 281 *off = coff; 282 *size = csize; 283 } 284 if (i >= nsd) 285 break; 286 coff = ((off_t)metaarr[i]->disk_offset_high << 32) + 287 metaarr[i]->disk_offset + 288 ((off_t)metaarr[i]->disk_sectors_high << 32) + 289 metaarr[i]->disk_sectors; 290 csize = sectors - coff; 291 i++; 292 } 293 return ((*size > 0) ? 1 : 0); 294 } 295 296 static int 297 promise_meta_translate_disk(struct g_raid_volume *vol, int md_disk_pos) 298 { 299 int disk_pos, width; 300 301 if (md_disk_pos >= 0 && vol->v_raid_level == G_RAID_VOLUME_RL_RAID1E) { 302 width = vol->v_disks_count / 2; 303 disk_pos = (md_disk_pos / width) + 304 (md_disk_pos % width) * width; 305 } else 306 disk_pos = md_disk_pos; 307 return (disk_pos); 308 } 309 310 static void 311 promise_meta_get_name(struct promise_raid_conf *meta, char *buf) 312 { 313 int i; 314 315 strncpy(buf, meta->name, 32); 316 buf[32] = 0; 317 for (i = 31; i >= 0; i--) { 318 if (buf[i] > 0x20) 319 break; 320 buf[i] = 0; 321 } 322 } 323 324 static void 325 promise_meta_put_name(struct promise_raid_conf *meta, char *buf) 326 { 327 328 memset(meta->name, 0x20, 32); 329 memcpy(meta->name, buf, MIN(strlen(buf), 32)); 330 } 331 332 static int 333 promise_meta_read(struct g_consumer *cp, struct promise_raid_conf **metaarr) 334 { 335 struct g_provider *pp; 336 struct promise_raid_conf *meta; 337 char *buf; 338 int error, i, subdisks; 339 uint32_t checksum, *ptr; 340 341 pp = cp->provider; 342 subdisks = 0; 343 344 if (pp->sectorsize * 4 < sizeof(*meta)) 345 return (subdisks); 346 if (pp->sectorsize * 4 > maxphys) { 347 G_RAID_DEBUG(1, "%s: Blocksize is too big.", pp->name); 348 return (subdisks); 349 } 350 next: 351 /* Read metadata block. */ 352 buf = g_read_data(cp, pp->mediasize - pp->sectorsize * 353 (63 - subdisks * PROMISE_META_OFFSET), 354 pp->sectorsize * 4, &error); 355 if (buf == NULL) { 356 G_RAID_DEBUG(1, "Cannot read metadata from %s (error=%d).", 357 pp->name, error); 358 return (subdisks); 359 } 360 meta = (struct promise_raid_conf *)buf; 361 362 /* Check if this is an Promise RAID struct */ 363 if (strncmp(meta->promise_id, PROMISE_MAGIC, strlen(PROMISE_MAGIC)) && 364 strncmp(meta->promise_id, FREEBSD_MAGIC, strlen(FREEBSD_MAGIC))) { 365 if (subdisks == 0) 366 G_RAID_DEBUG(1, 367 "Promise signature check failed on %s", pp->name); 368 g_free(buf); 369 return (subdisks); 370 } 371 meta = malloc(sizeof(*meta), M_MD_PROMISE, M_WAITOK); 372 memcpy(meta, buf, MIN(sizeof(*meta), pp->sectorsize * 4)); 373 g_free(buf); 374 375 /* Check metadata checksum. */ 376 for (checksum = 0, ptr = (uint32_t *)meta, i = 0; i < 511; i++) 377 checksum += *ptr++; 378 if (checksum != meta->checksum) { 379 G_RAID_DEBUG(1, "Promise checksum check failed on %s", pp->name); 380 free(meta, M_MD_PROMISE); 381 return (subdisks); 382 } 383 384 if ((meta->integrity & PROMISE_I_VALID) == 0) { 385 G_RAID_DEBUG(1, "Promise metadata is invalid on %s", pp->name); 386 free(meta, M_MD_PROMISE); 387 return (subdisks); 388 } 389 390 if (meta->total_disks > PROMISE_MAX_DISKS) { 391 G_RAID_DEBUG(1, "Wrong number of disks on %s (%d)", 392 pp->name, meta->total_disks); 393 free(meta, M_MD_PROMISE); 394 return (subdisks); 395 } 396 397 /* Remove filler garbage from fields used in newer metadata. */ 398 if (meta->disk_offset_high == 0x8b8c8d8e && 399 meta->disk_sectors_high == 0x8788898a && 400 meta->disk_rebuild_high == 0x83848586) { 401 meta->disk_offset_high = 0; 402 meta->disk_sectors_high = 0; 403 if (meta->disk_rebuild == UINT32_MAX) 404 meta->disk_rebuild_high = UINT32_MAX; 405 else 406 meta->disk_rebuild_high = 0; 407 if (meta->total_sectors_high == 0x15161718) { 408 meta->total_sectors_high = 0; 409 meta->backup_time = 0; 410 if (meta->rebuild_lba64 == 0x2122232425262728) 411 meta->rebuild_lba64 = UINT64_MAX; 412 } 413 } 414 if (meta->sector_size < 1 || meta->sector_size > 8) 415 meta->sector_size = 1; 416 417 /* Save this part and look for next. */ 418 *metaarr = meta; 419 metaarr++; 420 subdisks++; 421 if (subdisks < PROMISE_MAX_SUBDISKS) 422 goto next; 423 424 return (subdisks); 425 } 426 427 static int 428 promise_meta_write(struct g_consumer *cp, 429 struct promise_raid_conf **metaarr, int nsd) 430 { 431 struct g_provider *pp; 432 struct promise_raid_conf *meta; 433 char *buf; 434 off_t off, size; 435 int error, i, subdisk, fake; 436 uint32_t checksum, *ptr; 437 438 pp = cp->provider; 439 subdisk = 0; 440 fake = 0; 441 next: 442 buf = malloc(pp->sectorsize * 4, M_MD_PROMISE, M_WAITOK | M_ZERO); 443 meta = NULL; 444 if (subdisk < nsd) { 445 meta = metaarr[subdisk]; 446 } else if (!fake && promise_meta_unused_range(metaarr, nsd, 447 cp->provider->mediasize / cp->provider->sectorsize, 448 &off, &size)) { 449 /* Optionally add record for unused space. */ 450 meta = (struct promise_raid_conf *)buf; 451 memcpy(&meta->promise_id[0], PROMISE_MAGIC, 452 sizeof(PROMISE_MAGIC) - 1); 453 meta->dummy_0 = 0x00020000; 454 meta->integrity = PROMISE_I_VALID; 455 meta->disk.flags = PROMISE_F_ONLINE | PROMISE_F_VALID; 456 meta->disk.number = 0xff; 457 arc4rand(&meta->disk.id, sizeof(meta->disk.id), 0); 458 meta->disk_offset_high = off >> 32; 459 meta->disk_offset = (uint32_t)off; 460 meta->disk_sectors_high = size >> 32; 461 meta->disk_sectors = (uint32_t)size; 462 meta->disk_rebuild_high = UINT32_MAX; 463 meta->disk_rebuild = UINT32_MAX; 464 fake = 1; 465 } 466 if (meta != NULL) { 467 /* Recalculate checksum for case if metadata were changed. */ 468 meta->checksum = 0; 469 for (checksum = 0, ptr = (uint32_t *)meta, i = 0; i < 511; i++) 470 checksum += *ptr++; 471 meta->checksum = checksum; 472 memcpy(buf, meta, MIN(pp->sectorsize * 4, sizeof(*meta))); 473 } 474 error = g_write_data(cp, pp->mediasize - pp->sectorsize * 475 (63 - subdisk * PROMISE_META_OFFSET), 476 buf, pp->sectorsize * 4); 477 if (error != 0) { 478 G_RAID_DEBUG(1, "Cannot write metadata to %s (error=%d).", 479 pp->name, error); 480 } 481 free(buf, M_MD_PROMISE); 482 483 subdisk++; 484 if (subdisk < PROMISE_MAX_SUBDISKS) 485 goto next; 486 487 return (error); 488 } 489 490 static int 491 promise_meta_erase(struct g_consumer *cp) 492 { 493 struct g_provider *pp; 494 char *buf; 495 int error, subdisk; 496 497 pp = cp->provider; 498 buf = malloc(4 * pp->sectorsize, M_MD_PROMISE, M_WAITOK | M_ZERO); 499 for (subdisk = 0; subdisk < PROMISE_MAX_SUBDISKS; subdisk++) { 500 error = g_write_data(cp, pp->mediasize - pp->sectorsize * 501 (63 - subdisk * PROMISE_META_OFFSET), 502 buf, 4 * pp->sectorsize); 503 if (error != 0) { 504 G_RAID_DEBUG(1, "Cannot erase metadata on %s (error=%d).", 505 pp->name, error); 506 } 507 } 508 free(buf, M_MD_PROMISE); 509 return (error); 510 } 511 512 static int 513 promise_meta_write_spare(struct g_consumer *cp) 514 { 515 struct promise_raid_conf *meta; 516 off_t tmp; 517 int error; 518 519 meta = malloc(sizeof(*meta), M_MD_PROMISE, M_WAITOK | M_ZERO); 520 memcpy(&meta->promise_id[0], PROMISE_MAGIC, sizeof(PROMISE_MAGIC) - 1); 521 meta->dummy_0 = 0x00020000; 522 meta->integrity = PROMISE_I_VALID; 523 meta->disk.flags = PROMISE_F_SPARE | PROMISE_F_ONLINE | PROMISE_F_VALID; 524 meta->disk.number = 0xff; 525 arc4rand(&meta->disk.id, sizeof(meta->disk.id), 0); 526 tmp = cp->provider->mediasize / cp->provider->sectorsize - 131072; 527 meta->disk_sectors_high = tmp >> 32; 528 meta->disk_sectors = (uint32_t)tmp; 529 meta->disk_rebuild_high = UINT32_MAX; 530 meta->disk_rebuild = UINT32_MAX; 531 error = promise_meta_write(cp, &meta, 1); 532 free(meta, M_MD_PROMISE); 533 return (error); 534 } 535 536 static struct g_raid_volume * 537 g_raid_md_promise_get_volume(struct g_raid_softc *sc, uint64_t id) 538 { 539 struct g_raid_volume *vol; 540 struct g_raid_md_promise_pervolume *pv; 541 542 TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) { 543 pv = vol->v_md_data; 544 if (pv->pv_id == id) 545 break; 546 } 547 return (vol); 548 } 549 550 static int 551 g_raid_md_promise_purge_volumes(struct g_raid_softc *sc) 552 { 553 struct g_raid_volume *vol, *tvol; 554 struct g_raid_md_promise_pervolume *pv; 555 int i, res; 556 557 res = 0; 558 TAILQ_FOREACH_SAFE(vol, &sc->sc_volumes, v_next, tvol) { 559 pv = vol->v_md_data; 560 if (!pv->pv_started || vol->v_stopping) 561 continue; 562 for (i = 0; i < vol->v_disks_count; i++) { 563 if (vol->v_subdisks[i].sd_state != G_RAID_SUBDISK_S_NONE) 564 break; 565 } 566 if (i >= vol->v_disks_count) { 567 g_raid_destroy_volume(vol); 568 res = 1; 569 } 570 } 571 return (res); 572 } 573 574 static int 575 g_raid_md_promise_purge_disks(struct g_raid_softc *sc) 576 { 577 struct g_raid_disk *disk, *tdisk; 578 struct g_raid_volume *vol; 579 struct g_raid_md_promise_perdisk *pd; 580 int i, j, res; 581 582 res = 0; 583 TAILQ_FOREACH_SAFE(disk, &sc->sc_disks, d_next, tdisk) { 584 if (disk->d_state == G_RAID_DISK_S_SPARE) 585 continue; 586 pd = (struct g_raid_md_promise_perdisk *)disk->d_md_data; 587 588 /* Scan for deleted volumes. */ 589 for (i = 0; i < pd->pd_subdisks; ) { 590 vol = g_raid_md_promise_get_volume(sc, 591 pd->pd_meta[i]->volume_id); 592 if (vol != NULL && !vol->v_stopping) { 593 i++; 594 continue; 595 } 596 free(pd->pd_meta[i], M_MD_PROMISE); 597 for (j = i; j < pd->pd_subdisks - 1; j++) 598 pd->pd_meta[j] = pd->pd_meta[j + 1]; 599 pd->pd_meta[pd->pd_subdisks - 1] = NULL; 600 pd->pd_subdisks--; 601 pd->pd_updated = 1; 602 } 603 604 /* If there is no metadata left - erase and delete disk. */ 605 if (pd->pd_subdisks == 0) { 606 promise_meta_erase(disk->d_consumer); 607 g_raid_destroy_disk(disk); 608 res = 1; 609 } 610 } 611 return (res); 612 } 613 614 static int 615 g_raid_md_promise_supported(int level, int qual, int disks, int force) 616 { 617 618 if (disks > PROMISE_MAX_DISKS) 619 return (0); 620 switch (level) { 621 case G_RAID_VOLUME_RL_RAID0: 622 if (disks < 1) 623 return (0); 624 if (!force && disks < 2) 625 return (0); 626 break; 627 case G_RAID_VOLUME_RL_RAID1: 628 if (disks < 1) 629 return (0); 630 if (!force && (disks != 2)) 631 return (0); 632 break; 633 case G_RAID_VOLUME_RL_RAID1E: 634 if (disks < 2) 635 return (0); 636 if (disks % 2 != 0) 637 return (0); 638 if (!force && (disks != 4)) 639 return (0); 640 break; 641 case G_RAID_VOLUME_RL_SINGLE: 642 if (disks != 1) 643 return (0); 644 break; 645 case G_RAID_VOLUME_RL_CONCAT: 646 if (disks < 2) 647 return (0); 648 break; 649 case G_RAID_VOLUME_RL_RAID5: 650 if (disks < 3) 651 return (0); 652 if (qual != G_RAID_VOLUME_RLQ_R5LA) 653 return (0); 654 break; 655 default: 656 return (0); 657 } 658 if (level != G_RAID_VOLUME_RL_RAID5 && qual != G_RAID_VOLUME_RLQ_NONE) 659 return (0); 660 return (1); 661 } 662 663 static int 664 g_raid_md_promise_start_disk(struct g_raid_disk *disk, int sdn, 665 struct g_raid_volume *vol) 666 { 667 struct g_raid_softc *sc; 668 struct g_raid_subdisk *sd; 669 struct g_raid_md_promise_perdisk *pd; 670 struct g_raid_md_promise_pervolume *pv; 671 struct promise_raid_conf *meta; 672 off_t eoff, esize, size; 673 int disk_pos, md_disk_pos, i, resurrection = 0; 674 675 sc = disk->d_softc; 676 pd = (struct g_raid_md_promise_perdisk *)disk->d_md_data; 677 678 pv = vol->v_md_data; 679 meta = pv->pv_meta; 680 681 if (sdn >= 0) { 682 /* Find disk position in metadata by its serial. */ 683 md_disk_pos = promise_meta_find_disk(meta, pd->pd_meta[sdn]->disk.id); 684 /* For RAID0+1 we need to translate order. */ 685 disk_pos = promise_meta_translate_disk(vol, md_disk_pos); 686 } else { 687 md_disk_pos = -1; 688 disk_pos = -1; 689 } 690 if (disk_pos < 0) { 691 G_RAID_DEBUG1(1, sc, "Disk %s is not part of the volume %s", 692 g_raid_get_diskname(disk), vol->v_name); 693 /* Failed stale disk is useless for us. */ 694 if (sdn >= 0 && 695 pd->pd_meta[sdn]->disk.flags & PROMISE_F_DOWN) { 696 g_raid_change_disk_state(disk, G_RAID_DISK_S_STALE_FAILED); 697 return (0); 698 } 699 /* If we were given specific metadata subdisk - erase it. */ 700 if (sdn >= 0) { 701 free(pd->pd_meta[sdn], M_MD_PROMISE); 702 for (i = sdn; i < pd->pd_subdisks - 1; i++) 703 pd->pd_meta[i] = pd->pd_meta[i + 1]; 704 pd->pd_meta[pd->pd_subdisks - 1] = NULL; 705 pd->pd_subdisks--; 706 } 707 /* If we are in the start process, that's all for now. */ 708 if (!pv->pv_started) 709 goto nofit; 710 /* 711 * If we have already started - try to get use of the disk. 712 * Try to replace OFFLINE disks first, then FAILED. 713 */ 714 promise_meta_unused_range(pd->pd_meta, pd->pd_subdisks, 715 disk->d_consumer->provider->mediasize / 716 disk->d_consumer->provider->sectorsize, 717 &eoff, &esize); 718 if (esize == 0) { 719 G_RAID_DEBUG1(1, sc, "No free space on disk %s", 720 g_raid_get_diskname(disk)); 721 goto nofit; 722 } 723 size = INT64_MAX; 724 for (i = 0; i < vol->v_disks_count; i++) { 725 sd = &vol->v_subdisks[i]; 726 if (sd->sd_state != G_RAID_SUBDISK_S_NONE) 727 size = sd->sd_size; 728 if (sd->sd_state <= G_RAID_SUBDISK_S_FAILED && 729 (disk_pos < 0 || 730 vol->v_subdisks[i].sd_state < sd->sd_state)) 731 disk_pos = i; 732 } 733 if (disk_pos >= 0 && 734 vol->v_raid_level != G_RAID_VOLUME_RL_CONCAT && 735 (off_t)esize * 512 < size) { 736 G_RAID_DEBUG1(1, sc, "Disk %s free space " 737 "is too small (%ju < %ju)", 738 g_raid_get_diskname(disk), 739 (off_t)esize * 512, size); 740 disk_pos = -1; 741 } 742 if (disk_pos >= 0) { 743 if (vol->v_raid_level != G_RAID_VOLUME_RL_CONCAT) 744 esize = size / 512; 745 /* For RAID0+1 we need to translate order. */ 746 md_disk_pos = promise_meta_translate_disk(vol, disk_pos); 747 } else { 748 nofit: 749 if (pd->pd_subdisks == 0) { 750 g_raid_change_disk_state(disk, 751 G_RAID_DISK_S_SPARE); 752 } 753 return (0); 754 } 755 G_RAID_DEBUG1(1, sc, "Disk %s takes pos %d in the volume %s", 756 g_raid_get_diskname(disk), disk_pos, vol->v_name); 757 resurrection = 1; 758 } 759 760 sd = &vol->v_subdisks[disk_pos]; 761 762 if (resurrection && sd->sd_disk != NULL) { 763 g_raid_change_disk_state(sd->sd_disk, 764 G_RAID_DISK_S_STALE_FAILED); 765 TAILQ_REMOVE(&sd->sd_disk->d_subdisks, 766 sd, sd_next); 767 } 768 vol->v_subdisks[disk_pos].sd_disk = disk; 769 TAILQ_INSERT_TAIL(&disk->d_subdisks, sd, sd_next); 770 771 /* Welcome the new disk. */ 772 if (resurrection) 773 g_raid_change_disk_state(disk, G_RAID_DISK_S_ACTIVE); 774 else if (meta->disks[md_disk_pos].flags & PROMISE_F_DOWN) 775 g_raid_change_disk_state(disk, G_RAID_DISK_S_FAILED); 776 else 777 g_raid_change_disk_state(disk, G_RAID_DISK_S_ACTIVE); 778 779 if (resurrection) { 780 sd->sd_offset = (off_t)eoff * 512; 781 sd->sd_size = (off_t)esize * 512; 782 } else { 783 sd->sd_offset = (((off_t)pd->pd_meta[sdn]->disk_offset_high 784 << 32) + pd->pd_meta[sdn]->disk_offset) * 512; 785 sd->sd_size = (((off_t)pd->pd_meta[sdn]->disk_sectors_high 786 << 32) + pd->pd_meta[sdn]->disk_sectors) * 512; 787 } 788 789 if (resurrection) { 790 /* Stale disk, almost same as new. */ 791 g_raid_change_subdisk_state(sd, 792 G_RAID_SUBDISK_S_NEW); 793 } else if (meta->disks[md_disk_pos].flags & PROMISE_F_DOWN) { 794 /* Failed disk. */ 795 g_raid_change_subdisk_state(sd, 796 G_RAID_SUBDISK_S_FAILED); 797 } else if (meta->disks[md_disk_pos].flags & PROMISE_F_REDIR) { 798 /* Rebuilding disk. */ 799 g_raid_change_subdisk_state(sd, 800 G_RAID_SUBDISK_S_REBUILD); 801 if (pd->pd_meta[sdn]->generation != meta->generation) 802 sd->sd_rebuild_pos = 0; 803 else { 804 sd->sd_rebuild_pos = 805 (((off_t)pd->pd_meta[sdn]->disk_rebuild_high << 32) + 806 pd->pd_meta[sdn]->disk_rebuild) * 512; 807 } 808 } else if (!(meta->disks[md_disk_pos].flags & PROMISE_F_ONLINE)) { 809 /* Rebuilding disk. */ 810 g_raid_change_subdisk_state(sd, 811 G_RAID_SUBDISK_S_NEW); 812 } else if (pd->pd_meta[sdn]->generation != meta->generation || 813 (meta->status & PROMISE_S_MARKED)) { 814 /* Stale disk or dirty volume (unclean shutdown). */ 815 g_raid_change_subdisk_state(sd, 816 G_RAID_SUBDISK_S_STALE); 817 } else { 818 /* Up to date disk. */ 819 g_raid_change_subdisk_state(sd, 820 G_RAID_SUBDISK_S_ACTIVE); 821 } 822 g_raid_event_send(sd, G_RAID_SUBDISK_E_NEW, 823 G_RAID_EVENT_SUBDISK); 824 825 return (resurrection); 826 } 827 828 static void 829 g_raid_md_promise_refill(struct g_raid_softc *sc) 830 { 831 struct g_raid_volume *vol; 832 struct g_raid_subdisk *sd; 833 struct g_raid_disk *disk; 834 struct g_raid_md_object *md; 835 struct g_raid_md_promise_perdisk *pd; 836 struct g_raid_md_promise_pervolume *pv; 837 int update, updated, i, bad; 838 839 md = sc->sc_md; 840 restart: 841 updated = 0; 842 TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) { 843 pv = vol->v_md_data; 844 if (!pv->pv_started || vol->v_stopping) 845 continue; 846 847 /* Search for subdisk that needs replacement. */ 848 bad = 0; 849 for (i = 0; i < vol->v_disks_count; i++) { 850 sd = &vol->v_subdisks[i]; 851 if (sd->sd_state == G_RAID_SUBDISK_S_NONE || 852 sd->sd_state == G_RAID_SUBDISK_S_FAILED) 853 bad = 1; 854 } 855 if (!bad) 856 continue; 857 858 G_RAID_DEBUG1(1, sc, "Volume %s is not complete, " 859 "trying to refill.", vol->v_name); 860 861 TAILQ_FOREACH(disk, &sc->sc_disks, d_next) { 862 /* Skip failed. */ 863 if (disk->d_state < G_RAID_DISK_S_SPARE) 864 continue; 865 /* Skip already used by this volume. */ 866 for (i = 0; i < vol->v_disks_count; i++) { 867 sd = &vol->v_subdisks[i]; 868 if (sd->sd_disk == disk) 869 break; 870 } 871 if (i < vol->v_disks_count) 872 continue; 873 874 /* Try to use disk if it has empty extents. */ 875 pd = disk->d_md_data; 876 if (pd->pd_subdisks < PROMISE_MAX_SUBDISKS) { 877 update = 878 g_raid_md_promise_start_disk(disk, -1, vol); 879 } else 880 update = 0; 881 if (update) { 882 updated = 1; 883 g_raid_md_write_promise(md, vol, NULL, disk); 884 break; 885 } 886 } 887 } 888 if (updated) 889 goto restart; 890 } 891 892 static void 893 g_raid_md_promise_start(struct g_raid_volume *vol) 894 { 895 struct g_raid_softc *sc; 896 struct g_raid_subdisk *sd; 897 struct g_raid_disk *disk; 898 struct g_raid_md_object *md; 899 struct g_raid_md_promise_perdisk *pd; 900 struct g_raid_md_promise_pervolume *pv; 901 struct promise_raid_conf *meta; 902 u_int i; 903 904 sc = vol->v_softc; 905 md = sc->sc_md; 906 pv = vol->v_md_data; 907 meta = pv->pv_meta; 908 909 vol->v_raid_level_qualifier = G_RAID_VOLUME_RLQ_NONE; 910 if (meta->type == PROMISE_T_RAID0) 911 vol->v_raid_level = G_RAID_VOLUME_RL_RAID0; 912 else if (meta->type == PROMISE_T_RAID1) { 913 if (meta->array_width == 1) 914 vol->v_raid_level = G_RAID_VOLUME_RL_RAID1; 915 else 916 vol->v_raid_level = G_RAID_VOLUME_RL_RAID1E; 917 } else if (meta->type == PROMISE_T_RAID3) 918 vol->v_raid_level = G_RAID_VOLUME_RL_RAID3; 919 else if (meta->type == PROMISE_T_RAID5) { 920 vol->v_raid_level = G_RAID_VOLUME_RL_RAID5; 921 vol->v_raid_level_qualifier = G_RAID_VOLUME_RLQ_R5LA; 922 } else if (meta->type == PROMISE_T_SPAN) 923 vol->v_raid_level = G_RAID_VOLUME_RL_CONCAT; 924 else if (meta->type == PROMISE_T_JBOD) 925 vol->v_raid_level = G_RAID_VOLUME_RL_SINGLE; 926 else 927 vol->v_raid_level = G_RAID_VOLUME_RL_UNKNOWN; 928 vol->v_strip_size = 512 << meta->stripe_shift; //ZZZ 929 vol->v_disks_count = meta->total_disks; 930 vol->v_mediasize = (off_t)meta->total_sectors * 512; //ZZZ 931 if (meta->total_sectors_high < 256) /* If value looks sane. */ 932 vol->v_mediasize += 933 ((off_t)meta->total_sectors_high << 32) * 512; //ZZZ 934 vol->v_sectorsize = 512 * meta->sector_size; 935 for (i = 0; i < vol->v_disks_count; i++) { 936 sd = &vol->v_subdisks[i]; 937 sd->sd_offset = (((off_t)meta->disk_offset_high << 32) + 938 meta->disk_offset) * 512; 939 sd->sd_size = (((off_t)meta->disk_sectors_high << 32) + 940 meta->disk_sectors) * 512; 941 } 942 g_raid_start_volume(vol); 943 944 /* Make all disks found till the moment take their places. */ 945 TAILQ_FOREACH(disk, &sc->sc_disks, d_next) { 946 pd = disk->d_md_data; 947 for (i = 0; i < pd->pd_subdisks; i++) { 948 if (pd->pd_meta[i]->volume_id == meta->volume_id) 949 g_raid_md_promise_start_disk(disk, i, vol); 950 } 951 } 952 953 pv->pv_started = 1; 954 callout_stop(&pv->pv_start_co); 955 G_RAID_DEBUG1(0, sc, "Volume started."); 956 g_raid_md_write_promise(md, vol, NULL, NULL); 957 958 /* Pickup any STALE/SPARE disks to refill array if needed. */ 959 g_raid_md_promise_refill(sc); 960 961 g_raid_event_send(vol, G_RAID_VOLUME_E_START, G_RAID_EVENT_VOLUME); 962 } 963 964 static void 965 g_raid_promise_go(void *arg) 966 { 967 struct g_raid_volume *vol; 968 struct g_raid_softc *sc; 969 struct g_raid_md_promise_pervolume *pv; 970 971 vol = arg; 972 pv = vol->v_md_data; 973 sc = vol->v_softc; 974 if (!pv->pv_started) { 975 G_RAID_DEBUG1(0, sc, "Force volume start due to timeout."); 976 g_raid_event_send(vol, G_RAID_VOLUME_E_STARTMD, 977 G_RAID_EVENT_VOLUME); 978 } 979 } 980 981 static void 982 g_raid_md_promise_new_disk(struct g_raid_disk *disk) 983 { 984 struct g_raid_softc *sc; 985 struct g_raid_md_object *md; 986 struct promise_raid_conf *pdmeta; 987 struct g_raid_md_promise_perdisk *pd; 988 struct g_raid_md_promise_pervolume *pv; 989 struct g_raid_volume *vol; 990 int i; 991 char buf[33]; 992 993 sc = disk->d_softc; 994 md = sc->sc_md; 995 pd = (struct g_raid_md_promise_perdisk *)disk->d_md_data; 996 997 if (pd->pd_subdisks == 0) { 998 g_raid_change_disk_state(disk, G_RAID_DISK_S_SPARE); 999 g_raid_md_promise_refill(sc); 1000 return; 1001 } 1002 1003 for (i = 0; i < pd->pd_subdisks; i++) { 1004 pdmeta = pd->pd_meta[i]; 1005 1006 /* Look for volume with matching ID. */ 1007 vol = g_raid_md_promise_get_volume(sc, pdmeta->volume_id); 1008 if (vol == NULL) { 1009 promise_meta_get_name(pdmeta, buf); 1010 vol = g_raid_create_volume(sc, buf, pdmeta->array_number); 1011 pv = malloc(sizeof(*pv), M_MD_PROMISE, M_WAITOK | M_ZERO); 1012 pv->pv_id = pdmeta->volume_id; 1013 vol->v_md_data = pv; 1014 callout_init(&pv->pv_start_co, 1); 1015 callout_reset(&pv->pv_start_co, 1016 g_raid_start_timeout * hz, 1017 g_raid_promise_go, vol); 1018 } else 1019 pv = vol->v_md_data; 1020 1021 /* If we haven't started yet - check metadata freshness. */ 1022 if (pv->pv_meta == NULL || !pv->pv_started) { 1023 if (pv->pv_meta == NULL || 1024 ((int16_t)(pdmeta->generation - pv->pv_generation)) > 0) { 1025 G_RAID_DEBUG1(1, sc, "Newer disk"); 1026 if (pv->pv_meta != NULL) 1027 free(pv->pv_meta, M_MD_PROMISE); 1028 pv->pv_meta = promise_meta_copy(pdmeta); 1029 pv->pv_generation = pv->pv_meta->generation; 1030 pv->pv_disks_present = 1; 1031 } else if (pdmeta->generation == pv->pv_generation) { 1032 pv->pv_disks_present++; 1033 G_RAID_DEBUG1(1, sc, "Matching disk (%d of %d up)", 1034 pv->pv_disks_present, 1035 pv->pv_meta->total_disks); 1036 } else { 1037 G_RAID_DEBUG1(1, sc, "Older disk"); 1038 } 1039 } 1040 } 1041 1042 for (i = 0; i < pd->pd_subdisks; i++) { 1043 pdmeta = pd->pd_meta[i]; 1044 1045 /* Look for volume with matching ID. */ 1046 vol = g_raid_md_promise_get_volume(sc, pdmeta->volume_id); 1047 if (vol == NULL) 1048 continue; 1049 pv = vol->v_md_data; 1050 1051 if (pv->pv_started) { 1052 if (g_raid_md_promise_start_disk(disk, i, vol)) 1053 g_raid_md_write_promise(md, vol, NULL, NULL); 1054 } else { 1055 /* If we collected all needed disks - start array. */ 1056 if (pv->pv_disks_present == pv->pv_meta->total_disks) 1057 g_raid_md_promise_start(vol); 1058 } 1059 } 1060 } 1061 1062 static int 1063 g_raid_md_create_promise(struct g_raid_md_object *md, struct g_class *mp, 1064 struct g_geom **gp) 1065 { 1066 struct g_geom *geom; 1067 struct g_raid_softc *sc; 1068 1069 /* Search for existing node. */ 1070 LIST_FOREACH(geom, &mp->geom, geom) { 1071 sc = geom->softc; 1072 if (sc == NULL) 1073 continue; 1074 if (sc->sc_stopping != 0) 1075 continue; 1076 if (sc->sc_md->mdo_class != md->mdo_class) 1077 continue; 1078 break; 1079 } 1080 if (geom != NULL) { 1081 *gp = geom; 1082 return (G_RAID_MD_TASTE_EXISTING); 1083 } 1084 1085 /* Create new one if not found. */ 1086 sc = g_raid_create_node(mp, "Promise", md); 1087 if (sc == NULL) 1088 return (G_RAID_MD_TASTE_FAIL); 1089 md->mdo_softc = sc; 1090 *gp = sc->sc_geom; 1091 return (G_RAID_MD_TASTE_NEW); 1092 } 1093 1094 static int 1095 g_raid_md_taste_promise(struct g_raid_md_object *md, struct g_class *mp, 1096 struct g_consumer *cp, struct g_geom **gp) 1097 { 1098 struct g_consumer *rcp; 1099 struct g_provider *pp; 1100 struct g_raid_softc *sc; 1101 struct g_raid_disk *disk; 1102 struct promise_raid_conf *metaarr[4]; 1103 struct g_raid_md_promise_perdisk *pd; 1104 struct g_geom *geom; 1105 int i, j, result, len, subdisks; 1106 char name[16]; 1107 uint16_t vendor; 1108 1109 G_RAID_DEBUG(1, "Tasting Promise on %s", cp->provider->name); 1110 pp = cp->provider; 1111 1112 /* Read metadata from device. */ 1113 g_topology_unlock(); 1114 vendor = 0xffff; 1115 len = sizeof(vendor); 1116 if (pp->geom->rank == 1) 1117 g_io_getattr("GEOM::hba_vendor", cp, &len, &vendor); 1118 subdisks = promise_meta_read(cp, metaarr); 1119 g_topology_lock(); 1120 if (subdisks == 0) { 1121 if (g_raid_aggressive_spare) { 1122 if (vendor == 0x105a || vendor == 0x1002) { 1123 G_RAID_DEBUG(1, 1124 "No Promise metadata, forcing spare."); 1125 goto search; 1126 } else { 1127 G_RAID_DEBUG(1, 1128 "Promise/ATI vendor mismatch " 1129 "0x%04x != 0x105a/0x1002", 1130 vendor); 1131 } 1132 } 1133 return (G_RAID_MD_TASTE_FAIL); 1134 } 1135 1136 /* Metadata valid. Print it. */ 1137 for (i = 0; i < subdisks; i++) 1138 g_raid_md_promise_print(metaarr[i]); 1139 1140 /* Purge meaningless (empty/spare) records. */ 1141 for (i = 0; i < subdisks; ) { 1142 if (metaarr[i]->disk.flags & PROMISE_F_ASSIGNED) { 1143 i++; 1144 continue; 1145 } 1146 free(metaarr[i], M_MD_PROMISE); 1147 for (j = i; j < subdisks - 1; j++) 1148 metaarr[i] = metaarr[j + 1]; 1149 metaarr[subdisks - 1] = NULL; 1150 subdisks--; 1151 } 1152 1153 search: 1154 /* Search for matching node. */ 1155 sc = NULL; 1156 LIST_FOREACH(geom, &mp->geom, geom) { 1157 sc = geom->softc; 1158 if (sc == NULL) 1159 continue; 1160 if (sc->sc_stopping != 0) 1161 continue; 1162 if (sc->sc_md->mdo_class != md->mdo_class) 1163 continue; 1164 break; 1165 } 1166 1167 /* Found matching node. */ 1168 if (geom != NULL) { 1169 G_RAID_DEBUG(1, "Found matching array %s", sc->sc_name); 1170 result = G_RAID_MD_TASTE_EXISTING; 1171 1172 } else { /* Not found matching node -- create one. */ 1173 result = G_RAID_MD_TASTE_NEW; 1174 snprintf(name, sizeof(name), "Promise"); 1175 sc = g_raid_create_node(mp, name, md); 1176 md->mdo_softc = sc; 1177 geom = sc->sc_geom; 1178 } 1179 1180 /* There is no return after this point, so we close passed consumer. */ 1181 g_access(cp, -1, 0, 0); 1182 1183 rcp = g_new_consumer(geom); 1184 rcp->flags |= G_CF_DIRECT_RECEIVE; 1185 g_attach(rcp, pp); 1186 if (g_access(rcp, 1, 1, 1) != 0) 1187 ; //goto fail1; 1188 1189 g_topology_unlock(); 1190 sx_xlock(&sc->sc_lock); 1191 1192 pd = malloc(sizeof(*pd), M_MD_PROMISE, M_WAITOK | M_ZERO); 1193 pd->pd_subdisks = subdisks; 1194 for (i = 0; i < subdisks; i++) 1195 pd->pd_meta[i] = metaarr[i]; 1196 disk = g_raid_create_disk(sc); 1197 disk->d_md_data = (void *)pd; 1198 disk->d_consumer = rcp; 1199 rcp->private = disk; 1200 1201 g_raid_get_disk_info(disk); 1202 1203 g_raid_md_promise_new_disk(disk); 1204 1205 sx_xunlock(&sc->sc_lock); 1206 g_topology_lock(); 1207 *gp = geom; 1208 return (result); 1209 } 1210 1211 static int 1212 g_raid_md_event_promise(struct g_raid_md_object *md, 1213 struct g_raid_disk *disk, u_int event) 1214 { 1215 struct g_raid_softc *sc; 1216 1217 sc = md->mdo_softc; 1218 if (disk == NULL) 1219 return (-1); 1220 switch (event) { 1221 case G_RAID_DISK_E_DISCONNECTED: 1222 /* Delete disk. */ 1223 g_raid_change_disk_state(disk, G_RAID_DISK_S_NONE); 1224 g_raid_destroy_disk(disk); 1225 g_raid_md_promise_purge_volumes(sc); 1226 1227 /* Write updated metadata to all disks. */ 1228 g_raid_md_write_promise(md, NULL, NULL, NULL); 1229 1230 /* Check if anything left. */ 1231 if (g_raid_ndisks(sc, -1) == 0) 1232 g_raid_destroy_node(sc, 0); 1233 else 1234 g_raid_md_promise_refill(sc); 1235 return (0); 1236 } 1237 return (-2); 1238 } 1239 1240 static int 1241 g_raid_md_volume_event_promise(struct g_raid_md_object *md, 1242 struct g_raid_volume *vol, u_int event) 1243 { 1244 struct g_raid_md_promise_pervolume *pv; 1245 1246 pv = (struct g_raid_md_promise_pervolume *)vol->v_md_data; 1247 switch (event) { 1248 case G_RAID_VOLUME_E_STARTMD: 1249 if (!pv->pv_started) 1250 g_raid_md_promise_start(vol); 1251 return (0); 1252 } 1253 return (-2); 1254 } 1255 1256 static int 1257 g_raid_md_ctl_promise(struct g_raid_md_object *md, 1258 struct gctl_req *req) 1259 { 1260 struct g_raid_softc *sc; 1261 struct g_raid_volume *vol, *vol1; 1262 struct g_raid_subdisk *sd; 1263 struct g_raid_disk *disk, *disks[PROMISE_MAX_DISKS]; 1264 struct g_raid_md_promise_perdisk *pd; 1265 struct g_raid_md_promise_pervolume *pv; 1266 struct g_consumer *cp; 1267 struct g_provider *pp; 1268 char arg[16]; 1269 const char *nodename, *verb, *volname, *levelname, *diskname; 1270 char *tmp; 1271 int *nargs, *force; 1272 off_t esize, offs[PROMISE_MAX_DISKS], size, sectorsize, strip; 1273 intmax_t *sizearg, *striparg; 1274 int numdisks, i, len, level, qual; 1275 int error; 1276 1277 sc = md->mdo_softc; 1278 verb = gctl_get_param(req, "verb", NULL); 1279 nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs)); 1280 error = 0; 1281 if (strcmp(verb, "label") == 0) { 1282 if (*nargs < 4) { 1283 gctl_error(req, "Invalid number of arguments."); 1284 return (-1); 1285 } 1286 volname = gctl_get_asciiparam(req, "arg1"); 1287 if (volname == NULL) { 1288 gctl_error(req, "No volume name."); 1289 return (-2); 1290 } 1291 levelname = gctl_get_asciiparam(req, "arg2"); 1292 if (levelname == NULL) { 1293 gctl_error(req, "No RAID level."); 1294 return (-3); 1295 } 1296 if (strcasecmp(levelname, "RAID5") == 0) 1297 levelname = "RAID5-LA"; 1298 if (g_raid_volume_str2level(levelname, &level, &qual)) { 1299 gctl_error(req, "Unknown RAID level '%s'.", levelname); 1300 return (-4); 1301 } 1302 numdisks = *nargs - 3; 1303 force = gctl_get_paraml(req, "force", sizeof(*force)); 1304 if (!g_raid_md_promise_supported(level, qual, numdisks, 1305 force ? *force : 0)) { 1306 gctl_error(req, "Unsupported RAID level " 1307 "(0x%02x/0x%02x), or number of disks (%d).", 1308 level, qual, numdisks); 1309 return (-5); 1310 } 1311 1312 /* Search for disks, connect them and probe. */ 1313 size = INT64_MAX; 1314 sectorsize = 0; 1315 bzero(disks, sizeof(disks)); 1316 bzero(offs, sizeof(offs)); 1317 for (i = 0; i < numdisks; i++) { 1318 snprintf(arg, sizeof(arg), "arg%d", i + 3); 1319 diskname = gctl_get_asciiparam(req, arg); 1320 if (diskname == NULL) { 1321 gctl_error(req, "No disk name (%s).", arg); 1322 error = -6; 1323 break; 1324 } 1325 if (strcmp(diskname, "NONE") == 0) 1326 continue; 1327 1328 TAILQ_FOREACH(disk, &sc->sc_disks, d_next) { 1329 if (disk->d_consumer != NULL && 1330 disk->d_consumer->provider != NULL && 1331 strcmp(disk->d_consumer->provider->name, 1332 diskname) == 0) 1333 break; 1334 } 1335 if (disk != NULL) { 1336 if (disk->d_state != G_RAID_DISK_S_ACTIVE) { 1337 gctl_error(req, "Disk '%s' is in a " 1338 "wrong state (%s).", diskname, 1339 g_raid_disk_state2str(disk->d_state)); 1340 error = -7; 1341 break; 1342 } 1343 pd = disk->d_md_data; 1344 if (pd->pd_subdisks >= PROMISE_MAX_SUBDISKS) { 1345 gctl_error(req, "Disk '%s' already " 1346 "used by %d volumes.", 1347 diskname, pd->pd_subdisks); 1348 error = -7; 1349 break; 1350 } 1351 pp = disk->d_consumer->provider; 1352 disks[i] = disk; 1353 promise_meta_unused_range(pd->pd_meta, 1354 pd->pd_subdisks, 1355 pp->mediasize / pp->sectorsize, 1356 &offs[i], &esize); 1357 size = MIN(size, (off_t)esize * pp->sectorsize); 1358 sectorsize = MAX(sectorsize, pp->sectorsize); 1359 continue; 1360 } 1361 1362 g_topology_lock(); 1363 cp = g_raid_open_consumer(sc, diskname); 1364 if (cp == NULL) { 1365 gctl_error(req, "Can't open disk '%s'.", 1366 diskname); 1367 g_topology_unlock(); 1368 error = -8; 1369 break; 1370 } 1371 pp = cp->provider; 1372 pd = malloc(sizeof(*pd), M_MD_PROMISE, M_WAITOK | M_ZERO); 1373 disk = g_raid_create_disk(sc); 1374 disk->d_md_data = (void *)pd; 1375 disk->d_consumer = cp; 1376 disks[i] = disk; 1377 cp->private = disk; 1378 g_topology_unlock(); 1379 1380 g_raid_get_disk_info(disk); 1381 1382 /* Reserve some space for metadata. */ 1383 size = MIN(size, pp->mediasize - 131072llu * pp->sectorsize); 1384 sectorsize = MAX(sectorsize, pp->sectorsize); 1385 } 1386 if (error != 0) { 1387 for (i = 0; i < numdisks; i++) { 1388 if (disks[i] != NULL && 1389 disks[i]->d_state == G_RAID_DISK_S_NONE) 1390 g_raid_destroy_disk(disks[i]); 1391 } 1392 return (error); 1393 } 1394 1395 if (sectorsize <= 0) { 1396 gctl_error(req, "Can't get sector size."); 1397 return (-8); 1398 } 1399 1400 /* Handle size argument. */ 1401 len = sizeof(*sizearg); 1402 sizearg = gctl_get_param(req, "size", &len); 1403 if (sizearg != NULL && len == sizeof(*sizearg) && 1404 *sizearg > 0) { 1405 if (*sizearg > size) { 1406 gctl_error(req, "Size too big %lld > %lld.", 1407 (long long)*sizearg, (long long)size); 1408 return (-9); 1409 } 1410 size = *sizearg; 1411 } 1412 1413 /* Handle strip argument. */ 1414 strip = 131072; 1415 len = sizeof(*striparg); 1416 striparg = gctl_get_param(req, "strip", &len); 1417 if (striparg != NULL && len == sizeof(*striparg) && 1418 *striparg > 0) { 1419 if (*striparg < sectorsize) { 1420 gctl_error(req, "Strip size too small."); 1421 return (-10); 1422 } 1423 if (*striparg % sectorsize != 0) { 1424 gctl_error(req, "Incorrect strip size."); 1425 return (-11); 1426 } 1427 strip = *striparg; 1428 } 1429 1430 /* Round size down to strip or sector. */ 1431 if (level == G_RAID_VOLUME_RL_RAID1 || 1432 level == G_RAID_VOLUME_RL_SINGLE || 1433 level == G_RAID_VOLUME_RL_CONCAT) 1434 size -= (size % sectorsize); 1435 else if (level == G_RAID_VOLUME_RL_RAID1E && 1436 (numdisks & 1) != 0) 1437 size -= (size % (2 * strip)); 1438 else 1439 size -= (size % strip); 1440 if (size <= 0) { 1441 gctl_error(req, "Size too small."); 1442 return (-13); 1443 } 1444 1445 /* We have all we need, create things: volume, ... */ 1446 pv = malloc(sizeof(*pv), M_MD_PROMISE, M_WAITOK | M_ZERO); 1447 arc4rand(&pv->pv_id, sizeof(pv->pv_id), 0); 1448 pv->pv_generation = 0; 1449 pv->pv_started = 1; 1450 vol = g_raid_create_volume(sc, volname, -1); 1451 vol->v_md_data = pv; 1452 vol->v_raid_level = level; 1453 vol->v_raid_level_qualifier = qual; 1454 vol->v_strip_size = strip; 1455 vol->v_disks_count = numdisks; 1456 if (level == G_RAID_VOLUME_RL_RAID0 || 1457 level == G_RAID_VOLUME_RL_CONCAT || 1458 level == G_RAID_VOLUME_RL_SINGLE) 1459 vol->v_mediasize = size * numdisks; 1460 else if (level == G_RAID_VOLUME_RL_RAID1) 1461 vol->v_mediasize = size; 1462 else if (level == G_RAID_VOLUME_RL_RAID3 || 1463 level == G_RAID_VOLUME_RL_RAID5) 1464 vol->v_mediasize = size * (numdisks - 1); 1465 else { /* RAID1E */ 1466 vol->v_mediasize = ((size * numdisks) / strip / 2) * 1467 strip; 1468 } 1469 vol->v_sectorsize = sectorsize; 1470 g_raid_start_volume(vol); 1471 1472 /* , and subdisks. */ 1473 for (i = 0; i < numdisks; i++) { 1474 disk = disks[i]; 1475 sd = &vol->v_subdisks[i]; 1476 sd->sd_disk = disk; 1477 sd->sd_offset = (off_t)offs[i] * 512; 1478 sd->sd_size = size; 1479 if (disk == NULL) 1480 continue; 1481 TAILQ_INSERT_TAIL(&disk->d_subdisks, sd, sd_next); 1482 g_raid_change_disk_state(disk, 1483 G_RAID_DISK_S_ACTIVE); 1484 g_raid_change_subdisk_state(sd, 1485 G_RAID_SUBDISK_S_ACTIVE); 1486 g_raid_event_send(sd, G_RAID_SUBDISK_E_NEW, 1487 G_RAID_EVENT_SUBDISK); 1488 } 1489 1490 /* Write metadata based on created entities. */ 1491 G_RAID_DEBUG1(0, sc, "Array started."); 1492 g_raid_md_write_promise(md, vol, NULL, NULL); 1493 1494 /* Pickup any STALE/SPARE disks to refill array if needed. */ 1495 g_raid_md_promise_refill(sc); 1496 1497 g_raid_event_send(vol, G_RAID_VOLUME_E_START, 1498 G_RAID_EVENT_VOLUME); 1499 return (0); 1500 } 1501 if (strcmp(verb, "add") == 0) { 1502 gctl_error(req, "`add` command is not applicable, " 1503 "use `label` instead."); 1504 return (-99); 1505 } 1506 if (strcmp(verb, "delete") == 0) { 1507 nodename = gctl_get_asciiparam(req, "arg0"); 1508 if (nodename != NULL && strcasecmp(sc->sc_name, nodename) != 0) 1509 nodename = NULL; 1510 1511 /* Full node destruction. */ 1512 if (*nargs == 1 && nodename != NULL) { 1513 /* Check if some volume is still open. */ 1514 force = gctl_get_paraml(req, "force", sizeof(*force)); 1515 if (force != NULL && *force == 0 && 1516 g_raid_nopens(sc) != 0) { 1517 gctl_error(req, "Some volume is still open."); 1518 return (-4); 1519 } 1520 1521 TAILQ_FOREACH(disk, &sc->sc_disks, d_next) { 1522 if (disk->d_consumer) 1523 promise_meta_erase(disk->d_consumer); 1524 } 1525 g_raid_destroy_node(sc, 0); 1526 return (0); 1527 } 1528 1529 /* Destroy specified volume. If it was last - all node. */ 1530 if (*nargs > 2) { 1531 gctl_error(req, "Invalid number of arguments."); 1532 return (-1); 1533 } 1534 volname = gctl_get_asciiparam(req, 1535 nodename != NULL ? "arg1" : "arg0"); 1536 if (volname == NULL) { 1537 gctl_error(req, "No volume name."); 1538 return (-2); 1539 } 1540 1541 /* Search for volume. */ 1542 TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) { 1543 if (strcmp(vol->v_name, volname) == 0) 1544 break; 1545 pp = vol->v_provider; 1546 if (pp == NULL) 1547 continue; 1548 if (strcmp(pp->name, volname) == 0) 1549 break; 1550 if (strncmp(pp->name, "raid/", 5) == 0 && 1551 strcmp(pp->name + 5, volname) == 0) 1552 break; 1553 } 1554 if (vol == NULL) { 1555 i = strtol(volname, &tmp, 10); 1556 if (verb != volname && tmp[0] == 0) { 1557 TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) { 1558 if (vol->v_global_id == i) 1559 break; 1560 } 1561 } 1562 } 1563 if (vol == NULL) { 1564 gctl_error(req, "Volume '%s' not found.", volname); 1565 return (-3); 1566 } 1567 1568 /* Check if volume is still open. */ 1569 force = gctl_get_paraml(req, "force", sizeof(*force)); 1570 if (force != NULL && *force == 0 && 1571 vol->v_provider_open != 0) { 1572 gctl_error(req, "Volume is still open."); 1573 return (-4); 1574 } 1575 1576 /* Destroy volume and potentially node. */ 1577 i = 0; 1578 TAILQ_FOREACH(vol1, &sc->sc_volumes, v_next) 1579 i++; 1580 if (i >= 2) { 1581 g_raid_destroy_volume(vol); 1582 g_raid_md_promise_purge_disks(sc); 1583 g_raid_md_write_promise(md, NULL, NULL, NULL); 1584 } else { 1585 TAILQ_FOREACH(disk, &sc->sc_disks, d_next) { 1586 if (disk->d_consumer) 1587 promise_meta_erase(disk->d_consumer); 1588 } 1589 g_raid_destroy_node(sc, 0); 1590 } 1591 return (0); 1592 } 1593 if (strcmp(verb, "remove") == 0 || 1594 strcmp(verb, "fail") == 0) { 1595 if (*nargs < 2) { 1596 gctl_error(req, "Invalid number of arguments."); 1597 return (-1); 1598 } 1599 for (i = 1; i < *nargs; i++) { 1600 snprintf(arg, sizeof(arg), "arg%d", i); 1601 diskname = gctl_get_asciiparam(req, arg); 1602 if (diskname == NULL) { 1603 gctl_error(req, "No disk name (%s).", arg); 1604 error = -2; 1605 break; 1606 } 1607 if (strncmp(diskname, _PATH_DEV, 5) == 0) 1608 diskname += 5; 1609 1610 TAILQ_FOREACH(disk, &sc->sc_disks, d_next) { 1611 if (disk->d_consumer != NULL && 1612 disk->d_consumer->provider != NULL && 1613 strcmp(disk->d_consumer->provider->name, 1614 diskname) == 0) 1615 break; 1616 } 1617 if (disk == NULL) { 1618 gctl_error(req, "Disk '%s' not found.", 1619 diskname); 1620 error = -3; 1621 break; 1622 } 1623 1624 if (strcmp(verb, "fail") == 0) { 1625 g_raid_md_fail_disk_promise(md, NULL, disk); 1626 continue; 1627 } 1628 1629 /* Erase metadata on deleting disk and destroy it. */ 1630 promise_meta_erase(disk->d_consumer); 1631 g_raid_destroy_disk(disk); 1632 } 1633 g_raid_md_promise_purge_volumes(sc); 1634 1635 /* Write updated metadata to remaining disks. */ 1636 g_raid_md_write_promise(md, NULL, NULL, NULL); 1637 1638 /* Check if anything left. */ 1639 if (g_raid_ndisks(sc, -1) == 0) 1640 g_raid_destroy_node(sc, 0); 1641 else 1642 g_raid_md_promise_refill(sc); 1643 return (error); 1644 } 1645 if (strcmp(verb, "insert") == 0) { 1646 if (*nargs < 2) { 1647 gctl_error(req, "Invalid number of arguments."); 1648 return (-1); 1649 } 1650 for (i = 1; i < *nargs; i++) { 1651 /* Get disk name. */ 1652 snprintf(arg, sizeof(arg), "arg%d", i); 1653 diskname = gctl_get_asciiparam(req, arg); 1654 if (diskname == NULL) { 1655 gctl_error(req, "No disk name (%s).", arg); 1656 error = -3; 1657 break; 1658 } 1659 1660 /* Try to find provider with specified name. */ 1661 g_topology_lock(); 1662 cp = g_raid_open_consumer(sc, diskname); 1663 if (cp == NULL) { 1664 gctl_error(req, "Can't open disk '%s'.", 1665 diskname); 1666 g_topology_unlock(); 1667 error = -4; 1668 break; 1669 } 1670 pp = cp->provider; 1671 g_topology_unlock(); 1672 1673 pd = malloc(sizeof(*pd), M_MD_PROMISE, M_WAITOK | M_ZERO); 1674 1675 disk = g_raid_create_disk(sc); 1676 disk->d_consumer = cp; 1677 disk->d_md_data = (void *)pd; 1678 cp->private = disk; 1679 1680 g_raid_get_disk_info(disk); 1681 1682 /* Welcome the "new" disk. */ 1683 g_raid_change_disk_state(disk, G_RAID_DISK_S_SPARE); 1684 promise_meta_write_spare(cp); 1685 g_raid_md_promise_refill(sc); 1686 } 1687 return (error); 1688 } 1689 return (-100); 1690 } 1691 1692 static int 1693 g_raid_md_write_promise(struct g_raid_md_object *md, struct g_raid_volume *tvol, 1694 struct g_raid_subdisk *tsd, struct g_raid_disk *tdisk) 1695 { 1696 struct g_raid_softc *sc; 1697 struct g_raid_volume *vol; 1698 struct g_raid_subdisk *sd; 1699 struct g_raid_disk *disk; 1700 struct g_raid_md_promise_perdisk *pd; 1701 struct g_raid_md_promise_pervolume *pv; 1702 struct promise_raid_conf *meta; 1703 off_t rebuild_lba64; 1704 int i, j, pos, rebuild; 1705 1706 sc = md->mdo_softc; 1707 1708 if (sc->sc_stopping == G_RAID_DESTROY_HARD) 1709 return (0); 1710 1711 /* Generate new per-volume metadata for affected volumes. */ 1712 TAILQ_FOREACH(vol, &sc->sc_volumes, v_next) { 1713 if (vol->v_stopping) 1714 continue; 1715 1716 /* Skip volumes not related to specified targets. */ 1717 if (tvol != NULL && vol != tvol) 1718 continue; 1719 if (tsd != NULL && vol != tsd->sd_volume) 1720 continue; 1721 if (tdisk != NULL) { 1722 for (i = 0; i < vol->v_disks_count; i++) { 1723 if (vol->v_subdisks[i].sd_disk == tdisk) 1724 break; 1725 } 1726 if (i >= vol->v_disks_count) 1727 continue; 1728 } 1729 1730 pv = (struct g_raid_md_promise_pervolume *)vol->v_md_data; 1731 pv->pv_generation++; 1732 1733 meta = malloc(sizeof(*meta), M_MD_PROMISE, M_WAITOK | M_ZERO); 1734 if (pv->pv_meta != NULL) 1735 memcpy(meta, pv->pv_meta, sizeof(*meta)); 1736 memcpy(meta->promise_id, PROMISE_MAGIC, 1737 sizeof(PROMISE_MAGIC) - 1); 1738 meta->dummy_0 = 0x00020000; 1739 meta->integrity = PROMISE_I_VALID; 1740 1741 meta->generation = pv->pv_generation; 1742 meta->status = PROMISE_S_VALID | PROMISE_S_ONLINE | 1743 PROMISE_S_INITED | PROMISE_S_READY; 1744 if (vol->v_state <= G_RAID_VOLUME_S_DEGRADED) 1745 meta->status |= PROMISE_S_DEGRADED; 1746 if (vol->v_dirty) 1747 meta->status |= PROMISE_S_MARKED; /* XXX: INVENTED! */ 1748 if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID0 || 1749 vol->v_raid_level == G_RAID_VOLUME_RL_SINGLE) 1750 meta->type = PROMISE_T_RAID0; 1751 else if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID1 || 1752 vol->v_raid_level == G_RAID_VOLUME_RL_RAID1E) 1753 meta->type = PROMISE_T_RAID1; 1754 else if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID3) 1755 meta->type = PROMISE_T_RAID3; 1756 else if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID5) 1757 meta->type = PROMISE_T_RAID5; 1758 else if (vol->v_raid_level == G_RAID_VOLUME_RL_CONCAT) 1759 meta->type = PROMISE_T_SPAN; 1760 else 1761 meta->type = PROMISE_T_JBOD; 1762 meta->total_disks = vol->v_disks_count; 1763 meta->stripe_shift = ffs(vol->v_strip_size / 1024); 1764 meta->array_width = vol->v_disks_count; 1765 if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID1) 1766 meta->array_width = 1; 1767 else if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID1E) 1768 meta->array_width /= 2; 1769 meta->array_number = vol->v_global_id; 1770 meta->total_sectors = vol->v_mediasize / 512; 1771 meta->total_sectors_high = (vol->v_mediasize / 512) >> 32; 1772 meta->sector_size = vol->v_sectorsize / 512; 1773 meta->cylinders = meta->total_sectors / (255 * 63) - 1; 1774 meta->heads = 254; 1775 meta->sectors = 63; 1776 meta->volume_id = pv->pv_id; 1777 rebuild_lba64 = UINT64_MAX; 1778 rebuild = 0; 1779 for (i = 0; i < vol->v_disks_count; i++) { 1780 sd = &vol->v_subdisks[i]; 1781 /* For RAID0+1 we need to translate order. */ 1782 pos = promise_meta_translate_disk(vol, i); 1783 meta->disks[pos].flags = PROMISE_F_VALID | 1784 PROMISE_F_ASSIGNED; 1785 if (sd->sd_state == G_RAID_SUBDISK_S_NONE) { 1786 meta->disks[pos].flags |= 0; 1787 } else if (sd->sd_state == G_RAID_SUBDISK_S_FAILED) { 1788 meta->disks[pos].flags |= 1789 PROMISE_F_DOWN | PROMISE_F_REDIR; 1790 } else if (sd->sd_state <= G_RAID_SUBDISK_S_REBUILD) { 1791 meta->disks[pos].flags |= 1792 PROMISE_F_ONLINE | PROMISE_F_REDIR; 1793 if (sd->sd_state == G_RAID_SUBDISK_S_REBUILD) { 1794 rebuild_lba64 = MIN(rebuild_lba64, 1795 sd->sd_rebuild_pos / 512); 1796 } else 1797 rebuild_lba64 = 0; 1798 rebuild = 1; 1799 } else { 1800 meta->disks[pos].flags |= PROMISE_F_ONLINE; 1801 if (sd->sd_state < G_RAID_SUBDISK_S_ACTIVE) { 1802 meta->status |= PROMISE_S_MARKED; 1803 if (sd->sd_state == G_RAID_SUBDISK_S_RESYNC) { 1804 rebuild_lba64 = MIN(rebuild_lba64, 1805 sd->sd_rebuild_pos / 512); 1806 } else 1807 rebuild_lba64 = 0; 1808 } 1809 } 1810 if (pv->pv_meta != NULL) { 1811 meta->disks[pos].id = pv->pv_meta->disks[pos].id; 1812 } else { 1813 meta->disks[pos].number = i * 2; 1814 arc4rand(&meta->disks[pos].id, 1815 sizeof(meta->disks[pos].id), 0); 1816 } 1817 } 1818 promise_meta_put_name(meta, vol->v_name); 1819 1820 /* Try to mimic AMD BIOS rebuild/resync behavior. */ 1821 if (rebuild_lba64 != UINT64_MAX) { 1822 if (rebuild) 1823 meta->magic_3 = 0x03040010UL; /* Rebuild? */ 1824 else 1825 meta->magic_3 = 0x03040008UL; /* Resync? */ 1826 /* Translate from per-disk to per-volume LBA. */ 1827 if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID1 || 1828 vol->v_raid_level == G_RAID_VOLUME_RL_RAID1E) { 1829 rebuild_lba64 *= meta->array_width; 1830 } else if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID3 || 1831 vol->v_raid_level == G_RAID_VOLUME_RL_RAID5) { 1832 rebuild_lba64 *= meta->array_width - 1; 1833 } else 1834 rebuild_lba64 = 0; 1835 } else 1836 meta->magic_3 = 0x03000000UL; 1837 meta->rebuild_lba64 = rebuild_lba64; 1838 meta->magic_4 = 0x04010101UL; 1839 1840 /* Replace per-volume metadata with new. */ 1841 if (pv->pv_meta != NULL) 1842 free(pv->pv_meta, M_MD_PROMISE); 1843 pv->pv_meta = meta; 1844 1845 /* Copy new metadata to the disks, adding or replacing old. */ 1846 for (i = 0; i < vol->v_disks_count; i++) { 1847 sd = &vol->v_subdisks[i]; 1848 disk = sd->sd_disk; 1849 if (disk == NULL) 1850 continue; 1851 /* For RAID0+1 we need to translate order. */ 1852 pos = promise_meta_translate_disk(vol, i); 1853 pd = (struct g_raid_md_promise_perdisk *)disk->d_md_data; 1854 for (j = 0; j < pd->pd_subdisks; j++) { 1855 if (pd->pd_meta[j]->volume_id == meta->volume_id) 1856 break; 1857 } 1858 if (j == pd->pd_subdisks) 1859 pd->pd_subdisks++; 1860 if (pd->pd_meta[j] != NULL) 1861 free(pd->pd_meta[j], M_MD_PROMISE); 1862 pd->pd_meta[j] = promise_meta_copy(meta); 1863 pd->pd_meta[j]->disk = meta->disks[pos]; 1864 pd->pd_meta[j]->disk.number = pos; 1865 pd->pd_meta[j]->disk_offset_high = 1866 (sd->sd_offset / 512) >> 32; 1867 pd->pd_meta[j]->disk_offset = sd->sd_offset / 512; 1868 pd->pd_meta[j]->disk_sectors_high = 1869 (sd->sd_size / 512) >> 32; 1870 pd->pd_meta[j]->disk_sectors = sd->sd_size / 512; 1871 if (sd->sd_state == G_RAID_SUBDISK_S_REBUILD) { 1872 pd->pd_meta[j]->disk_rebuild_high = 1873 (sd->sd_rebuild_pos / 512) >> 32; 1874 pd->pd_meta[j]->disk_rebuild = 1875 sd->sd_rebuild_pos / 512; 1876 } else if (sd->sd_state < G_RAID_SUBDISK_S_REBUILD) { 1877 pd->pd_meta[j]->disk_rebuild_high = 0; 1878 pd->pd_meta[j]->disk_rebuild = 0; 1879 } else { 1880 pd->pd_meta[j]->disk_rebuild_high = UINT32_MAX; 1881 pd->pd_meta[j]->disk_rebuild = UINT32_MAX; 1882 } 1883 pd->pd_updated = 1; 1884 } 1885 } 1886 1887 TAILQ_FOREACH(disk, &sc->sc_disks, d_next) { 1888 pd = (struct g_raid_md_promise_perdisk *)disk->d_md_data; 1889 if (disk->d_state != G_RAID_DISK_S_ACTIVE) 1890 continue; 1891 if (!pd->pd_updated) 1892 continue; 1893 G_RAID_DEBUG(1, "Writing Promise metadata to %s", 1894 g_raid_get_diskname(disk)); 1895 for (i = 0; i < pd->pd_subdisks; i++) 1896 g_raid_md_promise_print(pd->pd_meta[i]); 1897 promise_meta_write(disk->d_consumer, 1898 pd->pd_meta, pd->pd_subdisks); 1899 pd->pd_updated = 0; 1900 } 1901 1902 return (0); 1903 } 1904 1905 static int 1906 g_raid_md_fail_disk_promise(struct g_raid_md_object *md, 1907 struct g_raid_subdisk *tsd, struct g_raid_disk *tdisk) 1908 { 1909 struct g_raid_softc *sc; 1910 struct g_raid_md_promise_perdisk *pd; 1911 struct g_raid_subdisk *sd; 1912 int i, pos; 1913 1914 sc = md->mdo_softc; 1915 pd = (struct g_raid_md_promise_perdisk *)tdisk->d_md_data; 1916 1917 /* We can't fail disk that is not a part of array now. */ 1918 if (tdisk->d_state != G_RAID_DISK_S_ACTIVE) 1919 return (-1); 1920 1921 /* 1922 * Mark disk as failed in metadata and try to write that metadata 1923 * to the disk itself to prevent it's later resurrection as STALE. 1924 */ 1925 if (pd->pd_subdisks > 0 && tdisk->d_consumer != NULL) 1926 G_RAID_DEBUG(1, "Writing Promise metadata to %s", 1927 g_raid_get_diskname(tdisk)); 1928 for (i = 0; i < pd->pd_subdisks; i++) { 1929 pd->pd_meta[i]->disk.flags |= 1930 PROMISE_F_DOWN | PROMISE_F_REDIR; 1931 pos = pd->pd_meta[i]->disk.number; 1932 if (pos >= 0 && pos < PROMISE_MAX_DISKS) { 1933 pd->pd_meta[i]->disks[pos].flags |= 1934 PROMISE_F_DOWN | PROMISE_F_REDIR; 1935 } 1936 g_raid_md_promise_print(pd->pd_meta[i]); 1937 } 1938 if (tdisk->d_consumer != NULL) 1939 promise_meta_write(tdisk->d_consumer, 1940 pd->pd_meta, pd->pd_subdisks); 1941 1942 /* Change states. */ 1943 g_raid_change_disk_state(tdisk, G_RAID_DISK_S_FAILED); 1944 TAILQ_FOREACH(sd, &tdisk->d_subdisks, sd_next) { 1945 g_raid_change_subdisk_state(sd, 1946 G_RAID_SUBDISK_S_FAILED); 1947 g_raid_event_send(sd, G_RAID_SUBDISK_E_FAILED, 1948 G_RAID_EVENT_SUBDISK); 1949 } 1950 1951 /* Write updated metadata to remaining disks. */ 1952 g_raid_md_write_promise(md, NULL, NULL, tdisk); 1953 1954 g_raid_md_promise_refill(sc); 1955 return (0); 1956 } 1957 1958 static int 1959 g_raid_md_free_disk_promise(struct g_raid_md_object *md, 1960 struct g_raid_disk *disk) 1961 { 1962 struct g_raid_md_promise_perdisk *pd; 1963 int i; 1964 1965 pd = (struct g_raid_md_promise_perdisk *)disk->d_md_data; 1966 for (i = 0; i < pd->pd_subdisks; i++) { 1967 if (pd->pd_meta[i] != NULL) { 1968 free(pd->pd_meta[i], M_MD_PROMISE); 1969 pd->pd_meta[i] = NULL; 1970 } 1971 } 1972 free(pd, M_MD_PROMISE); 1973 disk->d_md_data = NULL; 1974 return (0); 1975 } 1976 1977 static int 1978 g_raid_md_free_volume_promise(struct g_raid_md_object *md, 1979 struct g_raid_volume *vol) 1980 { 1981 struct g_raid_md_promise_pervolume *pv; 1982 1983 pv = (struct g_raid_md_promise_pervolume *)vol->v_md_data; 1984 if (pv && pv->pv_meta != NULL) { 1985 free(pv->pv_meta, M_MD_PROMISE); 1986 pv->pv_meta = NULL; 1987 } 1988 if (pv && !pv->pv_started) { 1989 pv->pv_started = 1; 1990 callout_stop(&pv->pv_start_co); 1991 } 1992 free(pv, M_MD_PROMISE); 1993 vol->v_md_data = NULL; 1994 return (0); 1995 } 1996 1997 static int 1998 g_raid_md_free_promise(struct g_raid_md_object *md) 1999 { 2000 2001 return (0); 2002 } 2003 2004 G_RAID_MD_DECLARE(promise, "Promise"); 2005