1 /*- 2 * Copyright (c) 2011 Alexander Motin <mav@FreeBSD.org> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27 #include <sys/cdefs.h> 28 __FBSDID("$FreeBSD$"); 29 30 #include <sys/param.h> 31 #include <sys/bio.h> 32 #include <sys/endian.h> 33 #include <sys/kernel.h> 34 #include <sys/kobj.h> 35 #include <sys/limits.h> 36 #include <sys/lock.h> 37 #include <sys/malloc.h> 38 #include <sys/mutex.h> 39 #include <sys/systm.h> 40 #include <sys/taskqueue.h> 41 #include <geom/geom.h> 42 #include "geom/raid/g_raid.h" 43 #include "g_raid_md_if.h" 44 45 static MALLOC_DEFINE(M_MD_NVIDIA, "md_nvidia_data", "GEOM_RAID NVIDIA metadata"); 46 47 struct nvidia_raid_conf { 48 uint8_t nvidia_id[8]; 49 #define NVIDIA_MAGIC "NVIDIA " 50 51 uint32_t config_size; 52 uint32_t checksum; 53 uint16_t version; 54 uint8_t disk_number; 55 uint8_t dummy_0; 56 uint32_t total_sectors; 57 uint32_t sector_size; 58 uint8_t name[16]; 59 uint8_t revision[4]; 60 uint32_t disk_status; 61 62 uint32_t magic_0; 63 #define NVIDIA_MAGIC0 0x00640044 64 65 uint64_t volume_id[2]; 66 uint8_t state; 67 #define NVIDIA_S_IDLE 0 68 #define NVIDIA_S_INIT 2 69 #define NVIDIA_S_REBUILD 3 70 #define NVIDIA_S_UPGRADE 4 71 #define NVIDIA_S_SYNC 5 72 uint8_t array_width; 73 uint8_t total_disks; 74 uint8_t orig_array_width; 75 uint16_t type; 76 #define NVIDIA_T_RAID0 0x0080 77 #define NVIDIA_T_RAID1 0x0081 78 #define NVIDIA_T_RAID3 0x0083 79 #define NVIDIA_T_RAID5 0x0085 /* RLQ = 00/02? */ 80 #define NVIDIA_T_RAID5_SYM 0x0095 /* RLQ = 03 */ 81 #define NVIDIA_T_RAID10 0x008a 82 #define NVIDIA_T_RAID01 0x8180 83 #define NVIDIA_T_CONCAT 0x00ff 84 85 uint16_t dummy_3; 86 uint32_t strip_sectors; 87 uint32_t strip_bytes; 88 uint32_t strip_shift; 89 uint32_t strip_mask; 90 uint32_t stripe_sectors; 91 uint32_t stripe_bytes; 92 uint32_t rebuild_lba; 93 uint32_t orig_type; 94 uint32_t orig_total_sectors; 95 uint32_t status; 96 #define NVIDIA_S_BOOTABLE 0x00000001 97 #define NVIDIA_S_DEGRADED 0x00000002 98 99 uint32_t filler[98]; 100 } __packed; 101 102 struct g_raid_md_nvidia_perdisk { 103 struct nvidia_raid_conf *pd_meta; 104 int pd_disk_pos; 105 off_t pd_disk_size; 106 }; 107 108 struct g_raid_md_nvidia_object { 109 struct g_raid_md_object mdio_base; 110 uint64_t mdio_volume_id[2]; 111 struct nvidia_raid_conf *mdio_meta; 112 struct callout mdio_start_co; /* STARTING state timer. */ 113 int mdio_total_disks; 114 int mdio_disks_present; 115 int mdio_started; 116 int mdio_incomplete; 117 struct root_hold_token *mdio_rootmount; /* Root mount delay token. */ 118 }; 119 120 static g_raid_md_create_t g_raid_md_create_nvidia; 121 static g_raid_md_taste_t g_raid_md_taste_nvidia; 122 static g_raid_md_event_t g_raid_md_event_nvidia; 123 static g_raid_md_ctl_t g_raid_md_ctl_nvidia; 124 static g_raid_md_write_t g_raid_md_write_nvidia; 125 static g_raid_md_fail_disk_t g_raid_md_fail_disk_nvidia; 126 static g_raid_md_free_disk_t g_raid_md_free_disk_nvidia; 127 static g_raid_md_free_t g_raid_md_free_nvidia; 128 129 static kobj_method_t g_raid_md_nvidia_methods[] = { 130 KOBJMETHOD(g_raid_md_create, g_raid_md_create_nvidia), 131 KOBJMETHOD(g_raid_md_taste, g_raid_md_taste_nvidia), 132 KOBJMETHOD(g_raid_md_event, g_raid_md_event_nvidia), 133 KOBJMETHOD(g_raid_md_ctl, g_raid_md_ctl_nvidia), 134 KOBJMETHOD(g_raid_md_write, g_raid_md_write_nvidia), 135 KOBJMETHOD(g_raid_md_fail_disk, g_raid_md_fail_disk_nvidia), 136 KOBJMETHOD(g_raid_md_free_disk, g_raid_md_free_disk_nvidia), 137 KOBJMETHOD(g_raid_md_free, g_raid_md_free_nvidia), 138 { 0, 0 } 139 }; 140 141 static struct g_raid_md_class g_raid_md_nvidia_class = { 142 "NVIDIA", 143 g_raid_md_nvidia_methods, 144 sizeof(struct g_raid_md_nvidia_object), 145 .mdc_priority = 100 146 }; 147 148 static int NVIDIANodeID = 1; 149 150 static void 151 g_raid_md_nvidia_print(struct nvidia_raid_conf *meta) 152 { 153 154 if (g_raid_debug < 1) 155 return; 156 157 printf("********* ATA NVIDIA RAID Metadata *********\n"); 158 printf("nvidia_id <%.8s>\n", meta->nvidia_id); 159 printf("config_size %u\n", meta->config_size); 160 printf("checksum 0x%08x\n", meta->checksum); 161 printf("version 0x%04x\n", meta->version); 162 printf("disk_number %d\n", meta->disk_number); 163 printf("dummy_0 0x%02x\n", meta->dummy_0); 164 printf("total_sectors %u\n", meta->total_sectors); 165 printf("sector_size %u\n", meta->sector_size); 166 printf("name <%.16s>\n", meta->name); 167 printf("revision 0x%02x%02x%02x%02x\n", 168 meta->revision[0], meta->revision[1], 169 meta->revision[2], meta->revision[3]); 170 printf("disk_status 0x%08x\n", meta->disk_status); 171 printf("magic_0 0x%08x\n", meta->magic_0); 172 printf("volume_id 0x%016jx%016jx\n", 173 meta->volume_id[1], meta->volume_id[0]); 174 printf("state 0x%02x\n", meta->state); 175 printf("array_width %u\n", meta->array_width); 176 printf("total_disks %u\n", meta->total_disks); 177 printf("orig_array_width %u\n", meta->orig_array_width); 178 printf("type 0x%04x\n", meta->type); 179 printf("dummy_3 0x%04x\n", meta->dummy_3); 180 printf("strip_sectors %u\n", meta->strip_sectors); 181 printf("strip_bytes %u\n", meta->strip_bytes); 182 printf("strip_shift %u\n", meta->strip_shift); 183 printf("strip_mask 0x%08x\n", meta->strip_mask); 184 printf("stripe_sectors %u\n", meta->stripe_sectors); 185 printf("stripe_bytes %u\n", meta->stripe_bytes); 186 printf("rebuild_lba %u\n", meta->rebuild_lba); 187 printf("orig_type 0x%04x\n", meta->orig_type); 188 printf("orig_total_sectors %u\n", meta->orig_total_sectors); 189 printf("status 0x%08x\n", meta->status); 190 printf("=================================================\n"); 191 } 192 193 static struct nvidia_raid_conf * 194 nvidia_meta_copy(struct nvidia_raid_conf *meta) 195 { 196 struct nvidia_raid_conf *nmeta; 197 198 nmeta = malloc(sizeof(*meta), M_MD_NVIDIA, M_WAITOK); 199 memcpy(nmeta, meta, sizeof(*meta)); 200 return (nmeta); 201 } 202 203 static int 204 nvidia_meta_translate_disk(struct nvidia_raid_conf *meta, int md_disk_pos) 205 { 206 int disk_pos; 207 208 if (md_disk_pos >= 0 && meta->type == NVIDIA_T_RAID01) { 209 disk_pos = (md_disk_pos / meta->array_width) + 210 (md_disk_pos % meta->array_width) * meta->array_width; 211 } else 212 disk_pos = md_disk_pos; 213 return (disk_pos); 214 } 215 216 static void 217 nvidia_meta_get_name(struct nvidia_raid_conf *meta, char *buf) 218 { 219 int i; 220 221 strncpy(buf, meta->name, 16); 222 buf[16] = 0; 223 for (i = 15; i >= 0; i--) { 224 if (buf[i] > 0x20) 225 break; 226 buf[i] = 0; 227 } 228 } 229 230 static void 231 nvidia_meta_put_name(struct nvidia_raid_conf *meta, char *buf) 232 { 233 234 memset(meta->name, 0x20, 16); 235 memcpy(meta->name, buf, MIN(strlen(buf), 16)); 236 } 237 238 static struct nvidia_raid_conf * 239 nvidia_meta_read(struct g_consumer *cp) 240 { 241 struct g_provider *pp; 242 struct nvidia_raid_conf *meta; 243 char *buf; 244 int error, i; 245 uint32_t checksum, *ptr; 246 247 pp = cp->provider; 248 249 /* Read the anchor sector. */ 250 buf = g_read_data(cp, 251 pp->mediasize - 2 * pp->sectorsize, pp->sectorsize, &error); 252 if (buf == NULL) { 253 G_RAID_DEBUG(1, "Cannot read metadata from %s (error=%d).", 254 pp->name, error); 255 return (NULL); 256 } 257 meta = malloc(sizeof(*meta), M_MD_NVIDIA, M_WAITOK); 258 memcpy(meta, buf, min(sizeof(*meta), pp->sectorsize)); 259 g_free(buf); 260 261 /* Check if this is an NVIDIA RAID struct */ 262 if (strncmp(meta->nvidia_id, NVIDIA_MAGIC, strlen(NVIDIA_MAGIC))) { 263 G_RAID_DEBUG(1, "NVIDIA signature check failed on %s", pp->name); 264 free(meta, M_MD_NVIDIA); 265 return (NULL); 266 } 267 if (meta->config_size > 128 || 268 meta->config_size < 30) { 269 G_RAID_DEBUG(1, "NVIDIA metadata size looks wrong: %d", 270 meta->config_size); 271 free(meta, M_MD_NVIDIA); 272 return (NULL); 273 } 274 275 /* Check metadata checksum. */ 276 for (checksum = 0, ptr = (uint32_t *)meta, 277 i = 0; i < meta->config_size; i++) 278 checksum += *ptr++; 279 if (checksum != 0) { 280 G_RAID_DEBUG(1, "NVIDIA checksum check failed on %s", pp->name); 281 free(meta, M_MD_NVIDIA); 282 return (NULL); 283 } 284 285 /* Check volume state. */ 286 if (meta->state != NVIDIA_S_IDLE && meta->state != NVIDIA_S_INIT && 287 meta->state != NVIDIA_S_REBUILD && meta->state != NVIDIA_S_SYNC) { 288 G_RAID_DEBUG(1, "NVIDIA unknown state on %s (0x%02x)", 289 pp->name, meta->state); 290 free(meta, M_MD_NVIDIA); 291 return (NULL); 292 } 293 294 /* Check raid type. */ 295 if (meta->type != NVIDIA_T_RAID0 && meta->type != NVIDIA_T_RAID1 && 296 meta->type != NVIDIA_T_RAID3 && meta->type != NVIDIA_T_RAID5 && 297 meta->type != NVIDIA_T_RAID5_SYM && 298 meta->type != NVIDIA_T_RAID01 && meta->type != NVIDIA_T_CONCAT) { 299 G_RAID_DEBUG(1, "NVIDIA unknown RAID level on %s (0x%02x)", 300 pp->name, meta->type); 301 free(meta, M_MD_NVIDIA); 302 return (NULL); 303 } 304 305 return (meta); 306 } 307 308 static int 309 nvidia_meta_write(struct g_consumer *cp, struct nvidia_raid_conf *meta) 310 { 311 struct g_provider *pp; 312 char *buf; 313 int error, i; 314 uint32_t checksum, *ptr; 315 316 pp = cp->provider; 317 318 /* Recalculate checksum for case if metadata were changed. */ 319 meta->checksum = 0; 320 for (checksum = 0, ptr = (uint32_t *)meta, 321 i = 0; i < meta->config_size; i++) 322 checksum += *ptr++; 323 meta->checksum -= checksum; 324 325 /* Create and fill buffer. */ 326 buf = malloc(pp->sectorsize, M_MD_NVIDIA, M_WAITOK | M_ZERO); 327 memcpy(buf, meta, sizeof(*meta)); 328 329 /* Write metadata. */ 330 error = g_write_data(cp, 331 pp->mediasize - 2 * pp->sectorsize, buf, pp->sectorsize); 332 if (error != 0) { 333 G_RAID_DEBUG(1, "Cannot write metadata to %s (error=%d).", 334 pp->name, error); 335 } 336 337 free(buf, M_MD_NVIDIA); 338 return (error); 339 } 340 341 static int 342 nvidia_meta_erase(struct g_consumer *cp) 343 { 344 struct g_provider *pp; 345 char *buf; 346 int error; 347 348 pp = cp->provider; 349 buf = malloc(pp->sectorsize, M_MD_NVIDIA, M_WAITOK | M_ZERO); 350 error = g_write_data(cp, 351 pp->mediasize - 2 * pp->sectorsize, buf, pp->sectorsize); 352 if (error != 0) { 353 G_RAID_DEBUG(1, "Cannot erase metadata on %s (error=%d).", 354 pp->name, error); 355 } 356 free(buf, M_MD_NVIDIA); 357 return (error); 358 } 359 360 static struct g_raid_disk * 361 g_raid_md_nvidia_get_disk(struct g_raid_softc *sc, int id) 362 { 363 struct g_raid_disk *disk; 364 struct g_raid_md_nvidia_perdisk *pd; 365 366 TAILQ_FOREACH(disk, &sc->sc_disks, d_next) { 367 pd = (struct g_raid_md_nvidia_perdisk *)disk->d_md_data; 368 if (pd->pd_disk_pos == id) 369 break; 370 } 371 return (disk); 372 } 373 374 static int 375 g_raid_md_nvidia_supported(int level, int qual, int disks, int force) 376 { 377 378 switch (level) { 379 case G_RAID_VOLUME_RL_RAID0: 380 if (disks < 1) 381 return (0); 382 if (!force && (disks < 2 || disks > 6)) 383 return (0); 384 break; 385 case G_RAID_VOLUME_RL_RAID1: 386 if (disks < 1) 387 return (0); 388 if (!force && (disks != 2)) 389 return (0); 390 break; 391 case G_RAID_VOLUME_RL_RAID1E: 392 if (disks < 2) 393 return (0); 394 if (disks % 2 != 0) 395 return (0); 396 if (!force && (disks < 4)) 397 return (0); 398 break; 399 case G_RAID_VOLUME_RL_SINGLE: 400 if (disks != 1) 401 return (0); 402 break; 403 case G_RAID_VOLUME_RL_CONCAT: 404 if (disks < 2) 405 return (0); 406 break; 407 case G_RAID_VOLUME_RL_RAID5: 408 if (disks < 3) 409 return (0); 410 break; 411 default: 412 return (0); 413 } 414 if (qual != G_RAID_VOLUME_RLQ_NONE) 415 return (0); 416 return (1); 417 } 418 419 static int 420 g_raid_md_nvidia_start_disk(struct g_raid_disk *disk) 421 { 422 struct g_raid_softc *sc; 423 struct g_raid_subdisk *sd, *tmpsd; 424 struct g_raid_disk *olddisk, *tmpdisk; 425 struct g_raid_md_object *md; 426 struct g_raid_md_nvidia_object *mdi; 427 struct g_raid_md_nvidia_perdisk *pd, *oldpd; 428 struct nvidia_raid_conf *meta; 429 int disk_pos, resurrection = 0; 430 431 sc = disk->d_softc; 432 md = sc->sc_md; 433 mdi = (struct g_raid_md_nvidia_object *)md; 434 meta = mdi->mdio_meta; 435 pd = (struct g_raid_md_nvidia_perdisk *)disk->d_md_data; 436 olddisk = NULL; 437 438 /* Find disk position in metadata by it's serial. */ 439 if (pd->pd_meta != NULL) { 440 disk_pos = pd->pd_meta->disk_number; 441 if (disk_pos >= meta->total_disks || mdi->mdio_started) 442 disk_pos = -3; 443 } else 444 disk_pos = -3; 445 /* For RAID0+1 we need to translate order. */ 446 disk_pos = nvidia_meta_translate_disk(meta, disk_pos); 447 if (disk_pos < 0) { 448 G_RAID_DEBUG1(1, sc, "Unknown, probably new or stale disk"); 449 /* If we are in the start process, that's all for now. */ 450 if (!mdi->mdio_started) 451 goto nofit; 452 /* 453 * If we have already started - try to get use of the disk. 454 * Try to replace OFFLINE disks first, then FAILED. 455 */ 456 TAILQ_FOREACH(tmpdisk, &sc->sc_disks, d_next) { 457 if (tmpdisk->d_state != G_RAID_DISK_S_OFFLINE && 458 tmpdisk->d_state != G_RAID_DISK_S_FAILED) 459 continue; 460 /* Make sure this disk is big enough. */ 461 TAILQ_FOREACH(sd, &tmpdisk->d_subdisks, sd_next) { 462 if (sd->sd_offset + sd->sd_size + 2 * 512 > 463 pd->pd_disk_size) { 464 G_RAID_DEBUG1(1, sc, 465 "Disk too small (%ju < %ju)", 466 pd->pd_disk_size, 467 sd->sd_offset + sd->sd_size + 512); 468 break; 469 } 470 } 471 if (sd != NULL) 472 continue; 473 if (tmpdisk->d_state == G_RAID_DISK_S_OFFLINE) { 474 olddisk = tmpdisk; 475 break; 476 } else if (olddisk == NULL) 477 olddisk = tmpdisk; 478 } 479 if (olddisk == NULL) { 480 nofit: 481 g_raid_change_disk_state(disk, G_RAID_DISK_S_SPARE); 482 return (1); 483 } 484 oldpd = (struct g_raid_md_nvidia_perdisk *)olddisk->d_md_data; 485 disk_pos = oldpd->pd_disk_pos; 486 resurrection = 1; 487 } 488 489 if (olddisk == NULL) { 490 /* Find placeholder by position. */ 491 olddisk = g_raid_md_nvidia_get_disk(sc, disk_pos); 492 if (olddisk == NULL) 493 panic("No disk at position %d!", disk_pos); 494 if (olddisk->d_state != G_RAID_DISK_S_OFFLINE) { 495 G_RAID_DEBUG1(1, sc, "More then one disk for pos %d", 496 disk_pos); 497 g_raid_change_disk_state(disk, G_RAID_DISK_S_STALE); 498 return (0); 499 } 500 oldpd = (struct g_raid_md_nvidia_perdisk *)olddisk->d_md_data; 501 } 502 503 /* Replace failed disk or placeholder with new disk. */ 504 TAILQ_FOREACH_SAFE(sd, &olddisk->d_subdisks, sd_next, tmpsd) { 505 TAILQ_REMOVE(&olddisk->d_subdisks, sd, sd_next); 506 TAILQ_INSERT_TAIL(&disk->d_subdisks, sd, sd_next); 507 sd->sd_disk = disk; 508 } 509 oldpd->pd_disk_pos = -2; 510 pd->pd_disk_pos = disk_pos; 511 512 /* If it was placeholder -- destroy it. */ 513 if (olddisk->d_state == G_RAID_DISK_S_OFFLINE) { 514 g_raid_destroy_disk(olddisk); 515 } else { 516 /* Otherwise, make it STALE_FAILED. */ 517 g_raid_change_disk_state(olddisk, G_RAID_DISK_S_STALE_FAILED); 518 } 519 520 /* Welcome the new disk. */ 521 if (resurrection) 522 g_raid_change_disk_state(disk, G_RAID_DISK_S_ACTIVE); 523 else// if (pd->pd_meta->disk_status == NVIDIA_S_CURRENT || 524 //pd->pd_meta->disk_status == NVIDIA_S_REBUILD) 525 g_raid_change_disk_state(disk, G_RAID_DISK_S_ACTIVE); 526 // else 527 // g_raid_change_disk_state(disk, G_RAID_DISK_S_FAILED); 528 TAILQ_FOREACH(sd, &disk->d_subdisks, sd_next) { 529 530 /* 531 * Different disks may have different sizes, 532 * in concat mode. Update from real disk size. 533 */ 534 if (meta->type == NVIDIA_T_CONCAT) 535 sd->sd_size = pd->pd_disk_size - 0x800 * 512; 536 537 if (resurrection) { 538 /* New or ex-spare disk. */ 539 g_raid_change_subdisk_state(sd, 540 G_RAID_SUBDISK_S_NEW); 541 } else if (meta->state == NVIDIA_S_REBUILD && 542 (pd->pd_meta->disk_status & 0x100)) { 543 /* Rebuilding disk. */ 544 g_raid_change_subdisk_state(sd, 545 G_RAID_SUBDISK_S_REBUILD); 546 sd->sd_rebuild_pos = (off_t)pd->pd_meta->rebuild_lba / 547 meta->array_width * pd->pd_meta->sector_size; 548 } else if (meta->state == NVIDIA_S_SYNC) { 549 /* Resyncing/dirty disk. */ 550 g_raid_change_subdisk_state(sd, 551 G_RAID_SUBDISK_S_RESYNC); 552 sd->sd_rebuild_pos = (off_t)pd->pd_meta->rebuild_lba / 553 meta->array_width * pd->pd_meta->sector_size; 554 } else { 555 /* Up to date disk. */ 556 g_raid_change_subdisk_state(sd, 557 G_RAID_SUBDISK_S_ACTIVE); 558 } 559 g_raid_event_send(sd, G_RAID_SUBDISK_E_NEW, 560 G_RAID_EVENT_SUBDISK); 561 } 562 563 /* Update status of our need for spare. */ 564 if (mdi->mdio_started) { 565 mdi->mdio_incomplete = 566 (g_raid_ndisks(sc, G_RAID_DISK_S_ACTIVE) < 567 mdi->mdio_total_disks); 568 } 569 570 return (resurrection); 571 } 572 573 static void 574 g_disk_md_nvidia_retaste(void *arg, int pending) 575 { 576 577 G_RAID_DEBUG(1, "Array is not complete, trying to retaste."); 578 g_retaste(&g_raid_class); 579 free(arg, M_MD_NVIDIA); 580 } 581 582 static void 583 g_raid_md_nvidia_refill(struct g_raid_softc *sc) 584 { 585 struct g_raid_md_object *md; 586 struct g_raid_md_nvidia_object *mdi; 587 struct g_raid_disk *disk; 588 struct task *task; 589 int update, na; 590 591 md = sc->sc_md; 592 mdi = (struct g_raid_md_nvidia_object *)md; 593 update = 0; 594 do { 595 /* Make sure we miss anything. */ 596 na = g_raid_ndisks(sc, G_RAID_DISK_S_ACTIVE); 597 if (na == mdi->mdio_total_disks) 598 break; 599 600 G_RAID_DEBUG1(1, md->mdo_softc, 601 "Array is not complete (%d of %d), " 602 "trying to refill.", na, mdi->mdio_total_disks); 603 604 /* Try to get use some of STALE disks. */ 605 TAILQ_FOREACH(disk, &sc->sc_disks, d_next) { 606 if (disk->d_state == G_RAID_DISK_S_STALE) { 607 update += g_raid_md_nvidia_start_disk(disk); 608 if (disk->d_state == G_RAID_DISK_S_ACTIVE) 609 break; 610 } 611 } 612 if (disk != NULL) 613 continue; 614 615 /* Try to get use some of SPARE disks. */ 616 TAILQ_FOREACH(disk, &sc->sc_disks, d_next) { 617 if (disk->d_state == G_RAID_DISK_S_SPARE) { 618 update += g_raid_md_nvidia_start_disk(disk); 619 if (disk->d_state == G_RAID_DISK_S_ACTIVE) 620 break; 621 } 622 } 623 } while (disk != NULL); 624 625 /* Write new metadata if we changed something. */ 626 if (update) 627 g_raid_md_write_nvidia(md, NULL, NULL, NULL); 628 629 /* Update status of our need for spare. */ 630 mdi->mdio_incomplete = (g_raid_ndisks(sc, G_RAID_DISK_S_ACTIVE) < 631 mdi->mdio_total_disks); 632 633 /* Request retaste hoping to find spare. */ 634 if (mdi->mdio_incomplete) { 635 task = malloc(sizeof(struct task), 636 M_MD_NVIDIA, M_WAITOK | M_ZERO); 637 TASK_INIT(task, 0, g_disk_md_nvidia_retaste, task); 638 taskqueue_enqueue(taskqueue_swi, task); 639 } 640 } 641 642 static void 643 g_raid_md_nvidia_start(struct g_raid_softc *sc) 644 { 645 struct g_raid_md_object *md; 646 struct g_raid_md_nvidia_object *mdi; 647 struct g_raid_md_nvidia_perdisk *pd; 648 struct nvidia_raid_conf *meta; 649 struct g_raid_volume *vol; 650 struct g_raid_subdisk *sd; 651 struct g_raid_disk *disk; 652 off_t size; 653 int j, disk_pos; 654 char buf[17]; 655 656 md = sc->sc_md; 657 mdi = (struct g_raid_md_nvidia_object *)md; 658 meta = mdi->mdio_meta; 659 660 /* Create volumes and subdisks. */ 661 nvidia_meta_get_name(meta, buf); 662 vol = g_raid_create_volume(sc, buf, -1); 663 vol->v_mediasize = (off_t)meta->total_sectors * 512; 664 vol->v_raid_level_qualifier = G_RAID_VOLUME_RLQ_NONE; 665 if (meta->type == NVIDIA_T_RAID0) { 666 vol->v_raid_level = G_RAID_VOLUME_RL_RAID0; 667 size = vol->v_mediasize / mdi->mdio_total_disks; 668 } else if (meta->type == NVIDIA_T_RAID1) { 669 vol->v_raid_level = G_RAID_VOLUME_RL_RAID1; 670 size = vol->v_mediasize; 671 } else if (meta->type == NVIDIA_T_RAID01) { 672 vol->v_raid_level = G_RAID_VOLUME_RL_RAID1E; 673 size = vol->v_mediasize / (mdi->mdio_total_disks / 2); 674 } else if (meta->type == NVIDIA_T_CONCAT) { 675 if (mdi->mdio_total_disks == 1) 676 vol->v_raid_level = G_RAID_VOLUME_RL_SINGLE; 677 else 678 vol->v_raid_level = G_RAID_VOLUME_RL_CONCAT; 679 size = 0; 680 } else if (meta->type == NVIDIA_T_RAID5) { 681 vol->v_raid_level = G_RAID_VOLUME_RL_RAID5; 682 size = vol->v_mediasize / (mdi->mdio_total_disks - 1); 683 } else if (meta->type == NVIDIA_T_RAID5_SYM) { 684 vol->v_raid_level = G_RAID_VOLUME_RL_RAID5; 685 // vol->v_raid_level_qualifier = 0x03; 686 size = vol->v_mediasize / (mdi->mdio_total_disks - 1); 687 } else { 688 vol->v_raid_level = G_RAID_VOLUME_RL_UNKNOWN; 689 size = 0; 690 } 691 vol->v_strip_size = meta->strip_sectors * 512; //ZZZ 692 vol->v_disks_count = mdi->mdio_total_disks; 693 vol->v_sectorsize = 512; //ZZZ 694 for (j = 0; j < vol->v_disks_count; j++) { 695 sd = &vol->v_subdisks[j]; 696 sd->sd_offset = 0; 697 sd->sd_size = size; 698 } 699 g_raid_start_volume(vol); 700 701 /* Create disk placeholders to store data for later writing. */ 702 for (disk_pos = 0; disk_pos < mdi->mdio_total_disks; disk_pos++) { 703 pd = malloc(sizeof(*pd), M_MD_NVIDIA, M_WAITOK | M_ZERO); 704 pd->pd_disk_pos = disk_pos; 705 disk = g_raid_create_disk(sc); 706 disk->d_md_data = (void *)pd; 707 disk->d_state = G_RAID_DISK_S_OFFLINE; 708 sd = &vol->v_subdisks[disk_pos]; 709 sd->sd_disk = disk; 710 TAILQ_INSERT_TAIL(&disk->d_subdisks, sd, sd_next); 711 } 712 713 /* Make all disks found till the moment take their places. */ 714 do { 715 TAILQ_FOREACH(disk, &sc->sc_disks, d_next) { 716 if (disk->d_state == G_RAID_DISK_S_NONE) { 717 g_raid_md_nvidia_start_disk(disk); 718 break; 719 } 720 } 721 } while (disk != NULL); 722 723 mdi->mdio_started = 1; 724 G_RAID_DEBUG1(0, sc, "Array started."); 725 g_raid_md_write_nvidia(md, NULL, NULL, NULL); 726 727 /* Pickup any STALE/SPARE disks to refill array if needed. */ 728 g_raid_md_nvidia_refill(sc); 729 730 g_raid_event_send(vol, G_RAID_VOLUME_E_START, G_RAID_EVENT_VOLUME); 731 732 callout_stop(&mdi->mdio_start_co); 733 G_RAID_DEBUG1(1, sc, "root_mount_rel %p", mdi->mdio_rootmount); 734 root_mount_rel(mdi->mdio_rootmount); 735 mdi->mdio_rootmount = NULL; 736 } 737 738 static void 739 g_raid_md_nvidia_new_disk(struct g_raid_disk *disk) 740 { 741 struct g_raid_softc *sc; 742 struct g_raid_md_object *md; 743 struct g_raid_md_nvidia_object *mdi; 744 struct nvidia_raid_conf *pdmeta; 745 struct g_raid_md_nvidia_perdisk *pd; 746 747 sc = disk->d_softc; 748 md = sc->sc_md; 749 mdi = (struct g_raid_md_nvidia_object *)md; 750 pd = (struct g_raid_md_nvidia_perdisk *)disk->d_md_data; 751 pdmeta = pd->pd_meta; 752 753 if (mdi->mdio_started) { 754 if (g_raid_md_nvidia_start_disk(disk)) 755 g_raid_md_write_nvidia(md, NULL, NULL, NULL); 756 } else { 757 if (mdi->mdio_meta == NULL || 758 mdi->mdio_meta->disk_number >= mdi->mdio_meta->total_disks) { 759 G_RAID_DEBUG1(1, sc, "Newer disk"); 760 if (mdi->mdio_meta != NULL) 761 free(mdi->mdio_meta, M_MD_NVIDIA); 762 mdi->mdio_meta = nvidia_meta_copy(pdmeta); 763 mdi->mdio_total_disks = pdmeta->total_disks; 764 mdi->mdio_disks_present = 1; 765 } else if (pdmeta->disk_number < mdi->mdio_meta->total_disks) { 766 mdi->mdio_disks_present++; 767 G_RAID_DEBUG1(1, sc, "Matching disk (%d of %d up)", 768 mdi->mdio_disks_present, 769 mdi->mdio_total_disks); 770 } else 771 G_RAID_DEBUG1(1, sc, "Spare disk"); 772 773 /* If we collected all needed disks - start array. */ 774 if (mdi->mdio_disks_present == mdi->mdio_total_disks) 775 g_raid_md_nvidia_start(sc); 776 } 777 } 778 779 static void 780 g_raid_nvidia_go(void *arg) 781 { 782 struct g_raid_softc *sc; 783 struct g_raid_md_object *md; 784 struct g_raid_md_nvidia_object *mdi; 785 786 sc = arg; 787 md = sc->sc_md; 788 mdi = (struct g_raid_md_nvidia_object *)md; 789 if (!mdi->mdio_started) { 790 G_RAID_DEBUG1(0, sc, "Force array start due to timeout."); 791 g_raid_event_send(sc, G_RAID_NODE_E_START, 0); 792 } 793 } 794 795 static int 796 g_raid_md_create_nvidia(struct g_raid_md_object *md, struct g_class *mp, 797 struct g_geom **gp) 798 { 799 struct g_raid_softc *sc; 800 struct g_raid_md_nvidia_object *mdi; 801 char name[32]; 802 803 mdi = (struct g_raid_md_nvidia_object *)md; 804 arc4rand(&mdi->mdio_volume_id, 16, 0); 805 snprintf(name, sizeof(name), "NVIDIA-%d", 806 atomic_fetchadd_int(&NVIDIANodeID, 1)); 807 sc = g_raid_create_node(mp, name, md); 808 if (sc == NULL) 809 return (G_RAID_MD_TASTE_FAIL); 810 md->mdo_softc = sc; 811 *gp = sc->sc_geom; 812 return (G_RAID_MD_TASTE_NEW); 813 } 814 815 static int 816 g_raid_md_taste_nvidia(struct g_raid_md_object *md, struct g_class *mp, 817 struct g_consumer *cp, struct g_geom **gp) 818 { 819 struct g_consumer *rcp; 820 struct g_provider *pp; 821 struct g_raid_md_nvidia_object *mdi, *mdi1; 822 struct g_raid_softc *sc; 823 struct g_raid_disk *disk; 824 struct nvidia_raid_conf *meta; 825 struct g_raid_md_nvidia_perdisk *pd; 826 struct g_geom *geom; 827 int error, result, spare, len; 828 char name[32]; 829 uint16_t vendor; 830 831 G_RAID_DEBUG(1, "Tasting NVIDIA on %s", cp->provider->name); 832 mdi = (struct g_raid_md_nvidia_object *)md; 833 pp = cp->provider; 834 835 /* Read metadata from device. */ 836 meta = NULL; 837 vendor = 0xffff; 838 if (g_access(cp, 1, 0, 0) != 0) 839 return (G_RAID_MD_TASTE_FAIL); 840 g_topology_unlock(); 841 len = 2; 842 if (pp->geom->rank == 1) 843 g_io_getattr("GEOM::hba_vendor", cp, &len, &vendor); 844 meta = nvidia_meta_read(cp); 845 g_topology_lock(); 846 g_access(cp, -1, 0, 0); 847 if (meta == NULL) { 848 if (g_raid_aggressive_spare) { 849 if (vendor == 0x10de) { 850 G_RAID_DEBUG(1, 851 "No NVIDIA metadata, forcing spare."); 852 spare = 2; 853 goto search; 854 } else { 855 G_RAID_DEBUG(1, 856 "NVIDIA vendor mismatch 0x%04x != 0x10de", 857 vendor); 858 } 859 } 860 return (G_RAID_MD_TASTE_FAIL); 861 } 862 863 /* Metadata valid. Print it. */ 864 g_raid_md_nvidia_print(meta); 865 G_RAID_DEBUG(1, "NVIDIA disk position %d", meta->disk_number); 866 spare = 0;//(meta->type == NVIDIA_T_SPARE) ? 1 : 0; 867 868 search: 869 /* Search for matching node. */ 870 sc = NULL; 871 mdi1 = NULL; 872 LIST_FOREACH(geom, &mp->geom, geom) { 873 sc = geom->softc; 874 if (sc == NULL) 875 continue; 876 if (sc->sc_stopping != 0) 877 continue; 878 if (sc->sc_md->mdo_class != md->mdo_class) 879 continue; 880 mdi1 = (struct g_raid_md_nvidia_object *)sc->sc_md; 881 if (spare) { 882 if (mdi1->mdio_incomplete) 883 break; 884 } else { 885 if (memcmp(&mdi1->mdio_volume_id, 886 &meta->volume_id, 16) == 0) 887 break; 888 } 889 } 890 891 /* Found matching node. */ 892 if (geom != NULL) { 893 G_RAID_DEBUG(1, "Found matching array %s", sc->sc_name); 894 result = G_RAID_MD_TASTE_EXISTING; 895 896 } else if (spare) { /* Not found needy node -- left for later. */ 897 G_RAID_DEBUG(1, "Spare is not needed at this time"); 898 goto fail1; 899 900 } else { /* Not found matching node -- create one. */ 901 result = G_RAID_MD_TASTE_NEW; 902 memcpy(&mdi->mdio_volume_id, &meta->volume_id, 16); 903 snprintf(name, sizeof(name), "NVIDIA-%d", 904 atomic_fetchadd_int(&NVIDIANodeID, 1)); 905 sc = g_raid_create_node(mp, name, md); 906 md->mdo_softc = sc; 907 geom = sc->sc_geom; 908 callout_init(&mdi->mdio_start_co, 1); 909 callout_reset(&mdi->mdio_start_co, g_raid_start_timeout * hz, 910 g_raid_nvidia_go, sc); 911 mdi->mdio_rootmount = root_mount_hold("GRAID-NVIDIA"); 912 G_RAID_DEBUG1(1, sc, "root_mount_hold %p", mdi->mdio_rootmount); 913 } 914 915 rcp = g_new_consumer(geom); 916 g_attach(rcp, pp); 917 if (g_access(rcp, 1, 1, 1) != 0) 918 ; //goto fail1; 919 920 g_topology_unlock(); 921 sx_xlock(&sc->sc_lock); 922 923 pd = malloc(sizeof(*pd), M_MD_NVIDIA, M_WAITOK | M_ZERO); 924 pd->pd_meta = meta; 925 if (spare == 2) { 926 pd->pd_disk_pos = -3; 927 } else { 928 pd->pd_disk_pos = -1; 929 } 930 pd->pd_disk_size = pp->mediasize; 931 disk = g_raid_create_disk(sc); 932 disk->d_md_data = (void *)pd; 933 disk->d_consumer = rcp; 934 rcp->private = disk; 935 936 /* Read kernel dumping information. */ 937 disk->d_kd.offset = 0; 938 disk->d_kd.length = OFF_MAX; 939 len = sizeof(disk->d_kd); 940 error = g_io_getattr("GEOM::kerneldump", rcp, &len, &disk->d_kd); 941 if (disk->d_kd.di.dumper == NULL) 942 G_RAID_DEBUG1(2, sc, "Dumping not supported by %s: %d.", 943 rcp->provider->name, error); 944 945 g_raid_md_nvidia_new_disk(disk); 946 947 sx_xunlock(&sc->sc_lock); 948 g_topology_lock(); 949 *gp = geom; 950 return (result); 951 fail1: 952 free(meta, M_MD_NVIDIA); 953 return (G_RAID_MD_TASTE_FAIL); 954 } 955 956 static int 957 g_raid_md_event_nvidia(struct g_raid_md_object *md, 958 struct g_raid_disk *disk, u_int event) 959 { 960 struct g_raid_softc *sc; 961 struct g_raid_subdisk *sd; 962 struct g_raid_md_nvidia_object *mdi; 963 struct g_raid_md_nvidia_perdisk *pd; 964 965 sc = md->mdo_softc; 966 mdi = (struct g_raid_md_nvidia_object *)md; 967 if (disk == NULL) { 968 switch (event) { 969 case G_RAID_NODE_E_START: 970 if (!mdi->mdio_started) { 971 /* Bump volume ID to drop missing disks. */ 972 arc4rand(&mdi->mdio_volume_id, 16, 0); 973 g_raid_md_nvidia_start(sc); 974 } 975 return (0); 976 } 977 return (-1); 978 } 979 pd = (struct g_raid_md_nvidia_perdisk *)disk->d_md_data; 980 switch (event) { 981 case G_RAID_DISK_E_DISCONNECTED: 982 /* If disk was assigned, just update statuses. */ 983 if (pd->pd_disk_pos >= 0) { 984 g_raid_change_disk_state(disk, G_RAID_DISK_S_OFFLINE); 985 if (disk->d_consumer) { 986 g_raid_kill_consumer(sc, disk->d_consumer); 987 disk->d_consumer = NULL; 988 } 989 TAILQ_FOREACH(sd, &disk->d_subdisks, sd_next) { 990 g_raid_change_subdisk_state(sd, 991 G_RAID_SUBDISK_S_NONE); 992 g_raid_event_send(sd, G_RAID_SUBDISK_E_DISCONNECTED, 993 G_RAID_EVENT_SUBDISK); 994 } 995 } else { 996 /* Otherwise -- delete. */ 997 g_raid_change_disk_state(disk, G_RAID_DISK_S_NONE); 998 g_raid_destroy_disk(disk); 999 } 1000 1001 if (mdi->mdio_started) { 1002 /* Bump volume ID to prevent disk resurrection. */ 1003 if (pd->pd_disk_pos >= 0) 1004 arc4rand(&mdi->mdio_volume_id, 16, 0); 1005 1006 /* Write updated metadata to all disks. */ 1007 g_raid_md_write_nvidia(md, NULL, NULL, NULL); 1008 } 1009 1010 /* Check if anything left except placeholders. */ 1011 if (g_raid_ndisks(sc, -1) == 1012 g_raid_ndisks(sc, G_RAID_DISK_S_OFFLINE)) 1013 g_raid_destroy_node(sc, 0); 1014 else 1015 g_raid_md_nvidia_refill(sc); 1016 return (0); 1017 } 1018 return (-2); 1019 } 1020 1021 static int 1022 g_raid_md_ctl_nvidia(struct g_raid_md_object *md, 1023 struct gctl_req *req) 1024 { 1025 struct g_raid_softc *sc; 1026 struct g_raid_volume *vol; 1027 struct g_raid_subdisk *sd; 1028 struct g_raid_disk *disk; 1029 struct g_raid_md_nvidia_object *mdi; 1030 struct g_raid_md_nvidia_perdisk *pd; 1031 struct g_consumer *cp; 1032 struct g_provider *pp; 1033 char arg[16]; 1034 const char *verb, *volname, *levelname, *diskname; 1035 int *nargs, *force; 1036 off_t size, sectorsize, strip, volsize; 1037 intmax_t *sizearg, *striparg; 1038 int numdisks, i, len, level, qual, update; 1039 int error; 1040 1041 sc = md->mdo_softc; 1042 mdi = (struct g_raid_md_nvidia_object *)md; 1043 verb = gctl_get_param(req, "verb", NULL); 1044 nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs)); 1045 error = 0; 1046 if (strcmp(verb, "label") == 0) { 1047 1048 if (*nargs < 4) { 1049 gctl_error(req, "Invalid number of arguments."); 1050 return (-1); 1051 } 1052 volname = gctl_get_asciiparam(req, "arg1"); 1053 if (volname == NULL) { 1054 gctl_error(req, "No volume name."); 1055 return (-2); 1056 } 1057 levelname = gctl_get_asciiparam(req, "arg2"); 1058 if (levelname == NULL) { 1059 gctl_error(req, "No RAID level."); 1060 return (-3); 1061 } 1062 if (g_raid_volume_str2level(levelname, &level, &qual)) { 1063 gctl_error(req, "Unknown RAID level '%s'.", levelname); 1064 return (-4); 1065 } 1066 numdisks = *nargs - 3; 1067 force = gctl_get_paraml(req, "force", sizeof(*force)); 1068 if (!g_raid_md_nvidia_supported(level, qual, numdisks, 1069 force ? *force : 0)) { 1070 gctl_error(req, "Unsupported RAID level " 1071 "(0x%02x/0x%02x), or number of disks (%d).", 1072 level, qual, numdisks); 1073 return (-5); 1074 } 1075 1076 /* Search for disks, connect them and probe. */ 1077 size = 0x7fffffffffffffffllu; 1078 sectorsize = 0; 1079 for (i = 0; i < numdisks; i++) { 1080 snprintf(arg, sizeof(arg), "arg%d", i + 3); 1081 diskname = gctl_get_asciiparam(req, arg); 1082 if (diskname == NULL) { 1083 gctl_error(req, "No disk name (%s).", arg); 1084 error = -6; 1085 break; 1086 } 1087 if (strcmp(diskname, "NONE") == 0) { 1088 cp = NULL; 1089 pp = NULL; 1090 } else { 1091 g_topology_lock(); 1092 cp = g_raid_open_consumer(sc, diskname); 1093 if (cp == NULL) { 1094 gctl_error(req, "Can't open '%s'.", 1095 diskname); 1096 g_topology_unlock(); 1097 error = -7; 1098 break; 1099 } 1100 pp = cp->provider; 1101 } 1102 pd = malloc(sizeof(*pd), M_MD_NVIDIA, M_WAITOK | M_ZERO); 1103 pd->pd_disk_pos = i; 1104 disk = g_raid_create_disk(sc); 1105 disk->d_md_data = (void *)pd; 1106 disk->d_consumer = cp; 1107 if (cp == NULL) 1108 continue; 1109 cp->private = disk; 1110 g_topology_unlock(); 1111 1112 /* Read kernel dumping information. */ 1113 disk->d_kd.offset = 0; 1114 disk->d_kd.length = OFF_MAX; 1115 len = sizeof(disk->d_kd); 1116 g_io_getattr("GEOM::kerneldump", cp, &len, &disk->d_kd); 1117 if (disk->d_kd.di.dumper == NULL) 1118 G_RAID_DEBUG1(2, sc, 1119 "Dumping not supported by %s.", 1120 cp->provider->name); 1121 1122 pd->pd_disk_size = pp->mediasize; 1123 if (size > pp->mediasize) 1124 size = pp->mediasize; 1125 if (sectorsize < pp->sectorsize) 1126 sectorsize = pp->sectorsize; 1127 } 1128 if (error != 0) 1129 return (error); 1130 1131 if (sectorsize <= 0) { 1132 gctl_error(req, "Can't get sector size."); 1133 return (-8); 1134 } 1135 1136 /* Reserve space for metadata. */ 1137 size -= 2 * sectorsize; 1138 1139 /* Handle size argument. */ 1140 len = sizeof(*sizearg); 1141 sizearg = gctl_get_param(req, "size", &len); 1142 if (sizearg != NULL && len == sizeof(*sizearg) && 1143 *sizearg > 0) { 1144 if (*sizearg > size) { 1145 gctl_error(req, "Size too big %lld > %lld.", 1146 (long long)*sizearg, (long long)size); 1147 return (-9); 1148 } 1149 size = *sizearg; 1150 } 1151 1152 /* Handle strip argument. */ 1153 strip = 131072; 1154 len = sizeof(*striparg); 1155 striparg = gctl_get_param(req, "strip", &len); 1156 if (striparg != NULL && len == sizeof(*striparg) && 1157 *striparg > 0) { 1158 if (*striparg < sectorsize) { 1159 gctl_error(req, "Strip size too small."); 1160 return (-10); 1161 } 1162 if (*striparg % sectorsize != 0) { 1163 gctl_error(req, "Incorrect strip size."); 1164 return (-11); 1165 } 1166 if (strip > 65535 * sectorsize) { 1167 gctl_error(req, "Strip size too big."); 1168 return (-12); 1169 } 1170 strip = *striparg; 1171 } 1172 1173 /* Round size down to strip or sector. */ 1174 if (level == G_RAID_VOLUME_RL_RAID1) 1175 size -= (size % sectorsize); 1176 else if (level == G_RAID_VOLUME_RL_RAID1E && 1177 (numdisks & 1) != 0) 1178 size -= (size % (2 * strip)); 1179 else 1180 size -= (size % strip); 1181 if (size <= 0) { 1182 gctl_error(req, "Size too small."); 1183 return (-13); 1184 } 1185 1186 if (level == G_RAID_VOLUME_RL_RAID0 || 1187 level == G_RAID_VOLUME_RL_CONCAT || 1188 level == G_RAID_VOLUME_RL_SINGLE) 1189 volsize = size * numdisks; 1190 else if (level == G_RAID_VOLUME_RL_RAID1) 1191 volsize = size; 1192 else if (level == G_RAID_VOLUME_RL_RAID5) 1193 volsize = size * (numdisks - 1); 1194 else { /* RAID1E */ 1195 volsize = ((size * numdisks) / strip / 2) * 1196 strip; 1197 } 1198 if (volsize > 0xffffffffllu * sectorsize) { 1199 gctl_error(req, "Size too big."); 1200 return (-14); 1201 } 1202 1203 /* We have all we need, create things: volume, ... */ 1204 mdi->mdio_total_disks = numdisks; 1205 mdi->mdio_started = 1; 1206 vol = g_raid_create_volume(sc, volname, -1); 1207 vol->v_md_data = (void *)(intptr_t)0; 1208 vol->v_raid_level = level; 1209 vol->v_raid_level_qualifier = G_RAID_VOLUME_RLQ_NONE; 1210 vol->v_strip_size = strip; 1211 vol->v_disks_count = numdisks; 1212 vol->v_mediasize = volsize; 1213 vol->v_sectorsize = sectorsize; 1214 g_raid_start_volume(vol); 1215 1216 /* , and subdisks. */ 1217 TAILQ_FOREACH(disk, &sc->sc_disks, d_next) { 1218 pd = (struct g_raid_md_nvidia_perdisk *)disk->d_md_data; 1219 sd = &vol->v_subdisks[pd->pd_disk_pos]; 1220 sd->sd_disk = disk; 1221 sd->sd_offset = 0; 1222 sd->sd_size = size; 1223 TAILQ_INSERT_TAIL(&disk->d_subdisks, sd, sd_next); 1224 if (sd->sd_disk->d_consumer != NULL) { 1225 g_raid_change_disk_state(disk, 1226 G_RAID_DISK_S_ACTIVE); 1227 g_raid_change_subdisk_state(sd, 1228 G_RAID_SUBDISK_S_ACTIVE); 1229 g_raid_event_send(sd, G_RAID_SUBDISK_E_NEW, 1230 G_RAID_EVENT_SUBDISK); 1231 } else { 1232 g_raid_change_disk_state(disk, G_RAID_DISK_S_OFFLINE); 1233 } 1234 } 1235 1236 /* Write metadata based on created entities. */ 1237 G_RAID_DEBUG1(0, sc, "Array started."); 1238 g_raid_md_write_nvidia(md, NULL, NULL, NULL); 1239 1240 /* Pickup any STALE/SPARE disks to refill array if needed. */ 1241 g_raid_md_nvidia_refill(sc); 1242 1243 g_raid_event_send(vol, G_RAID_VOLUME_E_START, 1244 G_RAID_EVENT_VOLUME); 1245 return (0); 1246 } 1247 if (strcmp(verb, "delete") == 0) { 1248 1249 /* Check if some volume is still open. */ 1250 force = gctl_get_paraml(req, "force", sizeof(*force)); 1251 if (force != NULL && *force == 0 && 1252 g_raid_nopens(sc) != 0) { 1253 gctl_error(req, "Some volume is still open."); 1254 return (-4); 1255 } 1256 1257 TAILQ_FOREACH(disk, &sc->sc_disks, d_next) { 1258 if (disk->d_consumer) 1259 nvidia_meta_erase(disk->d_consumer); 1260 } 1261 g_raid_destroy_node(sc, 0); 1262 return (0); 1263 } 1264 if (strcmp(verb, "remove") == 0 || 1265 strcmp(verb, "fail") == 0) { 1266 if (*nargs < 2) { 1267 gctl_error(req, "Invalid number of arguments."); 1268 return (-1); 1269 } 1270 for (i = 1; i < *nargs; i++) { 1271 snprintf(arg, sizeof(arg), "arg%d", i); 1272 diskname = gctl_get_asciiparam(req, arg); 1273 if (diskname == NULL) { 1274 gctl_error(req, "No disk name (%s).", arg); 1275 error = -2; 1276 break; 1277 } 1278 if (strncmp(diskname, "/dev/", 5) == 0) 1279 diskname += 5; 1280 1281 TAILQ_FOREACH(disk, &sc->sc_disks, d_next) { 1282 if (disk->d_consumer != NULL && 1283 disk->d_consumer->provider != NULL && 1284 strcmp(disk->d_consumer->provider->name, 1285 diskname) == 0) 1286 break; 1287 } 1288 if (disk == NULL) { 1289 gctl_error(req, "Disk '%s' not found.", 1290 diskname); 1291 error = -3; 1292 break; 1293 } 1294 1295 if (strcmp(verb, "fail") == 0) { 1296 g_raid_md_fail_disk_nvidia(md, NULL, disk); 1297 continue; 1298 } 1299 1300 pd = (struct g_raid_md_nvidia_perdisk *)disk->d_md_data; 1301 1302 /* Erase metadata on deleting disk. */ 1303 nvidia_meta_erase(disk->d_consumer); 1304 1305 /* If disk was assigned, just update statuses. */ 1306 if (pd->pd_disk_pos >= 0) { 1307 g_raid_change_disk_state(disk, G_RAID_DISK_S_OFFLINE); 1308 g_raid_kill_consumer(sc, disk->d_consumer); 1309 disk->d_consumer = NULL; 1310 TAILQ_FOREACH(sd, &disk->d_subdisks, sd_next) { 1311 g_raid_change_subdisk_state(sd, 1312 G_RAID_SUBDISK_S_NONE); 1313 g_raid_event_send(sd, G_RAID_SUBDISK_E_DISCONNECTED, 1314 G_RAID_EVENT_SUBDISK); 1315 } 1316 } else { 1317 /* Otherwise -- delete. */ 1318 g_raid_change_disk_state(disk, G_RAID_DISK_S_NONE); 1319 g_raid_destroy_disk(disk); 1320 } 1321 } 1322 1323 /* Write updated metadata to remaining disks. */ 1324 g_raid_md_write_nvidia(md, NULL, NULL, NULL); 1325 1326 /* Check if anything left except placeholders. */ 1327 if (g_raid_ndisks(sc, -1) == 1328 g_raid_ndisks(sc, G_RAID_DISK_S_OFFLINE)) 1329 g_raid_destroy_node(sc, 0); 1330 else 1331 g_raid_md_nvidia_refill(sc); 1332 return (error); 1333 } 1334 if (strcmp(verb, "insert") == 0) { 1335 if (*nargs < 2) { 1336 gctl_error(req, "Invalid number of arguments."); 1337 return (-1); 1338 } 1339 update = 0; 1340 for (i = 1; i < *nargs; i++) { 1341 /* Get disk name. */ 1342 snprintf(arg, sizeof(arg), "arg%d", i); 1343 diskname = gctl_get_asciiparam(req, arg); 1344 if (diskname == NULL) { 1345 gctl_error(req, "No disk name (%s).", arg); 1346 error = -3; 1347 break; 1348 } 1349 1350 /* Try to find provider with specified name. */ 1351 g_topology_lock(); 1352 cp = g_raid_open_consumer(sc, diskname); 1353 if (cp == NULL) { 1354 gctl_error(req, "Can't open disk '%s'.", 1355 diskname); 1356 g_topology_unlock(); 1357 error = -4; 1358 break; 1359 } 1360 pp = cp->provider; 1361 1362 pd = malloc(sizeof(*pd), M_MD_NVIDIA, M_WAITOK | M_ZERO); 1363 pd->pd_disk_pos = -3; 1364 pd->pd_disk_size = pp->mediasize; 1365 1366 disk = g_raid_create_disk(sc); 1367 disk->d_consumer = cp; 1368 disk->d_md_data = (void *)pd; 1369 cp->private = disk; 1370 g_topology_unlock(); 1371 1372 /* Read kernel dumping information. */ 1373 disk->d_kd.offset = 0; 1374 disk->d_kd.length = OFF_MAX; 1375 len = sizeof(disk->d_kd); 1376 g_io_getattr("GEOM::kerneldump", cp, &len, &disk->d_kd); 1377 if (disk->d_kd.di.dumper == NULL) 1378 G_RAID_DEBUG1(2, sc, 1379 "Dumping not supported by %s.", 1380 cp->provider->name); 1381 1382 /* Welcome the "new" disk. */ 1383 update += g_raid_md_nvidia_start_disk(disk); 1384 if (disk->d_state != G_RAID_DISK_S_SPARE && 1385 disk->d_state != G_RAID_DISK_S_ACTIVE) { 1386 gctl_error(req, "Disk '%s' doesn't fit.", 1387 diskname); 1388 g_raid_destroy_disk(disk); 1389 error = -8; 1390 break; 1391 } 1392 } 1393 1394 /* Write new metadata if we changed something. */ 1395 if (update) 1396 g_raid_md_write_nvidia(md, NULL, NULL, NULL); 1397 return (error); 1398 } 1399 gctl_error(req, "Command '%s' is not supported.", verb); 1400 return (-100); 1401 } 1402 1403 static int 1404 g_raid_md_write_nvidia(struct g_raid_md_object *md, struct g_raid_volume *tvol, 1405 struct g_raid_subdisk *tsd, struct g_raid_disk *tdisk) 1406 { 1407 struct g_raid_softc *sc; 1408 struct g_raid_volume *vol; 1409 struct g_raid_subdisk *sd; 1410 struct g_raid_disk *disk; 1411 struct g_raid_md_nvidia_object *mdi; 1412 struct g_raid_md_nvidia_perdisk *pd; 1413 struct nvidia_raid_conf *meta; 1414 int i, spares; 1415 1416 sc = md->mdo_softc; 1417 mdi = (struct g_raid_md_nvidia_object *)md; 1418 1419 if (sc->sc_stopping == G_RAID_DESTROY_HARD) 1420 return (0); 1421 1422 /* There is only one volume. */ 1423 vol = TAILQ_FIRST(&sc->sc_volumes); 1424 1425 /* Fill global fields. */ 1426 meta = malloc(sizeof(*meta), M_MD_NVIDIA, M_WAITOK | M_ZERO); 1427 if (mdi->mdio_meta) 1428 memcpy(meta, mdi->mdio_meta, sizeof(*meta)); 1429 memcpy(meta->nvidia_id, NVIDIA_MAGIC, sizeof(NVIDIA_MAGIC) - 1); 1430 meta->config_size = 30; 1431 meta->version = 0x0064; 1432 meta->total_sectors = vol->v_mediasize / vol->v_sectorsize; 1433 meta->sector_size = vol->v_sectorsize; 1434 nvidia_meta_put_name(meta, vol->v_name); 1435 meta->magic_0 = NVIDIA_MAGIC0; 1436 memcpy(&meta->volume_id, &mdi->mdio_volume_id, 16); 1437 meta->state = NVIDIA_S_IDLE; 1438 if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID1) 1439 meta->array_width = 1; 1440 else if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID1E) 1441 meta->array_width = vol->v_disks_count / 2; 1442 else if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID5) 1443 meta->array_width = vol->v_disks_count - 1; 1444 else 1445 meta->array_width = vol->v_disks_count; 1446 meta->total_disks = vol->v_disks_count; 1447 meta->orig_array_width = meta->array_width; 1448 if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID0) 1449 meta->type = NVIDIA_T_RAID0; 1450 else if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID1) 1451 meta->type = NVIDIA_T_RAID1; 1452 else if (vol->v_raid_level == G_RAID_VOLUME_RL_RAID1E) 1453 meta->type = NVIDIA_T_RAID01; 1454 else if (vol->v_raid_level == G_RAID_VOLUME_RL_CONCAT || 1455 vol->v_raid_level == G_RAID_VOLUME_RL_SINGLE) 1456 meta->type = NVIDIA_T_CONCAT; 1457 // else if (vol->v_raid_level_qualifier == 0) 1458 // meta->type = NVIDIA_T_RAID5; 1459 else 1460 meta->type = NVIDIA_T_RAID5_SYM; 1461 meta->strip_sectors = vol->v_strip_size / vol->v_sectorsize; 1462 meta->strip_bytes = vol->v_strip_size; 1463 meta->strip_shift = ffs(meta->strip_sectors) - 1; 1464 meta->strip_mask = meta->strip_sectors - 1; 1465 meta->stripe_sectors = meta->strip_sectors * meta->orig_array_width; 1466 meta->stripe_bytes = meta->stripe_sectors * vol->v_sectorsize; 1467 meta->rebuild_lba = 0; 1468 meta->orig_type = meta->type; 1469 meta->orig_total_sectors = meta->total_sectors; 1470 meta->status = 0; 1471 1472 for (i = 0; i < vol->v_disks_count; i++) { 1473 sd = &vol->v_subdisks[i]; 1474 if ((sd->sd_state == G_RAID_SUBDISK_S_STALE || 1475 sd->sd_state == G_RAID_SUBDISK_S_RESYNC || 1476 vol->v_dirty) && 1477 meta->state != NVIDIA_S_REBUILD) 1478 meta->state = NVIDIA_S_SYNC; 1479 else if (sd->sd_state == G_RAID_SUBDISK_S_NEW || 1480 sd->sd_state == G_RAID_SUBDISK_S_REBUILD) 1481 meta->state = NVIDIA_S_REBUILD; 1482 } 1483 1484 /* We are done. Print meta data and store them to disks. */ 1485 if (mdi->mdio_meta != NULL) 1486 free(mdi->mdio_meta, M_MD_NVIDIA); 1487 mdi->mdio_meta = meta; 1488 spares = 0; 1489 TAILQ_FOREACH(disk, &sc->sc_disks, d_next) { 1490 pd = (struct g_raid_md_nvidia_perdisk *)disk->d_md_data; 1491 if (disk->d_state != G_RAID_DISK_S_ACTIVE && 1492 disk->d_state != G_RAID_DISK_S_SPARE) 1493 continue; 1494 if (pd->pd_meta != NULL) { 1495 free(pd->pd_meta, M_MD_NVIDIA); 1496 pd->pd_meta = NULL; 1497 } 1498 pd->pd_meta = nvidia_meta_copy(meta); 1499 if ((sd = TAILQ_FIRST(&disk->d_subdisks)) != NULL) { 1500 /* For RAID0+1 we need to translate order. */ 1501 pd->pd_meta->disk_number = 1502 nvidia_meta_translate_disk(meta, sd->sd_pos); 1503 if (sd->sd_state != G_RAID_SUBDISK_S_ACTIVE) { 1504 pd->pd_meta->disk_status = 0x100; 1505 pd->pd_meta->rebuild_lba = 1506 sd->sd_rebuild_pos / vol->v_sectorsize * 1507 meta->array_width; 1508 } 1509 } else 1510 pd->pd_meta->disk_number = meta->total_disks + spares++; 1511 G_RAID_DEBUG(1, "Writing NVIDIA metadata to %s", 1512 g_raid_get_diskname(disk)); 1513 g_raid_md_nvidia_print(pd->pd_meta); 1514 nvidia_meta_write(disk->d_consumer, pd->pd_meta); 1515 } 1516 return (0); 1517 } 1518 1519 static int 1520 g_raid_md_fail_disk_nvidia(struct g_raid_md_object *md, 1521 struct g_raid_subdisk *tsd, struct g_raid_disk *tdisk) 1522 { 1523 struct g_raid_softc *sc; 1524 struct g_raid_md_nvidia_perdisk *pd; 1525 struct g_raid_subdisk *sd; 1526 1527 sc = md->mdo_softc; 1528 pd = (struct g_raid_md_nvidia_perdisk *)tdisk->d_md_data; 1529 1530 /* We can't fail disk that is not a part of array now. */ 1531 if (pd->pd_disk_pos < 0) 1532 return (-1); 1533 1534 /* Erase metadata to prevent disks's later resurrection. */ 1535 if (tdisk->d_consumer) 1536 nvidia_meta_erase(tdisk->d_consumer); 1537 1538 /* Change states. */ 1539 g_raid_change_disk_state(tdisk, G_RAID_DISK_S_FAILED); 1540 TAILQ_FOREACH(sd, &tdisk->d_subdisks, sd_next) { 1541 g_raid_change_subdisk_state(sd, 1542 G_RAID_SUBDISK_S_FAILED); 1543 g_raid_event_send(sd, G_RAID_SUBDISK_E_FAILED, 1544 G_RAID_EVENT_SUBDISK); 1545 } 1546 1547 /* Write updated metadata to remaining disks. */ 1548 g_raid_md_write_nvidia(md, NULL, NULL, tdisk); 1549 1550 /* Check if anything left except placeholders. */ 1551 if (g_raid_ndisks(sc, -1) == 1552 g_raid_ndisks(sc, G_RAID_DISK_S_OFFLINE)) 1553 g_raid_destroy_node(sc, 0); 1554 else 1555 g_raid_md_nvidia_refill(sc); 1556 return (0); 1557 } 1558 1559 static int 1560 g_raid_md_free_disk_nvidia(struct g_raid_md_object *md, 1561 struct g_raid_disk *disk) 1562 { 1563 struct g_raid_md_nvidia_perdisk *pd; 1564 1565 pd = (struct g_raid_md_nvidia_perdisk *)disk->d_md_data; 1566 if (pd->pd_meta != NULL) { 1567 free(pd->pd_meta, M_MD_NVIDIA); 1568 pd->pd_meta = NULL; 1569 } 1570 free(pd, M_MD_NVIDIA); 1571 disk->d_md_data = NULL; 1572 return (0); 1573 } 1574 1575 static int 1576 g_raid_md_free_nvidia(struct g_raid_md_object *md) 1577 { 1578 struct g_raid_md_nvidia_object *mdi; 1579 1580 mdi = (struct g_raid_md_nvidia_object *)md; 1581 if (!mdi->mdio_started) { 1582 mdi->mdio_started = 0; 1583 callout_stop(&mdi->mdio_start_co); 1584 G_RAID_DEBUG1(1, md->mdo_softc, 1585 "root_mount_rel %p", mdi->mdio_rootmount); 1586 root_mount_rel(mdi->mdio_rootmount); 1587 mdi->mdio_rootmount = NULL; 1588 } 1589 if (mdi->mdio_meta != NULL) { 1590 free(mdi->mdio_meta, M_MD_NVIDIA); 1591 mdi->mdio_meta = NULL; 1592 } 1593 return (0); 1594 } 1595 1596 G_RAID_MD_DECLARE(g_raid_md_nvidia); 1597