1 /*- 2 * Copyright (c) 2004, 2005 Lukas Ertl 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27 #include <sys/cdefs.h> 28 __FBSDID("$FreeBSD$"); 29 30 #include <sys/param.h> 31 #include <sys/bio.h> 32 #include <sys/errno.h> 33 #include <sys/endian.h> 34 #include <sys/conf.h> 35 #include <sys/kernel.h> 36 #include <sys/kthread.h> 37 #include <sys/libkern.h> 38 #include <sys/lock.h> 39 #include <sys/malloc.h> 40 #include <sys/module.h> 41 #include <sys/mutex.h> 42 #include <sys/sbuf.h> 43 #include <sys/systm.h> 44 #include <sys/time.h> 45 #include <sys/vimage.h> 46 47 #include <geom/geom.h> 48 #include <geom/vinum/geom_vinum_var.h> 49 #include <geom/vinum/geom_vinum.h> 50 #include <geom/vinum/geom_vinum_share.h> 51 52 #define GV_LEGACY_I386 0 53 #define GV_LEGACY_AMD64 1 54 #define GV_LEGACY_SPARC64 2 55 #define GV_LEGACY_POWERPC 3 56 57 static void gv_drive_dead(void *, int); 58 static void gv_drive_worker(void *); 59 static int gv_legacy_header_type(uint8_t *, int); 60 61 /* 62 * Here are the "offset (size)" for the various struct gv_hdr fields, 63 * for the legacy i386 (or 32-bit powerpc), legacy amd64 (or sparc64), and 64 * current (cpu & endian agnostic) versions of the on-disk format of the vinum 65 * header structure: 66 * 67 * i386 amd64 current field 68 * -------- -------- -------- ----- 69 * 0 ( 8) 0 ( 8) 0 ( 8) magic 70 * 8 ( 4) 8 ( 8) 8 ( 8) config_length 71 * 12 (32) 16 (32) 16 (32) label.sysname 72 * 44 (32) 48 (32) 48 (32) label.name 73 * 76 ( 4) 80 ( 8) 80 ( 8) label.date_of_birth.tv_sec 74 * 80 ( 4) 88 ( 8) 88 ( 8) label.date_of_birth.tv_usec 75 * 84 ( 4) 96 ( 8) 96 ( 8) label.last_update.tv_sec 76 * 88 ( 4) 104 ( 8) 104 ( 8) label.last_update.tv_usec 77 * 92 ( 8) 112 ( 8) 112 ( 8) label.drive_size 78 * ======== ======== ======== 79 * 100 120 120 total size 80 * 81 * NOTE: i386 and amd64 formats are stored as little-endian; the current 82 * format uses big-endian (network order). 83 */ 84 85 86 /* Checks for legacy format depending on platform. */ 87 static int 88 gv_legacy_header_type(uint8_t *hdr, int bigendian) 89 { 90 uint32_t *i32; 91 int arch_32, arch_64, i; 92 93 /* Set arch according to endianess. */ 94 if (bigendian) { 95 arch_32 = GV_LEGACY_POWERPC; 96 arch_64 = GV_LEGACY_SPARC64; 97 } else { 98 arch_32 = GV_LEGACY_I386; 99 arch_64 = GV_LEGACY_AMD64; 100 } 101 102 /* if non-empty hostname overlaps 64-bit config_length */ 103 i32 = (uint32_t *)(hdr + 12); 104 if (*i32 != 0) 105 return (arch_32); 106 /* check for non-empty hostname */ 107 if (hdr[16] != 0) 108 return (arch_64); 109 /* check bytes past 32-bit structure */ 110 for (i = 100; i < 120; i++) 111 if (hdr[i] != 0) 112 return (arch_32); 113 /* check for overlapping timestamp */ 114 i32 = (uint32_t *)(hdr + 84); 115 116 if (*i32 == 0) 117 return (arch_64); 118 return (arch_32); 119 } 120 121 /* 122 * Read the header while taking magic number into account, and write it to 123 * destination pointer. 124 */ 125 int 126 gv_read_header(struct g_consumer *cp, struct gv_hdr *m_hdr) 127 { 128 struct g_provider *pp; 129 uint64_t magic_machdep; 130 uint8_t *d_hdr; 131 int be, off; 132 133 #define GV_GET32(endian) \ 134 endian##32toh(*((uint32_t *)&d_hdr[off])); \ 135 off += 4 136 #define GV_GET64(endian) \ 137 endian##64toh(*((uint64_t *)&d_hdr[off])); \ 138 off += 8 139 140 KASSERT(m_hdr != NULL, ("gv_read_header: null m_hdr")); 141 KASSERT(cp != NULL, ("gv_read_header: null cp")); 142 pp = cp->provider; 143 KASSERT(pp != NULL, ("gv_read_header: null pp")); 144 145 d_hdr = g_read_data(cp, GV_HDR_OFFSET, pp->sectorsize, NULL); 146 if (d_hdr == NULL) 147 return (-1); 148 off = 0; 149 m_hdr->magic = GV_GET64(be); 150 magic_machdep = *((uint64_t *)&d_hdr[0]); 151 /* 152 * The big endian machines will have a reverse of GV_OLD_MAGIC, so we 153 * need to decide if we are running on a big endian machine as well as 154 * checking the magic against the reverse of GV_OLD_MAGIC. 155 */ 156 be = (m_hdr->magic == magic_machdep); 157 if (m_hdr->magic == GV_MAGIC) { 158 m_hdr->config_length = GV_GET64(be); 159 off = 16; 160 bcopy(d_hdr + off, m_hdr->label.sysname, GV_HOSTNAME_LEN); 161 off += GV_HOSTNAME_LEN; 162 bcopy(d_hdr + off, m_hdr->label.name, GV_MAXDRIVENAME); 163 off += GV_MAXDRIVENAME; 164 m_hdr->label.date_of_birth.tv_sec = GV_GET64(be); 165 m_hdr->label.date_of_birth.tv_usec = GV_GET64(be); 166 m_hdr->label.last_update.tv_sec = GV_GET64(be); 167 m_hdr->label.last_update.tv_usec = GV_GET64(be); 168 m_hdr->label.drive_size = GV_GET64(be); 169 } else if (m_hdr->magic != GV_OLD_MAGIC && 170 m_hdr->magic != le64toh(GV_OLD_MAGIC)) { 171 /* Not a gvinum drive. */ 172 g_free(d_hdr); 173 return (-1); 174 } else if (gv_legacy_header_type(d_hdr, be) == GV_LEGACY_SPARC64) { 175 G_VINUM_DEBUG(1, "detected legacy sparc64 header"); 176 m_hdr->magic = GV_MAGIC; 177 /* Legacy sparc64 on-disk header */ 178 m_hdr->config_length = GV_GET64(be); 179 bcopy(d_hdr + 16, m_hdr->label.sysname, GV_HOSTNAME_LEN); 180 off += GV_HOSTNAME_LEN; 181 bcopy(d_hdr + 48, m_hdr->label.name, GV_MAXDRIVENAME); 182 off += GV_MAXDRIVENAME; 183 m_hdr->label.date_of_birth.tv_sec = GV_GET64(be); 184 m_hdr->label.date_of_birth.tv_usec = GV_GET64(be); 185 m_hdr->label.last_update.tv_sec = GV_GET64(be); 186 m_hdr->label.last_update.tv_usec = GV_GET64(be); 187 m_hdr->label.drive_size = GV_GET64(be); 188 } else if (gv_legacy_header_type(d_hdr, be) == GV_LEGACY_POWERPC) { 189 G_VINUM_DEBUG(1, "detected legacy PowerPC header"); 190 m_hdr->magic = GV_MAGIC; 191 /* legacy 32-bit big endian on-disk header */ 192 m_hdr->config_length = GV_GET32(be); 193 bcopy(d_hdr + off, m_hdr->label.sysname, GV_HOSTNAME_LEN); 194 off += GV_HOSTNAME_LEN; 195 bcopy(d_hdr + off, m_hdr->label.name, GV_MAXDRIVENAME); 196 off += GV_MAXDRIVENAME; 197 m_hdr->label.date_of_birth.tv_sec = GV_GET32(be); 198 m_hdr->label.date_of_birth.tv_usec = GV_GET32(be); 199 m_hdr->label.last_update.tv_sec = GV_GET32(be); 200 m_hdr->label.last_update.tv_usec = GV_GET32(be); 201 m_hdr->label.drive_size = GV_GET64(be); 202 } else if (gv_legacy_header_type(d_hdr, be) == GV_LEGACY_I386) { 203 G_VINUM_DEBUG(1, "detected legacy i386 header"); 204 m_hdr->magic = GV_MAGIC; 205 /* legacy i386 on-disk header */ 206 m_hdr->config_length = GV_GET32(le); 207 bcopy(d_hdr + off, m_hdr->label.sysname, GV_HOSTNAME_LEN); 208 off += GV_HOSTNAME_LEN; 209 bcopy(d_hdr + off, m_hdr->label.name, GV_MAXDRIVENAME); 210 off += GV_MAXDRIVENAME; 211 m_hdr->label.date_of_birth.tv_sec = GV_GET32(le); 212 m_hdr->label.date_of_birth.tv_usec = GV_GET32(le); 213 m_hdr->label.last_update.tv_sec = GV_GET32(le); 214 m_hdr->label.last_update.tv_usec = GV_GET32(le); 215 m_hdr->label.drive_size = GV_GET64(le); 216 } else { 217 G_VINUM_DEBUG(1, "detected legacy amd64 header"); 218 m_hdr->magic = GV_MAGIC; 219 /* legacy amd64 on-disk header */ 220 m_hdr->config_length = GV_GET64(le); 221 bcopy(d_hdr + 16, m_hdr->label.sysname, GV_HOSTNAME_LEN); 222 off += GV_HOSTNAME_LEN; 223 bcopy(d_hdr + 48, m_hdr->label.name, GV_MAXDRIVENAME); 224 off += GV_MAXDRIVENAME; 225 m_hdr->label.date_of_birth.tv_sec = GV_GET64(le); 226 m_hdr->label.date_of_birth.tv_usec = GV_GET64(le); 227 m_hdr->label.last_update.tv_sec = GV_GET64(le); 228 m_hdr->label.last_update.tv_usec = GV_GET64(le); 229 m_hdr->label.drive_size = GV_GET64(le); 230 } 231 232 g_free(d_hdr); 233 return (0); 234 } 235 236 /* Write out the gvinum header. */ 237 int 238 gv_write_header(struct g_consumer *cp, struct gv_hdr *m_hdr) 239 { 240 uint8_t d_hdr[GV_HDR_LEN]; 241 int off, ret; 242 243 #define GV_SET64BE(field) \ 244 do { \ 245 *((uint64_t *)&d_hdr[off]) = htobe64(field); \ 246 off += 8; \ 247 } while (0) 248 249 KASSERT(m_hdr != NULL, ("gv_write_header: null m_hdr")); 250 251 off = 0; 252 memset(d_hdr, 0, GV_HDR_LEN); 253 GV_SET64BE(m_hdr->magic); 254 GV_SET64BE(m_hdr->config_length); 255 off = 16; 256 bcopy(m_hdr->label.sysname, d_hdr + off, GV_HOSTNAME_LEN); 257 off += GV_HOSTNAME_LEN; 258 bcopy(m_hdr->label.name, d_hdr + off, GV_MAXDRIVENAME); 259 off += GV_MAXDRIVENAME; 260 GV_SET64BE(m_hdr->label.date_of_birth.tv_sec); 261 GV_SET64BE(m_hdr->label.date_of_birth.tv_usec); 262 GV_SET64BE(m_hdr->label.last_update.tv_sec); 263 GV_SET64BE(m_hdr->label.last_update.tv_usec); 264 GV_SET64BE(m_hdr->label.drive_size); 265 266 ret = g_write_data(cp, GV_HDR_OFFSET, d_hdr, GV_HDR_LEN); 267 return (ret); 268 } 269 270 void 271 gv_config_new_drive(struct gv_drive *d) 272 { 273 struct gv_hdr *vhdr; 274 struct gv_freelist *fl; 275 276 KASSERT(d != NULL, ("config_new_drive: NULL d")); 277 278 vhdr = g_malloc(sizeof(*vhdr), M_WAITOK | M_ZERO); 279 vhdr->magic = GV_MAGIC; 280 vhdr->config_length = GV_CFG_LEN; 281 282 mtx_lock(&hostname_mtx); 283 bcopy(G_hostname, vhdr->label.sysname, GV_HOSTNAME_LEN); 284 mtx_unlock(&hostname_mtx); 285 strncpy(vhdr->label.name, d->name, GV_MAXDRIVENAME); 286 microtime(&vhdr->label.date_of_birth); 287 288 d->hdr = vhdr; 289 290 LIST_INIT(&d->subdisks); 291 LIST_INIT(&d->freelist); 292 293 fl = g_malloc(sizeof(struct gv_freelist), M_WAITOK | M_ZERO); 294 fl->offset = GV_DATA_START; 295 fl->size = d->avail; 296 LIST_INSERT_HEAD(&d->freelist, fl, freelist); 297 d->freelist_entries = 1; 298 299 d->bqueue = g_malloc(sizeof(struct bio_queue_head), M_WAITOK | M_ZERO); 300 bioq_init(d->bqueue); 301 mtx_init(&d->bqueue_mtx, "gv_drive", NULL, MTX_DEF); 302 kproc_create(gv_drive_worker, d, NULL, 0, 0, "gv_d %s", d->name); 303 d->flags |= GV_DRIVE_THREAD_ACTIVE; 304 } 305 306 void 307 gv_save_config_all(struct gv_softc *sc) 308 { 309 struct gv_drive *d; 310 311 g_topology_assert(); 312 313 LIST_FOREACH(d, &sc->drives, drive) { 314 if (d->geom == NULL) 315 continue; 316 gv_save_config(NULL, d, sc); 317 } 318 } 319 320 /* Save the vinum configuration back to disk. */ 321 void 322 gv_save_config(struct g_consumer *cp, struct gv_drive *d, struct gv_softc *sc) 323 { 324 struct g_geom *gp; 325 struct g_consumer *cp2; 326 struct gv_hdr *vhdr, *hdr; 327 struct sbuf *sb; 328 int error; 329 330 g_topology_assert(); 331 332 KASSERT(d != NULL, ("gv_save_config: null d")); 333 KASSERT(sc != NULL, ("gv_save_config: null sc")); 334 335 /* 336 * We can't save the config on a drive that isn't up, but drives that 337 * were just created aren't officially up yet, so we check a special 338 * flag. 339 */ 340 if ((d->state != GV_DRIVE_UP) && !(d->flags && GV_DRIVE_NEWBORN)) 341 return; 342 343 if (cp == NULL) { 344 gp = d->geom; 345 KASSERT(gp != NULL, ("gv_save_config: null gp")); 346 cp2 = LIST_FIRST(&gp->consumer); 347 KASSERT(cp2 != NULL, ("gv_save_config: null cp2")); 348 } else 349 cp2 = cp; 350 351 vhdr = g_malloc(GV_HDR_LEN, M_WAITOK | M_ZERO); 352 vhdr->magic = GV_MAGIC; 353 vhdr->config_length = GV_CFG_LEN; 354 355 hdr = d->hdr; 356 if (hdr == NULL) { 357 G_VINUM_DEBUG(0, "drive %s has NULL hdr", d->name); 358 g_free(vhdr); 359 return; 360 } 361 microtime(&hdr->label.last_update); 362 bcopy(&hdr->label, &vhdr->label, sizeof(struct gv_label)); 363 364 sb = sbuf_new(NULL, NULL, GV_CFG_LEN, SBUF_FIXEDLEN); 365 gv_format_config(sc, sb, 1, NULL); 366 sbuf_finish(sb); 367 368 error = g_access(cp2, 0, 1, 0); 369 if (error) { 370 G_VINUM_DEBUG(0, "g_access failed on drive %s, errno %d", 371 d->name, error); 372 sbuf_delete(sb); 373 g_free(vhdr); 374 return; 375 } 376 g_topology_unlock(); 377 378 do { 379 error = gv_write_header(cp2, vhdr); 380 if (error) { 381 G_VINUM_DEBUG(0, "writing vhdr failed on drive %s, " 382 "errno %d", d->name, error); 383 break; 384 } 385 386 error = g_write_data(cp2, GV_CFG_OFFSET, sbuf_data(sb), 387 GV_CFG_LEN); 388 if (error) { 389 G_VINUM_DEBUG(0, "writing first config copy failed " 390 "on drive %s, errno %d", d->name, error); 391 break; 392 } 393 394 error = g_write_data(cp2, GV_CFG_OFFSET + GV_CFG_LEN, 395 sbuf_data(sb), GV_CFG_LEN); 396 if (error) 397 G_VINUM_DEBUG(0, "writing second config copy failed " 398 "on drive %s, errno %d", d->name, error); 399 } while (0); 400 401 g_topology_lock(); 402 g_access(cp2, 0, -1, 0); 403 sbuf_delete(sb); 404 g_free(vhdr); 405 406 if (d->geom != NULL) 407 gv_drive_modify(d); 408 } 409 410 /* This resembles g_slice_access(). */ 411 static int 412 gv_drive_access(struct g_provider *pp, int dr, int dw, int de) 413 { 414 struct g_geom *gp; 415 struct g_consumer *cp; 416 struct g_provider *pp2; 417 struct gv_drive *d; 418 struct gv_sd *s, *s2; 419 int error; 420 421 gp = pp->geom; 422 cp = LIST_FIRST(&gp->consumer); 423 if (cp == NULL) 424 return (0); 425 426 d = gp->softc; 427 if (d == NULL) 428 return (0); 429 430 s = pp->private; 431 KASSERT(s != NULL, ("gv_drive_access: NULL s")); 432 433 LIST_FOREACH(s2, &d->subdisks, from_drive) { 434 if (s == s2) 435 continue; 436 if (s->drive_offset + s->size <= s2->drive_offset) 437 continue; 438 if (s2->drive_offset + s2->size <= s->drive_offset) 439 continue; 440 441 /* Overlap. */ 442 pp2 = s2->provider; 443 KASSERT(s2 != NULL, ("gv_drive_access: NULL s2")); 444 if ((pp->acw + dw) > 0 && pp2->ace > 0) 445 return (EPERM); 446 if ((pp->ace + de) > 0 && pp2->acw > 0) 447 return (EPERM); 448 } 449 450 error = g_access(cp, dr, dw, de); 451 return (error); 452 } 453 454 static void 455 gv_drive_done(struct bio *bp) 456 { 457 struct gv_drive *d; 458 459 /* Put the BIO on the worker queue again. */ 460 d = bp->bio_from->geom->softc; 461 bp->bio_cflags |= GV_BIO_DONE; 462 mtx_lock(&d->bqueue_mtx); 463 bioq_insert_tail(d->bqueue, bp); 464 wakeup(d); 465 mtx_unlock(&d->bqueue_mtx); 466 } 467 468 469 static void 470 gv_drive_start(struct bio *bp) 471 { 472 struct gv_drive *d; 473 struct gv_sd *s; 474 475 switch (bp->bio_cmd) { 476 case BIO_READ: 477 case BIO_WRITE: 478 case BIO_DELETE: 479 break; 480 case BIO_GETATTR: 481 default: 482 g_io_deliver(bp, EOPNOTSUPP); 483 return; 484 } 485 486 s = bp->bio_to->private; 487 if ((s->state == GV_SD_DOWN) || (s->state == GV_SD_STALE)) { 488 g_io_deliver(bp, ENXIO); 489 return; 490 } 491 492 d = bp->bio_to->geom->softc; 493 494 /* 495 * Put the BIO on the worker queue, where the worker thread will pick 496 * it up. 497 */ 498 mtx_lock(&d->bqueue_mtx); 499 bioq_disksort(d->bqueue, bp); 500 wakeup(d); 501 mtx_unlock(&d->bqueue_mtx); 502 503 } 504 505 static void 506 gv_drive_worker(void *arg) 507 { 508 struct bio *bp, *cbp; 509 struct g_geom *gp; 510 struct g_provider *pp; 511 struct gv_drive *d; 512 struct gv_sd *s; 513 int error; 514 515 d = arg; 516 517 mtx_lock(&d->bqueue_mtx); 518 for (;;) { 519 /* We were signaled to exit. */ 520 if (d->flags & GV_DRIVE_THREAD_DIE) 521 break; 522 523 /* Take the first BIO from out queue. */ 524 bp = bioq_takefirst(d->bqueue); 525 if (bp == NULL) { 526 msleep(d, &d->bqueue_mtx, PRIBIO, "-", hz/10); 527 continue; 528 } 529 mtx_unlock(&d->bqueue_mtx); 530 531 pp = bp->bio_to; 532 gp = pp->geom; 533 534 /* Completed request. */ 535 if (bp->bio_cflags & GV_BIO_DONE) { 536 error = bp->bio_error; 537 538 /* Deliver the original request. */ 539 g_std_done(bp); 540 541 /* The request had an error, we need to clean up. */ 542 if (error != 0) { 543 g_topology_lock(); 544 gv_set_drive_state(d, GV_DRIVE_DOWN, 545 GV_SETSTATE_FORCE | GV_SETSTATE_CONFIG); 546 g_topology_unlock(); 547 g_post_event(gv_drive_dead, d, M_WAITOK, d, 548 NULL); 549 } 550 551 /* New request, needs to be sent downwards. */ 552 } else { 553 s = pp->private; 554 555 if ((s->state == GV_SD_DOWN) || 556 (s->state == GV_SD_STALE)) { 557 g_io_deliver(bp, ENXIO); 558 mtx_lock(&d->bqueue_mtx); 559 continue; 560 } 561 if (bp->bio_offset > s->size) { 562 g_io_deliver(bp, EINVAL); 563 mtx_lock(&d->bqueue_mtx); 564 continue; 565 } 566 567 cbp = g_clone_bio(bp); 568 if (cbp == NULL) { 569 g_io_deliver(bp, ENOMEM); 570 mtx_lock(&d->bqueue_mtx); 571 continue; 572 } 573 if (cbp->bio_offset + cbp->bio_length > s->size) 574 cbp->bio_length = s->size - 575 cbp->bio_offset; 576 cbp->bio_done = gv_drive_done; 577 cbp->bio_offset += s->drive_offset; 578 g_io_request(cbp, LIST_FIRST(&gp->consumer)); 579 } 580 581 mtx_lock(&d->bqueue_mtx); 582 } 583 584 while ((bp = bioq_takefirst(d->bqueue)) != NULL) { 585 mtx_unlock(&d->bqueue_mtx); 586 if (bp->bio_cflags & GV_BIO_DONE) 587 g_std_done(bp); 588 else 589 g_io_deliver(bp, ENXIO); 590 mtx_lock(&d->bqueue_mtx); 591 } 592 mtx_unlock(&d->bqueue_mtx); 593 d->flags |= GV_DRIVE_THREAD_DEAD; 594 595 kproc_exit(ENXIO); 596 } 597 598 599 static void 600 gv_drive_orphan(struct g_consumer *cp) 601 { 602 struct g_geom *gp; 603 struct gv_drive *d; 604 605 g_topology_assert(); 606 gp = cp->geom; 607 g_trace(G_T_TOPOLOGY, "gv_drive_orphan(%s)", gp->name); 608 d = gp->softc; 609 if (d != NULL) { 610 gv_set_drive_state(d, GV_DRIVE_DOWN, 611 GV_SETSTATE_FORCE | GV_SETSTATE_CONFIG); 612 g_post_event(gv_drive_dead, d, M_WAITOK, d, NULL); 613 } else 614 g_wither_geom(gp, ENXIO); 615 } 616 617 static struct g_geom * 618 gv_drive_taste(struct g_class *mp, struct g_provider *pp, int flags __unused) 619 { 620 struct g_geom *gp, *gp2; 621 struct g_consumer *cp; 622 struct gv_drive *d; 623 struct gv_sd *s; 624 struct gv_softc *sc; 625 struct gv_freelist *fl; 626 struct gv_hdr *vhdr; 627 int error; 628 char *buf, errstr[ERRBUFSIZ]; 629 630 vhdr = NULL; 631 d = NULL; 632 633 g_trace(G_T_TOPOLOGY, "gv_drive_taste(%s, %s)", mp->name, pp->name); 634 g_topology_assert(); 635 636 /* Find the VINUM class and its associated geom. */ 637 gp2 = find_vinum_geom(); 638 if (gp2 == NULL) 639 return (NULL); 640 sc = gp2->softc; 641 642 gp = g_new_geomf(mp, "%s.vinumdrive", pp->name); 643 gp->start = gv_drive_start; 644 gp->orphan = gv_drive_orphan; 645 gp->access = gv_drive_access; 646 gp->start = gv_drive_start; 647 648 cp = g_new_consumer(gp); 649 g_attach(cp, pp); 650 error = g_access(cp, 1, 0, 0); 651 if (error) { 652 g_detach(cp); 653 g_destroy_consumer(cp); 654 g_destroy_geom(gp); 655 return (NULL); 656 } 657 658 g_topology_unlock(); 659 660 /* Now check if the provided slice is a valid vinum drive. */ 661 do { 662 vhdr = g_malloc(GV_HDR_LEN, M_WAITOK | M_ZERO); 663 error = gv_read_header(cp, vhdr); 664 if (error) { 665 g_free(vhdr); 666 break; 667 } 668 669 /* A valid vinum drive, let's parse the on-disk information. */ 670 buf = g_read_data(cp, GV_CFG_OFFSET, GV_CFG_LEN, NULL); 671 if (buf == NULL) { 672 g_free(vhdr); 673 break; 674 } 675 g_topology_lock(); 676 gv_parse_config(sc, buf, 1); 677 g_free(buf); 678 679 /* 680 * Let's see if this drive is already known in the 681 * configuration. 682 */ 683 d = gv_find_drive(sc, vhdr->label.name); 684 685 /* We already know about this drive. */ 686 if (d != NULL) { 687 /* Check if this drive already has a geom. */ 688 if (d->geom != NULL) { 689 g_topology_unlock(); 690 g_free(vhdr); 691 break; 692 } 693 bcopy(vhdr, d->hdr, sizeof(*vhdr)); 694 g_free(vhdr); 695 696 /* This is a new drive. */ 697 } else { 698 d = g_malloc(sizeof(*d), M_WAITOK | M_ZERO); 699 700 /* Initialize all needed variables. */ 701 d->size = pp->mediasize - GV_DATA_START; 702 d->avail = d->size; 703 d->hdr = vhdr; 704 strncpy(d->name, vhdr->label.name, GV_MAXDRIVENAME); 705 LIST_INIT(&d->subdisks); 706 LIST_INIT(&d->freelist); 707 708 /* We also need a freelist entry. */ 709 fl = g_malloc(sizeof(*fl), M_WAITOK | M_ZERO); 710 fl->offset = GV_DATA_START; 711 fl->size = d->avail; 712 LIST_INSERT_HEAD(&d->freelist, fl, freelist); 713 d->freelist_entries = 1; 714 715 /* Save it into the main configuration. */ 716 LIST_INSERT_HEAD(&sc->drives, d, drive); 717 } 718 719 /* 720 * Create bio queue, queue mutex and a worker thread, if 721 * necessary. 722 */ 723 if (d->bqueue == NULL) { 724 d->bqueue = g_malloc(sizeof(struct bio_queue_head), 725 M_WAITOK | M_ZERO); 726 bioq_init(d->bqueue); 727 } 728 if (mtx_initialized(&d->bqueue_mtx) == 0) 729 mtx_init(&d->bqueue_mtx, "gv_drive", NULL, MTX_DEF); 730 731 if (!(d->flags & GV_DRIVE_THREAD_ACTIVE)) { 732 kproc_create(gv_drive_worker, d, NULL, 0, 0, 733 "gv_d %s", d->name); 734 d->flags |= GV_DRIVE_THREAD_ACTIVE; 735 } 736 737 g_access(cp, -1, 0, 0); 738 739 gp->softc = d; 740 d->geom = gp; 741 d->vinumconf = sc; 742 strncpy(d->device, pp->name, GV_MAXDRIVENAME); 743 744 /* 745 * Find out which subdisks belong to this drive and crosslink 746 * them. 747 */ 748 LIST_FOREACH(s, &sc->subdisks, sd) { 749 if (!strncmp(s->drive, d->name, GV_MAXDRIVENAME)) 750 /* XXX: errors ignored */ 751 gv_sd_to_drive(sc, d, s, errstr, 752 sizeof(errstr)); 753 } 754 755 /* This drive is now up for sure. */ 756 gv_set_drive_state(d, GV_DRIVE_UP, 0); 757 758 /* 759 * If there are subdisks on this drive, we need to create 760 * providers for them. 761 */ 762 if (d->sdcount) 763 gv_drive_modify(d); 764 765 return (gp); 766 767 } while (0); 768 769 g_topology_lock(); 770 g_access(cp, -1, 0, 0); 771 772 g_detach(cp); 773 g_destroy_consumer(cp); 774 g_destroy_geom(gp); 775 return (NULL); 776 } 777 778 /* 779 * Modify the providers for the given drive 'd'. It is assumed that the 780 * subdisk list of 'd' is already correctly set up. 781 */ 782 void 783 gv_drive_modify(struct gv_drive *d) 784 { 785 struct g_geom *gp; 786 struct g_consumer *cp; 787 struct g_provider *pp, *pp2; 788 struct gv_sd *s; 789 790 KASSERT(d != NULL, ("gv_drive_modify: null d")); 791 gp = d->geom; 792 KASSERT(gp != NULL, ("gv_drive_modify: null gp")); 793 cp = LIST_FIRST(&gp->consumer); 794 KASSERT(cp != NULL, ("gv_drive_modify: null cp")); 795 pp = cp->provider; 796 KASSERT(pp != NULL, ("gv_drive_modify: null pp")); 797 798 g_topology_assert(); 799 800 LIST_FOREACH(s, &d->subdisks, from_drive) { 801 /* This subdisk already has a provider. */ 802 if (s->provider != NULL) 803 continue; 804 pp2 = g_new_providerf(gp, "gvinum/sd/%s", s->name); 805 pp2->mediasize = s->size; 806 pp2->sectorsize = pp->sectorsize; 807 g_error_provider(pp2, 0); 808 s->provider = pp2; 809 pp2->private = s; 810 } 811 } 812 813 static void 814 gv_drive_dead(void *arg, int flag) 815 { 816 struct g_geom *gp; 817 struct g_consumer *cp; 818 struct gv_drive *d; 819 struct gv_sd *s; 820 821 g_topology_assert(); 822 KASSERT(arg != NULL, ("gv_drive_dead: NULL arg")); 823 824 if (flag == EV_CANCEL) 825 return; 826 827 d = arg; 828 if (d->state != GV_DRIVE_DOWN) 829 return; 830 831 g_trace(G_T_TOPOLOGY, "gv_drive_dead(%s)", d->name); 832 833 gp = d->geom; 834 if (gp == NULL) 835 return; 836 837 LIST_FOREACH(cp, &gp->consumer, consumer) { 838 if (cp->nstart != cp->nend) { 839 G_VINUM_DEBUG(0, "dead drive '%s' still has " 840 "active requests, cannot detach consumer", 841 d->name); 842 g_post_event(gv_drive_dead, d, M_WAITOK, d, 843 NULL); 844 return; 845 } 846 if (cp->acr != 0 || cp->acw != 0 || cp->ace != 0) 847 g_access(cp, -cp->acr, -cp->acw, -cp->ace); 848 } 849 850 G_VINUM_DEBUG(1, "lost drive '%s'", d->name); 851 d->geom = NULL; 852 LIST_FOREACH(s, &d->subdisks, from_drive) { 853 s->provider = NULL; 854 s->consumer = NULL; 855 } 856 gv_kill_drive_thread(d); 857 gp->softc = NULL; 858 g_wither_geom(gp, ENXIO); 859 } 860 861 static int 862 gv_drive_destroy_geom(struct gctl_req *req, struct g_class *mp, 863 struct g_geom *gp) 864 { 865 struct gv_drive *d; 866 867 g_trace(G_T_TOPOLOGY, "gv_drive_destroy_geom: %s", gp->name); 868 g_topology_assert(); 869 870 d = gp->softc; 871 gv_kill_drive_thread(d); 872 873 g_wither_geom(gp, ENXIO); 874 return (0); 875 } 876 877 #define VINUMDRIVE_CLASS_NAME "VINUMDRIVE" 878 879 static struct g_class g_vinum_drive_class = { 880 .name = VINUMDRIVE_CLASS_NAME, 881 .version = G_VERSION, 882 .taste = gv_drive_taste, 883 .destroy_geom = gv_drive_destroy_geom 884 }; 885 886 DECLARE_GEOM_CLASS(g_vinum_drive_class, g_vinum_drive); 887