1 /*- 2 * SPDX-License-Identifier: (Beerware AND BSD-3-Clause) 3 * 4 * ---------------------------------------------------------------------------- 5 * "THE BEER-WARE LICENSE" (Revision 42): 6 * <phk@FreeBSD.ORG> wrote this file. As long as you retain this notice you 7 * can do whatever you want with this stuff. If we meet some day, and you think 8 * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp 9 * ---------------------------------------------------------------------------- 10 * 11 * $FreeBSD$ 12 * 13 */ 14 15 /*- 16 * The following functions are based on the vn(4) driver: mdstart_swap(), 17 * mdstart_vnode(), mdcreate_swap(), mdcreate_vnode() and mddestroy(), 18 * and as such under the following copyright: 19 * 20 * Copyright (c) 1988 University of Utah. 21 * Copyright (c) 1990, 1993 22 * The Regents of the University of California. All rights reserved. 23 * Copyright (c) 2013 The FreeBSD Foundation 24 * All rights reserved. 25 * 26 * This code is derived from software contributed to Berkeley by 27 * the Systems Programming Group of the University of Utah Computer 28 * Science Department. 29 * 30 * Portions of this software were developed by Konstantin Belousov 31 * under sponsorship from the FreeBSD Foundation. 32 * 33 * Redistribution and use in source and binary forms, with or without 34 * modification, are permitted provided that the following conditions 35 * are met: 36 * 1. Redistributions of source code must retain the above copyright 37 * notice, this list of conditions and the following disclaimer. 38 * 2. Redistributions in binary form must reproduce the above copyright 39 * notice, this list of conditions and the following disclaimer in the 40 * documentation and/or other materials provided with the distribution. 41 * 3. Neither the name of the University nor the names of its contributors 42 * may be used to endorse or promote products derived from this software 43 * without specific prior written permission. 44 * 45 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 46 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 47 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 48 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 49 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 50 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 51 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 52 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 53 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 54 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 55 * SUCH DAMAGE. 56 * 57 * from: Utah Hdr: vn.c 1.13 94/04/02 58 * 59 * from: @(#)vn.c 8.6 (Berkeley) 4/1/94 60 * From: src/sys/dev/vn/vn.c,v 1.122 2000/12/16 16:06:03 61 */ 62 63 #include "opt_rootdevname.h" 64 #include "opt_geom.h" 65 #include "opt_md.h" 66 67 #include <sys/param.h> 68 #include <sys/systm.h> 69 #include <sys/bio.h> 70 #include <sys/buf.h> 71 #include <sys/conf.h> 72 #include <sys/devicestat.h> 73 #include <sys/fcntl.h> 74 #include <sys/kernel.h> 75 #include <sys/kthread.h> 76 #include <sys/limits.h> 77 #include <sys/linker.h> 78 #include <sys/lock.h> 79 #include <sys/malloc.h> 80 #include <sys/mdioctl.h> 81 #include <sys/mount.h> 82 #include <sys/mutex.h> 83 #include <sys/sx.h> 84 #include <sys/namei.h> 85 #include <sys/proc.h> 86 #include <sys/queue.h> 87 #include <sys/rwlock.h> 88 #include <sys/sbuf.h> 89 #include <sys/sched.h> 90 #include <sys/sf_buf.h> 91 #include <sys/sysctl.h> 92 #include <sys/uio.h> 93 #include <sys/vnode.h> 94 #include <sys/disk.h> 95 96 #include <geom/geom.h> 97 #include <geom/geom_int.h> 98 99 #include <vm/vm.h> 100 #include <vm/vm_param.h> 101 #include <vm/vm_object.h> 102 #include <vm/vm_page.h> 103 #include <vm/vm_pager.h> 104 #include <vm/swap_pager.h> 105 #include <vm/uma.h> 106 107 #include <machine/bus.h> 108 109 #define MD_MODVER 1 110 111 #define MD_SHUTDOWN 0x10000 /* Tell worker thread to terminate. */ 112 #define MD_EXITING 0x20000 /* Worker thread is exiting. */ 113 #define MD_PROVIDERGONE 0x40000 /* Safe to free the softc */ 114 115 #ifndef MD_NSECT 116 #define MD_NSECT (10000 * 2) 117 #endif 118 119 struct md_req { 120 unsigned md_unit; /* unit number */ 121 enum md_types md_type; /* type of disk */ 122 off_t md_mediasize; /* size of disk in bytes */ 123 unsigned md_sectorsize; /* sectorsize */ 124 unsigned md_options; /* options */ 125 int md_fwheads; /* firmware heads */ 126 int md_fwsectors; /* firmware sectors */ 127 char *md_file; /* pathname of file to mount */ 128 enum uio_seg md_file_seg; /* location of md_file */ 129 char *md_label; /* label of the device (userspace) */ 130 int *md_units; /* pointer to units array (kernel) */ 131 size_t md_units_nitems; /* items in md_units array */ 132 }; 133 134 #ifdef COMPAT_FREEBSD32 135 struct md_ioctl32 { 136 unsigned md_version; 137 unsigned md_unit; 138 enum md_types md_type; 139 uint32_t md_file; 140 off_t md_mediasize; 141 unsigned md_sectorsize; 142 unsigned md_options; 143 uint64_t md_base; 144 int md_fwheads; 145 int md_fwsectors; 146 uint32_t md_label; 147 int md_pad[MDNPAD]; 148 } __attribute__((__packed__)); 149 CTASSERT((sizeof(struct md_ioctl32)) == 436); 150 151 #define MDIOCATTACH_32 _IOC_NEWTYPE(MDIOCATTACH, struct md_ioctl32) 152 #define MDIOCDETACH_32 _IOC_NEWTYPE(MDIOCDETACH, struct md_ioctl32) 153 #define MDIOCQUERY_32 _IOC_NEWTYPE(MDIOCQUERY, struct md_ioctl32) 154 #define MDIOCRESIZE_32 _IOC_NEWTYPE(MDIOCRESIZE, struct md_ioctl32) 155 #endif /* COMPAT_FREEBSD32 */ 156 157 static MALLOC_DEFINE(M_MD, "md_disk", "Memory Disk"); 158 static MALLOC_DEFINE(M_MDSECT, "md_sectors", "Memory Disk Sectors"); 159 160 static int md_debug; 161 SYSCTL_INT(_debug, OID_AUTO, mddebug, CTLFLAG_RW, &md_debug, 0, 162 "Enable md(4) debug messages"); 163 static int md_malloc_wait; 164 SYSCTL_INT(_vm, OID_AUTO, md_malloc_wait, CTLFLAG_RW, &md_malloc_wait, 0, 165 "Allow malloc to wait for memory allocations"); 166 167 #if defined(MD_ROOT) && !defined(MD_ROOT_FSTYPE) 168 #define MD_ROOT_FSTYPE "ufs" 169 #endif 170 171 #if defined(MD_ROOT) 172 /* 173 * Preloaded image gets put here. 174 */ 175 #if defined(MD_ROOT_SIZE) 176 /* 177 * We put the mfs_root symbol into the oldmfs section of the kernel object file. 178 * Applications that patch the object with the image can determine 179 * the size looking at the oldmfs section size within the kernel. 180 */ 181 u_char mfs_root[MD_ROOT_SIZE*1024] __attribute__ ((section ("oldmfs"))); 182 const int mfs_root_size = sizeof(mfs_root); 183 #elif defined(MD_ROOT_MEM) 184 /* MD region already mapped in the memory */ 185 u_char *mfs_root; 186 int mfs_root_size; 187 #else 188 extern volatile u_char __weak_symbol mfs_root; 189 extern volatile u_char __weak_symbol mfs_root_end; 190 #define mfs_root_size ((uintptr_t)(&mfs_root_end - &mfs_root)) 191 #endif 192 #endif 193 194 static g_init_t g_md_init; 195 static g_fini_t g_md_fini; 196 static g_start_t g_md_start; 197 static g_access_t g_md_access; 198 static void g_md_dumpconf(struct sbuf *sb, const char *indent, 199 struct g_geom *gp, struct g_consumer *cp __unused, struct g_provider *pp); 200 static g_provgone_t g_md_providergone; 201 202 static struct cdev *status_dev = NULL; 203 static struct sx md_sx; 204 static struct unrhdr *md_uh; 205 206 static d_ioctl_t mdctlioctl; 207 208 static struct cdevsw mdctl_cdevsw = { 209 .d_version = D_VERSION, 210 .d_ioctl = mdctlioctl, 211 .d_name = MD_NAME, 212 }; 213 214 struct g_class g_md_class = { 215 .name = "MD", 216 .version = G_VERSION, 217 .init = g_md_init, 218 .fini = g_md_fini, 219 .start = g_md_start, 220 .access = g_md_access, 221 .dumpconf = g_md_dumpconf, 222 .providergone = g_md_providergone, 223 }; 224 225 DECLARE_GEOM_CLASS(g_md_class, g_md); 226 227 static LIST_HEAD(, md_s) md_softc_list = LIST_HEAD_INITIALIZER(md_softc_list); 228 229 #define NINDIR (PAGE_SIZE / sizeof(uintptr_t)) 230 #define NMASK (NINDIR-1) 231 static int nshift; 232 233 static uma_zone_t md_pbuf_zone; 234 235 struct indir { 236 uintptr_t *array; 237 u_int total; 238 u_int used; 239 u_int shift; 240 }; 241 242 struct md_s { 243 int unit; 244 LIST_ENTRY(md_s) list; 245 struct bio_queue_head bio_queue; 246 struct mtx queue_mtx; 247 struct cdev *dev; 248 enum md_types type; 249 off_t mediasize; 250 unsigned sectorsize; 251 unsigned opencount; 252 unsigned fwheads; 253 unsigned fwsectors; 254 char ident[32]; 255 unsigned flags; 256 char name[20]; 257 struct proc *procp; 258 struct g_geom *gp; 259 struct g_provider *pp; 260 int (*start)(struct md_s *sc, struct bio *bp); 261 struct devstat *devstat; 262 263 /* MD_MALLOC related fields */ 264 struct indir *indir; 265 uma_zone_t uma; 266 267 /* MD_PRELOAD related fields */ 268 u_char *pl_ptr; 269 size_t pl_len; 270 271 /* MD_VNODE related fields */ 272 struct vnode *vnode; 273 char file[PATH_MAX]; 274 char label[PATH_MAX]; 275 struct ucred *cred; 276 277 /* MD_SWAP related fields */ 278 vm_object_t object; 279 }; 280 281 static struct indir * 282 new_indir(u_int shift) 283 { 284 struct indir *ip; 285 286 ip = malloc(sizeof *ip, M_MD, (md_malloc_wait ? M_WAITOK : M_NOWAIT) 287 | M_ZERO); 288 if (ip == NULL) 289 return (NULL); 290 ip->array = malloc(sizeof(uintptr_t) * NINDIR, 291 M_MDSECT, (md_malloc_wait ? M_WAITOK : M_NOWAIT) | M_ZERO); 292 if (ip->array == NULL) { 293 free(ip, M_MD); 294 return (NULL); 295 } 296 ip->total = NINDIR; 297 ip->shift = shift; 298 return (ip); 299 } 300 301 static void 302 del_indir(struct indir *ip) 303 { 304 305 free(ip->array, M_MDSECT); 306 free(ip, M_MD); 307 } 308 309 static void 310 destroy_indir(struct md_s *sc, struct indir *ip) 311 { 312 int i; 313 314 for (i = 0; i < NINDIR; i++) { 315 if (!ip->array[i]) 316 continue; 317 if (ip->shift) 318 destroy_indir(sc, (struct indir*)(ip->array[i])); 319 else if (ip->array[i] > 255) 320 uma_zfree(sc->uma, (void *)(ip->array[i])); 321 } 322 del_indir(ip); 323 } 324 325 /* 326 * This function does the math and allocates the top level "indir" structure 327 * for a device of "size" sectors. 328 */ 329 330 static struct indir * 331 dimension(off_t size) 332 { 333 off_t rcnt; 334 struct indir *ip; 335 int layer; 336 337 rcnt = size; 338 layer = 0; 339 while (rcnt > NINDIR) { 340 rcnt /= NINDIR; 341 layer++; 342 } 343 344 /* 345 * XXX: the top layer is probably not fully populated, so we allocate 346 * too much space for ip->array in here. 347 */ 348 ip = malloc(sizeof *ip, M_MD, M_WAITOK | M_ZERO); 349 ip->array = malloc(sizeof(uintptr_t) * NINDIR, 350 M_MDSECT, M_WAITOK | M_ZERO); 351 ip->total = NINDIR; 352 ip->shift = layer * nshift; 353 return (ip); 354 } 355 356 /* 357 * Read a given sector 358 */ 359 360 static uintptr_t 361 s_read(struct indir *ip, off_t offset) 362 { 363 struct indir *cip; 364 int idx; 365 uintptr_t up; 366 367 if (md_debug > 1) 368 printf("s_read(%jd)\n", (intmax_t)offset); 369 up = 0; 370 for (cip = ip; cip != NULL;) { 371 if (cip->shift) { 372 idx = (offset >> cip->shift) & NMASK; 373 up = cip->array[idx]; 374 cip = (struct indir *)up; 375 continue; 376 } 377 idx = offset & NMASK; 378 return (cip->array[idx]); 379 } 380 return (0); 381 } 382 383 /* 384 * Write a given sector, prune the tree if the value is 0 385 */ 386 387 static int 388 s_write(struct indir *ip, off_t offset, uintptr_t ptr) 389 { 390 struct indir *cip, *lip[10]; 391 int idx, li; 392 uintptr_t up; 393 394 if (md_debug > 1) 395 printf("s_write(%jd, %p)\n", (intmax_t)offset, (void *)ptr); 396 up = 0; 397 li = 0; 398 cip = ip; 399 for (;;) { 400 lip[li++] = cip; 401 if (cip->shift) { 402 idx = (offset >> cip->shift) & NMASK; 403 up = cip->array[idx]; 404 if (up != 0) { 405 cip = (struct indir *)up; 406 continue; 407 } 408 /* Allocate branch */ 409 cip->array[idx] = 410 (uintptr_t)new_indir(cip->shift - nshift); 411 if (cip->array[idx] == 0) 412 return (ENOSPC); 413 cip->used++; 414 up = cip->array[idx]; 415 cip = (struct indir *)up; 416 continue; 417 } 418 /* leafnode */ 419 idx = offset & NMASK; 420 up = cip->array[idx]; 421 if (up != 0) 422 cip->used--; 423 cip->array[idx] = ptr; 424 if (ptr != 0) 425 cip->used++; 426 break; 427 } 428 if (cip->used != 0 || li == 1) 429 return (0); 430 li--; 431 while (cip->used == 0 && cip != ip) { 432 li--; 433 idx = (offset >> lip[li]->shift) & NMASK; 434 up = lip[li]->array[idx]; 435 KASSERT(up == (uintptr_t)cip, ("md screwed up")); 436 del_indir(cip); 437 lip[li]->array[idx] = 0; 438 lip[li]->used--; 439 cip = lip[li]; 440 } 441 return (0); 442 } 443 444 static int 445 g_md_access(struct g_provider *pp, int r, int w, int e) 446 { 447 struct md_s *sc; 448 449 sc = pp->geom->softc; 450 if (sc == NULL) { 451 if (r <= 0 && w <= 0 && e <= 0) 452 return (0); 453 return (ENXIO); 454 } 455 r += pp->acr; 456 w += pp->acw; 457 e += pp->ace; 458 if ((sc->flags & MD_READONLY) != 0 && w > 0) 459 return (EROFS); 460 if ((pp->acr + pp->acw + pp->ace) == 0 && (r + w + e) > 0) { 461 sc->opencount = 1; 462 } else if ((pp->acr + pp->acw + pp->ace) > 0 && (r + w + e) == 0) { 463 sc->opencount = 0; 464 } 465 return (0); 466 } 467 468 static void 469 g_md_start(struct bio *bp) 470 { 471 struct md_s *sc; 472 473 sc = bp->bio_to->geom->softc; 474 if ((bp->bio_cmd == BIO_READ) || (bp->bio_cmd == BIO_WRITE)) { 475 devstat_start_transaction_bio(sc->devstat, bp); 476 } 477 mtx_lock(&sc->queue_mtx); 478 bioq_disksort(&sc->bio_queue, bp); 479 wakeup(sc); 480 mtx_unlock(&sc->queue_mtx); 481 } 482 483 #define MD_MALLOC_MOVE_ZERO 1 484 #define MD_MALLOC_MOVE_FILL 2 485 #define MD_MALLOC_MOVE_READ 3 486 #define MD_MALLOC_MOVE_WRITE 4 487 #define MD_MALLOC_MOVE_CMP 5 488 489 static int 490 md_malloc_move_ma(vm_page_t **mp, int *ma_offs, unsigned sectorsize, 491 void *ptr, u_char fill, int op) 492 { 493 struct sf_buf *sf; 494 vm_page_t m, *mp1; 495 char *p, first; 496 off_t *uc; 497 unsigned n; 498 int error, i, ma_offs1, sz, first_read; 499 500 m = NULL; 501 error = 0; 502 sf = NULL; 503 /* if (op == MD_MALLOC_MOVE_CMP) { gcc */ 504 first = 0; 505 first_read = 0; 506 uc = ptr; 507 mp1 = *mp; 508 ma_offs1 = *ma_offs; 509 /* } */ 510 sched_pin(); 511 for (n = sectorsize; n != 0; n -= sz) { 512 sz = imin(PAGE_SIZE - *ma_offs, n); 513 if (m != **mp) { 514 if (sf != NULL) 515 sf_buf_free(sf); 516 m = **mp; 517 sf = sf_buf_alloc(m, SFB_CPUPRIVATE | 518 (md_malloc_wait ? 0 : SFB_NOWAIT)); 519 if (sf == NULL) { 520 error = ENOMEM; 521 break; 522 } 523 } 524 p = (char *)sf_buf_kva(sf) + *ma_offs; 525 switch (op) { 526 case MD_MALLOC_MOVE_ZERO: 527 bzero(p, sz); 528 break; 529 case MD_MALLOC_MOVE_FILL: 530 memset(p, fill, sz); 531 break; 532 case MD_MALLOC_MOVE_READ: 533 bcopy(ptr, p, sz); 534 cpu_flush_dcache(p, sz); 535 break; 536 case MD_MALLOC_MOVE_WRITE: 537 bcopy(p, ptr, sz); 538 break; 539 case MD_MALLOC_MOVE_CMP: 540 for (i = 0; i < sz; i++, p++) { 541 if (!first_read) { 542 *uc = (u_char)*p; 543 first = *p; 544 first_read = 1; 545 } else if (*p != first) { 546 error = EDOOFUS; 547 break; 548 } 549 } 550 break; 551 default: 552 KASSERT(0, ("md_malloc_move_ma unknown op %d\n", op)); 553 break; 554 } 555 if (error != 0) 556 break; 557 *ma_offs += sz; 558 *ma_offs %= PAGE_SIZE; 559 if (*ma_offs == 0) 560 (*mp)++; 561 ptr = (char *)ptr + sz; 562 } 563 564 if (sf != NULL) 565 sf_buf_free(sf); 566 sched_unpin(); 567 if (op == MD_MALLOC_MOVE_CMP && error != 0) { 568 *mp = mp1; 569 *ma_offs = ma_offs1; 570 } 571 return (error); 572 } 573 574 static int 575 md_malloc_move_vlist(bus_dma_segment_t **pvlist, int *pma_offs, 576 unsigned len, void *ptr, u_char fill, int op) 577 { 578 bus_dma_segment_t *vlist; 579 uint8_t *p, *end, first; 580 off_t *uc; 581 int ma_offs, seg_len; 582 583 vlist = *pvlist; 584 ma_offs = *pma_offs; 585 uc = ptr; 586 587 for (; len != 0; len -= seg_len) { 588 seg_len = imin(vlist->ds_len - ma_offs, len); 589 p = (uint8_t *)(uintptr_t)vlist->ds_addr + ma_offs; 590 switch (op) { 591 case MD_MALLOC_MOVE_ZERO: 592 bzero(p, seg_len); 593 break; 594 case MD_MALLOC_MOVE_FILL: 595 memset(p, fill, seg_len); 596 break; 597 case MD_MALLOC_MOVE_READ: 598 bcopy(ptr, p, seg_len); 599 cpu_flush_dcache(p, seg_len); 600 break; 601 case MD_MALLOC_MOVE_WRITE: 602 bcopy(p, ptr, seg_len); 603 break; 604 case MD_MALLOC_MOVE_CMP: 605 end = p + seg_len; 606 first = *uc = *p; 607 /* Confirm all following bytes match the first */ 608 while (++p < end) { 609 if (*p != first) 610 return (EDOOFUS); 611 } 612 break; 613 default: 614 KASSERT(0, ("md_malloc_move_vlist unknown op %d\n", op)); 615 break; 616 } 617 618 ma_offs += seg_len; 619 if (ma_offs == vlist->ds_len) { 620 ma_offs = 0; 621 vlist++; 622 } 623 ptr = (uint8_t *)ptr + seg_len; 624 } 625 *pvlist = vlist; 626 *pma_offs = ma_offs; 627 628 return (0); 629 } 630 631 static int 632 mdstart_malloc(struct md_s *sc, struct bio *bp) 633 { 634 u_char *dst; 635 vm_page_t *m; 636 bus_dma_segment_t *vlist; 637 int i, error, error1, ma_offs, notmapped; 638 off_t secno, nsec, uc; 639 uintptr_t sp, osp; 640 641 switch (bp->bio_cmd) { 642 case BIO_READ: 643 case BIO_WRITE: 644 case BIO_DELETE: 645 break; 646 default: 647 return (EOPNOTSUPP); 648 } 649 650 notmapped = (bp->bio_flags & BIO_UNMAPPED) != 0; 651 vlist = (bp->bio_flags & BIO_VLIST) != 0 ? 652 (bus_dma_segment_t *)bp->bio_data : NULL; 653 if (notmapped) { 654 m = bp->bio_ma; 655 ma_offs = bp->bio_ma_offset; 656 dst = NULL; 657 KASSERT(vlist == NULL, ("vlists cannot be unmapped")); 658 } else if (vlist != NULL) { 659 ma_offs = bp->bio_ma_offset; 660 dst = NULL; 661 } else { 662 dst = bp->bio_data; 663 } 664 665 nsec = bp->bio_length / sc->sectorsize; 666 secno = bp->bio_offset / sc->sectorsize; 667 error = 0; 668 while (nsec--) { 669 osp = s_read(sc->indir, secno); 670 if (bp->bio_cmd == BIO_DELETE) { 671 if (osp != 0) 672 error = s_write(sc->indir, secno, 0); 673 } else if (bp->bio_cmd == BIO_READ) { 674 if (osp == 0) { 675 if (notmapped) { 676 error = md_malloc_move_ma(&m, &ma_offs, 677 sc->sectorsize, NULL, 0, 678 MD_MALLOC_MOVE_ZERO); 679 } else if (vlist != NULL) { 680 error = md_malloc_move_vlist(&vlist, 681 &ma_offs, sc->sectorsize, NULL, 0, 682 MD_MALLOC_MOVE_ZERO); 683 } else 684 bzero(dst, sc->sectorsize); 685 } else if (osp <= 255) { 686 if (notmapped) { 687 error = md_malloc_move_ma(&m, &ma_offs, 688 sc->sectorsize, NULL, osp, 689 MD_MALLOC_MOVE_FILL); 690 } else if (vlist != NULL) { 691 error = md_malloc_move_vlist(&vlist, 692 &ma_offs, sc->sectorsize, NULL, osp, 693 MD_MALLOC_MOVE_FILL); 694 } else 695 memset(dst, osp, sc->sectorsize); 696 } else { 697 if (notmapped) { 698 error = md_malloc_move_ma(&m, &ma_offs, 699 sc->sectorsize, (void *)osp, 0, 700 MD_MALLOC_MOVE_READ); 701 } else if (vlist != NULL) { 702 error = md_malloc_move_vlist(&vlist, 703 &ma_offs, sc->sectorsize, 704 (void *)osp, 0, 705 MD_MALLOC_MOVE_READ); 706 } else { 707 bcopy((void *)osp, dst, sc->sectorsize); 708 cpu_flush_dcache(dst, sc->sectorsize); 709 } 710 } 711 osp = 0; 712 } else if (bp->bio_cmd == BIO_WRITE) { 713 if (sc->flags & MD_COMPRESS) { 714 if (notmapped) { 715 error1 = md_malloc_move_ma(&m, &ma_offs, 716 sc->sectorsize, &uc, 0, 717 MD_MALLOC_MOVE_CMP); 718 i = error1 == 0 ? sc->sectorsize : 0; 719 } else if (vlist != NULL) { 720 error1 = md_malloc_move_vlist(&vlist, 721 &ma_offs, sc->sectorsize, &uc, 0, 722 MD_MALLOC_MOVE_CMP); 723 i = error1 == 0 ? sc->sectorsize : 0; 724 } else { 725 uc = dst[0]; 726 for (i = 1; i < sc->sectorsize; i++) { 727 if (dst[i] != uc) 728 break; 729 } 730 } 731 } else { 732 i = 0; 733 uc = 0; 734 } 735 if (i == sc->sectorsize) { 736 if (osp != uc) 737 error = s_write(sc->indir, secno, uc); 738 } else { 739 if (osp <= 255) { 740 sp = (uintptr_t)uma_zalloc(sc->uma, 741 md_malloc_wait ? M_WAITOK : 742 M_NOWAIT); 743 if (sp == 0) { 744 error = ENOSPC; 745 break; 746 } 747 if (notmapped) { 748 error = md_malloc_move_ma(&m, 749 &ma_offs, sc->sectorsize, 750 (void *)sp, 0, 751 MD_MALLOC_MOVE_WRITE); 752 } else if (vlist != NULL) { 753 error = md_malloc_move_vlist( 754 &vlist, &ma_offs, 755 sc->sectorsize, (void *)sp, 756 0, MD_MALLOC_MOVE_WRITE); 757 } else { 758 bcopy(dst, (void *)sp, 759 sc->sectorsize); 760 } 761 error = s_write(sc->indir, secno, sp); 762 } else { 763 if (notmapped) { 764 error = md_malloc_move_ma(&m, 765 &ma_offs, sc->sectorsize, 766 (void *)osp, 0, 767 MD_MALLOC_MOVE_WRITE); 768 } else if (vlist != NULL) { 769 error = md_malloc_move_vlist( 770 &vlist, &ma_offs, 771 sc->sectorsize, (void *)osp, 772 0, MD_MALLOC_MOVE_WRITE); 773 } else { 774 bcopy(dst, (void *)osp, 775 sc->sectorsize); 776 } 777 osp = 0; 778 } 779 } 780 } else { 781 error = EOPNOTSUPP; 782 } 783 if (osp > 255) 784 uma_zfree(sc->uma, (void*)osp); 785 if (error != 0) 786 break; 787 secno++; 788 if (!notmapped && vlist == NULL) 789 dst += sc->sectorsize; 790 } 791 bp->bio_resid = 0; 792 return (error); 793 } 794 795 static void 796 mdcopyto_vlist(void *src, bus_dma_segment_t *vlist, off_t offset, off_t len) 797 { 798 off_t seg_len; 799 800 while (offset >= vlist->ds_len) { 801 offset -= vlist->ds_len; 802 vlist++; 803 } 804 805 while (len != 0) { 806 seg_len = omin(len, vlist->ds_len - offset); 807 bcopy(src, (void *)(uintptr_t)(vlist->ds_addr + offset), 808 seg_len); 809 offset = 0; 810 src = (uint8_t *)src + seg_len; 811 len -= seg_len; 812 vlist++; 813 } 814 } 815 816 static void 817 mdcopyfrom_vlist(bus_dma_segment_t *vlist, off_t offset, void *dst, off_t len) 818 { 819 off_t seg_len; 820 821 while (offset >= vlist->ds_len) { 822 offset -= vlist->ds_len; 823 vlist++; 824 } 825 826 while (len != 0) { 827 seg_len = omin(len, vlist->ds_len - offset); 828 bcopy((void *)(uintptr_t)(vlist->ds_addr + offset), dst, 829 seg_len); 830 offset = 0; 831 dst = (uint8_t *)dst + seg_len; 832 len -= seg_len; 833 vlist++; 834 } 835 } 836 837 static int 838 mdstart_preload(struct md_s *sc, struct bio *bp) 839 { 840 uint8_t *p; 841 842 p = sc->pl_ptr + bp->bio_offset; 843 switch (bp->bio_cmd) { 844 case BIO_READ: 845 if ((bp->bio_flags & BIO_VLIST) != 0) { 846 mdcopyto_vlist(p, (bus_dma_segment_t *)bp->bio_data, 847 bp->bio_ma_offset, bp->bio_length); 848 } else { 849 bcopy(p, bp->bio_data, bp->bio_length); 850 } 851 cpu_flush_dcache(bp->bio_data, bp->bio_length); 852 break; 853 case BIO_WRITE: 854 if ((bp->bio_flags & BIO_VLIST) != 0) { 855 mdcopyfrom_vlist((bus_dma_segment_t *)bp->bio_data, 856 bp->bio_ma_offset, p, bp->bio_length); 857 } else { 858 bcopy(bp->bio_data, p, bp->bio_length); 859 } 860 break; 861 } 862 bp->bio_resid = 0; 863 return (0); 864 } 865 866 static int 867 mdstart_vnode(struct md_s *sc, struct bio *bp) 868 { 869 int error; 870 struct uio auio; 871 struct iovec aiov; 872 struct iovec *piov; 873 struct mount *mp; 874 struct vnode *vp; 875 struct buf *pb; 876 bus_dma_segment_t *vlist; 877 struct thread *td; 878 off_t iolen, iostart, len, zerosize; 879 int ma_offs, npages; 880 881 switch (bp->bio_cmd) { 882 case BIO_READ: 883 auio.uio_rw = UIO_READ; 884 break; 885 case BIO_WRITE: 886 case BIO_DELETE: 887 auio.uio_rw = UIO_WRITE; 888 break; 889 case BIO_FLUSH: 890 break; 891 default: 892 return (EOPNOTSUPP); 893 } 894 895 td = curthread; 896 vp = sc->vnode; 897 pb = NULL; 898 piov = NULL; 899 ma_offs = bp->bio_ma_offset; 900 len = bp->bio_length; 901 902 /* 903 * VNODE I/O 904 * 905 * If an error occurs, we set BIO_ERROR but we do not set 906 * B_INVAL because (for a write anyway), the buffer is 907 * still valid. 908 */ 909 910 if (bp->bio_cmd == BIO_FLUSH) { 911 (void) vn_start_write(vp, &mp, V_WAIT); 912 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 913 error = VOP_FSYNC(vp, MNT_WAIT, td); 914 VOP_UNLOCK(vp); 915 vn_finished_write(mp); 916 return (error); 917 } 918 919 auio.uio_offset = (vm_ooffset_t)bp->bio_offset; 920 auio.uio_resid = bp->bio_length; 921 auio.uio_segflg = UIO_SYSSPACE; 922 auio.uio_td = td; 923 924 if (bp->bio_cmd == BIO_DELETE) { 925 /* 926 * Emulate BIO_DELETE by writing zeros. 927 */ 928 zerosize = ZERO_REGION_SIZE - 929 (ZERO_REGION_SIZE % sc->sectorsize); 930 auio.uio_iovcnt = howmany(bp->bio_length, zerosize); 931 piov = malloc(sizeof(*piov) * auio.uio_iovcnt, M_MD, M_WAITOK); 932 auio.uio_iov = piov; 933 while (len > 0) { 934 piov->iov_base = __DECONST(void *, zero_region); 935 piov->iov_len = len; 936 if (len > zerosize) 937 piov->iov_len = zerosize; 938 len -= piov->iov_len; 939 piov++; 940 } 941 piov = auio.uio_iov; 942 } else if ((bp->bio_flags & BIO_VLIST) != 0) { 943 piov = malloc(sizeof(*piov) * bp->bio_ma_n, M_MD, M_WAITOK); 944 auio.uio_iov = piov; 945 vlist = (bus_dma_segment_t *)bp->bio_data; 946 while (len > 0) { 947 piov->iov_base = (void *)(uintptr_t)(vlist->ds_addr + 948 ma_offs); 949 piov->iov_len = vlist->ds_len - ma_offs; 950 if (piov->iov_len > len) 951 piov->iov_len = len; 952 len -= piov->iov_len; 953 ma_offs = 0; 954 vlist++; 955 piov++; 956 } 957 auio.uio_iovcnt = piov - auio.uio_iov; 958 piov = auio.uio_iov; 959 } else if ((bp->bio_flags & BIO_UNMAPPED) != 0) { 960 pb = uma_zalloc(md_pbuf_zone, M_WAITOK); 961 MPASS((pb->b_flags & B_MAXPHYS) != 0); 962 bp->bio_resid = len; 963 unmapped_step: 964 npages = atop(min(maxphys, round_page(len + (ma_offs & 965 PAGE_MASK)))); 966 iolen = min(ptoa(npages) - (ma_offs & PAGE_MASK), len); 967 KASSERT(iolen > 0, ("zero iolen")); 968 pmap_qenter((vm_offset_t)pb->b_data, 969 &bp->bio_ma[atop(ma_offs)], npages); 970 aiov.iov_base = (void *)((vm_offset_t)pb->b_data + 971 (ma_offs & PAGE_MASK)); 972 aiov.iov_len = iolen; 973 auio.uio_iov = &aiov; 974 auio.uio_iovcnt = 1; 975 auio.uio_resid = iolen; 976 } else { 977 aiov.iov_base = bp->bio_data; 978 aiov.iov_len = bp->bio_length; 979 auio.uio_iov = &aiov; 980 auio.uio_iovcnt = 1; 981 } 982 iostart = auio.uio_offset; 983 if (auio.uio_rw == UIO_READ) { 984 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 985 error = VOP_READ(vp, &auio, 0, sc->cred); 986 VOP_UNLOCK(vp); 987 } else { 988 (void) vn_start_write(vp, &mp, V_WAIT); 989 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 990 error = VOP_WRITE(vp, &auio, sc->flags & MD_ASYNC ? 0 : IO_SYNC, 991 sc->cred); 992 VOP_UNLOCK(vp); 993 vn_finished_write(mp); 994 if (error == 0) 995 sc->flags &= ~MD_VERIFY; 996 } 997 998 /* When MD_CACHE is set, try to avoid double-caching the data. */ 999 if (error == 0 && (sc->flags & MD_CACHE) == 0) 1000 VOP_ADVISE(vp, iostart, auio.uio_offset - 1, 1001 POSIX_FADV_DONTNEED); 1002 1003 if (pb != NULL) { 1004 pmap_qremove((vm_offset_t)pb->b_data, npages); 1005 if (error == 0) { 1006 len -= iolen; 1007 bp->bio_resid -= iolen; 1008 ma_offs += iolen; 1009 if (len > 0) 1010 goto unmapped_step; 1011 } 1012 uma_zfree(md_pbuf_zone, pb); 1013 } else { 1014 bp->bio_resid = auio.uio_resid; 1015 } 1016 1017 free(piov, M_MD); 1018 return (error); 1019 } 1020 1021 static int 1022 mdstart_swap(struct md_s *sc, struct bio *bp) 1023 { 1024 vm_page_t m; 1025 u_char *p; 1026 vm_pindex_t i, lastp; 1027 bus_dma_segment_t *vlist; 1028 int rv, ma_offs, offs, len, lastend; 1029 1030 switch (bp->bio_cmd) { 1031 case BIO_READ: 1032 case BIO_WRITE: 1033 case BIO_DELETE: 1034 break; 1035 default: 1036 return (EOPNOTSUPP); 1037 } 1038 1039 p = bp->bio_data; 1040 ma_offs = (bp->bio_flags & (BIO_UNMAPPED|BIO_VLIST)) != 0 ? 1041 bp->bio_ma_offset : 0; 1042 vlist = (bp->bio_flags & BIO_VLIST) != 0 ? 1043 (bus_dma_segment_t *)bp->bio_data : NULL; 1044 1045 /* 1046 * offs is the offset at which to start operating on the 1047 * next (ie, first) page. lastp is the last page on 1048 * which we're going to operate. lastend is the ending 1049 * position within that last page (ie, PAGE_SIZE if 1050 * we're operating on complete aligned pages). 1051 */ 1052 offs = bp->bio_offset % PAGE_SIZE; 1053 lastp = (bp->bio_offset + bp->bio_length - 1) / PAGE_SIZE; 1054 lastend = (bp->bio_offset + bp->bio_length - 1) % PAGE_SIZE + 1; 1055 1056 rv = VM_PAGER_OK; 1057 vm_object_pip_add(sc->object, 1); 1058 for (i = bp->bio_offset / PAGE_SIZE; i <= lastp; i++) { 1059 len = ((i == lastp) ? lastend : PAGE_SIZE) - offs; 1060 m = vm_page_grab_unlocked(sc->object, i, VM_ALLOC_SYSTEM); 1061 if (bp->bio_cmd == BIO_READ) { 1062 if (vm_page_all_valid(m)) 1063 rv = VM_PAGER_OK; 1064 else 1065 rv = vm_pager_get_pages(sc->object, &m, 1, 1066 NULL, NULL); 1067 if (rv == VM_PAGER_ERROR) { 1068 VM_OBJECT_WLOCK(sc->object); 1069 vm_page_free(m); 1070 VM_OBJECT_WUNLOCK(sc->object); 1071 break; 1072 } else if (rv == VM_PAGER_FAIL) { 1073 /* 1074 * Pager does not have the page. Zero 1075 * the allocated page, and mark it as 1076 * valid. Do not set dirty, the page 1077 * can be recreated if thrown out. 1078 */ 1079 pmap_zero_page(m); 1080 vm_page_valid(m); 1081 } 1082 if ((bp->bio_flags & BIO_UNMAPPED) != 0) { 1083 pmap_copy_pages(&m, offs, bp->bio_ma, 1084 ma_offs, len); 1085 } else if ((bp->bio_flags & BIO_VLIST) != 0) { 1086 physcopyout_vlist(VM_PAGE_TO_PHYS(m) + offs, 1087 vlist, ma_offs, len); 1088 cpu_flush_dcache(p, len); 1089 } else { 1090 physcopyout(VM_PAGE_TO_PHYS(m) + offs, p, len); 1091 cpu_flush_dcache(p, len); 1092 } 1093 } else if (bp->bio_cmd == BIO_WRITE) { 1094 if (len == PAGE_SIZE || vm_page_all_valid(m)) 1095 rv = VM_PAGER_OK; 1096 else 1097 rv = vm_pager_get_pages(sc->object, &m, 1, 1098 NULL, NULL); 1099 if (rv == VM_PAGER_ERROR) { 1100 VM_OBJECT_WLOCK(sc->object); 1101 vm_page_free(m); 1102 VM_OBJECT_WUNLOCK(sc->object); 1103 break; 1104 } else if (rv == VM_PAGER_FAIL) 1105 pmap_zero_page(m); 1106 1107 if ((bp->bio_flags & BIO_UNMAPPED) != 0) { 1108 pmap_copy_pages(bp->bio_ma, ma_offs, &m, 1109 offs, len); 1110 } else if ((bp->bio_flags & BIO_VLIST) != 0) { 1111 physcopyin_vlist(vlist, ma_offs, 1112 VM_PAGE_TO_PHYS(m) + offs, len); 1113 } else { 1114 physcopyin(p, VM_PAGE_TO_PHYS(m) + offs, len); 1115 } 1116 1117 vm_page_valid(m); 1118 vm_page_set_dirty(m); 1119 } else if (bp->bio_cmd == BIO_DELETE) { 1120 if (len == PAGE_SIZE || vm_page_all_valid(m)) 1121 rv = VM_PAGER_OK; 1122 else 1123 rv = vm_pager_get_pages(sc->object, &m, 1, 1124 NULL, NULL); 1125 VM_OBJECT_WLOCK(sc->object); 1126 if (rv == VM_PAGER_ERROR) { 1127 vm_page_free(m); 1128 VM_OBJECT_WUNLOCK(sc->object); 1129 break; 1130 } else if (rv == VM_PAGER_FAIL) { 1131 vm_page_free(m); 1132 m = NULL; 1133 } else { 1134 /* Page is valid. */ 1135 if (len != PAGE_SIZE) { 1136 pmap_zero_page_area(m, offs, len); 1137 vm_page_set_dirty(m); 1138 } else { 1139 vm_pager_page_unswapped(m); 1140 vm_page_free(m); 1141 m = NULL; 1142 } 1143 } 1144 VM_OBJECT_WUNLOCK(sc->object); 1145 } 1146 if (m != NULL) { 1147 /* 1148 * The page may be deactivated prior to setting 1149 * PGA_REFERENCED, but in this case it will be 1150 * reactivated by the page daemon. 1151 */ 1152 if (vm_page_active(m)) 1153 vm_page_reference(m); 1154 else 1155 vm_page_activate(m); 1156 vm_page_xunbusy(m); 1157 } 1158 1159 /* Actions on further pages start at offset 0 */ 1160 p += PAGE_SIZE - offs; 1161 offs = 0; 1162 ma_offs += len; 1163 } 1164 vm_object_pip_wakeup(sc->object); 1165 return (rv != VM_PAGER_ERROR ? 0 : ENOSPC); 1166 } 1167 1168 static int 1169 mdstart_null(struct md_s *sc, struct bio *bp) 1170 { 1171 1172 switch (bp->bio_cmd) { 1173 case BIO_READ: 1174 bzero(bp->bio_data, bp->bio_length); 1175 cpu_flush_dcache(bp->bio_data, bp->bio_length); 1176 break; 1177 case BIO_WRITE: 1178 break; 1179 } 1180 bp->bio_resid = 0; 1181 return (0); 1182 } 1183 1184 static void 1185 md_handleattr(struct md_s *sc, struct bio *bp) 1186 { 1187 if (sc->fwsectors && sc->fwheads && 1188 (g_handleattr_int(bp, "GEOM::fwsectors", sc->fwsectors) != 0 || 1189 g_handleattr_int(bp, "GEOM::fwheads", sc->fwheads) != 0)) 1190 return; 1191 if (g_handleattr_int(bp, "GEOM::candelete", 1) != 0) 1192 return; 1193 if (sc->ident[0] != '\0' && 1194 g_handleattr_str(bp, "GEOM::ident", sc->ident) != 0) 1195 return; 1196 if (g_handleattr_int(bp, "MNT::verified", (sc->flags & MD_VERIFY) != 0)) 1197 return; 1198 g_io_deliver(bp, EOPNOTSUPP); 1199 } 1200 1201 static void 1202 md_kthread(void *arg) 1203 { 1204 struct md_s *sc; 1205 struct bio *bp; 1206 int error; 1207 1208 sc = arg; 1209 thread_lock(curthread); 1210 sched_prio(curthread, PRIBIO); 1211 thread_unlock(curthread); 1212 if (sc->type == MD_VNODE) 1213 curthread->td_pflags |= TDP_NORUNNINGBUF; 1214 1215 for (;;) { 1216 mtx_lock(&sc->queue_mtx); 1217 if (sc->flags & MD_SHUTDOWN) { 1218 sc->flags |= MD_EXITING; 1219 mtx_unlock(&sc->queue_mtx); 1220 kproc_exit(0); 1221 } 1222 bp = bioq_takefirst(&sc->bio_queue); 1223 if (!bp) { 1224 msleep(sc, &sc->queue_mtx, PRIBIO | PDROP, "mdwait", 0); 1225 continue; 1226 } 1227 mtx_unlock(&sc->queue_mtx); 1228 if (bp->bio_cmd == BIO_GETATTR) { 1229 md_handleattr(sc, bp); 1230 } else { 1231 error = sc->start(sc, bp); 1232 if (bp->bio_cmd == BIO_READ || bp->bio_cmd == BIO_WRITE) { 1233 /* 1234 * Devstat uses (bio_bcount, bio_resid) for 1235 * determining the length of the completed part 1236 * of the i/o. g_io_deliver() will translate 1237 * from bio_completed to that, but it also 1238 * destroys the bio so we must do our own 1239 * translation. 1240 */ 1241 bp->bio_bcount = bp->bio_length; 1242 devstat_end_transaction_bio(sc->devstat, bp); 1243 } 1244 bp->bio_completed = bp->bio_length - bp->bio_resid; 1245 g_io_deliver(bp, error); 1246 } 1247 } 1248 } 1249 1250 static struct md_s * 1251 mdfind(int unit) 1252 { 1253 struct md_s *sc; 1254 1255 LIST_FOREACH(sc, &md_softc_list, list) { 1256 if (sc->unit == unit) 1257 break; 1258 } 1259 return (sc); 1260 } 1261 1262 static struct md_s * 1263 mdnew(int unit, int *errp, enum md_types type) 1264 { 1265 struct md_s *sc; 1266 int error; 1267 1268 *errp = 0; 1269 if (unit == -1) 1270 unit = alloc_unr(md_uh); 1271 else 1272 unit = alloc_unr_specific(md_uh, unit); 1273 1274 if (unit == -1) { 1275 *errp = EBUSY; 1276 return (NULL); 1277 } 1278 1279 sc = (struct md_s *)malloc(sizeof *sc, M_MD, M_WAITOK | M_ZERO); 1280 sc->type = type; 1281 bioq_init(&sc->bio_queue); 1282 mtx_init(&sc->queue_mtx, "md bio queue", NULL, MTX_DEF); 1283 sc->unit = unit; 1284 sprintf(sc->name, "md%d", unit); 1285 LIST_INSERT_HEAD(&md_softc_list, sc, list); 1286 error = kproc_create(md_kthread, sc, &sc->procp, 0, 0,"%s", sc->name); 1287 if (error == 0) 1288 return (sc); 1289 LIST_REMOVE(sc, list); 1290 mtx_destroy(&sc->queue_mtx); 1291 free_unr(md_uh, sc->unit); 1292 free(sc, M_MD); 1293 *errp = error; 1294 return (NULL); 1295 } 1296 1297 static void 1298 mdinit(struct md_s *sc) 1299 { 1300 struct g_geom *gp; 1301 struct g_provider *pp; 1302 1303 g_topology_lock(); 1304 gp = g_new_geomf(&g_md_class, "md%d", sc->unit); 1305 gp->softc = sc; 1306 pp = g_new_providerf(gp, "md%d", sc->unit); 1307 devstat_remove_entry(pp->stat); 1308 pp->stat = NULL; 1309 pp->flags |= G_PF_DIRECT_SEND | G_PF_DIRECT_RECEIVE; 1310 pp->mediasize = sc->mediasize; 1311 pp->sectorsize = sc->sectorsize; 1312 switch (sc->type) { 1313 case MD_MALLOC: 1314 case MD_VNODE: 1315 case MD_SWAP: 1316 pp->flags |= G_PF_ACCEPT_UNMAPPED; 1317 break; 1318 case MD_PRELOAD: 1319 case MD_NULL: 1320 break; 1321 } 1322 sc->gp = gp; 1323 sc->pp = pp; 1324 sc->devstat = devstat_new_entry("md", sc->unit, sc->sectorsize, 1325 DEVSTAT_ALL_SUPPORTED, DEVSTAT_TYPE_DIRECT, DEVSTAT_PRIORITY_MAX); 1326 sc->devstat->id = pp; 1327 g_error_provider(pp, 0); 1328 g_topology_unlock(); 1329 } 1330 1331 static int 1332 mdcreate_malloc(struct md_s *sc, struct md_req *mdr) 1333 { 1334 uintptr_t sp; 1335 int error; 1336 off_t u; 1337 1338 error = 0; 1339 if (mdr->md_options & ~(MD_AUTOUNIT | MD_COMPRESS | MD_RESERVE)) 1340 return (EINVAL); 1341 if (mdr->md_sectorsize != 0 && !powerof2(mdr->md_sectorsize)) 1342 return (EINVAL); 1343 /* Compression doesn't make sense if we have reserved space */ 1344 if (mdr->md_options & MD_RESERVE) 1345 mdr->md_options &= ~MD_COMPRESS; 1346 if (mdr->md_fwsectors != 0) 1347 sc->fwsectors = mdr->md_fwsectors; 1348 if (mdr->md_fwheads != 0) 1349 sc->fwheads = mdr->md_fwheads; 1350 sc->flags = mdr->md_options & (MD_COMPRESS | MD_FORCE); 1351 sc->indir = dimension(sc->mediasize / sc->sectorsize); 1352 sc->uma = uma_zcreate(sc->name, sc->sectorsize, NULL, NULL, NULL, NULL, 1353 0x1ff, 0); 1354 if (mdr->md_options & MD_RESERVE) { 1355 off_t nsectors; 1356 1357 nsectors = sc->mediasize / sc->sectorsize; 1358 for (u = 0; u < nsectors; u++) { 1359 sp = (uintptr_t)uma_zalloc(sc->uma, (md_malloc_wait ? 1360 M_WAITOK : M_NOWAIT) | M_ZERO); 1361 if (sp != 0) 1362 error = s_write(sc->indir, u, sp); 1363 else 1364 error = ENOMEM; 1365 if (error != 0) 1366 break; 1367 } 1368 } 1369 return (error); 1370 } 1371 1372 static int 1373 mdsetcred(struct md_s *sc, struct ucred *cred) 1374 { 1375 char *tmpbuf; 1376 int error = 0; 1377 1378 /* 1379 * Set credits in our softc 1380 */ 1381 1382 if (sc->cred) 1383 crfree(sc->cred); 1384 sc->cred = crhold(cred); 1385 1386 /* 1387 * Horrible kludge to establish credentials for NFS XXX. 1388 */ 1389 1390 if (sc->vnode) { 1391 struct uio auio; 1392 struct iovec aiov; 1393 1394 tmpbuf = malloc(sc->sectorsize, M_TEMP, M_WAITOK); 1395 bzero(&auio, sizeof(auio)); 1396 1397 aiov.iov_base = tmpbuf; 1398 aiov.iov_len = sc->sectorsize; 1399 auio.uio_iov = &aiov; 1400 auio.uio_iovcnt = 1; 1401 auio.uio_offset = 0; 1402 auio.uio_rw = UIO_READ; 1403 auio.uio_segflg = UIO_SYSSPACE; 1404 auio.uio_resid = aiov.iov_len; 1405 vn_lock(sc->vnode, LK_EXCLUSIVE | LK_RETRY); 1406 error = VOP_READ(sc->vnode, &auio, 0, sc->cred); 1407 VOP_UNLOCK(sc->vnode); 1408 free(tmpbuf, M_TEMP); 1409 } 1410 return (error); 1411 } 1412 1413 static int 1414 mdcreate_vnode(struct md_s *sc, struct md_req *mdr, struct thread *td) 1415 { 1416 struct vattr vattr; 1417 struct nameidata nd; 1418 char *fname; 1419 int error, flags; 1420 1421 fname = mdr->md_file; 1422 if (mdr->md_file_seg == UIO_USERSPACE) { 1423 error = copyinstr(fname, sc->file, sizeof(sc->file), NULL); 1424 if (error != 0) 1425 return (error); 1426 } else if (mdr->md_file_seg == UIO_SYSSPACE) 1427 strlcpy(sc->file, fname, sizeof(sc->file)); 1428 else 1429 return (EDOOFUS); 1430 1431 /* 1432 * If the user specified that this is a read only device, don't 1433 * set the FWRITE mask before trying to open the backing store. 1434 */ 1435 flags = FREAD | ((mdr->md_options & MD_READONLY) ? 0 : FWRITE) \ 1436 | ((mdr->md_options & MD_VERIFY) ? O_VERIFY : 0); 1437 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, sc->file, td); 1438 error = vn_open(&nd, &flags, 0, NULL); 1439 if (error != 0) 1440 return (error); 1441 NDFREE(&nd, NDF_ONLY_PNBUF); 1442 if (nd.ni_vp->v_type != VREG) { 1443 error = EINVAL; 1444 goto bad; 1445 } 1446 error = VOP_GETATTR(nd.ni_vp, &vattr, td->td_ucred); 1447 if (error != 0) 1448 goto bad; 1449 if (VOP_ISLOCKED(nd.ni_vp) != LK_EXCLUSIVE) { 1450 vn_lock(nd.ni_vp, LK_UPGRADE | LK_RETRY); 1451 if (VN_IS_DOOMED(nd.ni_vp)) { 1452 /* Forced unmount. */ 1453 error = EBADF; 1454 goto bad; 1455 } 1456 } 1457 nd.ni_vp->v_vflag |= VV_MD; 1458 VOP_UNLOCK(nd.ni_vp); 1459 1460 if (mdr->md_fwsectors != 0) 1461 sc->fwsectors = mdr->md_fwsectors; 1462 if (mdr->md_fwheads != 0) 1463 sc->fwheads = mdr->md_fwheads; 1464 snprintf(sc->ident, sizeof(sc->ident), "MD-DEV%ju-INO%ju", 1465 (uintmax_t)vattr.va_fsid, (uintmax_t)vattr.va_fileid); 1466 sc->flags = mdr->md_options & (MD_ASYNC | MD_CACHE | MD_FORCE | 1467 MD_VERIFY); 1468 if (!(flags & FWRITE)) 1469 sc->flags |= MD_READONLY; 1470 sc->vnode = nd.ni_vp; 1471 1472 error = mdsetcred(sc, td->td_ucred); 1473 if (error != 0) { 1474 sc->vnode = NULL; 1475 vn_lock(nd.ni_vp, LK_EXCLUSIVE | LK_RETRY); 1476 nd.ni_vp->v_vflag &= ~VV_MD; 1477 goto bad; 1478 } 1479 return (0); 1480 bad: 1481 VOP_UNLOCK(nd.ni_vp); 1482 (void)vn_close(nd.ni_vp, flags, td->td_ucred, td); 1483 return (error); 1484 } 1485 1486 static void 1487 g_md_providergone(struct g_provider *pp) 1488 { 1489 struct md_s *sc = pp->geom->softc; 1490 1491 mtx_lock(&sc->queue_mtx); 1492 sc->flags |= MD_PROVIDERGONE; 1493 wakeup(&sc->flags); 1494 mtx_unlock(&sc->queue_mtx); 1495 } 1496 1497 static int 1498 mddestroy(struct md_s *sc, struct thread *td) 1499 { 1500 1501 if (sc->gp) { 1502 g_topology_lock(); 1503 g_wither_geom(sc->gp, ENXIO); 1504 g_topology_unlock(); 1505 1506 mtx_lock(&sc->queue_mtx); 1507 while (!(sc->flags & MD_PROVIDERGONE)) 1508 msleep(&sc->flags, &sc->queue_mtx, PRIBIO, "mddestroy", 0); 1509 mtx_unlock(&sc->queue_mtx); 1510 } 1511 if (sc->devstat) { 1512 devstat_remove_entry(sc->devstat); 1513 sc->devstat = NULL; 1514 } 1515 mtx_lock(&sc->queue_mtx); 1516 sc->flags |= MD_SHUTDOWN; 1517 wakeup(sc); 1518 while (!(sc->flags & MD_EXITING)) 1519 msleep(sc->procp, &sc->queue_mtx, PRIBIO, "mddestroy", hz / 10); 1520 mtx_unlock(&sc->queue_mtx); 1521 mtx_destroy(&sc->queue_mtx); 1522 if (sc->vnode != NULL) { 1523 vn_lock(sc->vnode, LK_EXCLUSIVE | LK_RETRY); 1524 sc->vnode->v_vflag &= ~VV_MD; 1525 VOP_UNLOCK(sc->vnode); 1526 (void)vn_close(sc->vnode, sc->flags & MD_READONLY ? 1527 FREAD : (FREAD|FWRITE), sc->cred, td); 1528 } 1529 if (sc->cred != NULL) 1530 crfree(sc->cred); 1531 if (sc->object != NULL) 1532 vm_object_deallocate(sc->object); 1533 if (sc->indir) 1534 destroy_indir(sc, sc->indir); 1535 if (sc->uma) 1536 uma_zdestroy(sc->uma); 1537 1538 LIST_REMOVE(sc, list); 1539 free_unr(md_uh, sc->unit); 1540 free(sc, M_MD); 1541 return (0); 1542 } 1543 1544 static int 1545 mdresize(struct md_s *sc, struct md_req *mdr) 1546 { 1547 int error, res; 1548 vm_pindex_t oldpages, newpages; 1549 1550 switch (sc->type) { 1551 case MD_VNODE: 1552 case MD_NULL: 1553 break; 1554 case MD_SWAP: 1555 if (mdr->md_mediasize <= 0 || 1556 (mdr->md_mediasize % PAGE_SIZE) != 0) 1557 return (EDOM); 1558 oldpages = OFF_TO_IDX(sc->mediasize); 1559 newpages = OFF_TO_IDX(mdr->md_mediasize); 1560 if (newpages < oldpages) { 1561 VM_OBJECT_WLOCK(sc->object); 1562 vm_object_page_remove(sc->object, newpages, 0, 0); 1563 swap_release_by_cred(IDX_TO_OFF(oldpages - 1564 newpages), sc->cred); 1565 sc->object->charge = IDX_TO_OFF(newpages); 1566 sc->object->size = newpages; 1567 VM_OBJECT_WUNLOCK(sc->object); 1568 } else if (newpages > oldpages) { 1569 res = swap_reserve_by_cred(IDX_TO_OFF(newpages - 1570 oldpages), sc->cred); 1571 if (!res) 1572 return (ENOMEM); 1573 if ((mdr->md_options & MD_RESERVE) || 1574 (sc->flags & MD_RESERVE)) { 1575 error = swap_pager_reserve(sc->object, 1576 oldpages, newpages - oldpages); 1577 if (error < 0) { 1578 swap_release_by_cred( 1579 IDX_TO_OFF(newpages - oldpages), 1580 sc->cred); 1581 return (EDOM); 1582 } 1583 } 1584 VM_OBJECT_WLOCK(sc->object); 1585 sc->object->charge = IDX_TO_OFF(newpages); 1586 sc->object->size = newpages; 1587 VM_OBJECT_WUNLOCK(sc->object); 1588 } 1589 break; 1590 default: 1591 return (EOPNOTSUPP); 1592 } 1593 1594 sc->mediasize = mdr->md_mediasize; 1595 g_topology_lock(); 1596 g_resize_provider(sc->pp, sc->mediasize); 1597 g_topology_unlock(); 1598 return (0); 1599 } 1600 1601 static int 1602 mdcreate_swap(struct md_s *sc, struct md_req *mdr, struct thread *td) 1603 { 1604 vm_ooffset_t npage; 1605 int error; 1606 1607 /* 1608 * Range check. Disallow negative sizes and sizes not being 1609 * multiple of page size. 1610 */ 1611 if (sc->mediasize <= 0 || (sc->mediasize % PAGE_SIZE) != 0) 1612 return (EDOM); 1613 1614 /* 1615 * Allocate an OBJT_SWAP object. 1616 * 1617 * Note the truncation. 1618 */ 1619 1620 if ((mdr->md_options & MD_VERIFY) != 0) 1621 return (EINVAL); 1622 npage = mdr->md_mediasize / PAGE_SIZE; 1623 if (mdr->md_fwsectors != 0) 1624 sc->fwsectors = mdr->md_fwsectors; 1625 if (mdr->md_fwheads != 0) 1626 sc->fwheads = mdr->md_fwheads; 1627 sc->object = vm_pager_allocate(OBJT_SWAP, NULL, PAGE_SIZE * npage, 1628 VM_PROT_DEFAULT, 0, td->td_ucred); 1629 if (sc->object == NULL) 1630 return (ENOMEM); 1631 sc->flags = mdr->md_options & (MD_FORCE | MD_RESERVE); 1632 if (mdr->md_options & MD_RESERVE) { 1633 if (swap_pager_reserve(sc->object, 0, npage) < 0) { 1634 error = EDOM; 1635 goto finish; 1636 } 1637 } 1638 error = mdsetcred(sc, td->td_ucred); 1639 finish: 1640 if (error != 0) { 1641 vm_object_deallocate(sc->object); 1642 sc->object = NULL; 1643 } 1644 return (error); 1645 } 1646 1647 static int 1648 mdcreate_null(struct md_s *sc, struct md_req *mdr, struct thread *td) 1649 { 1650 1651 /* 1652 * Range check. Disallow negative sizes and sizes not being 1653 * multiple of page size. 1654 */ 1655 if (sc->mediasize <= 0 || (sc->mediasize % PAGE_SIZE) != 0) 1656 return (EDOM); 1657 1658 return (0); 1659 } 1660 1661 static int 1662 kern_mdattach_locked(struct thread *td, struct md_req *mdr) 1663 { 1664 struct md_s *sc; 1665 unsigned sectsize; 1666 int error, i; 1667 1668 sx_assert(&md_sx, SA_XLOCKED); 1669 1670 switch (mdr->md_type) { 1671 case MD_MALLOC: 1672 case MD_PRELOAD: 1673 case MD_VNODE: 1674 case MD_SWAP: 1675 case MD_NULL: 1676 break; 1677 default: 1678 return (EINVAL); 1679 } 1680 if (mdr->md_sectorsize == 0) 1681 sectsize = DEV_BSIZE; 1682 else 1683 sectsize = mdr->md_sectorsize; 1684 if (sectsize > maxphys || mdr->md_mediasize < sectsize) 1685 return (EINVAL); 1686 if (mdr->md_options & MD_AUTOUNIT) 1687 sc = mdnew(-1, &error, mdr->md_type); 1688 else { 1689 if (mdr->md_unit > INT_MAX) 1690 return (EINVAL); 1691 sc = mdnew(mdr->md_unit, &error, mdr->md_type); 1692 } 1693 if (sc == NULL) 1694 return (error); 1695 if (mdr->md_label != NULL) 1696 error = copyinstr(mdr->md_label, sc->label, 1697 sizeof(sc->label), NULL); 1698 if (error != 0) 1699 goto err_after_new; 1700 if (mdr->md_options & MD_AUTOUNIT) 1701 mdr->md_unit = sc->unit; 1702 sc->mediasize = mdr->md_mediasize; 1703 sc->sectorsize = sectsize; 1704 error = EDOOFUS; 1705 switch (sc->type) { 1706 case MD_MALLOC: 1707 sc->start = mdstart_malloc; 1708 error = mdcreate_malloc(sc, mdr); 1709 break; 1710 case MD_PRELOAD: 1711 /* 1712 * We disallow attaching preloaded memory disks via 1713 * ioctl. Preloaded memory disks are automatically 1714 * attached in g_md_init(). 1715 */ 1716 error = EOPNOTSUPP; 1717 break; 1718 case MD_VNODE: 1719 sc->start = mdstart_vnode; 1720 error = mdcreate_vnode(sc, mdr, td); 1721 break; 1722 case MD_SWAP: 1723 sc->start = mdstart_swap; 1724 error = mdcreate_swap(sc, mdr, td); 1725 break; 1726 case MD_NULL: 1727 sc->start = mdstart_null; 1728 error = mdcreate_null(sc, mdr, td); 1729 break; 1730 } 1731 err_after_new: 1732 if (error != 0) { 1733 mddestroy(sc, td); 1734 return (error); 1735 } 1736 1737 /* Prune off any residual fractional sector */ 1738 i = sc->mediasize % sc->sectorsize; 1739 sc->mediasize -= i; 1740 1741 mdinit(sc); 1742 return (0); 1743 } 1744 1745 static int 1746 kern_mdattach(struct thread *td, struct md_req *mdr) 1747 { 1748 int error; 1749 1750 sx_xlock(&md_sx); 1751 error = kern_mdattach_locked(td, mdr); 1752 sx_xunlock(&md_sx); 1753 return (error); 1754 } 1755 1756 static int 1757 kern_mddetach_locked(struct thread *td, struct md_req *mdr) 1758 { 1759 struct md_s *sc; 1760 1761 sx_assert(&md_sx, SA_XLOCKED); 1762 1763 if (mdr->md_mediasize != 0 || 1764 (mdr->md_options & ~MD_FORCE) != 0) 1765 return (EINVAL); 1766 1767 sc = mdfind(mdr->md_unit); 1768 if (sc == NULL) 1769 return (ENOENT); 1770 if (sc->opencount != 0 && !(sc->flags & MD_FORCE) && 1771 !(mdr->md_options & MD_FORCE)) 1772 return (EBUSY); 1773 return (mddestroy(sc, td)); 1774 } 1775 1776 static int 1777 kern_mddetach(struct thread *td, struct md_req *mdr) 1778 { 1779 int error; 1780 1781 sx_xlock(&md_sx); 1782 error = kern_mddetach_locked(td, mdr); 1783 sx_xunlock(&md_sx); 1784 return (error); 1785 } 1786 1787 static int 1788 kern_mdresize_locked(struct md_req *mdr) 1789 { 1790 struct md_s *sc; 1791 1792 sx_assert(&md_sx, SA_XLOCKED); 1793 1794 if ((mdr->md_options & ~(MD_FORCE | MD_RESERVE)) != 0) 1795 return (EINVAL); 1796 1797 sc = mdfind(mdr->md_unit); 1798 if (sc == NULL) 1799 return (ENOENT); 1800 if (mdr->md_mediasize < sc->sectorsize) 1801 return (EINVAL); 1802 if (mdr->md_mediasize < sc->mediasize && 1803 !(sc->flags & MD_FORCE) && 1804 !(mdr->md_options & MD_FORCE)) 1805 return (EBUSY); 1806 return (mdresize(sc, mdr)); 1807 } 1808 1809 static int 1810 kern_mdresize(struct md_req *mdr) 1811 { 1812 int error; 1813 1814 sx_xlock(&md_sx); 1815 error = kern_mdresize_locked(mdr); 1816 sx_xunlock(&md_sx); 1817 return (error); 1818 } 1819 1820 static int 1821 kern_mdquery_locked(struct md_req *mdr) 1822 { 1823 struct md_s *sc; 1824 int error; 1825 1826 sx_assert(&md_sx, SA_XLOCKED); 1827 1828 sc = mdfind(mdr->md_unit); 1829 if (sc == NULL) 1830 return (ENOENT); 1831 mdr->md_type = sc->type; 1832 mdr->md_options = sc->flags; 1833 mdr->md_mediasize = sc->mediasize; 1834 mdr->md_sectorsize = sc->sectorsize; 1835 error = 0; 1836 if (mdr->md_label != NULL) { 1837 error = copyout(sc->label, mdr->md_label, 1838 strlen(sc->label) + 1); 1839 if (error != 0) 1840 return (error); 1841 } 1842 if (sc->type == MD_VNODE || 1843 (sc->type == MD_PRELOAD && mdr->md_file != NULL)) 1844 error = copyout(sc->file, mdr->md_file, 1845 strlen(sc->file) + 1); 1846 return (error); 1847 } 1848 1849 static int 1850 kern_mdquery(struct md_req *mdr) 1851 { 1852 int error; 1853 1854 sx_xlock(&md_sx); 1855 error = kern_mdquery_locked(mdr); 1856 sx_xunlock(&md_sx); 1857 return (error); 1858 } 1859 1860 /* Copy members that are not userspace pointers. */ 1861 #define MD_IOCTL2REQ(mdio, mdr) do { \ 1862 (mdr)->md_unit = (mdio)->md_unit; \ 1863 (mdr)->md_type = (mdio)->md_type; \ 1864 (mdr)->md_mediasize = (mdio)->md_mediasize; \ 1865 (mdr)->md_sectorsize = (mdio)->md_sectorsize; \ 1866 (mdr)->md_options = (mdio)->md_options; \ 1867 (mdr)->md_fwheads = (mdio)->md_fwheads; \ 1868 (mdr)->md_fwsectors = (mdio)->md_fwsectors; \ 1869 (mdr)->md_units = &(mdio)->md_pad[0]; \ 1870 (mdr)->md_units_nitems = nitems((mdio)->md_pad); \ 1871 } while(0) 1872 1873 /* Copy members that might have been updated */ 1874 #define MD_REQ2IOCTL(mdr, mdio) do { \ 1875 (mdio)->md_unit = (mdr)->md_unit; \ 1876 (mdio)->md_type = (mdr)->md_type; \ 1877 (mdio)->md_mediasize = (mdr)->md_mediasize; \ 1878 (mdio)->md_sectorsize = (mdr)->md_sectorsize; \ 1879 (mdio)->md_options = (mdr)->md_options; \ 1880 (mdio)->md_fwheads = (mdr)->md_fwheads; \ 1881 (mdio)->md_fwsectors = (mdr)->md_fwsectors; \ 1882 } while(0) 1883 1884 static int 1885 mdctlioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, 1886 struct thread *td) 1887 { 1888 struct md_req mdr; 1889 int error; 1890 1891 if (md_debug) 1892 printf("mdctlioctl(%s %lx %p %x %p)\n", 1893 devtoname(dev), cmd, addr, flags, td); 1894 1895 bzero(&mdr, sizeof(mdr)); 1896 switch (cmd) { 1897 case MDIOCATTACH: 1898 case MDIOCDETACH: 1899 case MDIOCRESIZE: 1900 case MDIOCQUERY: { 1901 struct md_ioctl *mdio = (struct md_ioctl *)addr; 1902 if (mdio->md_version != MDIOVERSION) 1903 return (EINVAL); 1904 MD_IOCTL2REQ(mdio, &mdr); 1905 mdr.md_file = mdio->md_file; 1906 mdr.md_file_seg = UIO_USERSPACE; 1907 /* If the file is adjacent to the md_ioctl it's in kernel. */ 1908 if ((void *)mdio->md_file == (void *)(mdio + 1)) 1909 mdr.md_file_seg = UIO_SYSSPACE; 1910 mdr.md_label = mdio->md_label; 1911 break; 1912 } 1913 #ifdef COMPAT_FREEBSD32 1914 case MDIOCATTACH_32: 1915 case MDIOCDETACH_32: 1916 case MDIOCRESIZE_32: 1917 case MDIOCQUERY_32: { 1918 struct md_ioctl32 *mdio = (struct md_ioctl32 *)addr; 1919 if (mdio->md_version != MDIOVERSION) 1920 return (EINVAL); 1921 MD_IOCTL2REQ(mdio, &mdr); 1922 mdr.md_file = (void *)(uintptr_t)mdio->md_file; 1923 mdr.md_file_seg = UIO_USERSPACE; 1924 mdr.md_label = (void *)(uintptr_t)mdio->md_label; 1925 break; 1926 } 1927 #endif 1928 default: 1929 /* Fall through to handler switch. */ 1930 break; 1931 } 1932 1933 error = 0; 1934 switch (cmd) { 1935 case MDIOCATTACH: 1936 #ifdef COMPAT_FREEBSD32 1937 case MDIOCATTACH_32: 1938 #endif 1939 error = kern_mdattach(td, &mdr); 1940 break; 1941 case MDIOCDETACH: 1942 #ifdef COMPAT_FREEBSD32 1943 case MDIOCDETACH_32: 1944 #endif 1945 error = kern_mddetach(td, &mdr); 1946 break; 1947 case MDIOCRESIZE: 1948 #ifdef COMPAT_FREEBSD32 1949 case MDIOCRESIZE_32: 1950 #endif 1951 error = kern_mdresize(&mdr); 1952 break; 1953 case MDIOCQUERY: 1954 #ifdef COMPAT_FREEBSD32 1955 case MDIOCQUERY_32: 1956 #endif 1957 error = kern_mdquery(&mdr); 1958 break; 1959 default: 1960 error = ENOIOCTL; 1961 } 1962 1963 switch (cmd) { 1964 case MDIOCATTACH: 1965 case MDIOCQUERY: { 1966 struct md_ioctl *mdio = (struct md_ioctl *)addr; 1967 MD_REQ2IOCTL(&mdr, mdio); 1968 break; 1969 } 1970 #ifdef COMPAT_FREEBSD32 1971 case MDIOCATTACH_32: 1972 case MDIOCQUERY_32: { 1973 struct md_ioctl32 *mdio = (struct md_ioctl32 *)addr; 1974 MD_REQ2IOCTL(&mdr, mdio); 1975 break; 1976 } 1977 #endif 1978 default: 1979 /* Other commands to not alter mdr. */ 1980 break; 1981 } 1982 1983 return (error); 1984 } 1985 1986 static void 1987 md_preloaded(u_char *image, size_t length, const char *name) 1988 { 1989 struct md_s *sc; 1990 int error; 1991 1992 sc = mdnew(-1, &error, MD_PRELOAD); 1993 if (sc == NULL) 1994 return; 1995 sc->mediasize = length; 1996 sc->sectorsize = DEV_BSIZE; 1997 sc->pl_ptr = image; 1998 sc->pl_len = length; 1999 sc->start = mdstart_preload; 2000 if (name != NULL) 2001 strlcpy(sc->file, name, sizeof(sc->file)); 2002 #ifdef MD_ROOT 2003 if (sc->unit == 0) { 2004 #ifndef ROOTDEVNAME 2005 rootdevnames[0] = MD_ROOT_FSTYPE ":/dev/md0"; 2006 #endif 2007 #ifdef MD_ROOT_READONLY 2008 sc->flags |= MD_READONLY; 2009 #endif 2010 } 2011 #endif 2012 mdinit(sc); 2013 if (name != NULL) { 2014 printf("%s%d: Preloaded image <%s> %zd bytes at %p\n", 2015 MD_NAME, sc->unit, name, length, image); 2016 } else { 2017 printf("%s%d: Embedded image %zd bytes at %p\n", 2018 MD_NAME, sc->unit, length, image); 2019 } 2020 } 2021 2022 static void 2023 g_md_init(struct g_class *mp __unused) 2024 { 2025 caddr_t mod; 2026 u_char *ptr, *name, *type; 2027 unsigned len; 2028 int i; 2029 2030 /* figure out log2(NINDIR) */ 2031 for (i = NINDIR, nshift = -1; i; nshift++) 2032 i >>= 1; 2033 2034 mod = NULL; 2035 sx_init(&md_sx, "MD config lock"); 2036 g_topology_unlock(); 2037 md_uh = new_unrhdr(0, INT_MAX, NULL); 2038 #ifdef MD_ROOT 2039 if (mfs_root_size != 0) { 2040 sx_xlock(&md_sx); 2041 #ifdef MD_ROOT_MEM 2042 md_preloaded(mfs_root, mfs_root_size, NULL); 2043 #else 2044 md_preloaded(__DEVOLATILE(u_char *, &mfs_root), mfs_root_size, 2045 NULL); 2046 #endif 2047 sx_xunlock(&md_sx); 2048 } 2049 #endif 2050 /* XXX: are preload_* static or do they need Giant ? */ 2051 while ((mod = preload_search_next_name(mod)) != NULL) { 2052 name = (char *)preload_search_info(mod, MODINFO_NAME); 2053 if (name == NULL) 2054 continue; 2055 type = (char *)preload_search_info(mod, MODINFO_TYPE); 2056 if (type == NULL) 2057 continue; 2058 if (strcmp(type, "md_image") && strcmp(type, "mfs_root")) 2059 continue; 2060 ptr = preload_fetch_addr(mod); 2061 len = preload_fetch_size(mod); 2062 if (ptr != NULL && len != 0) { 2063 sx_xlock(&md_sx); 2064 md_preloaded(ptr, len, name); 2065 sx_xunlock(&md_sx); 2066 } 2067 } 2068 md_pbuf_zone = pbuf_zsecond_create("mdpbuf", nswbuf / 10); 2069 status_dev = make_dev(&mdctl_cdevsw, INT_MAX, UID_ROOT, GID_WHEEL, 2070 0600, MDCTL_NAME); 2071 g_topology_lock(); 2072 } 2073 2074 static void 2075 g_md_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp, 2076 struct g_consumer *cp __unused, struct g_provider *pp) 2077 { 2078 struct md_s *mp; 2079 char *type; 2080 2081 mp = gp->softc; 2082 if (mp == NULL) 2083 return; 2084 2085 switch (mp->type) { 2086 case MD_MALLOC: 2087 type = "malloc"; 2088 break; 2089 case MD_PRELOAD: 2090 type = "preload"; 2091 break; 2092 case MD_VNODE: 2093 type = "vnode"; 2094 break; 2095 case MD_SWAP: 2096 type = "swap"; 2097 break; 2098 case MD_NULL: 2099 type = "null"; 2100 break; 2101 default: 2102 type = "unknown"; 2103 break; 2104 } 2105 2106 if (pp != NULL) { 2107 if (indent == NULL) { 2108 sbuf_printf(sb, " u %d", mp->unit); 2109 sbuf_printf(sb, " s %ju", (uintmax_t) mp->sectorsize); 2110 sbuf_printf(sb, " f %ju", (uintmax_t) mp->fwheads); 2111 sbuf_printf(sb, " fs %ju", (uintmax_t) mp->fwsectors); 2112 sbuf_printf(sb, " l %ju", (uintmax_t) mp->mediasize); 2113 sbuf_printf(sb, " t %s", type); 2114 if ((mp->type == MD_VNODE && mp->vnode != NULL) || 2115 (mp->type == MD_PRELOAD && mp->file[0] != '\0')) 2116 sbuf_printf(sb, " file %s", mp->file); 2117 sbuf_printf(sb, " label %s", mp->label); 2118 } else { 2119 sbuf_printf(sb, "%s<unit>%d</unit>\n", indent, 2120 mp->unit); 2121 sbuf_printf(sb, "%s<sectorsize>%ju</sectorsize>\n", 2122 indent, (uintmax_t) mp->sectorsize); 2123 sbuf_printf(sb, "%s<fwheads>%ju</fwheads>\n", 2124 indent, (uintmax_t) mp->fwheads); 2125 sbuf_printf(sb, "%s<fwsectors>%ju</fwsectors>\n", 2126 indent, (uintmax_t) mp->fwsectors); 2127 if (mp->ident[0] != '\0') { 2128 sbuf_printf(sb, "%s<ident>", indent); 2129 g_conf_printf_escaped(sb, "%s", mp->ident); 2130 sbuf_printf(sb, "</ident>\n"); 2131 } 2132 sbuf_printf(sb, "%s<length>%ju</length>\n", 2133 indent, (uintmax_t) mp->mediasize); 2134 sbuf_printf(sb, "%s<compression>%s</compression>\n", indent, 2135 (mp->flags & MD_COMPRESS) == 0 ? "off": "on"); 2136 sbuf_printf(sb, "%s<access>%s</access>\n", indent, 2137 (mp->flags & MD_READONLY) == 0 ? "read-write": 2138 "read-only"); 2139 sbuf_printf(sb, "%s<type>%s</type>\n", indent, 2140 type); 2141 if ((mp->type == MD_VNODE && mp->vnode != NULL) || 2142 (mp->type == MD_PRELOAD && mp->file[0] != '\0')) { 2143 sbuf_printf(sb, "%s<file>", indent); 2144 g_conf_printf_escaped(sb, "%s", mp->file); 2145 sbuf_printf(sb, "</file>\n"); 2146 } 2147 if (mp->type == MD_VNODE) 2148 sbuf_printf(sb, "%s<cache>%s</cache>\n", indent, 2149 (mp->flags & MD_CACHE) == 0 ? "off": "on"); 2150 sbuf_printf(sb, "%s<label>", indent); 2151 g_conf_printf_escaped(sb, "%s", mp->label); 2152 sbuf_printf(sb, "</label>\n"); 2153 } 2154 } 2155 } 2156 2157 static void 2158 g_md_fini(struct g_class *mp __unused) 2159 { 2160 2161 sx_destroy(&md_sx); 2162 if (status_dev != NULL) 2163 destroy_dev(status_dev); 2164 uma_zdestroy(md_pbuf_zone); 2165 delete_unrhdr(md_uh); 2166 } 2167