1 /*- 2 * SPDX-License-Identifier: (Beerware AND BSD-3-Clause) 3 * 4 * ---------------------------------------------------------------------------- 5 * "THE BEER-WARE LICENSE" (Revision 42): 6 * <phk@FreeBSD.ORG> wrote this file. As long as you retain this notice you 7 * can do whatever you want with this stuff. If we meet some day, and you think 8 * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp 9 * ---------------------------------------------------------------------------- 10 * 11 * $FreeBSD$ 12 * 13 */ 14 15 /*- 16 * The following functions are based on the vn(4) driver: mdstart_swap(), 17 * mdstart_vnode(), mdcreate_swap(), mdcreate_vnode() and mddestroy(), 18 * and as such under the following copyright: 19 * 20 * Copyright (c) 1988 University of Utah. 21 * Copyright (c) 1990, 1993 22 * The Regents of the University of California. All rights reserved. 23 * Copyright (c) 2013 The FreeBSD Foundation 24 * All rights reserved. 25 * 26 * This code is derived from software contributed to Berkeley by 27 * the Systems Programming Group of the University of Utah Computer 28 * Science Department. 29 * 30 * Portions of this software were developed by Konstantin Belousov 31 * under sponsorship from the FreeBSD Foundation. 32 * 33 * Redistribution and use in source and binary forms, with or without 34 * modification, are permitted provided that the following conditions 35 * are met: 36 * 1. Redistributions of source code must retain the above copyright 37 * notice, this list of conditions and the following disclaimer. 38 * 2. Redistributions in binary form must reproduce the above copyright 39 * notice, this list of conditions and the following disclaimer in the 40 * documentation and/or other materials provided with the distribution. 41 * 3. Neither the name of the University nor the names of its contributors 42 * may be used to endorse or promote products derived from this software 43 * without specific prior written permission. 44 * 45 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 46 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 47 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 48 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 49 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 50 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 51 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 52 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 53 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 54 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 55 * SUCH DAMAGE. 56 * 57 * from: Utah Hdr: vn.c 1.13 94/04/02 58 * 59 * from: @(#)vn.c 8.6 (Berkeley) 4/1/94 60 * From: src/sys/dev/vn/vn.c,v 1.122 2000/12/16 16:06:03 61 */ 62 63 #include "opt_rootdevname.h" 64 #include "opt_geom.h" 65 #include "opt_md.h" 66 67 #include <sys/param.h> 68 #include <sys/systm.h> 69 #include <sys/bio.h> 70 #include <sys/buf.h> 71 #include <sys/conf.h> 72 #include <sys/devicestat.h> 73 #include <sys/fcntl.h> 74 #include <sys/kernel.h> 75 #include <sys/kthread.h> 76 #include <sys/limits.h> 77 #include <sys/linker.h> 78 #include <sys/lock.h> 79 #include <sys/malloc.h> 80 #include <sys/mdioctl.h> 81 #include <sys/mount.h> 82 #include <sys/mutex.h> 83 #include <sys/sx.h> 84 #include <sys/namei.h> 85 #include <sys/proc.h> 86 #include <sys/queue.h> 87 #include <sys/rwlock.h> 88 #include <sys/sbuf.h> 89 #include <sys/sched.h> 90 #include <sys/sf_buf.h> 91 #include <sys/sysctl.h> 92 #include <sys/uio.h> 93 #include <sys/unistd.h> 94 #include <sys/vnode.h> 95 #include <sys/disk.h> 96 97 #include <geom/geom.h> 98 #include <geom/geom_int.h> 99 100 #include <vm/vm.h> 101 #include <vm/vm_extern.h> 102 #include <vm/vm_param.h> 103 #include <vm/vm_object.h> 104 #include <vm/vm_page.h> 105 #include <vm/vm_pager.h> 106 #include <vm/swap_pager.h> 107 #include <vm/uma.h> 108 109 #include <machine/bus.h> 110 111 #define MD_MODVER 1 112 113 #define MD_SHUTDOWN 0x10000 /* Tell worker thread to terminate. */ 114 #define MD_EXITING 0x20000 /* Worker thread is exiting. */ 115 #define MD_PROVIDERGONE 0x40000 /* Safe to free the softc */ 116 117 #ifndef MD_NSECT 118 #define MD_NSECT (10000 * 2) 119 #endif 120 121 struct md_req { 122 unsigned md_unit; /* unit number */ 123 enum md_types md_type; /* type of disk */ 124 off_t md_mediasize; /* size of disk in bytes */ 125 unsigned md_sectorsize; /* sectorsize */ 126 unsigned md_options; /* options */ 127 int md_fwheads; /* firmware heads */ 128 int md_fwsectors; /* firmware sectors */ 129 char *md_file; /* pathname of file to mount */ 130 enum uio_seg md_file_seg; /* location of md_file */ 131 char *md_label; /* label of the device (userspace) */ 132 int *md_units; /* pointer to units array (kernel) */ 133 size_t md_units_nitems; /* items in md_units array */ 134 }; 135 136 #ifdef COMPAT_FREEBSD32 137 struct md_ioctl32 { 138 unsigned md_version; 139 unsigned md_unit; 140 enum md_types md_type; 141 uint32_t md_file; 142 off_t md_mediasize; 143 unsigned md_sectorsize; 144 unsigned md_options; 145 uint64_t md_base; 146 int md_fwheads; 147 int md_fwsectors; 148 uint32_t md_label; 149 int md_pad[MDNPAD]; 150 } 151 #ifdef __amd64__ 152 __attribute__((__packed__)) 153 #endif 154 ; 155 #ifndef __amd64__ 156 CTASSERT((sizeof(struct md_ioctl32)) == 440); 157 #else 158 CTASSERT((sizeof(struct md_ioctl32)) == 436); 159 #endif 160 161 #define MDIOCATTACH_32 _IOC_NEWTYPE(MDIOCATTACH, struct md_ioctl32) 162 #define MDIOCDETACH_32 _IOC_NEWTYPE(MDIOCDETACH, struct md_ioctl32) 163 #define MDIOCQUERY_32 _IOC_NEWTYPE(MDIOCQUERY, struct md_ioctl32) 164 #define MDIOCRESIZE_32 _IOC_NEWTYPE(MDIOCRESIZE, struct md_ioctl32) 165 #endif /* COMPAT_FREEBSD32 */ 166 167 static MALLOC_DEFINE(M_MD, "md_disk", "Memory Disk"); 168 static MALLOC_DEFINE(M_MDSECT, "md_sectors", "Memory Disk Sectors"); 169 170 static int md_debug; 171 SYSCTL_INT(_debug, OID_AUTO, mddebug, CTLFLAG_RW, &md_debug, 0, 172 "Enable md(4) debug messages"); 173 static int md_malloc_wait; 174 SYSCTL_INT(_vm, OID_AUTO, md_malloc_wait, CTLFLAG_RW, &md_malloc_wait, 0, 175 "Allow malloc to wait for memory allocations"); 176 177 #if defined(MD_ROOT) && !defined(MD_ROOT_FSTYPE) 178 #define MD_ROOT_FSTYPE "ufs" 179 #endif 180 181 #if defined(MD_ROOT) 182 /* 183 * Preloaded image gets put here. 184 */ 185 #if defined(MD_ROOT_SIZE) 186 /* 187 * We put the mfs_root symbol into the oldmfs section of the kernel object file. 188 * Applications that patch the object with the image can determine 189 * the size looking at the oldmfs section size within the kernel. 190 */ 191 u_char mfs_root[MD_ROOT_SIZE*1024] __attribute__ ((section ("oldmfs"))); 192 const int mfs_root_size = sizeof(mfs_root); 193 #elif defined(MD_ROOT_MEM) 194 /* MD region already mapped in the memory */ 195 u_char *mfs_root; 196 int mfs_root_size; 197 #else 198 extern volatile u_char __weak_symbol mfs_root; 199 extern volatile u_char __weak_symbol mfs_root_end; 200 #define mfs_root_size ((uintptr_t)(&mfs_root_end - &mfs_root)) 201 #endif 202 #endif 203 204 static g_init_t g_md_init; 205 static g_fini_t g_md_fini; 206 static g_start_t g_md_start; 207 static g_access_t g_md_access; 208 static void g_md_dumpconf(struct sbuf *sb, const char *indent, 209 struct g_geom *gp, struct g_consumer *cp __unused, struct g_provider *pp); 210 static g_provgone_t g_md_providergone; 211 212 static struct cdev *status_dev = NULL; 213 static struct sx md_sx; 214 static struct unrhdr *md_uh; 215 216 static d_ioctl_t mdctlioctl; 217 218 static struct cdevsw mdctl_cdevsw = { 219 .d_version = D_VERSION, 220 .d_ioctl = mdctlioctl, 221 .d_name = MD_NAME, 222 }; 223 224 struct g_class g_md_class = { 225 .name = "MD", 226 .version = G_VERSION, 227 .init = g_md_init, 228 .fini = g_md_fini, 229 .start = g_md_start, 230 .access = g_md_access, 231 .dumpconf = g_md_dumpconf, 232 .providergone = g_md_providergone, 233 }; 234 235 DECLARE_GEOM_CLASS(g_md_class, g_md); 236 MODULE_VERSION(geom_md, 0); 237 238 static LIST_HEAD(, md_s) md_softc_list = LIST_HEAD_INITIALIZER(md_softc_list); 239 240 #define NINDIR (PAGE_SIZE / sizeof(uintptr_t)) 241 #define NMASK (NINDIR-1) 242 static int nshift; 243 244 struct indir { 245 uintptr_t *array; 246 u_int total; 247 u_int used; 248 u_int shift; 249 }; 250 251 struct md_s { 252 int unit; 253 LIST_ENTRY(md_s) list; 254 struct bio_queue_head bio_queue; 255 struct mtx queue_mtx; 256 struct cdev *dev; 257 enum md_types type; 258 off_t mediasize; 259 unsigned sectorsize; 260 unsigned opencount; 261 unsigned fwheads; 262 unsigned fwsectors; 263 char ident[32]; 264 unsigned flags; 265 char name[20]; 266 struct proc *procp; 267 struct g_geom *gp; 268 struct g_provider *pp; 269 int (*start)(struct md_s *sc, struct bio *bp); 270 struct devstat *devstat; 271 bool candelete; 272 273 /* MD_MALLOC related fields */ 274 struct indir *indir; 275 uma_zone_t uma; 276 277 /* MD_PRELOAD related fields */ 278 u_char *pl_ptr; 279 size_t pl_len; 280 281 /* MD_VNODE related fields */ 282 struct vnode *vnode; 283 char file[PATH_MAX]; 284 char label[PATH_MAX]; 285 struct ucred *cred; 286 vm_offset_t kva; 287 288 /* MD_SWAP related fields */ 289 vm_object_t object; 290 }; 291 292 static struct indir * 293 new_indir(u_int shift) 294 { 295 struct indir *ip; 296 297 ip = malloc(sizeof *ip, M_MD, (md_malloc_wait ? M_WAITOK : M_NOWAIT) 298 | M_ZERO); 299 if (ip == NULL) 300 return (NULL); 301 ip->array = malloc(sizeof(uintptr_t) * NINDIR, 302 M_MDSECT, (md_malloc_wait ? M_WAITOK : M_NOWAIT) | M_ZERO); 303 if (ip->array == NULL) { 304 free(ip, M_MD); 305 return (NULL); 306 } 307 ip->total = NINDIR; 308 ip->shift = shift; 309 return (ip); 310 } 311 312 static void 313 del_indir(struct indir *ip) 314 { 315 316 free(ip->array, M_MDSECT); 317 free(ip, M_MD); 318 } 319 320 static void 321 destroy_indir(struct md_s *sc, struct indir *ip) 322 { 323 int i; 324 325 for (i = 0; i < NINDIR; i++) { 326 if (!ip->array[i]) 327 continue; 328 if (ip->shift) 329 destroy_indir(sc, (struct indir*)(ip->array[i])); 330 else if (ip->array[i] > 255) 331 uma_zfree(sc->uma, (void *)(ip->array[i])); 332 } 333 del_indir(ip); 334 } 335 336 /* 337 * This function does the math and allocates the top level "indir" structure 338 * for a device of "size" sectors. 339 */ 340 341 static struct indir * 342 dimension(off_t size) 343 { 344 off_t rcnt; 345 struct indir *ip; 346 int layer; 347 348 rcnt = size; 349 layer = 0; 350 while (rcnt > NINDIR) { 351 rcnt /= NINDIR; 352 layer++; 353 } 354 355 /* 356 * XXX: the top layer is probably not fully populated, so we allocate 357 * too much space for ip->array in here. 358 */ 359 ip = malloc(sizeof *ip, M_MD, M_WAITOK | M_ZERO); 360 ip->array = malloc(sizeof(uintptr_t) * NINDIR, 361 M_MDSECT, M_WAITOK | M_ZERO); 362 ip->total = NINDIR; 363 ip->shift = layer * nshift; 364 return (ip); 365 } 366 367 /* 368 * Read a given sector 369 */ 370 371 static uintptr_t 372 s_read(struct indir *ip, off_t offset) 373 { 374 struct indir *cip; 375 int idx; 376 uintptr_t up; 377 378 if (md_debug > 1) 379 printf("s_read(%jd)\n", (intmax_t)offset); 380 up = 0; 381 for (cip = ip; cip != NULL;) { 382 if (cip->shift) { 383 idx = (offset >> cip->shift) & NMASK; 384 up = cip->array[idx]; 385 cip = (struct indir *)up; 386 continue; 387 } 388 idx = offset & NMASK; 389 return (cip->array[idx]); 390 } 391 return (0); 392 } 393 394 /* 395 * Write a given sector, prune the tree if the value is 0 396 */ 397 398 static int 399 s_write(struct indir *ip, off_t offset, uintptr_t ptr) 400 { 401 struct indir *cip, *lip[10]; 402 int idx, li; 403 uintptr_t up; 404 405 if (md_debug > 1) 406 printf("s_write(%jd, %p)\n", (intmax_t)offset, (void *)ptr); 407 up = 0; 408 li = 0; 409 cip = ip; 410 for (;;) { 411 lip[li++] = cip; 412 if (cip->shift) { 413 idx = (offset >> cip->shift) & NMASK; 414 up = cip->array[idx]; 415 if (up != 0) { 416 cip = (struct indir *)up; 417 continue; 418 } 419 /* Allocate branch */ 420 cip->array[idx] = 421 (uintptr_t)new_indir(cip->shift - nshift); 422 if (cip->array[idx] == 0) 423 return (ENOSPC); 424 cip->used++; 425 up = cip->array[idx]; 426 cip = (struct indir *)up; 427 continue; 428 } 429 /* leafnode */ 430 idx = offset & NMASK; 431 up = cip->array[idx]; 432 if (up != 0) 433 cip->used--; 434 cip->array[idx] = ptr; 435 if (ptr != 0) 436 cip->used++; 437 break; 438 } 439 if (cip->used != 0 || li == 1) 440 return (0); 441 li--; 442 while (cip->used == 0 && cip != ip) { 443 li--; 444 idx = (offset >> lip[li]->shift) & NMASK; 445 up = lip[li]->array[idx]; 446 KASSERT(up == (uintptr_t)cip, ("md screwed up")); 447 del_indir(cip); 448 lip[li]->array[idx] = 0; 449 lip[li]->used--; 450 cip = lip[li]; 451 } 452 return (0); 453 } 454 455 static int 456 g_md_access(struct g_provider *pp, int r, int w, int e) 457 { 458 struct md_s *sc; 459 460 sc = pp->geom->softc; 461 if (sc == NULL) { 462 if (r <= 0 && w <= 0 && e <= 0) 463 return (0); 464 return (ENXIO); 465 } 466 r += pp->acr; 467 w += pp->acw; 468 e += pp->ace; 469 if ((sc->flags & MD_READONLY) != 0 && w > 0) 470 return (EROFS); 471 if ((pp->acr + pp->acw + pp->ace) == 0 && (r + w + e) > 0) { 472 sc->opencount = 1; 473 } else if ((pp->acr + pp->acw + pp->ace) > 0 && (r + w + e) == 0) { 474 sc->opencount = 0; 475 } 476 return (0); 477 } 478 479 static void 480 g_md_start(struct bio *bp) 481 { 482 struct md_s *sc; 483 484 sc = bp->bio_to->geom->softc; 485 if ((bp->bio_cmd == BIO_READ) || (bp->bio_cmd == BIO_WRITE)) { 486 devstat_start_transaction_bio(sc->devstat, bp); 487 } 488 mtx_lock(&sc->queue_mtx); 489 bioq_disksort(&sc->bio_queue, bp); 490 wakeup(sc); 491 mtx_unlock(&sc->queue_mtx); 492 } 493 494 #define MD_MALLOC_MOVE_ZERO 1 495 #define MD_MALLOC_MOVE_FILL 2 496 #define MD_MALLOC_MOVE_READ 3 497 #define MD_MALLOC_MOVE_WRITE 4 498 #define MD_MALLOC_MOVE_CMP 5 499 500 static int 501 md_malloc_move_ma(vm_page_t **mp, int *ma_offs, unsigned sectorsize, 502 void *ptr, u_char fill, int op) 503 { 504 struct sf_buf *sf; 505 vm_page_t m, *mp1; 506 char *p, first; 507 off_t *uc; 508 unsigned n; 509 int error, i, ma_offs1, sz, first_read; 510 511 m = NULL; 512 error = 0; 513 sf = NULL; 514 /* if (op == MD_MALLOC_MOVE_CMP) { gcc */ 515 first = 0; 516 first_read = 0; 517 uc = ptr; 518 mp1 = *mp; 519 ma_offs1 = *ma_offs; 520 /* } */ 521 sched_pin(); 522 for (n = sectorsize; n != 0; n -= sz) { 523 sz = imin(PAGE_SIZE - *ma_offs, n); 524 if (m != **mp) { 525 if (sf != NULL) 526 sf_buf_free(sf); 527 m = **mp; 528 sf = sf_buf_alloc(m, SFB_CPUPRIVATE | 529 (md_malloc_wait ? 0 : SFB_NOWAIT)); 530 if (sf == NULL) { 531 error = ENOMEM; 532 break; 533 } 534 } 535 p = (char *)sf_buf_kva(sf) + *ma_offs; 536 switch (op) { 537 case MD_MALLOC_MOVE_ZERO: 538 bzero(p, sz); 539 break; 540 case MD_MALLOC_MOVE_FILL: 541 memset(p, fill, sz); 542 break; 543 case MD_MALLOC_MOVE_READ: 544 bcopy(ptr, p, sz); 545 cpu_flush_dcache(p, sz); 546 break; 547 case MD_MALLOC_MOVE_WRITE: 548 bcopy(p, ptr, sz); 549 break; 550 case MD_MALLOC_MOVE_CMP: 551 for (i = 0; i < sz; i++, p++) { 552 if (!first_read) { 553 *uc = (u_char)*p; 554 first = *p; 555 first_read = 1; 556 } else if (*p != first) { 557 error = EDOOFUS; 558 break; 559 } 560 } 561 break; 562 default: 563 KASSERT(0, ("md_malloc_move_ma unknown op %d\n", op)); 564 break; 565 } 566 if (error != 0) 567 break; 568 *ma_offs += sz; 569 *ma_offs %= PAGE_SIZE; 570 if (*ma_offs == 0) 571 (*mp)++; 572 ptr = (char *)ptr + sz; 573 } 574 575 if (sf != NULL) 576 sf_buf_free(sf); 577 sched_unpin(); 578 if (op == MD_MALLOC_MOVE_CMP && error != 0) { 579 *mp = mp1; 580 *ma_offs = ma_offs1; 581 } 582 return (error); 583 } 584 585 static int 586 md_malloc_move_vlist(bus_dma_segment_t **pvlist, int *pma_offs, 587 unsigned len, void *ptr, u_char fill, int op) 588 { 589 bus_dma_segment_t *vlist; 590 uint8_t *p, *end, first; 591 off_t *uc; 592 int ma_offs, seg_len; 593 594 vlist = *pvlist; 595 ma_offs = *pma_offs; 596 uc = ptr; 597 598 for (; len != 0; len -= seg_len) { 599 seg_len = imin(vlist->ds_len - ma_offs, len); 600 p = (uint8_t *)(uintptr_t)vlist->ds_addr + ma_offs; 601 switch (op) { 602 case MD_MALLOC_MOVE_ZERO: 603 bzero(p, seg_len); 604 break; 605 case MD_MALLOC_MOVE_FILL: 606 memset(p, fill, seg_len); 607 break; 608 case MD_MALLOC_MOVE_READ: 609 bcopy(ptr, p, seg_len); 610 cpu_flush_dcache(p, seg_len); 611 break; 612 case MD_MALLOC_MOVE_WRITE: 613 bcopy(p, ptr, seg_len); 614 break; 615 case MD_MALLOC_MOVE_CMP: 616 end = p + seg_len; 617 first = *uc = *p; 618 /* Confirm all following bytes match the first */ 619 while (++p < end) { 620 if (*p != first) 621 return (EDOOFUS); 622 } 623 break; 624 default: 625 KASSERT(0, ("md_malloc_move_vlist unknown op %d\n", op)); 626 break; 627 } 628 629 ma_offs += seg_len; 630 if (ma_offs == vlist->ds_len) { 631 ma_offs = 0; 632 vlist++; 633 } 634 ptr = (uint8_t *)ptr + seg_len; 635 } 636 *pvlist = vlist; 637 *pma_offs = ma_offs; 638 639 return (0); 640 } 641 642 static int 643 mdstart_malloc(struct md_s *sc, struct bio *bp) 644 { 645 u_char *dst; 646 vm_page_t *m; 647 bus_dma_segment_t *vlist; 648 int i, error, error1, ma_offs, notmapped; 649 off_t secno, nsec, uc; 650 uintptr_t sp, osp; 651 652 switch (bp->bio_cmd) { 653 case BIO_READ: 654 case BIO_WRITE: 655 case BIO_DELETE: 656 break; 657 case BIO_FLUSH: 658 return (0); 659 default: 660 return (EOPNOTSUPP); 661 } 662 663 notmapped = (bp->bio_flags & BIO_UNMAPPED) != 0; 664 vlist = (bp->bio_flags & BIO_VLIST) != 0 ? 665 (bus_dma_segment_t *)bp->bio_data : NULL; 666 if (notmapped) { 667 m = bp->bio_ma; 668 ma_offs = bp->bio_ma_offset; 669 dst = NULL; 670 KASSERT(vlist == NULL, ("vlists cannot be unmapped")); 671 } else if (vlist != NULL) { 672 ma_offs = bp->bio_ma_offset; 673 dst = NULL; 674 } else { 675 dst = bp->bio_data; 676 } 677 678 nsec = bp->bio_length / sc->sectorsize; 679 secno = bp->bio_offset / sc->sectorsize; 680 error = 0; 681 while (nsec--) { 682 osp = s_read(sc->indir, secno); 683 if (bp->bio_cmd == BIO_DELETE) { 684 if (osp != 0) 685 error = s_write(sc->indir, secno, 0); 686 } else if (bp->bio_cmd == BIO_READ) { 687 if (osp == 0) { 688 if (notmapped) { 689 error = md_malloc_move_ma(&m, &ma_offs, 690 sc->sectorsize, NULL, 0, 691 MD_MALLOC_MOVE_ZERO); 692 } else if (vlist != NULL) { 693 error = md_malloc_move_vlist(&vlist, 694 &ma_offs, sc->sectorsize, NULL, 0, 695 MD_MALLOC_MOVE_ZERO); 696 } else 697 bzero(dst, sc->sectorsize); 698 } else if (osp <= 255) { 699 if (notmapped) { 700 error = md_malloc_move_ma(&m, &ma_offs, 701 sc->sectorsize, NULL, osp, 702 MD_MALLOC_MOVE_FILL); 703 } else if (vlist != NULL) { 704 error = md_malloc_move_vlist(&vlist, 705 &ma_offs, sc->sectorsize, NULL, osp, 706 MD_MALLOC_MOVE_FILL); 707 } else 708 memset(dst, osp, sc->sectorsize); 709 } else { 710 if (notmapped) { 711 error = md_malloc_move_ma(&m, &ma_offs, 712 sc->sectorsize, (void *)osp, 0, 713 MD_MALLOC_MOVE_READ); 714 } else if (vlist != NULL) { 715 error = md_malloc_move_vlist(&vlist, 716 &ma_offs, sc->sectorsize, 717 (void *)osp, 0, 718 MD_MALLOC_MOVE_READ); 719 } else { 720 bcopy((void *)osp, dst, sc->sectorsize); 721 cpu_flush_dcache(dst, sc->sectorsize); 722 } 723 } 724 osp = 0; 725 } else if (bp->bio_cmd == BIO_WRITE) { 726 if (sc->flags & MD_COMPRESS) { 727 if (notmapped) { 728 error1 = md_malloc_move_ma(&m, &ma_offs, 729 sc->sectorsize, &uc, 0, 730 MD_MALLOC_MOVE_CMP); 731 i = error1 == 0 ? sc->sectorsize : 0; 732 } else if (vlist != NULL) { 733 error1 = md_malloc_move_vlist(&vlist, 734 &ma_offs, sc->sectorsize, &uc, 0, 735 MD_MALLOC_MOVE_CMP); 736 i = error1 == 0 ? sc->sectorsize : 0; 737 } else { 738 uc = dst[0]; 739 for (i = 1; i < sc->sectorsize; i++) { 740 if (dst[i] != uc) 741 break; 742 } 743 } 744 } else { 745 i = 0; 746 uc = 0; 747 } 748 if (i == sc->sectorsize) { 749 if (osp != uc) 750 error = s_write(sc->indir, secno, uc); 751 } else { 752 if (osp <= 255) { 753 sp = (uintptr_t)uma_zalloc(sc->uma, 754 md_malloc_wait ? M_WAITOK : 755 M_NOWAIT); 756 if (sp == 0) { 757 error = ENOSPC; 758 break; 759 } 760 if (notmapped) { 761 error = md_malloc_move_ma(&m, 762 &ma_offs, sc->sectorsize, 763 (void *)sp, 0, 764 MD_MALLOC_MOVE_WRITE); 765 } else if (vlist != NULL) { 766 error = md_malloc_move_vlist( 767 &vlist, &ma_offs, 768 sc->sectorsize, (void *)sp, 769 0, MD_MALLOC_MOVE_WRITE); 770 } else { 771 bcopy(dst, (void *)sp, 772 sc->sectorsize); 773 } 774 error = s_write(sc->indir, secno, sp); 775 } else { 776 if (notmapped) { 777 error = md_malloc_move_ma(&m, 778 &ma_offs, sc->sectorsize, 779 (void *)osp, 0, 780 MD_MALLOC_MOVE_WRITE); 781 } else if (vlist != NULL) { 782 error = md_malloc_move_vlist( 783 &vlist, &ma_offs, 784 sc->sectorsize, (void *)osp, 785 0, MD_MALLOC_MOVE_WRITE); 786 } else { 787 bcopy(dst, (void *)osp, 788 sc->sectorsize); 789 } 790 osp = 0; 791 } 792 } 793 } else { 794 error = EOPNOTSUPP; 795 } 796 if (osp > 255) 797 uma_zfree(sc->uma, (void*)osp); 798 if (error != 0) 799 break; 800 secno++; 801 if (!notmapped && vlist == NULL) 802 dst += sc->sectorsize; 803 } 804 bp->bio_resid = 0; 805 return (error); 806 } 807 808 static void 809 mdcopyto_vlist(void *src, bus_dma_segment_t *vlist, off_t offset, off_t len) 810 { 811 off_t seg_len; 812 813 while (offset >= vlist->ds_len) { 814 offset -= vlist->ds_len; 815 vlist++; 816 } 817 818 while (len != 0) { 819 seg_len = omin(len, vlist->ds_len - offset); 820 bcopy(src, (void *)(uintptr_t)(vlist->ds_addr + offset), 821 seg_len); 822 offset = 0; 823 src = (uint8_t *)src + seg_len; 824 len -= seg_len; 825 vlist++; 826 } 827 } 828 829 static void 830 mdcopyfrom_vlist(bus_dma_segment_t *vlist, off_t offset, void *dst, off_t len) 831 { 832 off_t seg_len; 833 834 while (offset >= vlist->ds_len) { 835 offset -= vlist->ds_len; 836 vlist++; 837 } 838 839 while (len != 0) { 840 seg_len = omin(len, vlist->ds_len - offset); 841 bcopy((void *)(uintptr_t)(vlist->ds_addr + offset), dst, 842 seg_len); 843 offset = 0; 844 dst = (uint8_t *)dst + seg_len; 845 len -= seg_len; 846 vlist++; 847 } 848 } 849 850 static int 851 mdstart_preload(struct md_s *sc, struct bio *bp) 852 { 853 uint8_t *p; 854 855 p = sc->pl_ptr + bp->bio_offset; 856 switch (bp->bio_cmd) { 857 case BIO_READ: 858 if ((bp->bio_flags & BIO_VLIST) != 0) { 859 mdcopyto_vlist(p, (bus_dma_segment_t *)bp->bio_data, 860 bp->bio_ma_offset, bp->bio_length); 861 } else { 862 bcopy(p, bp->bio_data, bp->bio_length); 863 } 864 cpu_flush_dcache(bp->bio_data, bp->bio_length); 865 break; 866 case BIO_WRITE: 867 if ((bp->bio_flags & BIO_VLIST) != 0) { 868 mdcopyfrom_vlist((bus_dma_segment_t *)bp->bio_data, 869 bp->bio_ma_offset, p, bp->bio_length); 870 } else { 871 bcopy(bp->bio_data, p, bp->bio_length); 872 } 873 break; 874 } 875 bp->bio_resid = 0; 876 return (0); 877 } 878 879 static int 880 mdstart_vnode(struct md_s *sc, struct bio *bp) 881 { 882 int error; 883 struct uio auio; 884 struct iovec aiov; 885 struct iovec *piov; 886 struct mount *mp; 887 struct vnode *vp; 888 bus_dma_segment_t *vlist; 889 struct thread *td; 890 off_t iolen, iostart, off, len; 891 int ma_offs, npages; 892 bool mapped; 893 894 switch (bp->bio_cmd) { 895 case BIO_READ: 896 auio.uio_rw = UIO_READ; 897 break; 898 case BIO_WRITE: 899 auio.uio_rw = UIO_WRITE; 900 break; 901 case BIO_FLUSH: 902 break; 903 case BIO_DELETE: 904 if (sc->candelete) 905 break; 906 /* FALLTHROUGH */ 907 default: 908 return (EOPNOTSUPP); 909 } 910 911 td = curthread; 912 vp = sc->vnode; 913 piov = NULL; 914 ma_offs = bp->bio_ma_offset; 915 off = bp->bio_offset; 916 len = bp->bio_length; 917 mapped = false; 918 919 /* 920 * VNODE I/O 921 * 922 * If an error occurs, we set BIO_ERROR but we do not set 923 * B_INVAL because (for a write anyway), the buffer is 924 * still valid. 925 */ 926 927 if (bp->bio_cmd == BIO_FLUSH) { 928 do { 929 (void)vn_start_write(vp, &mp, V_WAIT); 930 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 931 error = VOP_FSYNC(vp, MNT_WAIT, td); 932 VOP_UNLOCK(vp); 933 vn_finished_write(mp); 934 } while (error == ERELOOKUP); 935 return (error); 936 } else if (bp->bio_cmd == BIO_DELETE) { 937 error = vn_deallocate(vp, &off, &len, 0, 938 sc->flags & MD_ASYNC ? 0 : IO_SYNC, sc->cred, NOCRED); 939 bp->bio_resid = len; 940 return (error); 941 } 942 943 auio.uio_offset = (vm_ooffset_t)bp->bio_offset; 944 auio.uio_resid = bp->bio_length; 945 auio.uio_segflg = UIO_SYSSPACE; 946 auio.uio_td = td; 947 948 if ((bp->bio_flags & BIO_VLIST) != 0) { 949 piov = malloc(sizeof(*piov) * bp->bio_ma_n, M_MD, M_WAITOK); 950 auio.uio_iov = piov; 951 vlist = (bus_dma_segment_t *)bp->bio_data; 952 while (len > 0) { 953 piov->iov_base = (void *)(uintptr_t)(vlist->ds_addr + 954 ma_offs); 955 piov->iov_len = vlist->ds_len - ma_offs; 956 if (piov->iov_len > len) 957 piov->iov_len = len; 958 len -= piov->iov_len; 959 ma_offs = 0; 960 vlist++; 961 piov++; 962 } 963 auio.uio_iovcnt = piov - auio.uio_iov; 964 piov = auio.uio_iov; 965 } else if ((bp->bio_flags & BIO_UNMAPPED) != 0) { 966 bp->bio_resid = len; 967 unmapped_step: 968 npages = atop(min(maxphys, round_page(len + (ma_offs & 969 PAGE_MASK)))); 970 iolen = min(ptoa(npages) - (ma_offs & PAGE_MASK), len); 971 KASSERT(iolen > 0, ("zero iolen")); 972 KASSERT(npages <= atop(MAXPHYS + PAGE_SIZE), 973 ("npages %d too large", npages)); 974 pmap_qenter(sc->kva, &bp->bio_ma[atop(ma_offs)], npages); 975 aiov.iov_base = (void *)(sc->kva + (ma_offs & PAGE_MASK)); 976 aiov.iov_len = iolen; 977 auio.uio_iov = &aiov; 978 auio.uio_iovcnt = 1; 979 auio.uio_resid = iolen; 980 mapped = true; 981 } else { 982 aiov.iov_base = bp->bio_data; 983 aiov.iov_len = bp->bio_length; 984 auio.uio_iov = &aiov; 985 auio.uio_iovcnt = 1; 986 } 987 iostart = auio.uio_offset; 988 if (auio.uio_rw == UIO_READ) { 989 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 990 error = VOP_READ(vp, &auio, 0, sc->cred); 991 VOP_UNLOCK(vp); 992 } else { 993 (void) vn_start_write(vp, &mp, V_WAIT); 994 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 995 error = VOP_WRITE(vp, &auio, sc->flags & MD_ASYNC ? 0 : IO_SYNC, 996 sc->cred); 997 VOP_UNLOCK(vp); 998 vn_finished_write(mp); 999 if (error == 0) 1000 sc->flags &= ~MD_VERIFY; 1001 } 1002 1003 /* When MD_CACHE is set, try to avoid double-caching the data. */ 1004 if (error == 0 && (sc->flags & MD_CACHE) == 0) 1005 VOP_ADVISE(vp, iostart, auio.uio_offset - 1, 1006 POSIX_FADV_DONTNEED); 1007 1008 if (mapped) { 1009 pmap_qremove(sc->kva, npages); 1010 if (error == 0) { 1011 len -= iolen; 1012 bp->bio_resid -= iolen; 1013 ma_offs += iolen; 1014 if (len > 0) 1015 goto unmapped_step; 1016 } 1017 } else { 1018 bp->bio_resid = auio.uio_resid; 1019 } 1020 1021 free(piov, M_MD); 1022 return (error); 1023 } 1024 1025 static int 1026 mdstart_swap(struct md_s *sc, struct bio *bp) 1027 { 1028 vm_page_t m; 1029 u_char *p; 1030 vm_pindex_t i, lastp; 1031 bus_dma_segment_t *vlist; 1032 int rv, ma_offs, offs, len, lastend; 1033 1034 switch (bp->bio_cmd) { 1035 case BIO_READ: 1036 case BIO_WRITE: 1037 case BIO_DELETE: 1038 break; 1039 case BIO_FLUSH: 1040 return (0); 1041 default: 1042 return (EOPNOTSUPP); 1043 } 1044 1045 p = bp->bio_data; 1046 ma_offs = (bp->bio_flags & (BIO_UNMAPPED|BIO_VLIST)) != 0 ? 1047 bp->bio_ma_offset : 0; 1048 vlist = (bp->bio_flags & BIO_VLIST) != 0 ? 1049 (bus_dma_segment_t *)bp->bio_data : NULL; 1050 1051 /* 1052 * offs is the offset at which to start operating on the 1053 * next (ie, first) page. lastp is the last page on 1054 * which we're going to operate. lastend is the ending 1055 * position within that last page (ie, PAGE_SIZE if 1056 * we're operating on complete aligned pages). 1057 */ 1058 offs = bp->bio_offset % PAGE_SIZE; 1059 lastp = (bp->bio_offset + bp->bio_length - 1) / PAGE_SIZE; 1060 lastend = (bp->bio_offset + bp->bio_length - 1) % PAGE_SIZE + 1; 1061 1062 rv = VM_PAGER_OK; 1063 vm_object_pip_add(sc->object, 1); 1064 for (i = bp->bio_offset / PAGE_SIZE; i <= lastp; i++) { 1065 len = ((i == lastp) ? lastend : PAGE_SIZE) - offs; 1066 m = vm_page_grab_unlocked(sc->object, i, VM_ALLOC_SYSTEM); 1067 if (bp->bio_cmd == BIO_READ) { 1068 if (vm_page_all_valid(m)) 1069 rv = VM_PAGER_OK; 1070 else 1071 rv = vm_pager_get_pages(sc->object, &m, 1, 1072 NULL, NULL); 1073 if (rv == VM_PAGER_ERROR) { 1074 VM_OBJECT_WLOCK(sc->object); 1075 vm_page_free(m); 1076 VM_OBJECT_WUNLOCK(sc->object); 1077 break; 1078 } else if (rv == VM_PAGER_FAIL) { 1079 /* 1080 * Pager does not have the page. Zero 1081 * the allocated page, and mark it as 1082 * valid. Do not set dirty, the page 1083 * can be recreated if thrown out. 1084 */ 1085 pmap_zero_page(m); 1086 vm_page_valid(m); 1087 } 1088 if ((bp->bio_flags & BIO_UNMAPPED) != 0) { 1089 pmap_copy_pages(&m, offs, bp->bio_ma, 1090 ma_offs, len); 1091 } else if ((bp->bio_flags & BIO_VLIST) != 0) { 1092 physcopyout_vlist(VM_PAGE_TO_PHYS(m) + offs, 1093 vlist, ma_offs, len); 1094 cpu_flush_dcache(p, len); 1095 } else { 1096 physcopyout(VM_PAGE_TO_PHYS(m) + offs, p, len); 1097 cpu_flush_dcache(p, len); 1098 } 1099 } else if (bp->bio_cmd == BIO_WRITE) { 1100 if (len == PAGE_SIZE || vm_page_all_valid(m)) 1101 rv = VM_PAGER_OK; 1102 else 1103 rv = vm_pager_get_pages(sc->object, &m, 1, 1104 NULL, NULL); 1105 if (rv == VM_PAGER_ERROR) { 1106 VM_OBJECT_WLOCK(sc->object); 1107 vm_page_free(m); 1108 VM_OBJECT_WUNLOCK(sc->object); 1109 break; 1110 } else if (rv == VM_PAGER_FAIL) 1111 pmap_zero_page(m); 1112 1113 if ((bp->bio_flags & BIO_UNMAPPED) != 0) { 1114 pmap_copy_pages(bp->bio_ma, ma_offs, &m, 1115 offs, len); 1116 } else if ((bp->bio_flags & BIO_VLIST) != 0) { 1117 physcopyin_vlist(vlist, ma_offs, 1118 VM_PAGE_TO_PHYS(m) + offs, len); 1119 } else { 1120 physcopyin(p, VM_PAGE_TO_PHYS(m) + offs, len); 1121 } 1122 1123 vm_page_valid(m); 1124 vm_page_set_dirty(m); 1125 } else if (bp->bio_cmd == BIO_DELETE) { 1126 if (len == PAGE_SIZE || vm_page_all_valid(m)) 1127 rv = VM_PAGER_OK; 1128 else 1129 rv = vm_pager_get_pages(sc->object, &m, 1, 1130 NULL, NULL); 1131 VM_OBJECT_WLOCK(sc->object); 1132 if (rv == VM_PAGER_ERROR) { 1133 vm_page_free(m); 1134 VM_OBJECT_WUNLOCK(sc->object); 1135 break; 1136 } else if (rv == VM_PAGER_FAIL) { 1137 vm_page_free(m); 1138 m = NULL; 1139 } else { 1140 /* Page is valid. */ 1141 if (len != PAGE_SIZE) { 1142 pmap_zero_page_area(m, offs, len); 1143 vm_page_set_dirty(m); 1144 } else { 1145 vm_pager_page_unswapped(m); 1146 vm_page_free(m); 1147 m = NULL; 1148 } 1149 } 1150 VM_OBJECT_WUNLOCK(sc->object); 1151 } 1152 if (m != NULL) { 1153 /* 1154 * The page may be deactivated prior to setting 1155 * PGA_REFERENCED, but in this case it will be 1156 * reactivated by the page daemon. 1157 */ 1158 if (vm_page_active(m)) 1159 vm_page_reference(m); 1160 else 1161 vm_page_activate(m); 1162 vm_page_xunbusy(m); 1163 } 1164 1165 /* Actions on further pages start at offset 0 */ 1166 p += PAGE_SIZE - offs; 1167 offs = 0; 1168 ma_offs += len; 1169 } 1170 vm_object_pip_wakeup(sc->object); 1171 return (rv != VM_PAGER_ERROR ? 0 : ENOSPC); 1172 } 1173 1174 static int 1175 mdstart_null(struct md_s *sc, struct bio *bp) 1176 { 1177 1178 switch (bp->bio_cmd) { 1179 case BIO_READ: 1180 bzero(bp->bio_data, bp->bio_length); 1181 cpu_flush_dcache(bp->bio_data, bp->bio_length); 1182 break; 1183 case BIO_WRITE: 1184 break; 1185 } 1186 bp->bio_resid = 0; 1187 return (0); 1188 } 1189 1190 static void 1191 md_handleattr(struct md_s *sc, struct bio *bp) 1192 { 1193 if (sc->fwsectors && sc->fwheads && 1194 (g_handleattr_int(bp, "GEOM::fwsectors", sc->fwsectors) != 0 || 1195 g_handleattr_int(bp, "GEOM::fwheads", sc->fwheads) != 0)) 1196 return; 1197 if (g_handleattr_int(bp, "GEOM::candelete", sc->candelete) != 0) 1198 return; 1199 if (sc->ident[0] != '\0' && 1200 g_handleattr_str(bp, "GEOM::ident", sc->ident) != 0) 1201 return; 1202 if (g_handleattr_int(bp, "MNT::verified", (sc->flags & MD_VERIFY) != 0)) 1203 return; 1204 g_io_deliver(bp, EOPNOTSUPP); 1205 } 1206 1207 static void 1208 md_kthread(void *arg) 1209 { 1210 struct md_s *sc; 1211 struct bio *bp; 1212 int error; 1213 1214 sc = arg; 1215 thread_lock(curthread); 1216 sched_prio(curthread, PRIBIO); 1217 thread_unlock(curthread); 1218 if (sc->type == MD_VNODE) 1219 curthread->td_pflags |= TDP_NORUNNINGBUF; 1220 1221 for (;;) { 1222 mtx_lock(&sc->queue_mtx); 1223 if (sc->flags & MD_SHUTDOWN) { 1224 sc->flags |= MD_EXITING; 1225 mtx_unlock(&sc->queue_mtx); 1226 kproc_exit(0); 1227 } 1228 bp = bioq_takefirst(&sc->bio_queue); 1229 if (!bp) { 1230 msleep(sc, &sc->queue_mtx, PRIBIO | PDROP, "mdwait", 0); 1231 continue; 1232 } 1233 mtx_unlock(&sc->queue_mtx); 1234 if (bp->bio_cmd == BIO_GETATTR) { 1235 md_handleattr(sc, bp); 1236 } else { 1237 error = sc->start(sc, bp); 1238 if (bp->bio_cmd == BIO_READ || bp->bio_cmd == BIO_WRITE) { 1239 /* 1240 * Devstat uses (bio_bcount, bio_resid) for 1241 * determining the length of the completed part 1242 * of the i/o. g_io_deliver() will translate 1243 * from bio_completed to that, but it also 1244 * destroys the bio so we must do our own 1245 * translation. 1246 */ 1247 bp->bio_bcount = bp->bio_length; 1248 devstat_end_transaction_bio(sc->devstat, bp); 1249 } 1250 bp->bio_completed = bp->bio_length - bp->bio_resid; 1251 g_io_deliver(bp, error); 1252 } 1253 } 1254 } 1255 1256 static struct md_s * 1257 mdfind(int unit) 1258 { 1259 struct md_s *sc; 1260 1261 LIST_FOREACH(sc, &md_softc_list, list) { 1262 if (sc->unit == unit) 1263 break; 1264 } 1265 return (sc); 1266 } 1267 1268 static struct md_s * 1269 mdnew(int unit, int *errp, enum md_types type) 1270 { 1271 struct md_s *sc; 1272 int error; 1273 1274 *errp = 0; 1275 if (unit == -1) 1276 unit = alloc_unr(md_uh); 1277 else 1278 unit = alloc_unr_specific(md_uh, unit); 1279 1280 if (unit == -1) { 1281 *errp = EBUSY; 1282 return (NULL); 1283 } 1284 1285 sc = malloc(sizeof(*sc), M_MD, M_WAITOK | M_ZERO); 1286 sc->type = type; 1287 bioq_init(&sc->bio_queue); 1288 mtx_init(&sc->queue_mtx, "md bio queue", NULL, MTX_DEF); 1289 sc->unit = unit; 1290 sprintf(sc->name, "md%d", unit); 1291 LIST_INSERT_HEAD(&md_softc_list, sc, list); 1292 error = kproc_create(md_kthread, sc, &sc->procp, 0, 0,"%s", sc->name); 1293 if (error == 0) 1294 return (sc); 1295 LIST_REMOVE(sc, list); 1296 mtx_destroy(&sc->queue_mtx); 1297 free_unr(md_uh, sc->unit); 1298 free(sc, M_MD); 1299 *errp = error; 1300 return (NULL); 1301 } 1302 1303 static void 1304 mdinit(struct md_s *sc) 1305 { 1306 struct g_geom *gp; 1307 struct g_provider *pp; 1308 1309 g_topology_lock(); 1310 gp = g_new_geomf(&g_md_class, "md%d", sc->unit); 1311 gp->softc = sc; 1312 pp = g_new_providerf(gp, "md%d", sc->unit); 1313 devstat_remove_entry(pp->stat); 1314 pp->stat = NULL; 1315 pp->flags |= G_PF_DIRECT_SEND | G_PF_DIRECT_RECEIVE; 1316 pp->mediasize = sc->mediasize; 1317 pp->sectorsize = sc->sectorsize; 1318 switch (sc->type) { 1319 case MD_MALLOC: 1320 case MD_VNODE: 1321 case MD_SWAP: 1322 pp->flags |= G_PF_ACCEPT_UNMAPPED; 1323 break; 1324 case MD_PRELOAD: 1325 case MD_NULL: 1326 break; 1327 } 1328 sc->gp = gp; 1329 sc->pp = pp; 1330 sc->devstat = devstat_new_entry("md", sc->unit, sc->sectorsize, 1331 DEVSTAT_ALL_SUPPORTED, DEVSTAT_TYPE_DIRECT, DEVSTAT_PRIORITY_MAX); 1332 sc->devstat->id = pp; 1333 g_error_provider(pp, 0); 1334 g_topology_unlock(); 1335 } 1336 1337 static int 1338 mdcreate_malloc(struct md_s *sc, struct md_req *mdr) 1339 { 1340 uintptr_t sp; 1341 int error; 1342 off_t u; 1343 1344 error = 0; 1345 if (mdr->md_options & ~(MD_AUTOUNIT | MD_COMPRESS | MD_RESERVE)) 1346 return (EINVAL); 1347 if (mdr->md_sectorsize != 0 && !powerof2(mdr->md_sectorsize)) 1348 return (EINVAL); 1349 /* Compression doesn't make sense if we have reserved space */ 1350 if (mdr->md_options & MD_RESERVE) 1351 mdr->md_options &= ~MD_COMPRESS; 1352 if (mdr->md_fwsectors != 0) 1353 sc->fwsectors = mdr->md_fwsectors; 1354 if (mdr->md_fwheads != 0) 1355 sc->fwheads = mdr->md_fwheads; 1356 sc->flags = mdr->md_options & (MD_COMPRESS | MD_FORCE); 1357 sc->indir = dimension(sc->mediasize / sc->sectorsize); 1358 sc->uma = uma_zcreate(sc->name, sc->sectorsize, NULL, NULL, NULL, NULL, 1359 0x1ff, 0); 1360 if (mdr->md_options & MD_RESERVE) { 1361 off_t nsectors; 1362 1363 nsectors = sc->mediasize / sc->sectorsize; 1364 for (u = 0; u < nsectors; u++) { 1365 sp = (uintptr_t)uma_zalloc(sc->uma, (md_malloc_wait ? 1366 M_WAITOK : M_NOWAIT) | M_ZERO); 1367 if (sp != 0) 1368 error = s_write(sc->indir, u, sp); 1369 else 1370 error = ENOMEM; 1371 if (error != 0) 1372 break; 1373 } 1374 } 1375 return (error); 1376 } 1377 1378 static int 1379 mdsetcred(struct md_s *sc, struct ucred *cred) 1380 { 1381 char *tmpbuf; 1382 int error = 0; 1383 1384 /* 1385 * Set credits in our softc 1386 */ 1387 1388 if (sc->cred) 1389 crfree(sc->cred); 1390 sc->cred = crhold(cred); 1391 1392 /* 1393 * Horrible kludge to establish credentials for NFS XXX. 1394 */ 1395 1396 if (sc->vnode) { 1397 struct uio auio; 1398 struct iovec aiov; 1399 1400 tmpbuf = malloc(sc->sectorsize, M_TEMP, M_WAITOK); 1401 bzero(&auio, sizeof(auio)); 1402 1403 aiov.iov_base = tmpbuf; 1404 aiov.iov_len = sc->sectorsize; 1405 auio.uio_iov = &aiov; 1406 auio.uio_iovcnt = 1; 1407 auio.uio_offset = 0; 1408 auio.uio_rw = UIO_READ; 1409 auio.uio_segflg = UIO_SYSSPACE; 1410 auio.uio_resid = aiov.iov_len; 1411 vn_lock(sc->vnode, LK_EXCLUSIVE | LK_RETRY); 1412 error = VOP_READ(sc->vnode, &auio, 0, sc->cred); 1413 VOP_UNLOCK(sc->vnode); 1414 free(tmpbuf, M_TEMP); 1415 } 1416 return (error); 1417 } 1418 1419 static int 1420 mdcreate_vnode(struct md_s *sc, struct md_req *mdr, struct thread *td) 1421 { 1422 struct vattr vattr; 1423 struct nameidata nd; 1424 char *fname; 1425 int error, flags; 1426 long v; 1427 1428 fname = mdr->md_file; 1429 if (mdr->md_file_seg == UIO_USERSPACE) { 1430 error = copyinstr(fname, sc->file, sizeof(sc->file), NULL); 1431 if (error != 0) 1432 return (error); 1433 } else if (mdr->md_file_seg == UIO_SYSSPACE) 1434 strlcpy(sc->file, fname, sizeof(sc->file)); 1435 else 1436 return (EDOOFUS); 1437 1438 /* 1439 * If the user specified that this is a read only device, don't 1440 * set the FWRITE mask before trying to open the backing store. 1441 */ 1442 flags = FREAD | ((mdr->md_options & MD_READONLY) ? 0 : FWRITE) \ 1443 | ((mdr->md_options & MD_VERIFY) ? O_VERIFY : 0); 1444 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, sc->file); 1445 error = vn_open(&nd, &flags, 0, NULL); 1446 if (error != 0) 1447 return (error); 1448 NDFREE_PNBUF(&nd); 1449 if (nd.ni_vp->v_type != VREG) { 1450 error = EINVAL; 1451 goto bad; 1452 } 1453 error = VOP_GETATTR(nd.ni_vp, &vattr, td->td_ucred); 1454 if (error != 0) 1455 goto bad; 1456 if ((mdr->md_options & MD_MUSTDEALLOC) != 0) { 1457 error = VOP_PATHCONF(nd.ni_vp, _PC_DEALLOC_PRESENT, &v); 1458 if (error != 0) 1459 goto bad; 1460 if (v == 0) 1461 sc->candelete = false; 1462 } 1463 if (VOP_ISLOCKED(nd.ni_vp) != LK_EXCLUSIVE) { 1464 vn_lock(nd.ni_vp, LK_UPGRADE | LK_RETRY); 1465 if (VN_IS_DOOMED(nd.ni_vp)) { 1466 /* Forced unmount. */ 1467 error = EBADF; 1468 goto bad; 1469 } 1470 } 1471 nd.ni_vp->v_vflag |= VV_MD; 1472 VOP_UNLOCK(nd.ni_vp); 1473 1474 if (mdr->md_fwsectors != 0) 1475 sc->fwsectors = mdr->md_fwsectors; 1476 if (mdr->md_fwheads != 0) 1477 sc->fwheads = mdr->md_fwheads; 1478 snprintf(sc->ident, sizeof(sc->ident), "MD-DEV%ju-INO%ju", 1479 (uintmax_t)vattr.va_fsid, (uintmax_t)vattr.va_fileid); 1480 sc->flags = mdr->md_options & (MD_ASYNC | MD_CACHE | MD_FORCE | 1481 MD_VERIFY); 1482 if (!(flags & FWRITE)) 1483 sc->flags |= MD_READONLY; 1484 sc->vnode = nd.ni_vp; 1485 1486 error = mdsetcred(sc, td->td_ucred); 1487 if (error != 0) { 1488 sc->vnode = NULL; 1489 vn_lock(nd.ni_vp, LK_EXCLUSIVE | LK_RETRY); 1490 nd.ni_vp->v_vflag &= ~VV_MD; 1491 goto bad; 1492 } 1493 1494 sc->kva = kva_alloc(MAXPHYS + PAGE_SIZE); 1495 return (0); 1496 bad: 1497 VOP_UNLOCK(nd.ni_vp); 1498 (void)vn_close(nd.ni_vp, flags, td->td_ucred, td); 1499 return (error); 1500 } 1501 1502 static void 1503 g_md_providergone(struct g_provider *pp) 1504 { 1505 struct md_s *sc = pp->geom->softc; 1506 1507 mtx_lock(&sc->queue_mtx); 1508 sc->flags |= MD_PROVIDERGONE; 1509 wakeup(&sc->flags); 1510 mtx_unlock(&sc->queue_mtx); 1511 } 1512 1513 static int 1514 mddestroy(struct md_s *sc, struct thread *td) 1515 { 1516 1517 if (sc->gp) { 1518 g_topology_lock(); 1519 g_wither_geom(sc->gp, ENXIO); 1520 g_topology_unlock(); 1521 1522 mtx_lock(&sc->queue_mtx); 1523 while (!(sc->flags & MD_PROVIDERGONE)) 1524 msleep(&sc->flags, &sc->queue_mtx, PRIBIO, "mddestroy", 0); 1525 mtx_unlock(&sc->queue_mtx); 1526 } 1527 if (sc->devstat) { 1528 devstat_remove_entry(sc->devstat); 1529 sc->devstat = NULL; 1530 } 1531 mtx_lock(&sc->queue_mtx); 1532 sc->flags |= MD_SHUTDOWN; 1533 wakeup(sc); 1534 while (!(sc->flags & MD_EXITING)) 1535 msleep(sc->procp, &sc->queue_mtx, PRIBIO, "mddestroy", hz / 10); 1536 mtx_unlock(&sc->queue_mtx); 1537 mtx_destroy(&sc->queue_mtx); 1538 if (sc->vnode != NULL) { 1539 vn_lock(sc->vnode, LK_EXCLUSIVE | LK_RETRY); 1540 sc->vnode->v_vflag &= ~VV_MD; 1541 VOP_UNLOCK(sc->vnode); 1542 (void)vn_close(sc->vnode, sc->flags & MD_READONLY ? 1543 FREAD : (FREAD|FWRITE), sc->cred, td); 1544 } 1545 if (sc->cred != NULL) 1546 crfree(sc->cred); 1547 if (sc->object != NULL) 1548 vm_object_deallocate(sc->object); 1549 if (sc->indir) 1550 destroy_indir(sc, sc->indir); 1551 if (sc->uma) 1552 uma_zdestroy(sc->uma); 1553 if (sc->kva) 1554 kva_free(sc->kva, MAXPHYS + PAGE_SIZE); 1555 1556 LIST_REMOVE(sc, list); 1557 free_unr(md_uh, sc->unit); 1558 free(sc, M_MD); 1559 return (0); 1560 } 1561 1562 static int 1563 mdresize(struct md_s *sc, struct md_req *mdr) 1564 { 1565 int error, res; 1566 vm_pindex_t oldpages, newpages; 1567 1568 switch (sc->type) { 1569 case MD_VNODE: 1570 case MD_NULL: 1571 break; 1572 case MD_SWAP: 1573 if (mdr->md_mediasize <= 0 || 1574 (mdr->md_mediasize % PAGE_SIZE) != 0) 1575 return (EDOM); 1576 oldpages = OFF_TO_IDX(sc->mediasize); 1577 newpages = OFF_TO_IDX(mdr->md_mediasize); 1578 if (newpages < oldpages) { 1579 VM_OBJECT_WLOCK(sc->object); 1580 vm_object_page_remove(sc->object, newpages, 0, 0); 1581 swap_release_by_cred(IDX_TO_OFF(oldpages - 1582 newpages), sc->cred); 1583 sc->object->charge = IDX_TO_OFF(newpages); 1584 sc->object->size = newpages; 1585 VM_OBJECT_WUNLOCK(sc->object); 1586 } else if (newpages > oldpages) { 1587 res = swap_reserve_by_cred(IDX_TO_OFF(newpages - 1588 oldpages), sc->cred); 1589 if (!res) 1590 return (ENOMEM); 1591 if ((mdr->md_options & MD_RESERVE) || 1592 (sc->flags & MD_RESERVE)) { 1593 error = swap_pager_reserve(sc->object, 1594 oldpages, newpages - oldpages); 1595 if (error < 0) { 1596 swap_release_by_cred( 1597 IDX_TO_OFF(newpages - oldpages), 1598 sc->cred); 1599 return (EDOM); 1600 } 1601 } 1602 VM_OBJECT_WLOCK(sc->object); 1603 sc->object->charge = IDX_TO_OFF(newpages); 1604 sc->object->size = newpages; 1605 VM_OBJECT_WUNLOCK(sc->object); 1606 } 1607 break; 1608 default: 1609 return (EOPNOTSUPP); 1610 } 1611 1612 sc->mediasize = mdr->md_mediasize; 1613 1614 g_topology_lock(); 1615 g_resize_provider(sc->pp, sc->mediasize); 1616 g_topology_unlock(); 1617 return (0); 1618 } 1619 1620 static int 1621 mdcreate_swap(struct md_s *sc, struct md_req *mdr, struct thread *td) 1622 { 1623 vm_ooffset_t npage; 1624 int error; 1625 1626 /* 1627 * Range check. Disallow negative sizes and sizes not being 1628 * multiple of page size. 1629 */ 1630 if (sc->mediasize <= 0 || (sc->mediasize % PAGE_SIZE) != 0) 1631 return (EDOM); 1632 1633 /* 1634 * Allocate an OBJT_SWAP object. 1635 * 1636 * Note the truncation. 1637 */ 1638 1639 if ((mdr->md_options & MD_VERIFY) != 0) 1640 return (EINVAL); 1641 npage = mdr->md_mediasize / PAGE_SIZE; 1642 if (mdr->md_fwsectors != 0) 1643 sc->fwsectors = mdr->md_fwsectors; 1644 if (mdr->md_fwheads != 0) 1645 sc->fwheads = mdr->md_fwheads; 1646 sc->object = vm_pager_allocate(OBJT_SWAP, NULL, PAGE_SIZE * npage, 1647 VM_PROT_DEFAULT, 0, td->td_ucred); 1648 if (sc->object == NULL) 1649 return (ENOMEM); 1650 sc->flags = mdr->md_options & (MD_FORCE | MD_RESERVE); 1651 if (mdr->md_options & MD_RESERVE) { 1652 if (swap_pager_reserve(sc->object, 0, npage) < 0) { 1653 error = EDOM; 1654 goto finish; 1655 } 1656 } 1657 error = mdsetcred(sc, td->td_ucred); 1658 finish: 1659 if (error != 0) { 1660 vm_object_deallocate(sc->object); 1661 sc->object = NULL; 1662 } 1663 return (error); 1664 } 1665 1666 static int 1667 mdcreate_null(struct md_s *sc, struct md_req *mdr, struct thread *td) 1668 { 1669 1670 /* 1671 * Range check. Disallow negative sizes and sizes not being 1672 * multiple of page size. 1673 */ 1674 if (sc->mediasize <= 0 || (sc->mediasize % PAGE_SIZE) != 0) 1675 return (EDOM); 1676 1677 return (0); 1678 } 1679 1680 static int 1681 kern_mdattach_locked(struct thread *td, struct md_req *mdr) 1682 { 1683 struct md_s *sc; 1684 unsigned sectsize; 1685 int error, i; 1686 1687 sx_assert(&md_sx, SA_XLOCKED); 1688 1689 switch (mdr->md_type) { 1690 case MD_MALLOC: 1691 case MD_PRELOAD: 1692 case MD_VNODE: 1693 case MD_SWAP: 1694 case MD_NULL: 1695 break; 1696 default: 1697 return (EINVAL); 1698 } 1699 if (mdr->md_sectorsize == 0) 1700 sectsize = DEV_BSIZE; 1701 else 1702 sectsize = mdr->md_sectorsize; 1703 if (sectsize > maxphys || mdr->md_mediasize < sectsize) 1704 return (EINVAL); 1705 if (mdr->md_options & MD_AUTOUNIT) 1706 sc = mdnew(-1, &error, mdr->md_type); 1707 else { 1708 if (mdr->md_unit > INT_MAX) 1709 return (EINVAL); 1710 sc = mdnew(mdr->md_unit, &error, mdr->md_type); 1711 } 1712 if (sc == NULL) 1713 return (error); 1714 if (mdr->md_label != NULL) 1715 error = copyinstr(mdr->md_label, sc->label, 1716 sizeof(sc->label), NULL); 1717 if (error != 0) 1718 goto err_after_new; 1719 if (mdr->md_options & MD_AUTOUNIT) 1720 mdr->md_unit = sc->unit; 1721 sc->mediasize = mdr->md_mediasize; 1722 sc->sectorsize = sectsize; 1723 sc->candelete = true; 1724 error = EDOOFUS; 1725 switch (sc->type) { 1726 case MD_MALLOC: 1727 sc->start = mdstart_malloc; 1728 error = mdcreate_malloc(sc, mdr); 1729 break; 1730 case MD_PRELOAD: 1731 /* 1732 * We disallow attaching preloaded memory disks via 1733 * ioctl. Preloaded memory disks are automatically 1734 * attached in g_md_init(). 1735 */ 1736 error = EOPNOTSUPP; 1737 break; 1738 case MD_VNODE: 1739 sc->start = mdstart_vnode; 1740 error = mdcreate_vnode(sc, mdr, td); 1741 break; 1742 case MD_SWAP: 1743 sc->start = mdstart_swap; 1744 error = mdcreate_swap(sc, mdr, td); 1745 break; 1746 case MD_NULL: 1747 sc->start = mdstart_null; 1748 error = mdcreate_null(sc, mdr, td); 1749 break; 1750 } 1751 err_after_new: 1752 if (error != 0) { 1753 mddestroy(sc, td); 1754 return (error); 1755 } 1756 1757 /* Prune off any residual fractional sector */ 1758 i = sc->mediasize % sc->sectorsize; 1759 sc->mediasize -= i; 1760 1761 mdinit(sc); 1762 return (0); 1763 } 1764 1765 static int 1766 kern_mdattach(struct thread *td, struct md_req *mdr) 1767 { 1768 int error; 1769 1770 sx_xlock(&md_sx); 1771 error = kern_mdattach_locked(td, mdr); 1772 sx_xunlock(&md_sx); 1773 return (error); 1774 } 1775 1776 static int 1777 kern_mddetach_locked(struct thread *td, struct md_req *mdr) 1778 { 1779 struct md_s *sc; 1780 1781 sx_assert(&md_sx, SA_XLOCKED); 1782 1783 if (mdr->md_mediasize != 0 || 1784 (mdr->md_options & ~MD_FORCE) != 0) 1785 return (EINVAL); 1786 1787 sc = mdfind(mdr->md_unit); 1788 if (sc == NULL) 1789 return (ENOENT); 1790 if (sc->opencount != 0 && !(sc->flags & MD_FORCE) && 1791 !(mdr->md_options & MD_FORCE)) 1792 return (EBUSY); 1793 return (mddestroy(sc, td)); 1794 } 1795 1796 static int 1797 kern_mddetach(struct thread *td, struct md_req *mdr) 1798 { 1799 int error; 1800 1801 sx_xlock(&md_sx); 1802 error = kern_mddetach_locked(td, mdr); 1803 sx_xunlock(&md_sx); 1804 return (error); 1805 } 1806 1807 static int 1808 kern_mdresize_locked(struct md_req *mdr) 1809 { 1810 struct md_s *sc; 1811 1812 sx_assert(&md_sx, SA_XLOCKED); 1813 1814 if ((mdr->md_options & ~(MD_FORCE | MD_RESERVE)) != 0) 1815 return (EINVAL); 1816 1817 sc = mdfind(mdr->md_unit); 1818 if (sc == NULL) 1819 return (ENOENT); 1820 if (mdr->md_mediasize < sc->sectorsize) 1821 return (EINVAL); 1822 mdr->md_mediasize -= mdr->md_mediasize % sc->sectorsize; 1823 if (mdr->md_mediasize < sc->mediasize && 1824 !(sc->flags & MD_FORCE) && 1825 !(mdr->md_options & MD_FORCE)) 1826 return (EBUSY); 1827 return (mdresize(sc, mdr)); 1828 } 1829 1830 static int 1831 kern_mdresize(struct md_req *mdr) 1832 { 1833 int error; 1834 1835 sx_xlock(&md_sx); 1836 error = kern_mdresize_locked(mdr); 1837 sx_xunlock(&md_sx); 1838 return (error); 1839 } 1840 1841 static int 1842 kern_mdquery_locked(struct md_req *mdr) 1843 { 1844 struct md_s *sc; 1845 int error; 1846 1847 sx_assert(&md_sx, SA_XLOCKED); 1848 1849 sc = mdfind(mdr->md_unit); 1850 if (sc == NULL) 1851 return (ENOENT); 1852 mdr->md_type = sc->type; 1853 mdr->md_options = sc->flags; 1854 mdr->md_mediasize = sc->mediasize; 1855 mdr->md_sectorsize = sc->sectorsize; 1856 error = 0; 1857 if (mdr->md_label != NULL) { 1858 error = copyout(sc->label, mdr->md_label, 1859 strlen(sc->label) + 1); 1860 if (error != 0) 1861 return (error); 1862 } 1863 if (sc->type == MD_VNODE || 1864 (sc->type == MD_PRELOAD && mdr->md_file != NULL)) 1865 error = copyout(sc->file, mdr->md_file, 1866 strlen(sc->file) + 1); 1867 return (error); 1868 } 1869 1870 static int 1871 kern_mdquery(struct md_req *mdr) 1872 { 1873 int error; 1874 1875 sx_xlock(&md_sx); 1876 error = kern_mdquery_locked(mdr); 1877 sx_xunlock(&md_sx); 1878 return (error); 1879 } 1880 1881 /* Copy members that are not userspace pointers. */ 1882 #define MD_IOCTL2REQ(mdio, mdr) do { \ 1883 (mdr)->md_unit = (mdio)->md_unit; \ 1884 (mdr)->md_type = (mdio)->md_type; \ 1885 (mdr)->md_mediasize = (mdio)->md_mediasize; \ 1886 (mdr)->md_sectorsize = (mdio)->md_sectorsize; \ 1887 (mdr)->md_options = (mdio)->md_options; \ 1888 (mdr)->md_fwheads = (mdio)->md_fwheads; \ 1889 (mdr)->md_fwsectors = (mdio)->md_fwsectors; \ 1890 (mdr)->md_units = &(mdio)->md_pad[0]; \ 1891 (mdr)->md_units_nitems = nitems((mdio)->md_pad); \ 1892 } while(0) 1893 1894 /* Copy members that might have been updated */ 1895 #define MD_REQ2IOCTL(mdr, mdio) do { \ 1896 (mdio)->md_unit = (mdr)->md_unit; \ 1897 (mdio)->md_type = (mdr)->md_type; \ 1898 (mdio)->md_mediasize = (mdr)->md_mediasize; \ 1899 (mdio)->md_sectorsize = (mdr)->md_sectorsize; \ 1900 (mdio)->md_options = (mdr)->md_options; \ 1901 (mdio)->md_fwheads = (mdr)->md_fwheads; \ 1902 (mdio)->md_fwsectors = (mdr)->md_fwsectors; \ 1903 } while(0) 1904 1905 static int 1906 mdctlioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, 1907 struct thread *td) 1908 { 1909 struct md_req mdr; 1910 int error; 1911 1912 if (md_debug) 1913 printf("mdctlioctl(%s %lx %p %x %p)\n", 1914 devtoname(dev), cmd, addr, flags, td); 1915 1916 bzero(&mdr, sizeof(mdr)); 1917 switch (cmd) { 1918 case MDIOCATTACH: 1919 case MDIOCDETACH: 1920 case MDIOCRESIZE: 1921 case MDIOCQUERY: { 1922 struct md_ioctl *mdio = (struct md_ioctl *)addr; 1923 if (mdio->md_version != MDIOVERSION) 1924 return (EINVAL); 1925 MD_IOCTL2REQ(mdio, &mdr); 1926 mdr.md_file = mdio->md_file; 1927 mdr.md_file_seg = UIO_USERSPACE; 1928 /* If the file is adjacent to the md_ioctl it's in kernel. */ 1929 if ((void *)mdio->md_file == (void *)(mdio + 1)) 1930 mdr.md_file_seg = UIO_SYSSPACE; 1931 mdr.md_label = mdio->md_label; 1932 break; 1933 } 1934 #ifdef COMPAT_FREEBSD32 1935 case MDIOCATTACH_32: 1936 case MDIOCDETACH_32: 1937 case MDIOCRESIZE_32: 1938 case MDIOCQUERY_32: { 1939 struct md_ioctl32 *mdio = (struct md_ioctl32 *)addr; 1940 if (mdio->md_version != MDIOVERSION) 1941 return (EINVAL); 1942 MD_IOCTL2REQ(mdio, &mdr); 1943 mdr.md_file = (void *)(uintptr_t)mdio->md_file; 1944 mdr.md_file_seg = UIO_USERSPACE; 1945 mdr.md_label = (void *)(uintptr_t)mdio->md_label; 1946 break; 1947 } 1948 #endif 1949 default: 1950 /* Fall through to handler switch. */ 1951 break; 1952 } 1953 1954 error = 0; 1955 switch (cmd) { 1956 case MDIOCATTACH: 1957 #ifdef COMPAT_FREEBSD32 1958 case MDIOCATTACH_32: 1959 #endif 1960 error = kern_mdattach(td, &mdr); 1961 break; 1962 case MDIOCDETACH: 1963 #ifdef COMPAT_FREEBSD32 1964 case MDIOCDETACH_32: 1965 #endif 1966 error = kern_mddetach(td, &mdr); 1967 break; 1968 case MDIOCRESIZE: 1969 #ifdef COMPAT_FREEBSD32 1970 case MDIOCRESIZE_32: 1971 #endif 1972 error = kern_mdresize(&mdr); 1973 break; 1974 case MDIOCQUERY: 1975 #ifdef COMPAT_FREEBSD32 1976 case MDIOCQUERY_32: 1977 #endif 1978 error = kern_mdquery(&mdr); 1979 break; 1980 default: 1981 error = ENOIOCTL; 1982 } 1983 1984 switch (cmd) { 1985 case MDIOCATTACH: 1986 case MDIOCQUERY: { 1987 struct md_ioctl *mdio = (struct md_ioctl *)addr; 1988 MD_REQ2IOCTL(&mdr, mdio); 1989 break; 1990 } 1991 #ifdef COMPAT_FREEBSD32 1992 case MDIOCATTACH_32: 1993 case MDIOCQUERY_32: { 1994 struct md_ioctl32 *mdio = (struct md_ioctl32 *)addr; 1995 MD_REQ2IOCTL(&mdr, mdio); 1996 break; 1997 } 1998 #endif 1999 default: 2000 /* Other commands to not alter mdr. */ 2001 break; 2002 } 2003 2004 return (error); 2005 } 2006 2007 static void 2008 md_preloaded(u_char *image, size_t length, const char *name) 2009 { 2010 struct md_s *sc; 2011 int error; 2012 2013 sc = mdnew(-1, &error, MD_PRELOAD); 2014 if (sc == NULL) 2015 return; 2016 sc->mediasize = length; 2017 sc->sectorsize = DEV_BSIZE; 2018 sc->pl_ptr = image; 2019 sc->pl_len = length; 2020 sc->start = mdstart_preload; 2021 if (name != NULL) 2022 strlcpy(sc->file, name, sizeof(sc->file)); 2023 #ifdef MD_ROOT 2024 if (sc->unit == 0) { 2025 #ifndef ROOTDEVNAME 2026 rootdevnames[0] = MD_ROOT_FSTYPE ":/dev/md0"; 2027 #endif 2028 #ifdef MD_ROOT_READONLY 2029 sc->flags |= MD_READONLY; 2030 #endif 2031 } 2032 #endif 2033 mdinit(sc); 2034 if (name != NULL) { 2035 printf("%s%d: Preloaded image <%s> %zd bytes at %p\n", 2036 MD_NAME, sc->unit, name, length, image); 2037 } else { 2038 printf("%s%d: Embedded image %zd bytes at %p\n", 2039 MD_NAME, sc->unit, length, image); 2040 } 2041 } 2042 2043 static void 2044 g_md_init(struct g_class *mp __unused) 2045 { 2046 caddr_t mod; 2047 u_char *ptr, *name, *type; 2048 unsigned len; 2049 int i; 2050 2051 /* figure out log2(NINDIR) */ 2052 for (i = NINDIR, nshift = -1; i; nshift++) 2053 i >>= 1; 2054 2055 mod = NULL; 2056 sx_init(&md_sx, "MD config lock"); 2057 g_topology_unlock(); 2058 md_uh = new_unrhdr(0, INT_MAX, NULL); 2059 #ifdef MD_ROOT 2060 if (mfs_root_size != 0) { 2061 sx_xlock(&md_sx); 2062 #ifdef MD_ROOT_MEM 2063 md_preloaded(mfs_root, mfs_root_size, NULL); 2064 #else 2065 md_preloaded(__DEVOLATILE(u_char *, &mfs_root), mfs_root_size, 2066 NULL); 2067 #endif 2068 sx_xunlock(&md_sx); 2069 } 2070 #endif 2071 /* XXX: are preload_* static or do they need Giant ? */ 2072 while ((mod = preload_search_next_name(mod)) != NULL) { 2073 name = (char *)preload_search_info(mod, MODINFO_NAME); 2074 if (name == NULL) 2075 continue; 2076 type = (char *)preload_search_info(mod, MODINFO_TYPE); 2077 if (type == NULL) 2078 continue; 2079 if (strcmp(type, "md_image") && strcmp(type, "mfs_root")) 2080 continue; 2081 ptr = preload_fetch_addr(mod); 2082 len = preload_fetch_size(mod); 2083 if (ptr != NULL && len != 0) { 2084 sx_xlock(&md_sx); 2085 md_preloaded(ptr, len, name); 2086 sx_xunlock(&md_sx); 2087 } 2088 } 2089 status_dev = make_dev(&mdctl_cdevsw, INT_MAX, UID_ROOT, GID_WHEEL, 2090 0600, MDCTL_NAME); 2091 g_topology_lock(); 2092 } 2093 2094 static void 2095 g_md_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp, 2096 struct g_consumer *cp __unused, struct g_provider *pp) 2097 { 2098 struct md_s *mp; 2099 char *type; 2100 2101 mp = gp->softc; 2102 if (mp == NULL) 2103 return; 2104 2105 switch (mp->type) { 2106 case MD_MALLOC: 2107 type = "malloc"; 2108 break; 2109 case MD_PRELOAD: 2110 type = "preload"; 2111 break; 2112 case MD_VNODE: 2113 type = "vnode"; 2114 break; 2115 case MD_SWAP: 2116 type = "swap"; 2117 break; 2118 case MD_NULL: 2119 type = "null"; 2120 break; 2121 default: 2122 type = "unknown"; 2123 break; 2124 } 2125 2126 if (pp != NULL) { 2127 if (indent == NULL) { 2128 sbuf_printf(sb, " u %d", mp->unit); 2129 sbuf_printf(sb, " s %ju", (uintmax_t) mp->sectorsize); 2130 sbuf_printf(sb, " f %ju", (uintmax_t) mp->fwheads); 2131 sbuf_printf(sb, " fs %ju", (uintmax_t) mp->fwsectors); 2132 sbuf_printf(sb, " l %ju", (uintmax_t) mp->mediasize); 2133 sbuf_printf(sb, " t %s", type); 2134 if ((mp->type == MD_VNODE && mp->vnode != NULL) || 2135 (mp->type == MD_PRELOAD && mp->file[0] != '\0')) 2136 sbuf_printf(sb, " file %s", mp->file); 2137 sbuf_printf(sb, " label %s", mp->label); 2138 } else { 2139 sbuf_printf(sb, "%s<unit>%d</unit>\n", indent, 2140 mp->unit); 2141 sbuf_printf(sb, "%s<sectorsize>%ju</sectorsize>\n", 2142 indent, (uintmax_t) mp->sectorsize); 2143 sbuf_printf(sb, "%s<fwheads>%ju</fwheads>\n", 2144 indent, (uintmax_t) mp->fwheads); 2145 sbuf_printf(sb, "%s<fwsectors>%ju</fwsectors>\n", 2146 indent, (uintmax_t) mp->fwsectors); 2147 if (mp->ident[0] != '\0') { 2148 sbuf_printf(sb, "%s<ident>", indent); 2149 g_conf_printf_escaped(sb, "%s", mp->ident); 2150 sbuf_printf(sb, "</ident>\n"); 2151 } 2152 sbuf_printf(sb, "%s<length>%ju</length>\n", 2153 indent, (uintmax_t) mp->mediasize); 2154 sbuf_printf(sb, "%s<compression>%s</compression>\n", indent, 2155 (mp->flags & MD_COMPRESS) == 0 ? "off": "on"); 2156 sbuf_printf(sb, "%s<access>%s</access>\n", indent, 2157 (mp->flags & MD_READONLY) == 0 ? "read-write": 2158 "read-only"); 2159 sbuf_printf(sb, "%s<type>%s</type>\n", indent, 2160 type); 2161 if ((mp->type == MD_VNODE && mp->vnode != NULL) || 2162 (mp->type == MD_PRELOAD && mp->file[0] != '\0')) { 2163 sbuf_printf(sb, "%s<file>", indent); 2164 g_conf_printf_escaped(sb, "%s", mp->file); 2165 sbuf_printf(sb, "</file>\n"); 2166 } 2167 if (mp->type == MD_VNODE) 2168 sbuf_printf(sb, "%s<cache>%s</cache>\n", indent, 2169 (mp->flags & MD_CACHE) == 0 ? "off": "on"); 2170 sbuf_printf(sb, "%s<label>", indent); 2171 g_conf_printf_escaped(sb, "%s", mp->label); 2172 sbuf_printf(sb, "</label>\n"); 2173 } 2174 } 2175 } 2176 2177 static void 2178 g_md_fini(struct g_class *mp __unused) 2179 { 2180 2181 sx_destroy(&md_sx); 2182 if (status_dev != NULL) 2183 destroy_dev(status_dev); 2184 delete_unrhdr(md_uh); 2185 } 2186