1 /*- 2 * SPDX-License-Identifier: (Beerware AND BSD-3-Clause) 3 * 4 * ---------------------------------------------------------------------------- 5 * "THE BEER-WARE LICENSE" (Revision 42): 6 * <phk@FreeBSD.ORG> wrote this file. As long as you retain this notice you 7 * can do whatever you want with this stuff. If we meet some day, and you think 8 * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp 9 * ---------------------------------------------------------------------------- 10 * 11 * $FreeBSD$ 12 * 13 */ 14 15 /*- 16 * The following functions are based in the vn(4) driver: mdstart_swap(), 17 * mdstart_vnode(), mdcreate_swap(), mdcreate_vnode() and mddestroy(), 18 * and as such under the following copyright: 19 * 20 * Copyright (c) 1988 University of Utah. 21 * Copyright (c) 1990, 1993 22 * The Regents of the University of California. All rights reserved. 23 * Copyright (c) 2013 The FreeBSD Foundation 24 * All rights reserved. 25 * 26 * This code is derived from software contributed to Berkeley by 27 * the Systems Programming Group of the University of Utah Computer 28 * Science Department. 29 * 30 * Portions of this software were developed by Konstantin Belousov 31 * under sponsorship from the FreeBSD Foundation. 32 * 33 * Redistribution and use in source and binary forms, with or without 34 * modification, are permitted provided that the following conditions 35 * are met: 36 * 1. Redistributions of source code must retain the above copyright 37 * notice, this list of conditions and the following disclaimer. 38 * 2. Redistributions in binary form must reproduce the above copyright 39 * notice, this list of conditions and the following disclaimer in the 40 * documentation and/or other materials provided with the distribution. 41 * 3. Neither the name of the University nor the names of its contributors 42 * may be used to endorse or promote products derived from this software 43 * without specific prior written permission. 44 * 45 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 46 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 47 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 48 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 49 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 50 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 51 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 52 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 53 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 54 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 55 * SUCH DAMAGE. 56 * 57 * from: Utah Hdr: vn.c 1.13 94/04/02 58 * 59 * from: @(#)vn.c 8.6 (Berkeley) 4/1/94 60 * From: src/sys/dev/vn/vn.c,v 1.122 2000/12/16 16:06:03 61 */ 62 63 #include "opt_rootdevname.h" 64 #include "opt_geom.h" 65 #include "opt_md.h" 66 67 #include <sys/param.h> 68 #include <sys/systm.h> 69 #include <sys/bio.h> 70 #include <sys/buf.h> 71 #include <sys/conf.h> 72 #include <sys/devicestat.h> 73 #include <sys/fcntl.h> 74 #include <sys/kernel.h> 75 #include <sys/kthread.h> 76 #include <sys/limits.h> 77 #include <sys/linker.h> 78 #include <sys/lock.h> 79 #include <sys/malloc.h> 80 #include <sys/mdioctl.h> 81 #include <sys/mount.h> 82 #include <sys/mutex.h> 83 #include <sys/sx.h> 84 #include <sys/namei.h> 85 #include <sys/proc.h> 86 #include <sys/queue.h> 87 #include <sys/rwlock.h> 88 #include <sys/sbuf.h> 89 #include <sys/sched.h> 90 #include <sys/sf_buf.h> 91 #include <sys/sysctl.h> 92 #include <sys/uio.h> 93 #include <sys/vnode.h> 94 #include <sys/disk.h> 95 96 #include <geom/geom.h> 97 #include <geom/geom_int.h> 98 99 #include <vm/vm.h> 100 #include <vm/vm_param.h> 101 #include <vm/vm_object.h> 102 #include <vm/vm_page.h> 103 #include <vm/vm_pager.h> 104 #include <vm/swap_pager.h> 105 #include <vm/uma.h> 106 107 #include <machine/bus.h> 108 109 #define MD_MODVER 1 110 111 #define MD_SHUTDOWN 0x10000 /* Tell worker thread to terminate. */ 112 #define MD_EXITING 0x20000 /* Worker thread is exiting. */ 113 114 #ifndef MD_NSECT 115 #define MD_NSECT (10000 * 2) 116 #endif 117 118 struct md_req { 119 unsigned md_unit; /* unit number */ 120 enum md_types md_type; /* type of disk */ 121 off_t md_mediasize; /* size of disk in bytes */ 122 unsigned md_sectorsize; /* sectorsize */ 123 unsigned md_options; /* options */ 124 int md_fwheads; /* firmware heads */ 125 int md_fwsectors; /* firmware sectors */ 126 char *md_file; /* pathname of file to mount */ 127 enum uio_seg md_file_seg; /* location of md_file */ 128 char *md_label; /* label of the device (userspace) */ 129 int *md_units; /* pointer to units array (kernel) */ 130 size_t md_units_nitems; /* items in md_units array */ 131 }; 132 133 #ifdef COMPAT_FREEBSD32 134 struct md_ioctl32 { 135 unsigned md_version; 136 unsigned md_unit; 137 enum md_types md_type; 138 uint32_t md_file; 139 off_t md_mediasize; 140 unsigned md_sectorsize; 141 unsigned md_options; 142 uint64_t md_base; 143 int md_fwheads; 144 int md_fwsectors; 145 uint32_t md_label; 146 int md_pad[MDNPAD]; 147 } __attribute__((__packed__)); 148 CTASSERT((sizeof(struct md_ioctl32)) == 436); 149 150 #define MDIOCATTACH_32 _IOC_NEWTYPE(MDIOCATTACH, struct md_ioctl32) 151 #define MDIOCDETACH_32 _IOC_NEWTYPE(MDIOCDETACH, struct md_ioctl32) 152 #define MDIOCQUERY_32 _IOC_NEWTYPE(MDIOCQUERY, struct md_ioctl32) 153 #define MDIOCLIST_32 _IOC_NEWTYPE(MDIOCLIST, struct md_ioctl32) 154 #define MDIOCRESIZE_32 _IOC_NEWTYPE(MDIOCRESIZE, struct md_ioctl32) 155 #endif /* COMPAT_FREEBSD32 */ 156 157 static MALLOC_DEFINE(M_MD, "md_disk", "Memory Disk"); 158 static MALLOC_DEFINE(M_MDSECT, "md_sectors", "Memory Disk Sectors"); 159 160 static int md_debug; 161 SYSCTL_INT(_debug, OID_AUTO, mddebug, CTLFLAG_RW, &md_debug, 0, 162 "Enable md(4) debug messages"); 163 static int md_malloc_wait; 164 SYSCTL_INT(_vm, OID_AUTO, md_malloc_wait, CTLFLAG_RW, &md_malloc_wait, 0, 165 "Allow malloc to wait for memory allocations"); 166 167 #if defined(MD_ROOT) && !defined(MD_ROOT_FSTYPE) 168 #define MD_ROOT_FSTYPE "ufs" 169 #endif 170 171 #if defined(MD_ROOT) 172 /* 173 * Preloaded image gets put here. 174 */ 175 #if defined(MD_ROOT_SIZE) 176 /* 177 * We put the mfs_root symbol into the oldmfs section of the kernel object file. 178 * Applications that patch the object with the image can determine 179 * the size looking at the oldmfs section size within the kernel. 180 */ 181 u_char mfs_root[MD_ROOT_SIZE*1024] __attribute__ ((section ("oldmfs"))); 182 const int mfs_root_size = sizeof(mfs_root); 183 #else 184 extern volatile u_char __weak_symbol mfs_root; 185 extern volatile u_char __weak_symbol mfs_root_end; 186 __GLOBL(mfs_root); 187 __GLOBL(mfs_root_end); 188 #define mfs_root_size ((uintptr_t)(&mfs_root_end - &mfs_root)) 189 #endif 190 #endif 191 192 static g_init_t g_md_init; 193 static g_fini_t g_md_fini; 194 static g_start_t g_md_start; 195 static g_access_t g_md_access; 196 static void g_md_dumpconf(struct sbuf *sb, const char *indent, 197 struct g_geom *gp, struct g_consumer *cp __unused, struct g_provider *pp); 198 199 static struct cdev *status_dev = NULL; 200 static struct sx md_sx; 201 static struct unrhdr *md_uh; 202 203 static d_ioctl_t mdctlioctl; 204 205 static struct cdevsw mdctl_cdevsw = { 206 .d_version = D_VERSION, 207 .d_ioctl = mdctlioctl, 208 .d_name = MD_NAME, 209 }; 210 211 struct g_class g_md_class = { 212 .name = "MD", 213 .version = G_VERSION, 214 .init = g_md_init, 215 .fini = g_md_fini, 216 .start = g_md_start, 217 .access = g_md_access, 218 .dumpconf = g_md_dumpconf, 219 }; 220 221 DECLARE_GEOM_CLASS(g_md_class, g_md); 222 223 224 static LIST_HEAD(, md_s) md_softc_list = LIST_HEAD_INITIALIZER(md_softc_list); 225 226 #define NINDIR (PAGE_SIZE / sizeof(uintptr_t)) 227 #define NMASK (NINDIR-1) 228 static int nshift; 229 230 static int md_vnode_pbuf_freecnt; 231 232 struct indir { 233 uintptr_t *array; 234 u_int total; 235 u_int used; 236 u_int shift; 237 }; 238 239 struct md_s { 240 int unit; 241 LIST_ENTRY(md_s) list; 242 struct bio_queue_head bio_queue; 243 struct mtx queue_mtx; 244 struct mtx stat_mtx; 245 struct cdev *dev; 246 enum md_types type; 247 off_t mediasize; 248 unsigned sectorsize; 249 unsigned opencount; 250 unsigned fwheads; 251 unsigned fwsectors; 252 char ident[32]; 253 unsigned flags; 254 char name[20]; 255 struct proc *procp; 256 struct g_geom *gp; 257 struct g_provider *pp; 258 int (*start)(struct md_s *sc, struct bio *bp); 259 struct devstat *devstat; 260 261 /* MD_MALLOC related fields */ 262 struct indir *indir; 263 uma_zone_t uma; 264 265 /* MD_PRELOAD related fields */ 266 u_char *pl_ptr; 267 size_t pl_len; 268 269 /* MD_VNODE related fields */ 270 struct vnode *vnode; 271 char file[PATH_MAX]; 272 char label[PATH_MAX]; 273 struct ucred *cred; 274 275 /* MD_SWAP related fields */ 276 vm_object_t object; 277 }; 278 279 static struct indir * 280 new_indir(u_int shift) 281 { 282 struct indir *ip; 283 284 ip = malloc(sizeof *ip, M_MD, (md_malloc_wait ? M_WAITOK : M_NOWAIT) 285 | M_ZERO); 286 if (ip == NULL) 287 return (NULL); 288 ip->array = malloc(sizeof(uintptr_t) * NINDIR, 289 M_MDSECT, (md_malloc_wait ? M_WAITOK : M_NOWAIT) | M_ZERO); 290 if (ip->array == NULL) { 291 free(ip, M_MD); 292 return (NULL); 293 } 294 ip->total = NINDIR; 295 ip->shift = shift; 296 return (ip); 297 } 298 299 static void 300 del_indir(struct indir *ip) 301 { 302 303 free(ip->array, M_MDSECT); 304 free(ip, M_MD); 305 } 306 307 static void 308 destroy_indir(struct md_s *sc, struct indir *ip) 309 { 310 int i; 311 312 for (i = 0; i < NINDIR; i++) { 313 if (!ip->array[i]) 314 continue; 315 if (ip->shift) 316 destroy_indir(sc, (struct indir*)(ip->array[i])); 317 else if (ip->array[i] > 255) 318 uma_zfree(sc->uma, (void *)(ip->array[i])); 319 } 320 del_indir(ip); 321 } 322 323 /* 324 * This function does the math and allocates the top level "indir" structure 325 * for a device of "size" sectors. 326 */ 327 328 static struct indir * 329 dimension(off_t size) 330 { 331 off_t rcnt; 332 struct indir *ip; 333 int layer; 334 335 rcnt = size; 336 layer = 0; 337 while (rcnt > NINDIR) { 338 rcnt /= NINDIR; 339 layer++; 340 } 341 342 /* 343 * XXX: the top layer is probably not fully populated, so we allocate 344 * too much space for ip->array in here. 345 */ 346 ip = malloc(sizeof *ip, M_MD, M_WAITOK | M_ZERO); 347 ip->array = malloc(sizeof(uintptr_t) * NINDIR, 348 M_MDSECT, M_WAITOK | M_ZERO); 349 ip->total = NINDIR; 350 ip->shift = layer * nshift; 351 return (ip); 352 } 353 354 /* 355 * Read a given sector 356 */ 357 358 static uintptr_t 359 s_read(struct indir *ip, off_t offset) 360 { 361 struct indir *cip; 362 int idx; 363 uintptr_t up; 364 365 if (md_debug > 1) 366 printf("s_read(%jd)\n", (intmax_t)offset); 367 up = 0; 368 for (cip = ip; cip != NULL;) { 369 if (cip->shift) { 370 idx = (offset >> cip->shift) & NMASK; 371 up = cip->array[idx]; 372 cip = (struct indir *)up; 373 continue; 374 } 375 idx = offset & NMASK; 376 return (cip->array[idx]); 377 } 378 return (0); 379 } 380 381 /* 382 * Write a given sector, prune the tree if the value is 0 383 */ 384 385 static int 386 s_write(struct indir *ip, off_t offset, uintptr_t ptr) 387 { 388 struct indir *cip, *lip[10]; 389 int idx, li; 390 uintptr_t up; 391 392 if (md_debug > 1) 393 printf("s_write(%jd, %p)\n", (intmax_t)offset, (void *)ptr); 394 up = 0; 395 li = 0; 396 cip = ip; 397 for (;;) { 398 lip[li++] = cip; 399 if (cip->shift) { 400 idx = (offset >> cip->shift) & NMASK; 401 up = cip->array[idx]; 402 if (up != 0) { 403 cip = (struct indir *)up; 404 continue; 405 } 406 /* Allocate branch */ 407 cip->array[idx] = 408 (uintptr_t)new_indir(cip->shift - nshift); 409 if (cip->array[idx] == 0) 410 return (ENOSPC); 411 cip->used++; 412 up = cip->array[idx]; 413 cip = (struct indir *)up; 414 continue; 415 } 416 /* leafnode */ 417 idx = offset & NMASK; 418 up = cip->array[idx]; 419 if (up != 0) 420 cip->used--; 421 cip->array[idx] = ptr; 422 if (ptr != 0) 423 cip->used++; 424 break; 425 } 426 if (cip->used != 0 || li == 1) 427 return (0); 428 li--; 429 while (cip->used == 0 && cip != ip) { 430 li--; 431 idx = (offset >> lip[li]->shift) & NMASK; 432 up = lip[li]->array[idx]; 433 KASSERT(up == (uintptr_t)cip, ("md screwed up")); 434 del_indir(cip); 435 lip[li]->array[idx] = 0; 436 lip[li]->used--; 437 cip = lip[li]; 438 } 439 return (0); 440 } 441 442 443 static int 444 g_md_access(struct g_provider *pp, int r, int w, int e) 445 { 446 struct md_s *sc; 447 448 sc = pp->geom->softc; 449 if (sc == NULL) { 450 if (r <= 0 && w <= 0 && e <= 0) 451 return (0); 452 return (ENXIO); 453 } 454 r += pp->acr; 455 w += pp->acw; 456 e += pp->ace; 457 if ((sc->flags & MD_READONLY) != 0 && w > 0) 458 return (EROFS); 459 if ((pp->acr + pp->acw + pp->ace) == 0 && (r + w + e) > 0) { 460 sc->opencount = 1; 461 } else if ((pp->acr + pp->acw + pp->ace) > 0 && (r + w + e) == 0) { 462 sc->opencount = 0; 463 } 464 return (0); 465 } 466 467 static void 468 g_md_start(struct bio *bp) 469 { 470 struct md_s *sc; 471 472 sc = bp->bio_to->geom->softc; 473 if ((bp->bio_cmd == BIO_READ) || (bp->bio_cmd == BIO_WRITE)) { 474 mtx_lock(&sc->stat_mtx); 475 devstat_start_transaction_bio(sc->devstat, bp); 476 mtx_unlock(&sc->stat_mtx); 477 } 478 mtx_lock(&sc->queue_mtx); 479 bioq_disksort(&sc->bio_queue, bp); 480 mtx_unlock(&sc->queue_mtx); 481 wakeup(sc); 482 } 483 484 #define MD_MALLOC_MOVE_ZERO 1 485 #define MD_MALLOC_MOVE_FILL 2 486 #define MD_MALLOC_MOVE_READ 3 487 #define MD_MALLOC_MOVE_WRITE 4 488 #define MD_MALLOC_MOVE_CMP 5 489 490 static int 491 md_malloc_move_ma(vm_page_t **mp, int *ma_offs, unsigned sectorsize, 492 void *ptr, u_char fill, int op) 493 { 494 struct sf_buf *sf; 495 vm_page_t m, *mp1; 496 char *p, first; 497 off_t *uc; 498 unsigned n; 499 int error, i, ma_offs1, sz, first_read; 500 501 m = NULL; 502 error = 0; 503 sf = NULL; 504 /* if (op == MD_MALLOC_MOVE_CMP) { gcc */ 505 first = 0; 506 first_read = 0; 507 uc = ptr; 508 mp1 = *mp; 509 ma_offs1 = *ma_offs; 510 /* } */ 511 sched_pin(); 512 for (n = sectorsize; n != 0; n -= sz) { 513 sz = imin(PAGE_SIZE - *ma_offs, n); 514 if (m != **mp) { 515 if (sf != NULL) 516 sf_buf_free(sf); 517 m = **mp; 518 sf = sf_buf_alloc(m, SFB_CPUPRIVATE | 519 (md_malloc_wait ? 0 : SFB_NOWAIT)); 520 if (sf == NULL) { 521 error = ENOMEM; 522 break; 523 } 524 } 525 p = (char *)sf_buf_kva(sf) + *ma_offs; 526 switch (op) { 527 case MD_MALLOC_MOVE_ZERO: 528 bzero(p, sz); 529 break; 530 case MD_MALLOC_MOVE_FILL: 531 memset(p, fill, sz); 532 break; 533 case MD_MALLOC_MOVE_READ: 534 bcopy(ptr, p, sz); 535 cpu_flush_dcache(p, sz); 536 break; 537 case MD_MALLOC_MOVE_WRITE: 538 bcopy(p, ptr, sz); 539 break; 540 case MD_MALLOC_MOVE_CMP: 541 for (i = 0; i < sz; i++, p++) { 542 if (!first_read) { 543 *uc = (u_char)*p; 544 first = *p; 545 first_read = 1; 546 } else if (*p != first) { 547 error = EDOOFUS; 548 break; 549 } 550 } 551 break; 552 default: 553 KASSERT(0, ("md_malloc_move_ma unknown op %d\n", op)); 554 break; 555 } 556 if (error != 0) 557 break; 558 *ma_offs += sz; 559 *ma_offs %= PAGE_SIZE; 560 if (*ma_offs == 0) 561 (*mp)++; 562 ptr = (char *)ptr + sz; 563 } 564 565 if (sf != NULL) 566 sf_buf_free(sf); 567 sched_unpin(); 568 if (op == MD_MALLOC_MOVE_CMP && error != 0) { 569 *mp = mp1; 570 *ma_offs = ma_offs1; 571 } 572 return (error); 573 } 574 575 static int 576 md_malloc_move_vlist(bus_dma_segment_t **pvlist, int *pma_offs, 577 unsigned len, void *ptr, u_char fill, int op) 578 { 579 bus_dma_segment_t *vlist; 580 uint8_t *p, *end, first; 581 off_t *uc; 582 int ma_offs, seg_len; 583 584 vlist = *pvlist; 585 ma_offs = *pma_offs; 586 uc = ptr; 587 588 for (; len != 0; len -= seg_len) { 589 seg_len = imin(vlist->ds_len - ma_offs, len); 590 p = (uint8_t *)(uintptr_t)vlist->ds_addr + ma_offs; 591 switch (op) { 592 case MD_MALLOC_MOVE_ZERO: 593 bzero(p, seg_len); 594 break; 595 case MD_MALLOC_MOVE_FILL: 596 memset(p, fill, seg_len); 597 break; 598 case MD_MALLOC_MOVE_READ: 599 bcopy(ptr, p, seg_len); 600 cpu_flush_dcache(p, seg_len); 601 break; 602 case MD_MALLOC_MOVE_WRITE: 603 bcopy(p, ptr, seg_len); 604 break; 605 case MD_MALLOC_MOVE_CMP: 606 end = p + seg_len; 607 first = *uc = *p; 608 /* Confirm all following bytes match the first */ 609 while (++p < end) { 610 if (*p != first) 611 return (EDOOFUS); 612 } 613 break; 614 default: 615 KASSERT(0, ("md_malloc_move_vlist unknown op %d\n", op)); 616 break; 617 } 618 619 ma_offs += seg_len; 620 if (ma_offs == vlist->ds_len) { 621 ma_offs = 0; 622 vlist++; 623 } 624 ptr = (uint8_t *)ptr + seg_len; 625 } 626 *pvlist = vlist; 627 *pma_offs = ma_offs; 628 629 return (0); 630 } 631 632 static int 633 mdstart_malloc(struct md_s *sc, struct bio *bp) 634 { 635 u_char *dst; 636 vm_page_t *m; 637 bus_dma_segment_t *vlist; 638 int i, error, error1, ma_offs, notmapped; 639 off_t secno, nsec, uc; 640 uintptr_t sp, osp; 641 642 switch (bp->bio_cmd) { 643 case BIO_READ: 644 case BIO_WRITE: 645 case BIO_DELETE: 646 break; 647 default: 648 return (EOPNOTSUPP); 649 } 650 651 notmapped = (bp->bio_flags & BIO_UNMAPPED) != 0; 652 vlist = (bp->bio_flags & BIO_VLIST) != 0 ? 653 (bus_dma_segment_t *)bp->bio_data : NULL; 654 if (notmapped) { 655 m = bp->bio_ma; 656 ma_offs = bp->bio_ma_offset; 657 dst = NULL; 658 KASSERT(vlist == NULL, ("vlists cannot be unmapped")); 659 } else if (vlist != NULL) { 660 ma_offs = bp->bio_ma_offset; 661 dst = NULL; 662 } else { 663 dst = bp->bio_data; 664 } 665 666 nsec = bp->bio_length / sc->sectorsize; 667 secno = bp->bio_offset / sc->sectorsize; 668 error = 0; 669 while (nsec--) { 670 osp = s_read(sc->indir, secno); 671 if (bp->bio_cmd == BIO_DELETE) { 672 if (osp != 0) 673 error = s_write(sc->indir, secno, 0); 674 } else if (bp->bio_cmd == BIO_READ) { 675 if (osp == 0) { 676 if (notmapped) { 677 error = md_malloc_move_ma(&m, &ma_offs, 678 sc->sectorsize, NULL, 0, 679 MD_MALLOC_MOVE_ZERO); 680 } else if (vlist != NULL) { 681 error = md_malloc_move_vlist(&vlist, 682 &ma_offs, sc->sectorsize, NULL, 0, 683 MD_MALLOC_MOVE_ZERO); 684 } else 685 bzero(dst, sc->sectorsize); 686 } else if (osp <= 255) { 687 if (notmapped) { 688 error = md_malloc_move_ma(&m, &ma_offs, 689 sc->sectorsize, NULL, osp, 690 MD_MALLOC_MOVE_FILL); 691 } else if (vlist != NULL) { 692 error = md_malloc_move_vlist(&vlist, 693 &ma_offs, sc->sectorsize, NULL, osp, 694 MD_MALLOC_MOVE_FILL); 695 } else 696 memset(dst, osp, sc->sectorsize); 697 } else { 698 if (notmapped) { 699 error = md_malloc_move_ma(&m, &ma_offs, 700 sc->sectorsize, (void *)osp, 0, 701 MD_MALLOC_MOVE_READ); 702 } else if (vlist != NULL) { 703 error = md_malloc_move_vlist(&vlist, 704 &ma_offs, sc->sectorsize, 705 (void *)osp, 0, 706 MD_MALLOC_MOVE_READ); 707 } else { 708 bcopy((void *)osp, dst, sc->sectorsize); 709 cpu_flush_dcache(dst, sc->sectorsize); 710 } 711 } 712 osp = 0; 713 } else if (bp->bio_cmd == BIO_WRITE) { 714 if (sc->flags & MD_COMPRESS) { 715 if (notmapped) { 716 error1 = md_malloc_move_ma(&m, &ma_offs, 717 sc->sectorsize, &uc, 0, 718 MD_MALLOC_MOVE_CMP); 719 i = error1 == 0 ? sc->sectorsize : 0; 720 } else if (vlist != NULL) { 721 error1 = md_malloc_move_vlist(&vlist, 722 &ma_offs, sc->sectorsize, &uc, 0, 723 MD_MALLOC_MOVE_CMP); 724 i = error1 == 0 ? sc->sectorsize : 0; 725 } else { 726 uc = dst[0]; 727 for (i = 1; i < sc->sectorsize; i++) { 728 if (dst[i] != uc) 729 break; 730 } 731 } 732 } else { 733 i = 0; 734 uc = 0; 735 } 736 if (i == sc->sectorsize) { 737 if (osp != uc) 738 error = s_write(sc->indir, secno, uc); 739 } else { 740 if (osp <= 255) { 741 sp = (uintptr_t)uma_zalloc(sc->uma, 742 md_malloc_wait ? M_WAITOK : 743 M_NOWAIT); 744 if (sp == 0) { 745 error = ENOSPC; 746 break; 747 } 748 if (notmapped) { 749 error = md_malloc_move_ma(&m, 750 &ma_offs, sc->sectorsize, 751 (void *)sp, 0, 752 MD_MALLOC_MOVE_WRITE); 753 } else if (vlist != NULL) { 754 error = md_malloc_move_vlist( 755 &vlist, &ma_offs, 756 sc->sectorsize, (void *)sp, 757 0, MD_MALLOC_MOVE_WRITE); 758 } else { 759 bcopy(dst, (void *)sp, 760 sc->sectorsize); 761 } 762 error = s_write(sc->indir, secno, sp); 763 } else { 764 if (notmapped) { 765 error = md_malloc_move_ma(&m, 766 &ma_offs, sc->sectorsize, 767 (void *)osp, 0, 768 MD_MALLOC_MOVE_WRITE); 769 } else if (vlist != NULL) { 770 error = md_malloc_move_vlist( 771 &vlist, &ma_offs, 772 sc->sectorsize, (void *)osp, 773 0, MD_MALLOC_MOVE_WRITE); 774 } else { 775 bcopy(dst, (void *)osp, 776 sc->sectorsize); 777 } 778 osp = 0; 779 } 780 } 781 } else { 782 error = EOPNOTSUPP; 783 } 784 if (osp > 255) 785 uma_zfree(sc->uma, (void*)osp); 786 if (error != 0) 787 break; 788 secno++; 789 if (!notmapped && vlist == NULL) 790 dst += sc->sectorsize; 791 } 792 bp->bio_resid = 0; 793 return (error); 794 } 795 796 static void 797 mdcopyto_vlist(void *src, bus_dma_segment_t *vlist, off_t offset, off_t len) 798 { 799 off_t seg_len; 800 801 while (offset >= vlist->ds_len) { 802 offset -= vlist->ds_len; 803 vlist++; 804 } 805 806 while (len != 0) { 807 seg_len = omin(len, vlist->ds_len - offset); 808 bcopy(src, (void *)(uintptr_t)(vlist->ds_addr + offset), 809 seg_len); 810 offset = 0; 811 src = (uint8_t *)src + seg_len; 812 len -= seg_len; 813 vlist++; 814 } 815 } 816 817 static void 818 mdcopyfrom_vlist(bus_dma_segment_t *vlist, off_t offset, void *dst, off_t len) 819 { 820 off_t seg_len; 821 822 while (offset >= vlist->ds_len) { 823 offset -= vlist->ds_len; 824 vlist++; 825 } 826 827 while (len != 0) { 828 seg_len = omin(len, vlist->ds_len - offset); 829 bcopy((void *)(uintptr_t)(vlist->ds_addr + offset), dst, 830 seg_len); 831 offset = 0; 832 dst = (uint8_t *)dst + seg_len; 833 len -= seg_len; 834 vlist++; 835 } 836 } 837 838 static int 839 mdstart_preload(struct md_s *sc, struct bio *bp) 840 { 841 uint8_t *p; 842 843 p = sc->pl_ptr + bp->bio_offset; 844 switch (bp->bio_cmd) { 845 case BIO_READ: 846 if ((bp->bio_flags & BIO_VLIST) != 0) { 847 mdcopyto_vlist(p, (bus_dma_segment_t *)bp->bio_data, 848 bp->bio_ma_offset, bp->bio_length); 849 } else { 850 bcopy(p, bp->bio_data, bp->bio_length); 851 } 852 cpu_flush_dcache(bp->bio_data, bp->bio_length); 853 break; 854 case BIO_WRITE: 855 if ((bp->bio_flags & BIO_VLIST) != 0) { 856 mdcopyfrom_vlist((bus_dma_segment_t *)bp->bio_data, 857 bp->bio_ma_offset, p, bp->bio_length); 858 } else { 859 bcopy(bp->bio_data, p, bp->bio_length); 860 } 861 break; 862 } 863 bp->bio_resid = 0; 864 return (0); 865 } 866 867 static int 868 mdstart_vnode(struct md_s *sc, struct bio *bp) 869 { 870 int error; 871 struct uio auio; 872 struct iovec aiov; 873 struct iovec *piov; 874 struct mount *mp; 875 struct vnode *vp; 876 struct buf *pb; 877 bus_dma_segment_t *vlist; 878 struct thread *td; 879 off_t iolen, len, zerosize; 880 int ma_offs, npages; 881 882 switch (bp->bio_cmd) { 883 case BIO_READ: 884 auio.uio_rw = UIO_READ; 885 break; 886 case BIO_WRITE: 887 case BIO_DELETE: 888 auio.uio_rw = UIO_WRITE; 889 break; 890 case BIO_FLUSH: 891 break; 892 default: 893 return (EOPNOTSUPP); 894 } 895 896 td = curthread; 897 vp = sc->vnode; 898 pb = NULL; 899 piov = NULL; 900 ma_offs = bp->bio_ma_offset; 901 len = bp->bio_length; 902 903 /* 904 * VNODE I/O 905 * 906 * If an error occurs, we set BIO_ERROR but we do not set 907 * B_INVAL because (for a write anyway), the buffer is 908 * still valid. 909 */ 910 911 if (bp->bio_cmd == BIO_FLUSH) { 912 (void) vn_start_write(vp, &mp, V_WAIT); 913 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 914 error = VOP_FSYNC(vp, MNT_WAIT, td); 915 VOP_UNLOCK(vp, 0); 916 vn_finished_write(mp); 917 return (error); 918 } 919 920 auio.uio_offset = (vm_ooffset_t)bp->bio_offset; 921 auio.uio_resid = bp->bio_length; 922 auio.uio_segflg = UIO_SYSSPACE; 923 auio.uio_td = td; 924 925 if (bp->bio_cmd == BIO_DELETE) { 926 /* 927 * Emulate BIO_DELETE by writing zeros. 928 */ 929 zerosize = ZERO_REGION_SIZE - 930 (ZERO_REGION_SIZE % sc->sectorsize); 931 auio.uio_iovcnt = howmany(bp->bio_length, zerosize); 932 piov = malloc(sizeof(*piov) * auio.uio_iovcnt, M_MD, M_WAITOK); 933 auio.uio_iov = piov; 934 while (len > 0) { 935 piov->iov_base = __DECONST(void *, zero_region); 936 piov->iov_len = len; 937 if (len > zerosize) 938 piov->iov_len = zerosize; 939 len -= piov->iov_len; 940 piov++; 941 } 942 piov = auio.uio_iov; 943 } else if ((bp->bio_flags & BIO_VLIST) != 0) { 944 piov = malloc(sizeof(*piov) * bp->bio_ma_n, M_MD, M_WAITOK); 945 auio.uio_iov = piov; 946 vlist = (bus_dma_segment_t *)bp->bio_data; 947 while (len > 0) { 948 piov->iov_base = (void *)(uintptr_t)(vlist->ds_addr + 949 ma_offs); 950 piov->iov_len = vlist->ds_len - ma_offs; 951 if (piov->iov_len > len) 952 piov->iov_len = len; 953 len -= piov->iov_len; 954 ma_offs = 0; 955 vlist++; 956 piov++; 957 } 958 auio.uio_iovcnt = piov - auio.uio_iov; 959 piov = auio.uio_iov; 960 } else if ((bp->bio_flags & BIO_UNMAPPED) != 0) { 961 pb = getpbuf(&md_vnode_pbuf_freecnt); 962 bp->bio_resid = len; 963 unmapped_step: 964 npages = atop(min(MAXPHYS, round_page(len + (ma_offs & 965 PAGE_MASK)))); 966 iolen = min(ptoa(npages) - (ma_offs & PAGE_MASK), len); 967 KASSERT(iolen > 0, ("zero iolen")); 968 pmap_qenter((vm_offset_t)pb->b_data, 969 &bp->bio_ma[atop(ma_offs)], npages); 970 aiov.iov_base = (void *)((vm_offset_t)pb->b_data + 971 (ma_offs & PAGE_MASK)); 972 aiov.iov_len = iolen; 973 auio.uio_iov = &aiov; 974 auio.uio_iovcnt = 1; 975 auio.uio_resid = iolen; 976 } else { 977 aiov.iov_base = bp->bio_data; 978 aiov.iov_len = bp->bio_length; 979 auio.uio_iov = &aiov; 980 auio.uio_iovcnt = 1; 981 } 982 /* 983 * When reading set IO_DIRECT to try to avoid double-caching 984 * the data. When writing IO_DIRECT is not optimal. 985 */ 986 if (auio.uio_rw == UIO_READ) { 987 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 988 error = VOP_READ(vp, &auio, IO_DIRECT, sc->cred); 989 VOP_UNLOCK(vp, 0); 990 } else { 991 (void) vn_start_write(vp, &mp, V_WAIT); 992 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 993 error = VOP_WRITE(vp, &auio, sc->flags & MD_ASYNC ? 0 : IO_SYNC, 994 sc->cred); 995 VOP_UNLOCK(vp, 0); 996 vn_finished_write(mp); 997 if (error == 0) 998 sc->flags &= ~MD_VERIFY; 999 } 1000 1001 if (pb != NULL) { 1002 pmap_qremove((vm_offset_t)pb->b_data, npages); 1003 if (error == 0) { 1004 len -= iolen; 1005 bp->bio_resid -= iolen; 1006 ma_offs += iolen; 1007 if (len > 0) 1008 goto unmapped_step; 1009 } 1010 relpbuf(pb, &md_vnode_pbuf_freecnt); 1011 } 1012 1013 free(piov, M_MD); 1014 if (pb == NULL) 1015 bp->bio_resid = auio.uio_resid; 1016 return (error); 1017 } 1018 1019 static void 1020 md_swap_page_free(vm_page_t m) 1021 { 1022 1023 vm_page_xunbusy(m); 1024 vm_page_lock(m); 1025 vm_page_free(m); 1026 vm_page_unlock(m); 1027 } 1028 1029 static int 1030 mdstart_swap(struct md_s *sc, struct bio *bp) 1031 { 1032 vm_page_t m; 1033 u_char *p; 1034 vm_pindex_t i, lastp; 1035 bus_dma_segment_t *vlist; 1036 int rv, ma_offs, offs, len, lastend; 1037 1038 switch (bp->bio_cmd) { 1039 case BIO_READ: 1040 case BIO_WRITE: 1041 case BIO_DELETE: 1042 break; 1043 default: 1044 return (EOPNOTSUPP); 1045 } 1046 1047 p = bp->bio_data; 1048 ma_offs = (bp->bio_flags & (BIO_UNMAPPED|BIO_VLIST)) != 0 ? 1049 bp->bio_ma_offset : 0; 1050 vlist = (bp->bio_flags & BIO_VLIST) != 0 ? 1051 (bus_dma_segment_t *)bp->bio_data : NULL; 1052 1053 /* 1054 * offs is the offset at which to start operating on the 1055 * next (ie, first) page. lastp is the last page on 1056 * which we're going to operate. lastend is the ending 1057 * position within that last page (ie, PAGE_SIZE if 1058 * we're operating on complete aligned pages). 1059 */ 1060 offs = bp->bio_offset % PAGE_SIZE; 1061 lastp = (bp->bio_offset + bp->bio_length - 1) / PAGE_SIZE; 1062 lastend = (bp->bio_offset + bp->bio_length - 1) % PAGE_SIZE + 1; 1063 1064 rv = VM_PAGER_OK; 1065 VM_OBJECT_WLOCK(sc->object); 1066 vm_object_pip_add(sc->object, 1); 1067 for (i = bp->bio_offset / PAGE_SIZE; i <= lastp; i++) { 1068 len = ((i == lastp) ? lastend : PAGE_SIZE) - offs; 1069 m = vm_page_grab(sc->object, i, VM_ALLOC_SYSTEM); 1070 if (bp->bio_cmd == BIO_READ) { 1071 if (m->valid == VM_PAGE_BITS_ALL) 1072 rv = VM_PAGER_OK; 1073 else 1074 rv = vm_pager_get_pages(sc->object, &m, 1, 1075 NULL, NULL); 1076 if (rv == VM_PAGER_ERROR) { 1077 md_swap_page_free(m); 1078 break; 1079 } else if (rv == VM_PAGER_FAIL) { 1080 /* 1081 * Pager does not have the page. Zero 1082 * the allocated page, and mark it as 1083 * valid. Do not set dirty, the page 1084 * can be recreated if thrown out. 1085 */ 1086 pmap_zero_page(m); 1087 m->valid = VM_PAGE_BITS_ALL; 1088 } 1089 if ((bp->bio_flags & BIO_UNMAPPED) != 0) { 1090 pmap_copy_pages(&m, offs, bp->bio_ma, 1091 ma_offs, len); 1092 } else if ((bp->bio_flags & BIO_VLIST) != 0) { 1093 physcopyout_vlist(VM_PAGE_TO_PHYS(m) + offs, 1094 vlist, ma_offs, len); 1095 cpu_flush_dcache(p, len); 1096 } else { 1097 physcopyout(VM_PAGE_TO_PHYS(m) + offs, p, len); 1098 cpu_flush_dcache(p, len); 1099 } 1100 } else if (bp->bio_cmd == BIO_WRITE) { 1101 if (len == PAGE_SIZE || m->valid == VM_PAGE_BITS_ALL) 1102 rv = VM_PAGER_OK; 1103 else 1104 rv = vm_pager_get_pages(sc->object, &m, 1, 1105 NULL, NULL); 1106 if (rv == VM_PAGER_ERROR) { 1107 md_swap_page_free(m); 1108 break; 1109 } else if (rv == VM_PAGER_FAIL) 1110 pmap_zero_page(m); 1111 1112 if ((bp->bio_flags & BIO_UNMAPPED) != 0) { 1113 pmap_copy_pages(bp->bio_ma, ma_offs, &m, 1114 offs, len); 1115 } else if ((bp->bio_flags & BIO_VLIST) != 0) { 1116 physcopyin_vlist(vlist, ma_offs, 1117 VM_PAGE_TO_PHYS(m) + offs, len); 1118 } else { 1119 physcopyin(p, VM_PAGE_TO_PHYS(m) + offs, len); 1120 } 1121 1122 m->valid = VM_PAGE_BITS_ALL; 1123 if (m->dirty != VM_PAGE_BITS_ALL) { 1124 vm_page_dirty(m); 1125 vm_pager_page_unswapped(m); 1126 } 1127 } else if (bp->bio_cmd == BIO_DELETE) { 1128 if (len == PAGE_SIZE || m->valid == VM_PAGE_BITS_ALL) 1129 rv = VM_PAGER_OK; 1130 else 1131 rv = vm_pager_get_pages(sc->object, &m, 1, 1132 NULL, NULL); 1133 if (rv == VM_PAGER_ERROR) { 1134 md_swap_page_free(m); 1135 break; 1136 } else if (rv == VM_PAGER_FAIL) { 1137 md_swap_page_free(m); 1138 m = NULL; 1139 } else { 1140 /* Page is valid. */ 1141 if (len != PAGE_SIZE) { 1142 pmap_zero_page_area(m, offs, len); 1143 if (m->dirty != VM_PAGE_BITS_ALL) { 1144 vm_page_dirty(m); 1145 vm_pager_page_unswapped(m); 1146 } 1147 } else { 1148 vm_pager_page_unswapped(m); 1149 md_swap_page_free(m); 1150 m = NULL; 1151 } 1152 } 1153 } 1154 if (m != NULL) { 1155 vm_page_xunbusy(m); 1156 vm_page_lock(m); 1157 if (vm_page_active(m)) 1158 vm_page_reference(m); 1159 else 1160 vm_page_activate(m); 1161 vm_page_unlock(m); 1162 } 1163 1164 /* Actions on further pages start at offset 0 */ 1165 p += PAGE_SIZE - offs; 1166 offs = 0; 1167 ma_offs += len; 1168 } 1169 vm_object_pip_wakeup(sc->object); 1170 VM_OBJECT_WUNLOCK(sc->object); 1171 return (rv != VM_PAGER_ERROR ? 0 : ENOSPC); 1172 } 1173 1174 static int 1175 mdstart_null(struct md_s *sc, struct bio *bp) 1176 { 1177 1178 switch (bp->bio_cmd) { 1179 case BIO_READ: 1180 bzero(bp->bio_data, bp->bio_length); 1181 cpu_flush_dcache(bp->bio_data, bp->bio_length); 1182 break; 1183 case BIO_WRITE: 1184 break; 1185 } 1186 bp->bio_resid = 0; 1187 return (0); 1188 } 1189 1190 static void 1191 md_kthread(void *arg) 1192 { 1193 struct md_s *sc; 1194 struct bio *bp; 1195 int error; 1196 1197 sc = arg; 1198 thread_lock(curthread); 1199 sched_prio(curthread, PRIBIO); 1200 thread_unlock(curthread); 1201 if (sc->type == MD_VNODE) 1202 curthread->td_pflags |= TDP_NORUNNINGBUF; 1203 1204 for (;;) { 1205 mtx_lock(&sc->queue_mtx); 1206 if (sc->flags & MD_SHUTDOWN) { 1207 sc->flags |= MD_EXITING; 1208 mtx_unlock(&sc->queue_mtx); 1209 kproc_exit(0); 1210 } 1211 bp = bioq_takefirst(&sc->bio_queue); 1212 if (!bp) { 1213 msleep(sc, &sc->queue_mtx, PRIBIO | PDROP, "mdwait", 0); 1214 continue; 1215 } 1216 mtx_unlock(&sc->queue_mtx); 1217 if (bp->bio_cmd == BIO_GETATTR) { 1218 int isv = ((sc->flags & MD_VERIFY) != 0); 1219 1220 if ((sc->fwsectors && sc->fwheads && 1221 (g_handleattr_int(bp, "GEOM::fwsectors", 1222 sc->fwsectors) || 1223 g_handleattr_int(bp, "GEOM::fwheads", 1224 sc->fwheads))) || 1225 g_handleattr_int(bp, "GEOM::candelete", 1)) 1226 error = -1; 1227 else if (sc->ident[0] != '\0' && 1228 g_handleattr_str(bp, "GEOM::ident", sc->ident)) 1229 error = -1; 1230 else if (g_handleattr_int(bp, "MNT::verified", isv)) 1231 error = -1; 1232 else 1233 error = EOPNOTSUPP; 1234 } else { 1235 error = sc->start(sc, bp); 1236 } 1237 1238 if (error != -1) { 1239 bp->bio_completed = bp->bio_length; 1240 if ((bp->bio_cmd == BIO_READ) || (bp->bio_cmd == BIO_WRITE)) 1241 devstat_end_transaction_bio(sc->devstat, bp); 1242 g_io_deliver(bp, error); 1243 } 1244 } 1245 } 1246 1247 static struct md_s * 1248 mdfind(int unit) 1249 { 1250 struct md_s *sc; 1251 1252 LIST_FOREACH(sc, &md_softc_list, list) { 1253 if (sc->unit == unit) 1254 break; 1255 } 1256 return (sc); 1257 } 1258 1259 static struct md_s * 1260 mdnew(int unit, int *errp, enum md_types type) 1261 { 1262 struct md_s *sc; 1263 int error; 1264 1265 *errp = 0; 1266 if (unit == -1) 1267 unit = alloc_unr(md_uh); 1268 else 1269 unit = alloc_unr_specific(md_uh, unit); 1270 1271 if (unit == -1) { 1272 *errp = EBUSY; 1273 return (NULL); 1274 } 1275 1276 sc = (struct md_s *)malloc(sizeof *sc, M_MD, M_WAITOK | M_ZERO); 1277 sc->type = type; 1278 bioq_init(&sc->bio_queue); 1279 mtx_init(&sc->queue_mtx, "md bio queue", NULL, MTX_DEF); 1280 mtx_init(&sc->stat_mtx, "md stat", NULL, MTX_DEF); 1281 sc->unit = unit; 1282 sprintf(sc->name, "md%d", unit); 1283 LIST_INSERT_HEAD(&md_softc_list, sc, list); 1284 error = kproc_create(md_kthread, sc, &sc->procp, 0, 0,"%s", sc->name); 1285 if (error == 0) 1286 return (sc); 1287 LIST_REMOVE(sc, list); 1288 mtx_destroy(&sc->stat_mtx); 1289 mtx_destroy(&sc->queue_mtx); 1290 free_unr(md_uh, sc->unit); 1291 free(sc, M_MD); 1292 *errp = error; 1293 return (NULL); 1294 } 1295 1296 static void 1297 mdinit(struct md_s *sc) 1298 { 1299 struct g_geom *gp; 1300 struct g_provider *pp; 1301 1302 g_topology_lock(); 1303 gp = g_new_geomf(&g_md_class, "md%d", sc->unit); 1304 gp->softc = sc; 1305 pp = g_new_providerf(gp, "md%d", sc->unit); 1306 pp->flags |= G_PF_DIRECT_SEND | G_PF_DIRECT_RECEIVE; 1307 pp->mediasize = sc->mediasize; 1308 pp->sectorsize = sc->sectorsize; 1309 switch (sc->type) { 1310 case MD_MALLOC: 1311 case MD_VNODE: 1312 case MD_SWAP: 1313 pp->flags |= G_PF_ACCEPT_UNMAPPED; 1314 break; 1315 case MD_PRELOAD: 1316 case MD_NULL: 1317 break; 1318 } 1319 sc->gp = gp; 1320 sc->pp = pp; 1321 g_error_provider(pp, 0); 1322 g_topology_unlock(); 1323 sc->devstat = devstat_new_entry("md", sc->unit, sc->sectorsize, 1324 DEVSTAT_ALL_SUPPORTED, DEVSTAT_TYPE_DIRECT, DEVSTAT_PRIORITY_MAX); 1325 } 1326 1327 static int 1328 mdcreate_malloc(struct md_s *sc, struct md_req *mdr) 1329 { 1330 uintptr_t sp; 1331 int error; 1332 off_t u; 1333 1334 error = 0; 1335 if (mdr->md_options & ~(MD_AUTOUNIT | MD_COMPRESS | MD_RESERVE)) 1336 return (EINVAL); 1337 if (mdr->md_sectorsize != 0 && !powerof2(mdr->md_sectorsize)) 1338 return (EINVAL); 1339 /* Compression doesn't make sense if we have reserved space */ 1340 if (mdr->md_options & MD_RESERVE) 1341 mdr->md_options &= ~MD_COMPRESS; 1342 if (mdr->md_fwsectors != 0) 1343 sc->fwsectors = mdr->md_fwsectors; 1344 if (mdr->md_fwheads != 0) 1345 sc->fwheads = mdr->md_fwheads; 1346 sc->flags = mdr->md_options & (MD_COMPRESS | MD_FORCE); 1347 sc->indir = dimension(sc->mediasize / sc->sectorsize); 1348 sc->uma = uma_zcreate(sc->name, sc->sectorsize, NULL, NULL, NULL, NULL, 1349 0x1ff, 0); 1350 if (mdr->md_options & MD_RESERVE) { 1351 off_t nsectors; 1352 1353 nsectors = sc->mediasize / sc->sectorsize; 1354 for (u = 0; u < nsectors; u++) { 1355 sp = (uintptr_t)uma_zalloc(sc->uma, (md_malloc_wait ? 1356 M_WAITOK : M_NOWAIT) | M_ZERO); 1357 if (sp != 0) 1358 error = s_write(sc->indir, u, sp); 1359 else 1360 error = ENOMEM; 1361 if (error != 0) 1362 break; 1363 } 1364 } 1365 return (error); 1366 } 1367 1368 1369 static int 1370 mdsetcred(struct md_s *sc, struct ucred *cred) 1371 { 1372 char *tmpbuf; 1373 int error = 0; 1374 1375 /* 1376 * Set credits in our softc 1377 */ 1378 1379 if (sc->cred) 1380 crfree(sc->cred); 1381 sc->cred = crhold(cred); 1382 1383 /* 1384 * Horrible kludge to establish credentials for NFS XXX. 1385 */ 1386 1387 if (sc->vnode) { 1388 struct uio auio; 1389 struct iovec aiov; 1390 1391 tmpbuf = malloc(sc->sectorsize, M_TEMP, M_WAITOK); 1392 bzero(&auio, sizeof(auio)); 1393 1394 aiov.iov_base = tmpbuf; 1395 aiov.iov_len = sc->sectorsize; 1396 auio.uio_iov = &aiov; 1397 auio.uio_iovcnt = 1; 1398 auio.uio_offset = 0; 1399 auio.uio_rw = UIO_READ; 1400 auio.uio_segflg = UIO_SYSSPACE; 1401 auio.uio_resid = aiov.iov_len; 1402 vn_lock(sc->vnode, LK_EXCLUSIVE | LK_RETRY); 1403 error = VOP_READ(sc->vnode, &auio, 0, sc->cred); 1404 VOP_UNLOCK(sc->vnode, 0); 1405 free(tmpbuf, M_TEMP); 1406 } 1407 return (error); 1408 } 1409 1410 static int 1411 mdcreate_vnode(struct md_s *sc, struct md_req *mdr, struct thread *td) 1412 { 1413 struct vattr vattr; 1414 struct nameidata nd; 1415 char *fname; 1416 int error, flags; 1417 1418 fname = mdr->md_file; 1419 if (mdr->md_file_seg == UIO_USERSPACE) { 1420 error = copyinstr(fname, sc->file, sizeof(sc->file), NULL); 1421 if (error != 0) 1422 return (error); 1423 } else if (mdr->md_file_seg == UIO_SYSSPACE) 1424 strlcpy(sc->file, fname, sizeof(sc->file)); 1425 else 1426 return (EDOOFUS); 1427 1428 /* 1429 * If the user specified that this is a read only device, don't 1430 * set the FWRITE mask before trying to open the backing store. 1431 */ 1432 flags = FREAD | ((mdr->md_options & MD_READONLY) ? 0 : FWRITE) \ 1433 | ((mdr->md_options & MD_VERIFY) ? O_VERIFY : 0); 1434 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, sc->file, td); 1435 error = vn_open(&nd, &flags, 0, NULL); 1436 if (error != 0) 1437 return (error); 1438 NDFREE(&nd, NDF_ONLY_PNBUF); 1439 if (nd.ni_vp->v_type != VREG) { 1440 error = EINVAL; 1441 goto bad; 1442 } 1443 error = VOP_GETATTR(nd.ni_vp, &vattr, td->td_ucred); 1444 if (error != 0) 1445 goto bad; 1446 if (VOP_ISLOCKED(nd.ni_vp) != LK_EXCLUSIVE) { 1447 vn_lock(nd.ni_vp, LK_UPGRADE | LK_RETRY); 1448 if (nd.ni_vp->v_iflag & VI_DOOMED) { 1449 /* Forced unmount. */ 1450 error = EBADF; 1451 goto bad; 1452 } 1453 } 1454 nd.ni_vp->v_vflag |= VV_MD; 1455 VOP_UNLOCK(nd.ni_vp, 0); 1456 1457 if (mdr->md_fwsectors != 0) 1458 sc->fwsectors = mdr->md_fwsectors; 1459 if (mdr->md_fwheads != 0) 1460 sc->fwheads = mdr->md_fwheads; 1461 snprintf(sc->ident, sizeof(sc->ident), "MD-DEV%ju-INO%ju", 1462 (uintmax_t)vattr.va_fsid, (uintmax_t)vattr.va_fileid); 1463 sc->flags = mdr->md_options & (MD_FORCE | MD_ASYNC | MD_VERIFY); 1464 if (!(flags & FWRITE)) 1465 sc->flags |= MD_READONLY; 1466 sc->vnode = nd.ni_vp; 1467 1468 error = mdsetcred(sc, td->td_ucred); 1469 if (error != 0) { 1470 sc->vnode = NULL; 1471 vn_lock(nd.ni_vp, LK_EXCLUSIVE | LK_RETRY); 1472 nd.ni_vp->v_vflag &= ~VV_MD; 1473 goto bad; 1474 } 1475 return (0); 1476 bad: 1477 VOP_UNLOCK(nd.ni_vp, 0); 1478 (void)vn_close(nd.ni_vp, flags, td->td_ucred, td); 1479 return (error); 1480 } 1481 1482 static int 1483 mddestroy(struct md_s *sc, struct thread *td) 1484 { 1485 1486 if (sc->gp) { 1487 sc->gp->softc = NULL; 1488 g_topology_lock(); 1489 g_wither_geom(sc->gp, ENXIO); 1490 g_topology_unlock(); 1491 sc->gp = NULL; 1492 sc->pp = NULL; 1493 } 1494 if (sc->devstat) { 1495 devstat_remove_entry(sc->devstat); 1496 sc->devstat = NULL; 1497 } 1498 mtx_lock(&sc->queue_mtx); 1499 sc->flags |= MD_SHUTDOWN; 1500 wakeup(sc); 1501 while (!(sc->flags & MD_EXITING)) 1502 msleep(sc->procp, &sc->queue_mtx, PRIBIO, "mddestroy", hz / 10); 1503 mtx_unlock(&sc->queue_mtx); 1504 mtx_destroy(&sc->stat_mtx); 1505 mtx_destroy(&sc->queue_mtx); 1506 if (sc->vnode != NULL) { 1507 vn_lock(sc->vnode, LK_EXCLUSIVE | LK_RETRY); 1508 sc->vnode->v_vflag &= ~VV_MD; 1509 VOP_UNLOCK(sc->vnode, 0); 1510 (void)vn_close(sc->vnode, sc->flags & MD_READONLY ? 1511 FREAD : (FREAD|FWRITE), sc->cred, td); 1512 } 1513 if (sc->cred != NULL) 1514 crfree(sc->cred); 1515 if (sc->object != NULL) 1516 vm_object_deallocate(sc->object); 1517 if (sc->indir) 1518 destroy_indir(sc, sc->indir); 1519 if (sc->uma) 1520 uma_zdestroy(sc->uma); 1521 1522 LIST_REMOVE(sc, list); 1523 free_unr(md_uh, sc->unit); 1524 free(sc, M_MD); 1525 return (0); 1526 } 1527 1528 static int 1529 mdresize(struct md_s *sc, struct md_req *mdr) 1530 { 1531 int error, res; 1532 vm_pindex_t oldpages, newpages; 1533 1534 switch (sc->type) { 1535 case MD_VNODE: 1536 case MD_NULL: 1537 break; 1538 case MD_SWAP: 1539 if (mdr->md_mediasize <= 0 || 1540 (mdr->md_mediasize % PAGE_SIZE) != 0) 1541 return (EDOM); 1542 oldpages = OFF_TO_IDX(round_page(sc->mediasize)); 1543 newpages = OFF_TO_IDX(round_page(mdr->md_mediasize)); 1544 if (newpages < oldpages) { 1545 VM_OBJECT_WLOCK(sc->object); 1546 vm_object_page_remove(sc->object, newpages, 0, 0); 1547 swap_pager_freespace(sc->object, newpages, 1548 oldpages - newpages); 1549 swap_release_by_cred(IDX_TO_OFF(oldpages - 1550 newpages), sc->cred); 1551 sc->object->charge = IDX_TO_OFF(newpages); 1552 sc->object->size = newpages; 1553 VM_OBJECT_WUNLOCK(sc->object); 1554 } else if (newpages > oldpages) { 1555 res = swap_reserve_by_cred(IDX_TO_OFF(newpages - 1556 oldpages), sc->cred); 1557 if (!res) 1558 return (ENOMEM); 1559 if ((mdr->md_options & MD_RESERVE) || 1560 (sc->flags & MD_RESERVE)) { 1561 error = swap_pager_reserve(sc->object, 1562 oldpages, newpages - oldpages); 1563 if (error < 0) { 1564 swap_release_by_cred( 1565 IDX_TO_OFF(newpages - oldpages), 1566 sc->cred); 1567 return (EDOM); 1568 } 1569 } 1570 VM_OBJECT_WLOCK(sc->object); 1571 sc->object->charge = IDX_TO_OFF(newpages); 1572 sc->object->size = newpages; 1573 VM_OBJECT_WUNLOCK(sc->object); 1574 } 1575 break; 1576 default: 1577 return (EOPNOTSUPP); 1578 } 1579 1580 sc->mediasize = mdr->md_mediasize; 1581 g_topology_lock(); 1582 g_resize_provider(sc->pp, sc->mediasize); 1583 g_topology_unlock(); 1584 return (0); 1585 } 1586 1587 static int 1588 mdcreate_swap(struct md_s *sc, struct md_req *mdr, struct thread *td) 1589 { 1590 vm_ooffset_t npage; 1591 int error; 1592 1593 /* 1594 * Range check. Disallow negative sizes and sizes not being 1595 * multiple of page size. 1596 */ 1597 if (sc->mediasize <= 0 || (sc->mediasize % PAGE_SIZE) != 0) 1598 return (EDOM); 1599 1600 /* 1601 * Allocate an OBJT_SWAP object. 1602 * 1603 * Note the truncation. 1604 */ 1605 1606 if ((mdr->md_options & MD_VERIFY) != 0) 1607 return (EINVAL); 1608 npage = mdr->md_mediasize / PAGE_SIZE; 1609 if (mdr->md_fwsectors != 0) 1610 sc->fwsectors = mdr->md_fwsectors; 1611 if (mdr->md_fwheads != 0) 1612 sc->fwheads = mdr->md_fwheads; 1613 sc->object = vm_pager_allocate(OBJT_SWAP, NULL, PAGE_SIZE * npage, 1614 VM_PROT_DEFAULT, 0, td->td_ucred); 1615 if (sc->object == NULL) 1616 return (ENOMEM); 1617 sc->flags = mdr->md_options & (MD_FORCE | MD_RESERVE); 1618 if (mdr->md_options & MD_RESERVE) { 1619 if (swap_pager_reserve(sc->object, 0, npage) < 0) { 1620 error = EDOM; 1621 goto finish; 1622 } 1623 } 1624 error = mdsetcred(sc, td->td_ucred); 1625 finish: 1626 if (error != 0) { 1627 vm_object_deallocate(sc->object); 1628 sc->object = NULL; 1629 } 1630 return (error); 1631 } 1632 1633 static int 1634 mdcreate_null(struct md_s *sc, struct md_req *mdr, struct thread *td) 1635 { 1636 1637 /* 1638 * Range check. Disallow negative sizes and sizes not being 1639 * multiple of page size. 1640 */ 1641 if (sc->mediasize <= 0 || (sc->mediasize % PAGE_SIZE) != 0) 1642 return (EDOM); 1643 1644 return (0); 1645 } 1646 1647 static int 1648 kern_mdattach_locked(struct thread *td, struct md_req *mdr) 1649 { 1650 struct md_s *sc; 1651 unsigned sectsize; 1652 int error, i; 1653 1654 sx_assert(&md_sx, SA_XLOCKED); 1655 1656 switch (mdr->md_type) { 1657 case MD_MALLOC: 1658 case MD_PRELOAD: 1659 case MD_VNODE: 1660 case MD_SWAP: 1661 case MD_NULL: 1662 break; 1663 default: 1664 return (EINVAL); 1665 } 1666 if (mdr->md_sectorsize == 0) 1667 sectsize = DEV_BSIZE; 1668 else 1669 sectsize = mdr->md_sectorsize; 1670 if (sectsize > MAXPHYS || mdr->md_mediasize < sectsize) 1671 return (EINVAL); 1672 if (mdr->md_options & MD_AUTOUNIT) 1673 sc = mdnew(-1, &error, mdr->md_type); 1674 else { 1675 if (mdr->md_unit > INT_MAX) 1676 return (EINVAL); 1677 sc = mdnew(mdr->md_unit, &error, mdr->md_type); 1678 } 1679 if (sc == NULL) 1680 return (error); 1681 if (mdr->md_label != NULL) 1682 error = copyinstr(mdr->md_label, sc->label, 1683 sizeof(sc->label), NULL); 1684 if (error != 0) 1685 goto err_after_new; 1686 if (mdr->md_options & MD_AUTOUNIT) 1687 mdr->md_unit = sc->unit; 1688 sc->mediasize = mdr->md_mediasize; 1689 sc->sectorsize = sectsize; 1690 error = EDOOFUS; 1691 switch (sc->type) { 1692 case MD_MALLOC: 1693 sc->start = mdstart_malloc; 1694 error = mdcreate_malloc(sc, mdr); 1695 break; 1696 case MD_PRELOAD: 1697 /* 1698 * We disallow attaching preloaded memory disks via 1699 * ioctl. Preloaded memory disks are automatically 1700 * attached in g_md_init(). 1701 */ 1702 error = EOPNOTSUPP; 1703 break; 1704 case MD_VNODE: 1705 sc->start = mdstart_vnode; 1706 error = mdcreate_vnode(sc, mdr, td); 1707 break; 1708 case MD_SWAP: 1709 sc->start = mdstart_swap; 1710 error = mdcreate_swap(sc, mdr, td); 1711 break; 1712 case MD_NULL: 1713 sc->start = mdstart_null; 1714 error = mdcreate_null(sc, mdr, td); 1715 break; 1716 } 1717 err_after_new: 1718 if (error != 0) { 1719 mddestroy(sc, td); 1720 return (error); 1721 } 1722 1723 /* Prune off any residual fractional sector */ 1724 i = sc->mediasize % sc->sectorsize; 1725 sc->mediasize -= i; 1726 1727 mdinit(sc); 1728 return (0); 1729 } 1730 1731 static int 1732 kern_mdattach(struct thread *td, struct md_req *mdr) 1733 { 1734 int error; 1735 1736 sx_xlock(&md_sx); 1737 error = kern_mdattach_locked(td, mdr); 1738 sx_xunlock(&md_sx); 1739 return (error); 1740 } 1741 1742 static int 1743 kern_mddetach_locked(struct thread *td, struct md_req *mdr) 1744 { 1745 struct md_s *sc; 1746 1747 sx_assert(&md_sx, SA_XLOCKED); 1748 1749 if (mdr->md_mediasize != 0 || 1750 (mdr->md_options & ~MD_FORCE) != 0) 1751 return (EINVAL); 1752 1753 sc = mdfind(mdr->md_unit); 1754 if (sc == NULL) 1755 return (ENOENT); 1756 if (sc->opencount != 0 && !(sc->flags & MD_FORCE) && 1757 !(mdr->md_options & MD_FORCE)) 1758 return (EBUSY); 1759 return (mddestroy(sc, td)); 1760 } 1761 1762 static int 1763 kern_mddetach(struct thread *td, struct md_req *mdr) 1764 { 1765 int error; 1766 1767 sx_xlock(&md_sx); 1768 error = kern_mddetach_locked(td, mdr); 1769 sx_xunlock(&md_sx); 1770 return (error); 1771 } 1772 1773 static int 1774 kern_mdresize_locked(struct md_req *mdr) 1775 { 1776 struct md_s *sc; 1777 1778 sx_assert(&md_sx, SA_XLOCKED); 1779 1780 if ((mdr->md_options & ~(MD_FORCE | MD_RESERVE)) != 0) 1781 return (EINVAL); 1782 1783 sc = mdfind(mdr->md_unit); 1784 if (sc == NULL) 1785 return (ENOENT); 1786 if (mdr->md_mediasize < sc->sectorsize) 1787 return (EINVAL); 1788 if (mdr->md_mediasize < sc->mediasize && 1789 !(sc->flags & MD_FORCE) && 1790 !(mdr->md_options & MD_FORCE)) 1791 return (EBUSY); 1792 return (mdresize(sc, mdr)); 1793 } 1794 1795 static int 1796 kern_mdresize(struct md_req *mdr) 1797 { 1798 int error; 1799 1800 sx_xlock(&md_sx); 1801 error = kern_mdresize_locked(mdr); 1802 sx_xunlock(&md_sx); 1803 return (error); 1804 } 1805 1806 static int 1807 kern_mdquery_locked(struct md_req *mdr) 1808 { 1809 struct md_s *sc; 1810 int error; 1811 1812 sx_assert(&md_sx, SA_XLOCKED); 1813 1814 sc = mdfind(mdr->md_unit); 1815 if (sc == NULL) 1816 return (ENOENT); 1817 mdr->md_type = sc->type; 1818 mdr->md_options = sc->flags; 1819 mdr->md_mediasize = sc->mediasize; 1820 mdr->md_sectorsize = sc->sectorsize; 1821 error = 0; 1822 if (mdr->md_label != NULL) { 1823 error = copyout(sc->label, mdr->md_label, 1824 strlen(sc->label) + 1); 1825 if (error != 0) 1826 return (error); 1827 } 1828 if (sc->type == MD_VNODE || 1829 (sc->type == MD_PRELOAD && mdr->md_file != NULL)) 1830 error = copyout(sc->file, mdr->md_file, 1831 strlen(sc->file) + 1); 1832 return (error); 1833 } 1834 1835 static int 1836 kern_mdquery(struct md_req *mdr) 1837 { 1838 int error; 1839 1840 sx_xlock(&md_sx); 1841 error = kern_mdquery_locked(mdr); 1842 sx_xunlock(&md_sx); 1843 return (error); 1844 } 1845 1846 static int 1847 kern_mdlist_locked(struct md_req *mdr) 1848 { 1849 struct md_s *sc; 1850 int i; 1851 1852 sx_assert(&md_sx, SA_XLOCKED); 1853 1854 /* 1855 * Write the number of md devices to mdr->md_units[0]. 1856 * Write the unit number of the first (mdr->md_units_nitems - 2) 1857 * units to mdr->md_units[1::(mdr->md_units - 2)] and terminate the 1858 * list with -1. 1859 * 1860 * XXX: There is currently no mechanism to retrieve unit 1861 * numbers for more than (MDNPAD - 2) units. 1862 * 1863 * XXX: Due to the use of LIST_INSERT_HEAD in mdnew(), the 1864 * list of visible unit numbers not stable. 1865 */ 1866 i = 1; 1867 LIST_FOREACH(sc, &md_softc_list, list) { 1868 if (i < mdr->md_units_nitems - 1) 1869 mdr->md_units[i] = sc->unit; 1870 i++; 1871 } 1872 mdr->md_units[MIN(i, mdr->md_units_nitems - 1)] = -1; 1873 mdr->md_units[0] = i - 1; 1874 return (0); 1875 } 1876 1877 static int 1878 kern_mdlist(struct md_req *mdr) 1879 { 1880 int error; 1881 1882 sx_xlock(&md_sx); 1883 error = kern_mdlist_locked(mdr); 1884 sx_xunlock(&md_sx); 1885 return (error); 1886 } 1887 1888 /* Copy members that are not userspace pointers. */ 1889 #define MD_IOCTL2REQ(mdio, mdr) do { \ 1890 (mdr)->md_unit = (mdio)->md_unit; \ 1891 (mdr)->md_type = (mdio)->md_type; \ 1892 (mdr)->md_mediasize = (mdio)->md_mediasize; \ 1893 (mdr)->md_sectorsize = (mdio)->md_sectorsize; \ 1894 (mdr)->md_options = (mdio)->md_options; \ 1895 (mdr)->md_fwheads = (mdio)->md_fwheads; \ 1896 (mdr)->md_fwsectors = (mdio)->md_fwsectors; \ 1897 (mdr)->md_units = &(mdio)->md_pad[0]; \ 1898 (mdr)->md_units_nitems = nitems((mdio)->md_pad); \ 1899 } while(0) 1900 1901 /* Copy members that might have been updated */ 1902 #define MD_REQ2IOCTL(mdr, mdio) do { \ 1903 (mdio)->md_unit = (mdr)->md_unit; \ 1904 (mdio)->md_type = (mdr)->md_type; \ 1905 (mdio)->md_mediasize = (mdr)->md_mediasize; \ 1906 (mdio)->md_sectorsize = (mdr)->md_sectorsize; \ 1907 (mdio)->md_options = (mdr)->md_options; \ 1908 (mdio)->md_fwheads = (mdr)->md_fwheads; \ 1909 (mdio)->md_fwsectors = (mdr)->md_fwsectors; \ 1910 } while(0) 1911 1912 static int 1913 mdctlioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, 1914 struct thread *td) 1915 { 1916 struct md_req mdr; 1917 int error; 1918 1919 if (md_debug) 1920 printf("mdctlioctl(%s %lx %p %x %p)\n", 1921 devtoname(dev), cmd, addr, flags, td); 1922 1923 bzero(&mdr, sizeof(mdr)); 1924 switch (cmd) { 1925 case MDIOCATTACH: 1926 case MDIOCDETACH: 1927 case MDIOCRESIZE: 1928 case MDIOCQUERY: 1929 case MDIOCLIST: { 1930 struct md_ioctl *mdio = (struct md_ioctl *)addr; 1931 if (mdio->md_version != MDIOVERSION) 1932 return (EINVAL); 1933 MD_IOCTL2REQ(mdio, &mdr); 1934 mdr.md_file = mdio->md_file; 1935 mdr.md_file_seg = UIO_USERSPACE; 1936 /* If the file is adjacent to the md_ioctl it's in kernel. */ 1937 if ((void *)mdio->md_file == (void *)(mdio + 1)) 1938 mdr.md_file_seg = UIO_SYSSPACE; 1939 mdr.md_label = mdio->md_label; 1940 break; 1941 } 1942 #ifdef COMPAT_FREEBSD32 1943 case MDIOCATTACH_32: 1944 case MDIOCDETACH_32: 1945 case MDIOCRESIZE_32: 1946 case MDIOCQUERY_32: 1947 case MDIOCLIST_32: { 1948 struct md_ioctl32 *mdio = (struct md_ioctl32 *)addr; 1949 if (mdio->md_version != MDIOVERSION) 1950 return (EINVAL); 1951 MD_IOCTL2REQ(mdio, &mdr); 1952 mdr.md_file = (void *)(uintptr_t)mdio->md_file; 1953 mdr.md_file_seg = UIO_USERSPACE; 1954 mdr.md_label = (void *)(uintptr_t)mdio->md_label; 1955 break; 1956 } 1957 #endif 1958 default: 1959 /* Fall through to handler switch. */ 1960 break; 1961 } 1962 1963 error = 0; 1964 switch (cmd) { 1965 case MDIOCATTACH: 1966 #ifdef COMPAT_FREEBSD32 1967 case MDIOCATTACH_32: 1968 #endif 1969 error = kern_mdattach(td, &mdr); 1970 break; 1971 case MDIOCDETACH: 1972 #ifdef COMPAT_FREEBSD32 1973 case MDIOCDETACH_32: 1974 #endif 1975 error = kern_mddetach(td, &mdr); 1976 break; 1977 case MDIOCRESIZE: 1978 #ifdef COMPAT_FREEBSD32 1979 case MDIOCRESIZE_32: 1980 #endif 1981 error = kern_mdresize(&mdr); 1982 break; 1983 case MDIOCQUERY: 1984 #ifdef COMPAT_FREEBSD32 1985 case MDIOCQUERY_32: 1986 #endif 1987 error = kern_mdquery(&mdr); 1988 break; 1989 case MDIOCLIST: 1990 #ifdef COMPAT_FREEBSD32 1991 case MDIOCLIST_32: 1992 #endif 1993 error = kern_mdlist(&mdr); 1994 break; 1995 default: 1996 error = ENOIOCTL; 1997 } 1998 1999 switch (cmd) { 2000 case MDIOCATTACH: 2001 case MDIOCQUERY: { 2002 struct md_ioctl *mdio = (struct md_ioctl *)addr; 2003 MD_REQ2IOCTL(&mdr, mdio); 2004 break; 2005 } 2006 #ifdef COMPAT_FREEBSD32 2007 case MDIOCATTACH_32: 2008 case MDIOCQUERY_32: { 2009 struct md_ioctl32 *mdio = (struct md_ioctl32 *)addr; 2010 MD_REQ2IOCTL(&mdr, mdio); 2011 break; 2012 } 2013 #endif 2014 default: 2015 /* Other commands to not alter mdr. */ 2016 break; 2017 } 2018 2019 return (error); 2020 } 2021 2022 static void 2023 md_preloaded(u_char *image, size_t length, const char *name) 2024 { 2025 struct md_s *sc; 2026 int error; 2027 2028 sc = mdnew(-1, &error, MD_PRELOAD); 2029 if (sc == NULL) 2030 return; 2031 sc->mediasize = length; 2032 sc->sectorsize = DEV_BSIZE; 2033 sc->pl_ptr = image; 2034 sc->pl_len = length; 2035 sc->start = mdstart_preload; 2036 if (name != NULL) 2037 strlcpy(sc->file, name, sizeof(sc->file)); 2038 #ifdef MD_ROOT 2039 if (sc->unit == 0) { 2040 #ifndef ROOTDEVNAME 2041 rootdevnames[0] = MD_ROOT_FSTYPE ":/dev/md0"; 2042 #endif 2043 #ifdef MD_ROOT_READONLY 2044 sc->flags |= MD_READONLY; 2045 #endif 2046 } 2047 #endif 2048 mdinit(sc); 2049 if (name != NULL) { 2050 printf("%s%d: Preloaded image <%s> %zd bytes at %p\n", 2051 MD_NAME, sc->unit, name, length, image); 2052 } else { 2053 printf("%s%d: Embedded image %zd bytes at %p\n", 2054 MD_NAME, sc->unit, length, image); 2055 } 2056 } 2057 2058 static void 2059 g_md_init(struct g_class *mp __unused) 2060 { 2061 caddr_t mod; 2062 u_char *ptr, *name, *type; 2063 unsigned len; 2064 int i; 2065 2066 /* figure out log2(NINDIR) */ 2067 for (i = NINDIR, nshift = -1; i; nshift++) 2068 i >>= 1; 2069 2070 mod = NULL; 2071 sx_init(&md_sx, "MD config lock"); 2072 g_topology_unlock(); 2073 md_uh = new_unrhdr(0, INT_MAX, NULL); 2074 #ifdef MD_ROOT 2075 if (mfs_root_size != 0) { 2076 sx_xlock(&md_sx); 2077 md_preloaded(__DEVOLATILE(u_char *, &mfs_root), mfs_root_size, 2078 NULL); 2079 sx_xunlock(&md_sx); 2080 } 2081 #endif 2082 /* XXX: are preload_* static or do they need Giant ? */ 2083 while ((mod = preload_search_next_name(mod)) != NULL) { 2084 name = (char *)preload_search_info(mod, MODINFO_NAME); 2085 if (name == NULL) 2086 continue; 2087 type = (char *)preload_search_info(mod, MODINFO_TYPE); 2088 if (type == NULL) 2089 continue; 2090 if (strcmp(type, "md_image") && strcmp(type, "mfs_root")) 2091 continue; 2092 ptr = preload_fetch_addr(mod); 2093 len = preload_fetch_size(mod); 2094 if (ptr != NULL && len != 0) { 2095 sx_xlock(&md_sx); 2096 md_preloaded(ptr, len, name); 2097 sx_xunlock(&md_sx); 2098 } 2099 } 2100 md_vnode_pbuf_freecnt = nswbuf / 10; 2101 status_dev = make_dev(&mdctl_cdevsw, INT_MAX, UID_ROOT, GID_WHEEL, 2102 0600, MDCTL_NAME); 2103 g_topology_lock(); 2104 } 2105 2106 static void 2107 g_md_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp, 2108 struct g_consumer *cp __unused, struct g_provider *pp) 2109 { 2110 struct md_s *mp; 2111 char *type; 2112 2113 mp = gp->softc; 2114 if (mp == NULL) 2115 return; 2116 2117 switch (mp->type) { 2118 case MD_MALLOC: 2119 type = "malloc"; 2120 break; 2121 case MD_PRELOAD: 2122 type = "preload"; 2123 break; 2124 case MD_VNODE: 2125 type = "vnode"; 2126 break; 2127 case MD_SWAP: 2128 type = "swap"; 2129 break; 2130 case MD_NULL: 2131 type = "null"; 2132 break; 2133 default: 2134 type = "unknown"; 2135 break; 2136 } 2137 2138 if (pp != NULL) { 2139 if (indent == NULL) { 2140 sbuf_printf(sb, " u %d", mp->unit); 2141 sbuf_printf(sb, " s %ju", (uintmax_t) mp->sectorsize); 2142 sbuf_printf(sb, " f %ju", (uintmax_t) mp->fwheads); 2143 sbuf_printf(sb, " fs %ju", (uintmax_t) mp->fwsectors); 2144 sbuf_printf(sb, " l %ju", (uintmax_t) mp->mediasize); 2145 sbuf_printf(sb, " t %s", type); 2146 if ((mp->type == MD_VNODE && mp->vnode != NULL) || 2147 (mp->type == MD_PRELOAD && mp->file[0] != '\0')) 2148 sbuf_printf(sb, " file %s", mp->file); 2149 sbuf_printf(sb, " label %s", mp->label); 2150 } else { 2151 sbuf_printf(sb, "%s<unit>%d</unit>\n", indent, 2152 mp->unit); 2153 sbuf_printf(sb, "%s<sectorsize>%ju</sectorsize>\n", 2154 indent, (uintmax_t) mp->sectorsize); 2155 sbuf_printf(sb, "%s<fwheads>%ju</fwheads>\n", 2156 indent, (uintmax_t) mp->fwheads); 2157 sbuf_printf(sb, "%s<fwsectors>%ju</fwsectors>\n", 2158 indent, (uintmax_t) mp->fwsectors); 2159 if (mp->ident[0] != '\0') { 2160 sbuf_printf(sb, "%s<ident>", indent); 2161 g_conf_printf_escaped(sb, "%s", mp->ident); 2162 sbuf_printf(sb, "</ident>\n"); 2163 } 2164 sbuf_printf(sb, "%s<length>%ju</length>\n", 2165 indent, (uintmax_t) mp->mediasize); 2166 sbuf_printf(sb, "%s<compression>%s</compression>\n", indent, 2167 (mp->flags & MD_COMPRESS) == 0 ? "off": "on"); 2168 sbuf_printf(sb, "%s<access>%s</access>\n", indent, 2169 (mp->flags & MD_READONLY) == 0 ? "read-write": 2170 "read-only"); 2171 sbuf_printf(sb, "%s<type>%s</type>\n", indent, 2172 type); 2173 if ((mp->type == MD_VNODE && mp->vnode != NULL) || 2174 (mp->type == MD_PRELOAD && mp->file[0] != '\0')) { 2175 sbuf_printf(sb, "%s<file>", indent); 2176 g_conf_printf_escaped(sb, "%s", mp->file); 2177 sbuf_printf(sb, "</file>\n"); 2178 } 2179 sbuf_printf(sb, "%s<label>", indent); 2180 g_conf_printf_escaped(sb, "%s", mp->label); 2181 sbuf_printf(sb, "</label>\n"); 2182 } 2183 } 2184 } 2185 2186 static void 2187 g_md_fini(struct g_class *mp __unused) 2188 { 2189 2190 sx_destroy(&md_sx); 2191 if (status_dev != NULL) 2192 destroy_dev(status_dev); 2193 delete_unrhdr(md_uh); 2194 } 2195