1 /*- 2 * SPDX-License-Identifier: (Beerware AND BSD-3-Clause) 3 * 4 * ---------------------------------------------------------------------------- 5 * "THE BEER-WARE LICENSE" (Revision 42): 6 * <phk@FreeBSD.ORG> wrote this file. As long as you retain this notice you 7 * can do whatever you want with this stuff. If we meet some day, and you think 8 * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp 9 * ---------------------------------------------------------------------------- 10 * 11 * $FreeBSD$ 12 * 13 */ 14 15 /*- 16 * The following functions are based in the vn(4) driver: mdstart_swap(), 17 * mdstart_vnode(), mdcreate_swap(), mdcreate_vnode() and mddestroy(), 18 * and as such under the following copyright: 19 * 20 * Copyright (c) 1988 University of Utah. 21 * Copyright (c) 1990, 1993 22 * The Regents of the University of California. All rights reserved. 23 * Copyright (c) 2013 The FreeBSD Foundation 24 * All rights reserved. 25 * 26 * This code is derived from software contributed to Berkeley by 27 * the Systems Programming Group of the University of Utah Computer 28 * Science Department. 29 * 30 * Portions of this software were developed by Konstantin Belousov 31 * under sponsorship from the FreeBSD Foundation. 32 * 33 * Redistribution and use in source and binary forms, with or without 34 * modification, are permitted provided that the following conditions 35 * are met: 36 * 1. Redistributions of source code must retain the above copyright 37 * notice, this list of conditions and the following disclaimer. 38 * 2. Redistributions in binary form must reproduce the above copyright 39 * notice, this list of conditions and the following disclaimer in the 40 * documentation and/or other materials provided with the distribution. 41 * 3. Neither the name of the University nor the names of its contributors 42 * may be used to endorse or promote products derived from this software 43 * without specific prior written permission. 44 * 45 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 46 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 47 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 48 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 49 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 50 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 51 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 52 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 53 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 54 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 55 * SUCH DAMAGE. 56 * 57 * from: Utah Hdr: vn.c 1.13 94/04/02 58 * 59 * from: @(#)vn.c 8.6 (Berkeley) 4/1/94 60 * From: src/sys/dev/vn/vn.c,v 1.122 2000/12/16 16:06:03 61 */ 62 63 #include "opt_compat.h" 64 #include "opt_rootdevname.h" 65 #include "opt_geom.h" 66 #include "opt_md.h" 67 68 #include <sys/param.h> 69 #include <sys/systm.h> 70 #include <sys/bio.h> 71 #include <sys/buf.h> 72 #include <sys/conf.h> 73 #include <sys/devicestat.h> 74 #include <sys/fcntl.h> 75 #include <sys/kernel.h> 76 #include <sys/kthread.h> 77 #include <sys/limits.h> 78 #include <sys/linker.h> 79 #include <sys/lock.h> 80 #include <sys/malloc.h> 81 #include <sys/mdioctl.h> 82 #include <sys/mount.h> 83 #include <sys/mutex.h> 84 #include <sys/sx.h> 85 #include <sys/namei.h> 86 #include <sys/proc.h> 87 #include <sys/queue.h> 88 #include <sys/rwlock.h> 89 #include <sys/sbuf.h> 90 #include <sys/sched.h> 91 #include <sys/sf_buf.h> 92 #include <sys/sysctl.h> 93 #include <sys/uio.h> 94 #include <sys/vnode.h> 95 #include <sys/disk.h> 96 97 #include <geom/geom.h> 98 #include <geom/geom_int.h> 99 100 #include <vm/vm.h> 101 #include <vm/vm_param.h> 102 #include <vm/vm_object.h> 103 #include <vm/vm_page.h> 104 #include <vm/vm_pager.h> 105 #include <vm/swap_pager.h> 106 #include <vm/uma.h> 107 108 #include <machine/bus.h> 109 110 #define MD_MODVER 1 111 112 #define MD_SHUTDOWN 0x10000 /* Tell worker thread to terminate. */ 113 #define MD_EXITING 0x20000 /* Worker thread is exiting. */ 114 115 #ifndef MD_NSECT 116 #define MD_NSECT (10000 * 2) 117 #endif 118 119 struct md_req { 120 unsigned md_unit; /* unit number */ 121 enum md_types md_type; /* type of disk */ 122 off_t md_mediasize; /* size of disk in bytes */ 123 unsigned md_sectorsize; /* sectorsize */ 124 unsigned md_options; /* options */ 125 int md_fwheads; /* firmware heads */ 126 int md_fwsectors; /* firmware sectors */ 127 char *md_file; /* pathname of file to mount */ 128 enum uio_seg md_file_seg; /* location of md_file */ 129 char *md_label; /* label of the device (userspace) */ 130 int *md_units; /* pointer to units array (kernel) */ 131 size_t md_units_nitems; /* items in md_units array */ 132 }; 133 134 #ifdef COMPAT_FREEBSD32 135 struct md_ioctl32 { 136 unsigned md_version; 137 unsigned md_unit; 138 enum md_types md_type; 139 uint32_t md_file; 140 off_t md_mediasize; 141 unsigned md_sectorsize; 142 unsigned md_options; 143 uint64_t md_base; 144 int md_fwheads; 145 int md_fwsectors; 146 uint32_t md_label; 147 int md_pad[MDNPAD]; 148 } __attribute__((__packed__)); 149 CTASSERT((sizeof(struct md_ioctl32)) == 436); 150 151 #define MDIOCATTACH_32 _IOC_NEWTYPE(MDIOCATTACH, struct md_ioctl32) 152 #define MDIOCDETACH_32 _IOC_NEWTYPE(MDIOCDETACH, struct md_ioctl32) 153 #define MDIOCQUERY_32 _IOC_NEWTYPE(MDIOCQUERY, struct md_ioctl32) 154 #define MDIOCLIST_32 _IOC_NEWTYPE(MDIOCLIST, struct md_ioctl32) 155 #define MDIOCRESIZE_32 _IOC_NEWTYPE(MDIOCRESIZE, struct md_ioctl32) 156 #endif /* COMPAT_FREEBSD32 */ 157 158 static MALLOC_DEFINE(M_MD, "md_disk", "Memory Disk"); 159 static MALLOC_DEFINE(M_MDSECT, "md_sectors", "Memory Disk Sectors"); 160 161 static int md_debug; 162 SYSCTL_INT(_debug, OID_AUTO, mddebug, CTLFLAG_RW, &md_debug, 0, 163 "Enable md(4) debug messages"); 164 static int md_malloc_wait; 165 SYSCTL_INT(_vm, OID_AUTO, md_malloc_wait, CTLFLAG_RW, &md_malloc_wait, 0, 166 "Allow malloc to wait for memory allocations"); 167 168 #if defined(MD_ROOT) && !defined(MD_ROOT_FSTYPE) 169 #define MD_ROOT_FSTYPE "ufs" 170 #endif 171 172 #if defined(MD_ROOT) 173 /* 174 * Preloaded image gets put here. 175 */ 176 #if defined(MD_ROOT_SIZE) 177 /* 178 * We put the mfs_root symbol into the oldmfs section of the kernel object file. 179 * Applications that patch the object with the image can determine 180 * the size looking at the oldmfs section size within the kernel. 181 */ 182 u_char mfs_root[MD_ROOT_SIZE*1024] __attribute__ ((section ("oldmfs"))); 183 const int mfs_root_size = sizeof(mfs_root); 184 #else 185 extern volatile u_char __weak_symbol mfs_root; 186 extern volatile u_char __weak_symbol mfs_root_end; 187 __GLOBL(mfs_root); 188 __GLOBL(mfs_root_end); 189 #define mfs_root_size ((uintptr_t)(&mfs_root_end - &mfs_root)) 190 #endif 191 #endif 192 193 static g_init_t g_md_init; 194 static g_fini_t g_md_fini; 195 static g_start_t g_md_start; 196 static g_access_t g_md_access; 197 static void g_md_dumpconf(struct sbuf *sb, const char *indent, 198 struct g_geom *gp, struct g_consumer *cp __unused, struct g_provider *pp); 199 200 static struct cdev *status_dev = NULL; 201 static struct sx md_sx; 202 static struct unrhdr *md_uh; 203 204 static d_ioctl_t mdctlioctl; 205 206 static struct cdevsw mdctl_cdevsw = { 207 .d_version = D_VERSION, 208 .d_ioctl = mdctlioctl, 209 .d_name = MD_NAME, 210 }; 211 212 struct g_class g_md_class = { 213 .name = "MD", 214 .version = G_VERSION, 215 .init = g_md_init, 216 .fini = g_md_fini, 217 .start = g_md_start, 218 .access = g_md_access, 219 .dumpconf = g_md_dumpconf, 220 }; 221 222 DECLARE_GEOM_CLASS(g_md_class, g_md); 223 224 225 static LIST_HEAD(, md_s) md_softc_list = LIST_HEAD_INITIALIZER(md_softc_list); 226 227 #define NINDIR (PAGE_SIZE / sizeof(uintptr_t)) 228 #define NMASK (NINDIR-1) 229 static int nshift; 230 231 static int md_vnode_pbuf_freecnt; 232 233 struct indir { 234 uintptr_t *array; 235 u_int total; 236 u_int used; 237 u_int shift; 238 }; 239 240 struct md_s { 241 int unit; 242 LIST_ENTRY(md_s) list; 243 struct bio_queue_head bio_queue; 244 struct mtx queue_mtx; 245 struct mtx stat_mtx; 246 struct cdev *dev; 247 enum md_types type; 248 off_t mediasize; 249 unsigned sectorsize; 250 unsigned opencount; 251 unsigned fwheads; 252 unsigned fwsectors; 253 char ident[32]; 254 unsigned flags; 255 char name[20]; 256 struct proc *procp; 257 struct g_geom *gp; 258 struct g_provider *pp; 259 int (*start)(struct md_s *sc, struct bio *bp); 260 struct devstat *devstat; 261 262 /* MD_MALLOC related fields */ 263 struct indir *indir; 264 uma_zone_t uma; 265 266 /* MD_PRELOAD related fields */ 267 u_char *pl_ptr; 268 size_t pl_len; 269 270 /* MD_VNODE related fields */ 271 struct vnode *vnode; 272 char file[PATH_MAX]; 273 char label[PATH_MAX]; 274 struct ucred *cred; 275 276 /* MD_SWAP related fields */ 277 vm_object_t object; 278 }; 279 280 static struct indir * 281 new_indir(u_int shift) 282 { 283 struct indir *ip; 284 285 ip = malloc(sizeof *ip, M_MD, (md_malloc_wait ? M_WAITOK : M_NOWAIT) 286 | M_ZERO); 287 if (ip == NULL) 288 return (NULL); 289 ip->array = malloc(sizeof(uintptr_t) * NINDIR, 290 M_MDSECT, (md_malloc_wait ? M_WAITOK : M_NOWAIT) | M_ZERO); 291 if (ip->array == NULL) { 292 free(ip, M_MD); 293 return (NULL); 294 } 295 ip->total = NINDIR; 296 ip->shift = shift; 297 return (ip); 298 } 299 300 static void 301 del_indir(struct indir *ip) 302 { 303 304 free(ip->array, M_MDSECT); 305 free(ip, M_MD); 306 } 307 308 static void 309 destroy_indir(struct md_s *sc, struct indir *ip) 310 { 311 int i; 312 313 for (i = 0; i < NINDIR; i++) { 314 if (!ip->array[i]) 315 continue; 316 if (ip->shift) 317 destroy_indir(sc, (struct indir*)(ip->array[i])); 318 else if (ip->array[i] > 255) 319 uma_zfree(sc->uma, (void *)(ip->array[i])); 320 } 321 del_indir(ip); 322 } 323 324 /* 325 * This function does the math and allocates the top level "indir" structure 326 * for a device of "size" sectors. 327 */ 328 329 static struct indir * 330 dimension(off_t size) 331 { 332 off_t rcnt; 333 struct indir *ip; 334 int layer; 335 336 rcnt = size; 337 layer = 0; 338 while (rcnt > NINDIR) { 339 rcnt /= NINDIR; 340 layer++; 341 } 342 343 /* 344 * XXX: the top layer is probably not fully populated, so we allocate 345 * too much space for ip->array in here. 346 */ 347 ip = malloc(sizeof *ip, M_MD, M_WAITOK | M_ZERO); 348 ip->array = malloc(sizeof(uintptr_t) * NINDIR, 349 M_MDSECT, M_WAITOK | M_ZERO); 350 ip->total = NINDIR; 351 ip->shift = layer * nshift; 352 return (ip); 353 } 354 355 /* 356 * Read a given sector 357 */ 358 359 static uintptr_t 360 s_read(struct indir *ip, off_t offset) 361 { 362 struct indir *cip; 363 int idx; 364 uintptr_t up; 365 366 if (md_debug > 1) 367 printf("s_read(%jd)\n", (intmax_t)offset); 368 up = 0; 369 for (cip = ip; cip != NULL;) { 370 if (cip->shift) { 371 idx = (offset >> cip->shift) & NMASK; 372 up = cip->array[idx]; 373 cip = (struct indir *)up; 374 continue; 375 } 376 idx = offset & NMASK; 377 return (cip->array[idx]); 378 } 379 return (0); 380 } 381 382 /* 383 * Write a given sector, prune the tree if the value is 0 384 */ 385 386 static int 387 s_write(struct indir *ip, off_t offset, uintptr_t ptr) 388 { 389 struct indir *cip, *lip[10]; 390 int idx, li; 391 uintptr_t up; 392 393 if (md_debug > 1) 394 printf("s_write(%jd, %p)\n", (intmax_t)offset, (void *)ptr); 395 up = 0; 396 li = 0; 397 cip = ip; 398 for (;;) { 399 lip[li++] = cip; 400 if (cip->shift) { 401 idx = (offset >> cip->shift) & NMASK; 402 up = cip->array[idx]; 403 if (up != 0) { 404 cip = (struct indir *)up; 405 continue; 406 } 407 /* Allocate branch */ 408 cip->array[idx] = 409 (uintptr_t)new_indir(cip->shift - nshift); 410 if (cip->array[idx] == 0) 411 return (ENOSPC); 412 cip->used++; 413 up = cip->array[idx]; 414 cip = (struct indir *)up; 415 continue; 416 } 417 /* leafnode */ 418 idx = offset & NMASK; 419 up = cip->array[idx]; 420 if (up != 0) 421 cip->used--; 422 cip->array[idx] = ptr; 423 if (ptr != 0) 424 cip->used++; 425 break; 426 } 427 if (cip->used != 0 || li == 1) 428 return (0); 429 li--; 430 while (cip->used == 0 && cip != ip) { 431 li--; 432 idx = (offset >> lip[li]->shift) & NMASK; 433 up = lip[li]->array[idx]; 434 KASSERT(up == (uintptr_t)cip, ("md screwed up")); 435 del_indir(cip); 436 lip[li]->array[idx] = 0; 437 lip[li]->used--; 438 cip = lip[li]; 439 } 440 return (0); 441 } 442 443 444 static int 445 g_md_access(struct g_provider *pp, int r, int w, int e) 446 { 447 struct md_s *sc; 448 449 sc = pp->geom->softc; 450 if (sc == NULL) { 451 if (r <= 0 && w <= 0 && e <= 0) 452 return (0); 453 return (ENXIO); 454 } 455 r += pp->acr; 456 w += pp->acw; 457 e += pp->ace; 458 if ((sc->flags & MD_READONLY) != 0 && w > 0) 459 return (EROFS); 460 if ((pp->acr + pp->acw + pp->ace) == 0 && (r + w + e) > 0) { 461 sc->opencount = 1; 462 } else if ((pp->acr + pp->acw + pp->ace) > 0 && (r + w + e) == 0) { 463 sc->opencount = 0; 464 } 465 return (0); 466 } 467 468 static void 469 g_md_start(struct bio *bp) 470 { 471 struct md_s *sc; 472 473 sc = bp->bio_to->geom->softc; 474 if ((bp->bio_cmd == BIO_READ) || (bp->bio_cmd == BIO_WRITE)) { 475 mtx_lock(&sc->stat_mtx); 476 devstat_start_transaction_bio(sc->devstat, bp); 477 mtx_unlock(&sc->stat_mtx); 478 } 479 mtx_lock(&sc->queue_mtx); 480 bioq_disksort(&sc->bio_queue, bp); 481 mtx_unlock(&sc->queue_mtx); 482 wakeup(sc); 483 } 484 485 #define MD_MALLOC_MOVE_ZERO 1 486 #define MD_MALLOC_MOVE_FILL 2 487 #define MD_MALLOC_MOVE_READ 3 488 #define MD_MALLOC_MOVE_WRITE 4 489 #define MD_MALLOC_MOVE_CMP 5 490 491 static int 492 md_malloc_move_ma(vm_page_t **mp, int *ma_offs, unsigned sectorsize, 493 void *ptr, u_char fill, int op) 494 { 495 struct sf_buf *sf; 496 vm_page_t m, *mp1; 497 char *p, first; 498 off_t *uc; 499 unsigned n; 500 int error, i, ma_offs1, sz, first_read; 501 502 m = NULL; 503 error = 0; 504 sf = NULL; 505 /* if (op == MD_MALLOC_MOVE_CMP) { gcc */ 506 first = 0; 507 first_read = 0; 508 uc = ptr; 509 mp1 = *mp; 510 ma_offs1 = *ma_offs; 511 /* } */ 512 sched_pin(); 513 for (n = sectorsize; n != 0; n -= sz) { 514 sz = imin(PAGE_SIZE - *ma_offs, n); 515 if (m != **mp) { 516 if (sf != NULL) 517 sf_buf_free(sf); 518 m = **mp; 519 sf = sf_buf_alloc(m, SFB_CPUPRIVATE | 520 (md_malloc_wait ? 0 : SFB_NOWAIT)); 521 if (sf == NULL) { 522 error = ENOMEM; 523 break; 524 } 525 } 526 p = (char *)sf_buf_kva(sf) + *ma_offs; 527 switch (op) { 528 case MD_MALLOC_MOVE_ZERO: 529 bzero(p, sz); 530 break; 531 case MD_MALLOC_MOVE_FILL: 532 memset(p, fill, sz); 533 break; 534 case MD_MALLOC_MOVE_READ: 535 bcopy(ptr, p, sz); 536 cpu_flush_dcache(p, sz); 537 break; 538 case MD_MALLOC_MOVE_WRITE: 539 bcopy(p, ptr, sz); 540 break; 541 case MD_MALLOC_MOVE_CMP: 542 for (i = 0; i < sz; i++, p++) { 543 if (!first_read) { 544 *uc = (u_char)*p; 545 first = *p; 546 first_read = 1; 547 } else if (*p != first) { 548 error = EDOOFUS; 549 break; 550 } 551 } 552 break; 553 default: 554 KASSERT(0, ("md_malloc_move_ma unknown op %d\n", op)); 555 break; 556 } 557 if (error != 0) 558 break; 559 *ma_offs += sz; 560 *ma_offs %= PAGE_SIZE; 561 if (*ma_offs == 0) 562 (*mp)++; 563 ptr = (char *)ptr + sz; 564 } 565 566 if (sf != NULL) 567 sf_buf_free(sf); 568 sched_unpin(); 569 if (op == MD_MALLOC_MOVE_CMP && error != 0) { 570 *mp = mp1; 571 *ma_offs = ma_offs1; 572 } 573 return (error); 574 } 575 576 static int 577 md_malloc_move_vlist(bus_dma_segment_t **pvlist, int *pma_offs, 578 unsigned len, void *ptr, u_char fill, int op) 579 { 580 bus_dma_segment_t *vlist; 581 uint8_t *p, *end, first; 582 off_t *uc; 583 int ma_offs, seg_len; 584 585 vlist = *pvlist; 586 ma_offs = *pma_offs; 587 uc = ptr; 588 589 for (; len != 0; len -= seg_len) { 590 seg_len = imin(vlist->ds_len - ma_offs, len); 591 p = (uint8_t *)(uintptr_t)vlist->ds_addr + ma_offs; 592 switch (op) { 593 case MD_MALLOC_MOVE_ZERO: 594 bzero(p, seg_len); 595 break; 596 case MD_MALLOC_MOVE_FILL: 597 memset(p, fill, seg_len); 598 break; 599 case MD_MALLOC_MOVE_READ: 600 bcopy(ptr, p, seg_len); 601 cpu_flush_dcache(p, seg_len); 602 break; 603 case MD_MALLOC_MOVE_WRITE: 604 bcopy(p, ptr, seg_len); 605 break; 606 case MD_MALLOC_MOVE_CMP: 607 end = p + seg_len; 608 first = *uc = *p; 609 /* Confirm all following bytes match the first */ 610 while (++p < end) { 611 if (*p != first) 612 return (EDOOFUS); 613 } 614 break; 615 default: 616 KASSERT(0, ("md_malloc_move_vlist unknown op %d\n", op)); 617 break; 618 } 619 620 ma_offs += seg_len; 621 if (ma_offs == vlist->ds_len) { 622 ma_offs = 0; 623 vlist++; 624 } 625 ptr = (uint8_t *)ptr + seg_len; 626 } 627 *pvlist = vlist; 628 *pma_offs = ma_offs; 629 630 return (0); 631 } 632 633 static int 634 mdstart_malloc(struct md_s *sc, struct bio *bp) 635 { 636 u_char *dst; 637 vm_page_t *m; 638 bus_dma_segment_t *vlist; 639 int i, error, error1, ma_offs, notmapped; 640 off_t secno, nsec, uc; 641 uintptr_t sp, osp; 642 643 switch (bp->bio_cmd) { 644 case BIO_READ: 645 case BIO_WRITE: 646 case BIO_DELETE: 647 break; 648 default: 649 return (EOPNOTSUPP); 650 } 651 652 notmapped = (bp->bio_flags & BIO_UNMAPPED) != 0; 653 vlist = (bp->bio_flags & BIO_VLIST) != 0 ? 654 (bus_dma_segment_t *)bp->bio_data : NULL; 655 if (notmapped) { 656 m = bp->bio_ma; 657 ma_offs = bp->bio_ma_offset; 658 dst = NULL; 659 KASSERT(vlist == NULL, ("vlists cannot be unmapped")); 660 } else if (vlist != NULL) { 661 ma_offs = bp->bio_ma_offset; 662 dst = NULL; 663 } else { 664 dst = bp->bio_data; 665 } 666 667 nsec = bp->bio_length / sc->sectorsize; 668 secno = bp->bio_offset / sc->sectorsize; 669 error = 0; 670 while (nsec--) { 671 osp = s_read(sc->indir, secno); 672 if (bp->bio_cmd == BIO_DELETE) { 673 if (osp != 0) 674 error = s_write(sc->indir, secno, 0); 675 } else if (bp->bio_cmd == BIO_READ) { 676 if (osp == 0) { 677 if (notmapped) { 678 error = md_malloc_move_ma(&m, &ma_offs, 679 sc->sectorsize, NULL, 0, 680 MD_MALLOC_MOVE_ZERO); 681 } else if (vlist != NULL) { 682 error = md_malloc_move_vlist(&vlist, 683 &ma_offs, sc->sectorsize, NULL, 0, 684 MD_MALLOC_MOVE_ZERO); 685 } else 686 bzero(dst, sc->sectorsize); 687 } else if (osp <= 255) { 688 if (notmapped) { 689 error = md_malloc_move_ma(&m, &ma_offs, 690 sc->sectorsize, NULL, osp, 691 MD_MALLOC_MOVE_FILL); 692 } else if (vlist != NULL) { 693 error = md_malloc_move_vlist(&vlist, 694 &ma_offs, sc->sectorsize, NULL, osp, 695 MD_MALLOC_MOVE_FILL); 696 } else 697 memset(dst, osp, sc->sectorsize); 698 } else { 699 if (notmapped) { 700 error = md_malloc_move_ma(&m, &ma_offs, 701 sc->sectorsize, (void *)osp, 0, 702 MD_MALLOC_MOVE_READ); 703 } else if (vlist != NULL) { 704 error = md_malloc_move_vlist(&vlist, 705 &ma_offs, sc->sectorsize, 706 (void *)osp, 0, 707 MD_MALLOC_MOVE_READ); 708 } else { 709 bcopy((void *)osp, dst, sc->sectorsize); 710 cpu_flush_dcache(dst, sc->sectorsize); 711 } 712 } 713 osp = 0; 714 } else if (bp->bio_cmd == BIO_WRITE) { 715 if (sc->flags & MD_COMPRESS) { 716 if (notmapped) { 717 error1 = md_malloc_move_ma(&m, &ma_offs, 718 sc->sectorsize, &uc, 0, 719 MD_MALLOC_MOVE_CMP); 720 i = error1 == 0 ? sc->sectorsize : 0; 721 } else if (vlist != NULL) { 722 error1 = md_malloc_move_vlist(&vlist, 723 &ma_offs, sc->sectorsize, &uc, 0, 724 MD_MALLOC_MOVE_CMP); 725 i = error1 == 0 ? sc->sectorsize : 0; 726 } else { 727 uc = dst[0]; 728 for (i = 1; i < sc->sectorsize; i++) { 729 if (dst[i] != uc) 730 break; 731 } 732 } 733 } else { 734 i = 0; 735 uc = 0; 736 } 737 if (i == sc->sectorsize) { 738 if (osp != uc) 739 error = s_write(sc->indir, secno, uc); 740 } else { 741 if (osp <= 255) { 742 sp = (uintptr_t)uma_zalloc(sc->uma, 743 md_malloc_wait ? M_WAITOK : 744 M_NOWAIT); 745 if (sp == 0) { 746 error = ENOSPC; 747 break; 748 } 749 if (notmapped) { 750 error = md_malloc_move_ma(&m, 751 &ma_offs, sc->sectorsize, 752 (void *)sp, 0, 753 MD_MALLOC_MOVE_WRITE); 754 } else if (vlist != NULL) { 755 error = md_malloc_move_vlist( 756 &vlist, &ma_offs, 757 sc->sectorsize, (void *)sp, 758 0, MD_MALLOC_MOVE_WRITE); 759 } else { 760 bcopy(dst, (void *)sp, 761 sc->sectorsize); 762 } 763 error = s_write(sc->indir, secno, sp); 764 } else { 765 if (notmapped) { 766 error = md_malloc_move_ma(&m, 767 &ma_offs, sc->sectorsize, 768 (void *)osp, 0, 769 MD_MALLOC_MOVE_WRITE); 770 } else if (vlist != NULL) { 771 error = md_malloc_move_vlist( 772 &vlist, &ma_offs, 773 sc->sectorsize, (void *)osp, 774 0, MD_MALLOC_MOVE_WRITE); 775 } else { 776 bcopy(dst, (void *)osp, 777 sc->sectorsize); 778 } 779 osp = 0; 780 } 781 } 782 } else { 783 error = EOPNOTSUPP; 784 } 785 if (osp > 255) 786 uma_zfree(sc->uma, (void*)osp); 787 if (error != 0) 788 break; 789 secno++; 790 if (!notmapped && vlist == NULL) 791 dst += sc->sectorsize; 792 } 793 bp->bio_resid = 0; 794 return (error); 795 } 796 797 static void 798 mdcopyto_vlist(void *src, bus_dma_segment_t *vlist, off_t offset, off_t len) 799 { 800 off_t seg_len; 801 802 while (offset >= vlist->ds_len) { 803 offset -= vlist->ds_len; 804 vlist++; 805 } 806 807 while (len != 0) { 808 seg_len = omin(len, vlist->ds_len - offset); 809 bcopy(src, (void *)(uintptr_t)(vlist->ds_addr + offset), 810 seg_len); 811 offset = 0; 812 src = (uint8_t *)src + seg_len; 813 len -= seg_len; 814 vlist++; 815 } 816 } 817 818 static void 819 mdcopyfrom_vlist(bus_dma_segment_t *vlist, off_t offset, void *dst, off_t len) 820 { 821 off_t seg_len; 822 823 while (offset >= vlist->ds_len) { 824 offset -= vlist->ds_len; 825 vlist++; 826 } 827 828 while (len != 0) { 829 seg_len = omin(len, vlist->ds_len - offset); 830 bcopy((void *)(uintptr_t)(vlist->ds_addr + offset), dst, 831 seg_len); 832 offset = 0; 833 dst = (uint8_t *)dst + seg_len; 834 len -= seg_len; 835 vlist++; 836 } 837 } 838 839 static int 840 mdstart_preload(struct md_s *sc, struct bio *bp) 841 { 842 uint8_t *p; 843 844 p = sc->pl_ptr + bp->bio_offset; 845 switch (bp->bio_cmd) { 846 case BIO_READ: 847 if ((bp->bio_flags & BIO_VLIST) != 0) { 848 mdcopyto_vlist(p, (bus_dma_segment_t *)bp->bio_data, 849 bp->bio_ma_offset, bp->bio_length); 850 } else { 851 bcopy(p, bp->bio_data, bp->bio_length); 852 } 853 cpu_flush_dcache(bp->bio_data, bp->bio_length); 854 break; 855 case BIO_WRITE: 856 if ((bp->bio_flags & BIO_VLIST) != 0) { 857 mdcopyfrom_vlist((bus_dma_segment_t *)bp->bio_data, 858 bp->bio_ma_offset, p, bp->bio_length); 859 } else { 860 bcopy(bp->bio_data, p, bp->bio_length); 861 } 862 break; 863 } 864 bp->bio_resid = 0; 865 return (0); 866 } 867 868 static int 869 mdstart_vnode(struct md_s *sc, struct bio *bp) 870 { 871 int error; 872 struct uio auio; 873 struct iovec aiov; 874 struct iovec *piov; 875 struct mount *mp; 876 struct vnode *vp; 877 struct buf *pb; 878 bus_dma_segment_t *vlist; 879 struct thread *td; 880 off_t iolen, len, zerosize; 881 int ma_offs, npages; 882 883 switch (bp->bio_cmd) { 884 case BIO_READ: 885 auio.uio_rw = UIO_READ; 886 break; 887 case BIO_WRITE: 888 case BIO_DELETE: 889 auio.uio_rw = UIO_WRITE; 890 break; 891 case BIO_FLUSH: 892 break; 893 default: 894 return (EOPNOTSUPP); 895 } 896 897 td = curthread; 898 vp = sc->vnode; 899 pb = NULL; 900 piov = NULL; 901 ma_offs = bp->bio_ma_offset; 902 len = bp->bio_length; 903 904 /* 905 * VNODE I/O 906 * 907 * If an error occurs, we set BIO_ERROR but we do not set 908 * B_INVAL because (for a write anyway), the buffer is 909 * still valid. 910 */ 911 912 if (bp->bio_cmd == BIO_FLUSH) { 913 (void) vn_start_write(vp, &mp, V_WAIT); 914 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 915 error = VOP_FSYNC(vp, MNT_WAIT, td); 916 VOP_UNLOCK(vp, 0); 917 vn_finished_write(mp); 918 return (error); 919 } 920 921 auio.uio_offset = (vm_ooffset_t)bp->bio_offset; 922 auio.uio_resid = bp->bio_length; 923 auio.uio_segflg = UIO_SYSSPACE; 924 auio.uio_td = td; 925 926 if (bp->bio_cmd == BIO_DELETE) { 927 /* 928 * Emulate BIO_DELETE by writing zeros. 929 */ 930 zerosize = ZERO_REGION_SIZE - 931 (ZERO_REGION_SIZE % sc->sectorsize); 932 auio.uio_iovcnt = howmany(bp->bio_length, zerosize); 933 piov = malloc(sizeof(*piov) * auio.uio_iovcnt, M_MD, M_WAITOK); 934 auio.uio_iov = piov; 935 while (len > 0) { 936 piov->iov_base = __DECONST(void *, zero_region); 937 piov->iov_len = len; 938 if (len > zerosize) 939 piov->iov_len = zerosize; 940 len -= piov->iov_len; 941 piov++; 942 } 943 piov = auio.uio_iov; 944 } else if ((bp->bio_flags & BIO_VLIST) != 0) { 945 piov = malloc(sizeof(*piov) * bp->bio_ma_n, M_MD, M_WAITOK); 946 auio.uio_iov = piov; 947 vlist = (bus_dma_segment_t *)bp->bio_data; 948 while (len > 0) { 949 piov->iov_base = (void *)(uintptr_t)(vlist->ds_addr + 950 ma_offs); 951 piov->iov_len = vlist->ds_len - ma_offs; 952 if (piov->iov_len > len) 953 piov->iov_len = len; 954 len -= piov->iov_len; 955 ma_offs = 0; 956 vlist++; 957 piov++; 958 } 959 auio.uio_iovcnt = piov - auio.uio_iov; 960 piov = auio.uio_iov; 961 } else if ((bp->bio_flags & BIO_UNMAPPED) != 0) { 962 pb = getpbuf(&md_vnode_pbuf_freecnt); 963 bp->bio_resid = len; 964 unmapped_step: 965 npages = atop(min(MAXPHYS, round_page(len + (ma_offs & 966 PAGE_MASK)))); 967 iolen = min(ptoa(npages) - (ma_offs & PAGE_MASK), len); 968 KASSERT(iolen > 0, ("zero iolen")); 969 pmap_qenter((vm_offset_t)pb->b_data, 970 &bp->bio_ma[atop(ma_offs)], npages); 971 aiov.iov_base = (void *)((vm_offset_t)pb->b_data + 972 (ma_offs & PAGE_MASK)); 973 aiov.iov_len = iolen; 974 auio.uio_iov = &aiov; 975 auio.uio_iovcnt = 1; 976 auio.uio_resid = iolen; 977 } else { 978 aiov.iov_base = bp->bio_data; 979 aiov.iov_len = bp->bio_length; 980 auio.uio_iov = &aiov; 981 auio.uio_iovcnt = 1; 982 } 983 /* 984 * When reading set IO_DIRECT to try to avoid double-caching 985 * the data. When writing IO_DIRECT is not optimal. 986 */ 987 if (auio.uio_rw == UIO_READ) { 988 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 989 error = VOP_READ(vp, &auio, IO_DIRECT, sc->cred); 990 VOP_UNLOCK(vp, 0); 991 } else { 992 (void) vn_start_write(vp, &mp, V_WAIT); 993 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 994 error = VOP_WRITE(vp, &auio, sc->flags & MD_ASYNC ? 0 : IO_SYNC, 995 sc->cred); 996 VOP_UNLOCK(vp, 0); 997 vn_finished_write(mp); 998 if (error == 0) 999 sc->flags &= ~MD_VERIFY; 1000 } 1001 1002 if (pb != NULL) { 1003 pmap_qremove((vm_offset_t)pb->b_data, npages); 1004 if (error == 0) { 1005 len -= iolen; 1006 bp->bio_resid -= iolen; 1007 ma_offs += iolen; 1008 if (len > 0) 1009 goto unmapped_step; 1010 } 1011 relpbuf(pb, &md_vnode_pbuf_freecnt); 1012 } 1013 1014 free(piov, M_MD); 1015 if (pb == NULL) 1016 bp->bio_resid = auio.uio_resid; 1017 return (error); 1018 } 1019 1020 static void 1021 md_swap_page_free(vm_page_t m) 1022 { 1023 1024 vm_page_xunbusy(m); 1025 vm_page_lock(m); 1026 vm_page_free(m); 1027 vm_page_unlock(m); 1028 } 1029 1030 static int 1031 mdstart_swap(struct md_s *sc, struct bio *bp) 1032 { 1033 vm_page_t m; 1034 u_char *p; 1035 vm_pindex_t i, lastp; 1036 bus_dma_segment_t *vlist; 1037 int rv, ma_offs, offs, len, lastend; 1038 1039 switch (bp->bio_cmd) { 1040 case BIO_READ: 1041 case BIO_WRITE: 1042 case BIO_DELETE: 1043 break; 1044 default: 1045 return (EOPNOTSUPP); 1046 } 1047 1048 p = bp->bio_data; 1049 ma_offs = (bp->bio_flags & (BIO_UNMAPPED|BIO_VLIST)) != 0 ? 1050 bp->bio_ma_offset : 0; 1051 vlist = (bp->bio_flags & BIO_VLIST) != 0 ? 1052 (bus_dma_segment_t *)bp->bio_data : NULL; 1053 1054 /* 1055 * offs is the offset at which to start operating on the 1056 * next (ie, first) page. lastp is the last page on 1057 * which we're going to operate. lastend is the ending 1058 * position within that last page (ie, PAGE_SIZE if 1059 * we're operating on complete aligned pages). 1060 */ 1061 offs = bp->bio_offset % PAGE_SIZE; 1062 lastp = (bp->bio_offset + bp->bio_length - 1) / PAGE_SIZE; 1063 lastend = (bp->bio_offset + bp->bio_length - 1) % PAGE_SIZE + 1; 1064 1065 rv = VM_PAGER_OK; 1066 VM_OBJECT_WLOCK(sc->object); 1067 vm_object_pip_add(sc->object, 1); 1068 for (i = bp->bio_offset / PAGE_SIZE; i <= lastp; i++) { 1069 len = ((i == lastp) ? lastend : PAGE_SIZE) - offs; 1070 m = vm_page_grab(sc->object, i, VM_ALLOC_SYSTEM); 1071 if (bp->bio_cmd == BIO_READ) { 1072 if (m->valid == VM_PAGE_BITS_ALL) 1073 rv = VM_PAGER_OK; 1074 else 1075 rv = vm_pager_get_pages(sc->object, &m, 1, 1076 NULL, NULL); 1077 if (rv == VM_PAGER_ERROR) { 1078 md_swap_page_free(m); 1079 break; 1080 } else if (rv == VM_PAGER_FAIL) { 1081 /* 1082 * Pager does not have the page. Zero 1083 * the allocated page, and mark it as 1084 * valid. Do not set dirty, the page 1085 * can be recreated if thrown out. 1086 */ 1087 pmap_zero_page(m); 1088 m->valid = VM_PAGE_BITS_ALL; 1089 } 1090 if ((bp->bio_flags & BIO_UNMAPPED) != 0) { 1091 pmap_copy_pages(&m, offs, bp->bio_ma, 1092 ma_offs, len); 1093 } else if ((bp->bio_flags & BIO_VLIST) != 0) { 1094 physcopyout_vlist(VM_PAGE_TO_PHYS(m) + offs, 1095 vlist, ma_offs, len); 1096 cpu_flush_dcache(p, len); 1097 } else { 1098 physcopyout(VM_PAGE_TO_PHYS(m) + offs, p, len); 1099 cpu_flush_dcache(p, len); 1100 } 1101 } else if (bp->bio_cmd == BIO_WRITE) { 1102 if (len == PAGE_SIZE || m->valid == VM_PAGE_BITS_ALL) 1103 rv = VM_PAGER_OK; 1104 else 1105 rv = vm_pager_get_pages(sc->object, &m, 1, 1106 NULL, NULL); 1107 if (rv == VM_PAGER_ERROR) { 1108 md_swap_page_free(m); 1109 break; 1110 } else if (rv == VM_PAGER_FAIL) 1111 pmap_zero_page(m); 1112 1113 if ((bp->bio_flags & BIO_UNMAPPED) != 0) { 1114 pmap_copy_pages(bp->bio_ma, ma_offs, &m, 1115 offs, len); 1116 } else if ((bp->bio_flags & BIO_VLIST) != 0) { 1117 physcopyin_vlist(vlist, ma_offs, 1118 VM_PAGE_TO_PHYS(m) + offs, len); 1119 } else { 1120 physcopyin(p, VM_PAGE_TO_PHYS(m) + offs, len); 1121 } 1122 1123 m->valid = VM_PAGE_BITS_ALL; 1124 if (m->dirty != VM_PAGE_BITS_ALL) { 1125 vm_page_dirty(m); 1126 vm_pager_page_unswapped(m); 1127 } 1128 } else if (bp->bio_cmd == BIO_DELETE) { 1129 if (len == PAGE_SIZE || m->valid == VM_PAGE_BITS_ALL) 1130 rv = VM_PAGER_OK; 1131 else 1132 rv = vm_pager_get_pages(sc->object, &m, 1, 1133 NULL, NULL); 1134 if (rv == VM_PAGER_ERROR) { 1135 md_swap_page_free(m); 1136 break; 1137 } else if (rv == VM_PAGER_FAIL) { 1138 md_swap_page_free(m); 1139 m = NULL; 1140 } else { 1141 /* Page is valid. */ 1142 if (len != PAGE_SIZE) { 1143 pmap_zero_page_area(m, offs, len); 1144 if (m->dirty != VM_PAGE_BITS_ALL) { 1145 vm_page_dirty(m); 1146 vm_pager_page_unswapped(m); 1147 } 1148 } else { 1149 vm_pager_page_unswapped(m); 1150 md_swap_page_free(m); 1151 m = NULL; 1152 } 1153 } 1154 } 1155 if (m != NULL) { 1156 vm_page_xunbusy(m); 1157 vm_page_lock(m); 1158 if (vm_page_active(m)) 1159 vm_page_reference(m); 1160 else 1161 vm_page_activate(m); 1162 vm_page_unlock(m); 1163 } 1164 1165 /* Actions on further pages start at offset 0 */ 1166 p += PAGE_SIZE - offs; 1167 offs = 0; 1168 ma_offs += len; 1169 } 1170 vm_object_pip_wakeup(sc->object); 1171 VM_OBJECT_WUNLOCK(sc->object); 1172 return (rv != VM_PAGER_ERROR ? 0 : ENOSPC); 1173 } 1174 1175 static int 1176 mdstart_null(struct md_s *sc, struct bio *bp) 1177 { 1178 1179 switch (bp->bio_cmd) { 1180 case BIO_READ: 1181 bzero(bp->bio_data, bp->bio_length); 1182 cpu_flush_dcache(bp->bio_data, bp->bio_length); 1183 break; 1184 case BIO_WRITE: 1185 break; 1186 } 1187 bp->bio_resid = 0; 1188 return (0); 1189 } 1190 1191 static void 1192 md_kthread(void *arg) 1193 { 1194 struct md_s *sc; 1195 struct bio *bp; 1196 int error; 1197 1198 sc = arg; 1199 thread_lock(curthread); 1200 sched_prio(curthread, PRIBIO); 1201 thread_unlock(curthread); 1202 if (sc->type == MD_VNODE) 1203 curthread->td_pflags |= TDP_NORUNNINGBUF; 1204 1205 for (;;) { 1206 mtx_lock(&sc->queue_mtx); 1207 if (sc->flags & MD_SHUTDOWN) { 1208 sc->flags |= MD_EXITING; 1209 mtx_unlock(&sc->queue_mtx); 1210 kproc_exit(0); 1211 } 1212 bp = bioq_takefirst(&sc->bio_queue); 1213 if (!bp) { 1214 msleep(sc, &sc->queue_mtx, PRIBIO | PDROP, "mdwait", 0); 1215 continue; 1216 } 1217 mtx_unlock(&sc->queue_mtx); 1218 if (bp->bio_cmd == BIO_GETATTR) { 1219 int isv = ((sc->flags & MD_VERIFY) != 0); 1220 1221 if ((sc->fwsectors && sc->fwheads && 1222 (g_handleattr_int(bp, "GEOM::fwsectors", 1223 sc->fwsectors) || 1224 g_handleattr_int(bp, "GEOM::fwheads", 1225 sc->fwheads))) || 1226 g_handleattr_int(bp, "GEOM::candelete", 1)) 1227 error = -1; 1228 else if (sc->ident[0] != '\0' && 1229 g_handleattr_str(bp, "GEOM::ident", sc->ident)) 1230 error = -1; 1231 else if (g_handleattr_int(bp, "MNT::verified", isv)) 1232 error = -1; 1233 else 1234 error = EOPNOTSUPP; 1235 } else { 1236 error = sc->start(sc, bp); 1237 } 1238 1239 if (error != -1) { 1240 bp->bio_completed = bp->bio_length; 1241 if ((bp->bio_cmd == BIO_READ) || (bp->bio_cmd == BIO_WRITE)) 1242 devstat_end_transaction_bio(sc->devstat, bp); 1243 g_io_deliver(bp, error); 1244 } 1245 } 1246 } 1247 1248 static struct md_s * 1249 mdfind(int unit) 1250 { 1251 struct md_s *sc; 1252 1253 LIST_FOREACH(sc, &md_softc_list, list) { 1254 if (sc->unit == unit) 1255 break; 1256 } 1257 return (sc); 1258 } 1259 1260 static struct md_s * 1261 mdnew(int unit, int *errp, enum md_types type) 1262 { 1263 struct md_s *sc; 1264 int error; 1265 1266 *errp = 0; 1267 if (unit == -1) 1268 unit = alloc_unr(md_uh); 1269 else 1270 unit = alloc_unr_specific(md_uh, unit); 1271 1272 if (unit == -1) { 1273 *errp = EBUSY; 1274 return (NULL); 1275 } 1276 1277 sc = (struct md_s *)malloc(sizeof *sc, M_MD, M_WAITOK | M_ZERO); 1278 sc->type = type; 1279 bioq_init(&sc->bio_queue); 1280 mtx_init(&sc->queue_mtx, "md bio queue", NULL, MTX_DEF); 1281 mtx_init(&sc->stat_mtx, "md stat", NULL, MTX_DEF); 1282 sc->unit = unit; 1283 sprintf(sc->name, "md%d", unit); 1284 LIST_INSERT_HEAD(&md_softc_list, sc, list); 1285 error = kproc_create(md_kthread, sc, &sc->procp, 0, 0,"%s", sc->name); 1286 if (error == 0) 1287 return (sc); 1288 LIST_REMOVE(sc, list); 1289 mtx_destroy(&sc->stat_mtx); 1290 mtx_destroy(&sc->queue_mtx); 1291 free_unr(md_uh, sc->unit); 1292 free(sc, M_MD); 1293 *errp = error; 1294 return (NULL); 1295 } 1296 1297 static void 1298 mdinit(struct md_s *sc) 1299 { 1300 struct g_geom *gp; 1301 struct g_provider *pp; 1302 1303 g_topology_lock(); 1304 gp = g_new_geomf(&g_md_class, "md%d", sc->unit); 1305 gp->softc = sc; 1306 pp = g_new_providerf(gp, "md%d", sc->unit); 1307 pp->flags |= G_PF_DIRECT_SEND | G_PF_DIRECT_RECEIVE; 1308 pp->mediasize = sc->mediasize; 1309 pp->sectorsize = sc->sectorsize; 1310 switch (sc->type) { 1311 case MD_MALLOC: 1312 case MD_VNODE: 1313 case MD_SWAP: 1314 pp->flags |= G_PF_ACCEPT_UNMAPPED; 1315 break; 1316 case MD_PRELOAD: 1317 case MD_NULL: 1318 break; 1319 } 1320 sc->gp = gp; 1321 sc->pp = pp; 1322 g_error_provider(pp, 0); 1323 g_topology_unlock(); 1324 sc->devstat = devstat_new_entry("md", sc->unit, sc->sectorsize, 1325 DEVSTAT_ALL_SUPPORTED, DEVSTAT_TYPE_DIRECT, DEVSTAT_PRIORITY_MAX); 1326 } 1327 1328 static int 1329 mdcreate_malloc(struct md_s *sc, struct md_req *mdr) 1330 { 1331 uintptr_t sp; 1332 int error; 1333 off_t u; 1334 1335 error = 0; 1336 if (mdr->md_options & ~(MD_AUTOUNIT | MD_COMPRESS | MD_RESERVE)) 1337 return (EINVAL); 1338 if (mdr->md_sectorsize != 0 && !powerof2(mdr->md_sectorsize)) 1339 return (EINVAL); 1340 /* Compression doesn't make sense if we have reserved space */ 1341 if (mdr->md_options & MD_RESERVE) 1342 mdr->md_options &= ~MD_COMPRESS; 1343 if (mdr->md_fwsectors != 0) 1344 sc->fwsectors = mdr->md_fwsectors; 1345 if (mdr->md_fwheads != 0) 1346 sc->fwheads = mdr->md_fwheads; 1347 sc->flags = mdr->md_options & (MD_COMPRESS | MD_FORCE); 1348 sc->indir = dimension(sc->mediasize / sc->sectorsize); 1349 sc->uma = uma_zcreate(sc->name, sc->sectorsize, NULL, NULL, NULL, NULL, 1350 0x1ff, 0); 1351 if (mdr->md_options & MD_RESERVE) { 1352 off_t nsectors; 1353 1354 nsectors = sc->mediasize / sc->sectorsize; 1355 for (u = 0; u < nsectors; u++) { 1356 sp = (uintptr_t)uma_zalloc(sc->uma, (md_malloc_wait ? 1357 M_WAITOK : M_NOWAIT) | M_ZERO); 1358 if (sp != 0) 1359 error = s_write(sc->indir, u, sp); 1360 else 1361 error = ENOMEM; 1362 if (error != 0) 1363 break; 1364 } 1365 } 1366 return (error); 1367 } 1368 1369 1370 static int 1371 mdsetcred(struct md_s *sc, struct ucred *cred) 1372 { 1373 char *tmpbuf; 1374 int error = 0; 1375 1376 /* 1377 * Set credits in our softc 1378 */ 1379 1380 if (sc->cred) 1381 crfree(sc->cred); 1382 sc->cred = crhold(cred); 1383 1384 /* 1385 * Horrible kludge to establish credentials for NFS XXX. 1386 */ 1387 1388 if (sc->vnode) { 1389 struct uio auio; 1390 struct iovec aiov; 1391 1392 tmpbuf = malloc(sc->sectorsize, M_TEMP, M_WAITOK); 1393 bzero(&auio, sizeof(auio)); 1394 1395 aiov.iov_base = tmpbuf; 1396 aiov.iov_len = sc->sectorsize; 1397 auio.uio_iov = &aiov; 1398 auio.uio_iovcnt = 1; 1399 auio.uio_offset = 0; 1400 auio.uio_rw = UIO_READ; 1401 auio.uio_segflg = UIO_SYSSPACE; 1402 auio.uio_resid = aiov.iov_len; 1403 vn_lock(sc->vnode, LK_EXCLUSIVE | LK_RETRY); 1404 error = VOP_READ(sc->vnode, &auio, 0, sc->cred); 1405 VOP_UNLOCK(sc->vnode, 0); 1406 free(tmpbuf, M_TEMP); 1407 } 1408 return (error); 1409 } 1410 1411 static int 1412 mdcreate_vnode(struct md_s *sc, struct md_req *mdr, struct thread *td) 1413 { 1414 struct vattr vattr; 1415 struct nameidata nd; 1416 char *fname; 1417 int error, flags; 1418 1419 fname = mdr->md_file; 1420 if (mdr->md_file_seg == UIO_USERSPACE) { 1421 error = copyinstr(fname, sc->file, sizeof(sc->file), NULL); 1422 if (error != 0) 1423 return (error); 1424 } else if (mdr->md_file_seg == UIO_SYSSPACE) 1425 strlcpy(sc->file, fname, sizeof(sc->file)); 1426 else 1427 return (EDOOFUS); 1428 1429 /* 1430 * If the user specified that this is a read only device, don't 1431 * set the FWRITE mask before trying to open the backing store. 1432 */ 1433 flags = FREAD | ((mdr->md_options & MD_READONLY) ? 0 : FWRITE) \ 1434 | ((mdr->md_options & MD_VERIFY) ? O_VERIFY : 0); 1435 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, sc->file, td); 1436 error = vn_open(&nd, &flags, 0, NULL); 1437 if (error != 0) 1438 return (error); 1439 NDFREE(&nd, NDF_ONLY_PNBUF); 1440 if (nd.ni_vp->v_type != VREG) { 1441 error = EINVAL; 1442 goto bad; 1443 } 1444 error = VOP_GETATTR(nd.ni_vp, &vattr, td->td_ucred); 1445 if (error != 0) 1446 goto bad; 1447 if (VOP_ISLOCKED(nd.ni_vp) != LK_EXCLUSIVE) { 1448 vn_lock(nd.ni_vp, LK_UPGRADE | LK_RETRY); 1449 if (nd.ni_vp->v_iflag & VI_DOOMED) { 1450 /* Forced unmount. */ 1451 error = EBADF; 1452 goto bad; 1453 } 1454 } 1455 nd.ni_vp->v_vflag |= VV_MD; 1456 VOP_UNLOCK(nd.ni_vp, 0); 1457 1458 if (mdr->md_fwsectors != 0) 1459 sc->fwsectors = mdr->md_fwsectors; 1460 if (mdr->md_fwheads != 0) 1461 sc->fwheads = mdr->md_fwheads; 1462 snprintf(sc->ident, sizeof(sc->ident), "MD-DEV%ju-INO%ju", 1463 (uintmax_t)vattr.va_fsid, (uintmax_t)vattr.va_fileid); 1464 sc->flags = mdr->md_options & (MD_FORCE | MD_ASYNC | MD_VERIFY); 1465 if (!(flags & FWRITE)) 1466 sc->flags |= MD_READONLY; 1467 sc->vnode = nd.ni_vp; 1468 1469 error = mdsetcred(sc, td->td_ucred); 1470 if (error != 0) { 1471 sc->vnode = NULL; 1472 vn_lock(nd.ni_vp, LK_EXCLUSIVE | LK_RETRY); 1473 nd.ni_vp->v_vflag &= ~VV_MD; 1474 goto bad; 1475 } 1476 return (0); 1477 bad: 1478 VOP_UNLOCK(nd.ni_vp, 0); 1479 (void)vn_close(nd.ni_vp, flags, td->td_ucred, td); 1480 return (error); 1481 } 1482 1483 static int 1484 mddestroy(struct md_s *sc, struct thread *td) 1485 { 1486 1487 if (sc->gp) { 1488 sc->gp->softc = NULL; 1489 g_topology_lock(); 1490 g_wither_geom(sc->gp, ENXIO); 1491 g_topology_unlock(); 1492 sc->gp = NULL; 1493 sc->pp = NULL; 1494 } 1495 if (sc->devstat) { 1496 devstat_remove_entry(sc->devstat); 1497 sc->devstat = NULL; 1498 } 1499 mtx_lock(&sc->queue_mtx); 1500 sc->flags |= MD_SHUTDOWN; 1501 wakeup(sc); 1502 while (!(sc->flags & MD_EXITING)) 1503 msleep(sc->procp, &sc->queue_mtx, PRIBIO, "mddestroy", hz / 10); 1504 mtx_unlock(&sc->queue_mtx); 1505 mtx_destroy(&sc->stat_mtx); 1506 mtx_destroy(&sc->queue_mtx); 1507 if (sc->vnode != NULL) { 1508 vn_lock(sc->vnode, LK_EXCLUSIVE | LK_RETRY); 1509 sc->vnode->v_vflag &= ~VV_MD; 1510 VOP_UNLOCK(sc->vnode, 0); 1511 (void)vn_close(sc->vnode, sc->flags & MD_READONLY ? 1512 FREAD : (FREAD|FWRITE), sc->cred, td); 1513 } 1514 if (sc->cred != NULL) 1515 crfree(sc->cred); 1516 if (sc->object != NULL) 1517 vm_object_deallocate(sc->object); 1518 if (sc->indir) 1519 destroy_indir(sc, sc->indir); 1520 if (sc->uma) 1521 uma_zdestroy(sc->uma); 1522 1523 LIST_REMOVE(sc, list); 1524 free_unr(md_uh, sc->unit); 1525 free(sc, M_MD); 1526 return (0); 1527 } 1528 1529 static int 1530 mdresize(struct md_s *sc, struct md_req *mdr) 1531 { 1532 int error, res; 1533 vm_pindex_t oldpages, newpages; 1534 1535 switch (sc->type) { 1536 case MD_VNODE: 1537 case MD_NULL: 1538 break; 1539 case MD_SWAP: 1540 if (mdr->md_mediasize <= 0 || 1541 (mdr->md_mediasize % PAGE_SIZE) != 0) 1542 return (EDOM); 1543 oldpages = OFF_TO_IDX(round_page(sc->mediasize)); 1544 newpages = OFF_TO_IDX(round_page(mdr->md_mediasize)); 1545 if (newpages < oldpages) { 1546 VM_OBJECT_WLOCK(sc->object); 1547 vm_object_page_remove(sc->object, newpages, 0, 0); 1548 swap_pager_freespace(sc->object, newpages, 1549 oldpages - newpages); 1550 swap_release_by_cred(IDX_TO_OFF(oldpages - 1551 newpages), sc->cred); 1552 sc->object->charge = IDX_TO_OFF(newpages); 1553 sc->object->size = newpages; 1554 VM_OBJECT_WUNLOCK(sc->object); 1555 } else if (newpages > oldpages) { 1556 res = swap_reserve_by_cred(IDX_TO_OFF(newpages - 1557 oldpages), sc->cred); 1558 if (!res) 1559 return (ENOMEM); 1560 if ((mdr->md_options & MD_RESERVE) || 1561 (sc->flags & MD_RESERVE)) { 1562 error = swap_pager_reserve(sc->object, 1563 oldpages, newpages - oldpages); 1564 if (error < 0) { 1565 swap_release_by_cred( 1566 IDX_TO_OFF(newpages - oldpages), 1567 sc->cred); 1568 return (EDOM); 1569 } 1570 } 1571 VM_OBJECT_WLOCK(sc->object); 1572 sc->object->charge = IDX_TO_OFF(newpages); 1573 sc->object->size = newpages; 1574 VM_OBJECT_WUNLOCK(sc->object); 1575 } 1576 break; 1577 default: 1578 return (EOPNOTSUPP); 1579 } 1580 1581 sc->mediasize = mdr->md_mediasize; 1582 g_topology_lock(); 1583 g_resize_provider(sc->pp, sc->mediasize); 1584 g_topology_unlock(); 1585 return (0); 1586 } 1587 1588 static int 1589 mdcreate_swap(struct md_s *sc, struct md_req *mdr, struct thread *td) 1590 { 1591 vm_ooffset_t npage; 1592 int error; 1593 1594 /* 1595 * Range check. Disallow negative sizes and sizes not being 1596 * multiple of page size. 1597 */ 1598 if (sc->mediasize <= 0 || (sc->mediasize % PAGE_SIZE) != 0) 1599 return (EDOM); 1600 1601 /* 1602 * Allocate an OBJT_SWAP object. 1603 * 1604 * Note the truncation. 1605 */ 1606 1607 if ((mdr->md_options & MD_VERIFY) != 0) 1608 return (EINVAL); 1609 npage = mdr->md_mediasize / PAGE_SIZE; 1610 if (mdr->md_fwsectors != 0) 1611 sc->fwsectors = mdr->md_fwsectors; 1612 if (mdr->md_fwheads != 0) 1613 sc->fwheads = mdr->md_fwheads; 1614 sc->object = vm_pager_allocate(OBJT_SWAP, NULL, PAGE_SIZE * npage, 1615 VM_PROT_DEFAULT, 0, td->td_ucred); 1616 if (sc->object == NULL) 1617 return (ENOMEM); 1618 sc->flags = mdr->md_options & (MD_FORCE | MD_RESERVE); 1619 if (mdr->md_options & MD_RESERVE) { 1620 if (swap_pager_reserve(sc->object, 0, npage) < 0) { 1621 error = EDOM; 1622 goto finish; 1623 } 1624 } 1625 error = mdsetcred(sc, td->td_ucred); 1626 finish: 1627 if (error != 0) { 1628 vm_object_deallocate(sc->object); 1629 sc->object = NULL; 1630 } 1631 return (error); 1632 } 1633 1634 static int 1635 mdcreate_null(struct md_s *sc, struct md_req *mdr, struct thread *td) 1636 { 1637 1638 /* 1639 * Range check. Disallow negative sizes and sizes not being 1640 * multiple of page size. 1641 */ 1642 if (sc->mediasize <= 0 || (sc->mediasize % PAGE_SIZE) != 0) 1643 return (EDOM); 1644 1645 return (0); 1646 } 1647 1648 static int 1649 kern_mdattach_locked(struct thread *td, struct md_req *mdr) 1650 { 1651 struct md_s *sc; 1652 unsigned sectsize; 1653 int error, i; 1654 1655 sx_assert(&md_sx, SA_XLOCKED); 1656 1657 switch (mdr->md_type) { 1658 case MD_MALLOC: 1659 case MD_PRELOAD: 1660 case MD_VNODE: 1661 case MD_SWAP: 1662 case MD_NULL: 1663 break; 1664 default: 1665 return (EINVAL); 1666 } 1667 if (mdr->md_sectorsize == 0) 1668 sectsize = DEV_BSIZE; 1669 else 1670 sectsize = mdr->md_sectorsize; 1671 if (sectsize > MAXPHYS || mdr->md_mediasize < sectsize) 1672 return (EINVAL); 1673 if (mdr->md_options & MD_AUTOUNIT) 1674 sc = mdnew(-1, &error, mdr->md_type); 1675 else { 1676 if (mdr->md_unit > INT_MAX) 1677 return (EINVAL); 1678 sc = mdnew(mdr->md_unit, &error, mdr->md_type); 1679 } 1680 if (sc == NULL) 1681 return (error); 1682 if (mdr->md_label != NULL) 1683 error = copyinstr(mdr->md_label, sc->label, 1684 sizeof(sc->label), NULL); 1685 if (error != 0) 1686 goto err_after_new; 1687 if (mdr->md_options & MD_AUTOUNIT) 1688 mdr->md_unit = sc->unit; 1689 sc->mediasize = mdr->md_mediasize; 1690 sc->sectorsize = sectsize; 1691 error = EDOOFUS; 1692 switch (sc->type) { 1693 case MD_MALLOC: 1694 sc->start = mdstart_malloc; 1695 error = mdcreate_malloc(sc, mdr); 1696 break; 1697 case MD_PRELOAD: 1698 /* 1699 * We disallow attaching preloaded memory disks via 1700 * ioctl. Preloaded memory disks are automatically 1701 * attached in g_md_init(). 1702 */ 1703 error = EOPNOTSUPP; 1704 break; 1705 case MD_VNODE: 1706 sc->start = mdstart_vnode; 1707 error = mdcreate_vnode(sc, mdr, td); 1708 break; 1709 case MD_SWAP: 1710 sc->start = mdstart_swap; 1711 error = mdcreate_swap(sc, mdr, td); 1712 break; 1713 case MD_NULL: 1714 sc->start = mdstart_null; 1715 error = mdcreate_null(sc, mdr, td); 1716 break; 1717 } 1718 err_after_new: 1719 if (error != 0) { 1720 mddestroy(sc, td); 1721 return (error); 1722 } 1723 1724 /* Prune off any residual fractional sector */ 1725 i = sc->mediasize % sc->sectorsize; 1726 sc->mediasize -= i; 1727 1728 mdinit(sc); 1729 return (0); 1730 } 1731 1732 static int 1733 kern_mdattach(struct thread *td, struct md_req *mdr) 1734 { 1735 int error; 1736 1737 sx_xlock(&md_sx); 1738 error = kern_mdattach_locked(td, mdr); 1739 sx_xunlock(&md_sx); 1740 return (error); 1741 } 1742 1743 static int 1744 kern_mddetach_locked(struct thread *td, struct md_req *mdr) 1745 { 1746 struct md_s *sc; 1747 1748 sx_assert(&md_sx, SA_XLOCKED); 1749 1750 if (mdr->md_mediasize != 0 || 1751 (mdr->md_options & ~MD_FORCE) != 0) 1752 return (EINVAL); 1753 1754 sc = mdfind(mdr->md_unit); 1755 if (sc == NULL) 1756 return (ENOENT); 1757 if (sc->opencount != 0 && !(sc->flags & MD_FORCE) && 1758 !(mdr->md_options & MD_FORCE)) 1759 return (EBUSY); 1760 return (mddestroy(sc, td)); 1761 } 1762 1763 static int 1764 kern_mddetach(struct thread *td, struct md_req *mdr) 1765 { 1766 int error; 1767 1768 sx_xlock(&md_sx); 1769 error = kern_mddetach_locked(td, mdr); 1770 sx_xunlock(&md_sx); 1771 return (error); 1772 } 1773 1774 static int 1775 kern_mdresize_locked(struct md_req *mdr) 1776 { 1777 struct md_s *sc; 1778 1779 sx_assert(&md_sx, SA_XLOCKED); 1780 1781 if ((mdr->md_options & ~(MD_FORCE | MD_RESERVE)) != 0) 1782 return (EINVAL); 1783 1784 sc = mdfind(mdr->md_unit); 1785 if (sc == NULL) 1786 return (ENOENT); 1787 if (mdr->md_mediasize < sc->sectorsize) 1788 return (EINVAL); 1789 if (mdr->md_mediasize < sc->mediasize && 1790 !(sc->flags & MD_FORCE) && 1791 !(mdr->md_options & MD_FORCE)) 1792 return (EBUSY); 1793 return (mdresize(sc, mdr)); 1794 } 1795 1796 static int 1797 kern_mdresize(struct md_req *mdr) 1798 { 1799 int error; 1800 1801 sx_xlock(&md_sx); 1802 error = kern_mdresize_locked(mdr); 1803 sx_xunlock(&md_sx); 1804 return (error); 1805 } 1806 1807 static int 1808 kern_mdquery_locked(struct md_req *mdr) 1809 { 1810 struct md_s *sc; 1811 int error; 1812 1813 sx_assert(&md_sx, SA_XLOCKED); 1814 1815 sc = mdfind(mdr->md_unit); 1816 if (sc == NULL) 1817 return (ENOENT); 1818 mdr->md_type = sc->type; 1819 mdr->md_options = sc->flags; 1820 mdr->md_mediasize = sc->mediasize; 1821 mdr->md_sectorsize = sc->sectorsize; 1822 error = 0; 1823 if (mdr->md_label != NULL) { 1824 error = copyout(sc->label, mdr->md_label, 1825 strlen(sc->label) + 1); 1826 if (error != 0) 1827 return (error); 1828 } 1829 if (sc->type == MD_VNODE || 1830 (sc->type == MD_PRELOAD && mdr->md_file != NULL)) 1831 error = copyout(sc->file, mdr->md_file, 1832 strlen(sc->file) + 1); 1833 return (error); 1834 } 1835 1836 static int 1837 kern_mdquery(struct md_req *mdr) 1838 { 1839 int error; 1840 1841 sx_xlock(&md_sx); 1842 error = kern_mdquery_locked(mdr); 1843 sx_xunlock(&md_sx); 1844 return (error); 1845 } 1846 1847 static int 1848 kern_mdlist_locked(struct md_req *mdr) 1849 { 1850 struct md_s *sc; 1851 int i; 1852 1853 sx_assert(&md_sx, SA_XLOCKED); 1854 1855 /* 1856 * Write the number of md devices to mdr->md_units[0]. 1857 * Write the unit number of the first (mdr->md_units_nitems - 2) 1858 * units to mdr->md_units[1::(mdr->md_units - 2)] and terminate the 1859 * list with -1. 1860 * 1861 * XXX: There is currently no mechanism to retrieve unit 1862 * numbers for more than (MDNPAD - 2) units. 1863 * 1864 * XXX: Due to the use of LIST_INSERT_HEAD in mdnew(), the 1865 * list of visible unit numbers not stable. 1866 */ 1867 i = 1; 1868 LIST_FOREACH(sc, &md_softc_list, list) { 1869 if (i < mdr->md_units_nitems - 1) 1870 mdr->md_units[i] = sc->unit; 1871 i++; 1872 } 1873 mdr->md_units[MIN(i, mdr->md_units_nitems - 1)] = -1; 1874 mdr->md_units[0] = i - 1; 1875 return (0); 1876 } 1877 1878 static int 1879 kern_mdlist(struct md_req *mdr) 1880 { 1881 int error; 1882 1883 sx_xlock(&md_sx); 1884 error = kern_mdlist_locked(mdr); 1885 sx_xunlock(&md_sx); 1886 return (error); 1887 } 1888 1889 /* Copy members that are not userspace pointers. */ 1890 #define MD_IOCTL2REQ(mdio, mdr) do { \ 1891 (mdr)->md_unit = (mdio)->md_unit; \ 1892 (mdr)->md_type = (mdio)->md_type; \ 1893 (mdr)->md_mediasize = (mdio)->md_mediasize; \ 1894 (mdr)->md_sectorsize = (mdio)->md_sectorsize; \ 1895 (mdr)->md_options = (mdio)->md_options; \ 1896 (mdr)->md_fwheads = (mdio)->md_fwheads; \ 1897 (mdr)->md_fwsectors = (mdio)->md_fwsectors; \ 1898 (mdr)->md_units = &(mdio)->md_pad[0]; \ 1899 (mdr)->md_units_nitems = nitems((mdio)->md_pad); \ 1900 } while(0) 1901 1902 /* Copy members that might have been updated */ 1903 #define MD_REQ2IOCTL(mdr, mdio) do { \ 1904 (mdio)->md_unit = (mdr)->md_unit; \ 1905 (mdio)->md_type = (mdr)->md_type; \ 1906 (mdio)->md_mediasize = (mdr)->md_mediasize; \ 1907 (mdio)->md_sectorsize = (mdr)->md_sectorsize; \ 1908 (mdio)->md_options = (mdr)->md_options; \ 1909 (mdio)->md_fwheads = (mdr)->md_fwheads; \ 1910 (mdio)->md_fwsectors = (mdr)->md_fwsectors; \ 1911 } while(0) 1912 1913 static int 1914 mdctlioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, 1915 struct thread *td) 1916 { 1917 struct md_req mdr; 1918 int error; 1919 1920 if (md_debug) 1921 printf("mdctlioctl(%s %lx %p %x %p)\n", 1922 devtoname(dev), cmd, addr, flags, td); 1923 1924 bzero(&mdr, sizeof(mdr)); 1925 switch (cmd) { 1926 case MDIOCATTACH: 1927 case MDIOCDETACH: 1928 case MDIOCRESIZE: 1929 case MDIOCQUERY: 1930 case MDIOCLIST: { 1931 struct md_ioctl *mdio = (struct md_ioctl *)addr; 1932 if (mdio->md_version != MDIOVERSION) 1933 return (EINVAL); 1934 MD_IOCTL2REQ(mdio, &mdr); 1935 mdr.md_file = mdio->md_file; 1936 mdr.md_file_seg = UIO_USERSPACE; 1937 /* If the file is adjacent to the md_ioctl it's in kernel. */ 1938 if ((void *)mdio->md_file == (void *)(mdio + 1)) 1939 mdr.md_file_seg = UIO_SYSSPACE; 1940 mdr.md_label = mdio->md_label; 1941 break; 1942 } 1943 #ifdef COMPAT_FREEBSD32 1944 case MDIOCATTACH_32: 1945 case MDIOCDETACH_32: 1946 case MDIOCRESIZE_32: 1947 case MDIOCQUERY_32: 1948 case MDIOCLIST_32: { 1949 struct md_ioctl32 *mdio = (struct md_ioctl32 *)addr; 1950 if (mdio->md_version != MDIOVERSION) 1951 return (EINVAL); 1952 MD_IOCTL2REQ(mdio, &mdr); 1953 mdr.md_file = (void *)(uintptr_t)mdio->md_file; 1954 mdr.md_file_seg = UIO_USERSPACE; 1955 mdr.md_label = (void *)(uintptr_t)mdio->md_label; 1956 break; 1957 } 1958 #endif 1959 default: 1960 /* Fall through to handler switch. */ 1961 break; 1962 } 1963 1964 error = 0; 1965 switch (cmd) { 1966 case MDIOCATTACH: 1967 #ifdef COMPAT_FREEBSD32 1968 case MDIOCATTACH_32: 1969 #endif 1970 error = kern_mdattach(td, &mdr); 1971 break; 1972 case MDIOCDETACH: 1973 #ifdef COMPAT_FREEBSD32 1974 case MDIOCDETACH_32: 1975 #endif 1976 error = kern_mddetach(td, &mdr); 1977 break; 1978 case MDIOCRESIZE: 1979 #ifdef COMPAT_FREEBSD32 1980 case MDIOCRESIZE_32: 1981 #endif 1982 error = kern_mdresize(&mdr); 1983 break; 1984 case MDIOCQUERY: 1985 #ifdef COMPAT_FREEBSD32 1986 case MDIOCQUERY_32: 1987 #endif 1988 error = kern_mdquery(&mdr); 1989 break; 1990 case MDIOCLIST: 1991 #ifdef COMPAT_FREEBSD32 1992 case MDIOCLIST_32: 1993 #endif 1994 error = kern_mdlist(&mdr); 1995 break; 1996 default: 1997 error = ENOIOCTL; 1998 } 1999 2000 switch (cmd) { 2001 case MDIOCATTACH: 2002 case MDIOCQUERY: { 2003 struct md_ioctl *mdio = (struct md_ioctl *)addr; 2004 MD_REQ2IOCTL(&mdr, mdio); 2005 break; 2006 } 2007 #ifdef COMPAT_FREEBSD32 2008 case MDIOCATTACH_32: 2009 case MDIOCQUERY_32: { 2010 struct md_ioctl32 *mdio = (struct md_ioctl32 *)addr; 2011 MD_REQ2IOCTL(&mdr, mdio); 2012 break; 2013 } 2014 #endif 2015 default: 2016 /* Other commands to not alter mdr. */ 2017 break; 2018 } 2019 2020 return (error); 2021 } 2022 2023 static void 2024 md_preloaded(u_char *image, size_t length, const char *name) 2025 { 2026 struct md_s *sc; 2027 int error; 2028 2029 sc = mdnew(-1, &error, MD_PRELOAD); 2030 if (sc == NULL) 2031 return; 2032 sc->mediasize = length; 2033 sc->sectorsize = DEV_BSIZE; 2034 sc->pl_ptr = image; 2035 sc->pl_len = length; 2036 sc->start = mdstart_preload; 2037 if (name != NULL) 2038 strlcpy(sc->file, name, sizeof(sc->file)); 2039 #ifdef MD_ROOT 2040 if (sc->unit == 0) { 2041 #ifndef ROOTDEVNAME 2042 rootdevnames[0] = MD_ROOT_FSTYPE ":/dev/md0"; 2043 #endif 2044 #ifdef MD_ROOT_READONLY 2045 sc->flags |= MD_READONLY; 2046 #endif 2047 } 2048 #endif 2049 mdinit(sc); 2050 if (name != NULL) { 2051 printf("%s%d: Preloaded image <%s> %zd bytes at %p\n", 2052 MD_NAME, sc->unit, name, length, image); 2053 } else { 2054 printf("%s%d: Embedded image %zd bytes at %p\n", 2055 MD_NAME, sc->unit, length, image); 2056 } 2057 } 2058 2059 static void 2060 g_md_init(struct g_class *mp __unused) 2061 { 2062 caddr_t mod; 2063 u_char *ptr, *name, *type; 2064 unsigned len; 2065 int i; 2066 2067 /* figure out log2(NINDIR) */ 2068 for (i = NINDIR, nshift = -1; i; nshift++) 2069 i >>= 1; 2070 2071 mod = NULL; 2072 sx_init(&md_sx, "MD config lock"); 2073 g_topology_unlock(); 2074 md_uh = new_unrhdr(0, INT_MAX, NULL); 2075 #ifdef MD_ROOT 2076 if (mfs_root_size != 0) { 2077 sx_xlock(&md_sx); 2078 md_preloaded(__DEVOLATILE(u_char *, &mfs_root), mfs_root_size, 2079 NULL); 2080 sx_xunlock(&md_sx); 2081 } 2082 #endif 2083 /* XXX: are preload_* static or do they need Giant ? */ 2084 while ((mod = preload_search_next_name(mod)) != NULL) { 2085 name = (char *)preload_search_info(mod, MODINFO_NAME); 2086 if (name == NULL) 2087 continue; 2088 type = (char *)preload_search_info(mod, MODINFO_TYPE); 2089 if (type == NULL) 2090 continue; 2091 if (strcmp(type, "md_image") && strcmp(type, "mfs_root")) 2092 continue; 2093 ptr = preload_fetch_addr(mod); 2094 len = preload_fetch_size(mod); 2095 if (ptr != NULL && len != 0) { 2096 sx_xlock(&md_sx); 2097 md_preloaded(ptr, len, name); 2098 sx_xunlock(&md_sx); 2099 } 2100 } 2101 md_vnode_pbuf_freecnt = nswbuf / 10; 2102 status_dev = make_dev(&mdctl_cdevsw, INT_MAX, UID_ROOT, GID_WHEEL, 2103 0600, MDCTL_NAME); 2104 g_topology_lock(); 2105 } 2106 2107 static void 2108 g_md_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp, 2109 struct g_consumer *cp __unused, struct g_provider *pp) 2110 { 2111 struct md_s *mp; 2112 char *type; 2113 2114 mp = gp->softc; 2115 if (mp == NULL) 2116 return; 2117 2118 switch (mp->type) { 2119 case MD_MALLOC: 2120 type = "malloc"; 2121 break; 2122 case MD_PRELOAD: 2123 type = "preload"; 2124 break; 2125 case MD_VNODE: 2126 type = "vnode"; 2127 break; 2128 case MD_SWAP: 2129 type = "swap"; 2130 break; 2131 case MD_NULL: 2132 type = "null"; 2133 break; 2134 default: 2135 type = "unknown"; 2136 break; 2137 } 2138 2139 if (pp != NULL) { 2140 if (indent == NULL) { 2141 sbuf_printf(sb, " u %d", mp->unit); 2142 sbuf_printf(sb, " s %ju", (uintmax_t) mp->sectorsize); 2143 sbuf_printf(sb, " f %ju", (uintmax_t) mp->fwheads); 2144 sbuf_printf(sb, " fs %ju", (uintmax_t) mp->fwsectors); 2145 sbuf_printf(sb, " l %ju", (uintmax_t) mp->mediasize); 2146 sbuf_printf(sb, " t %s", type); 2147 if ((mp->type == MD_VNODE && mp->vnode != NULL) || 2148 (mp->type == MD_PRELOAD && mp->file[0] != '\0')) 2149 sbuf_printf(sb, " file %s", mp->file); 2150 sbuf_printf(sb, " label %s", mp->label); 2151 } else { 2152 sbuf_printf(sb, "%s<unit>%d</unit>\n", indent, 2153 mp->unit); 2154 sbuf_printf(sb, "%s<sectorsize>%ju</sectorsize>\n", 2155 indent, (uintmax_t) mp->sectorsize); 2156 sbuf_printf(sb, "%s<fwheads>%ju</fwheads>\n", 2157 indent, (uintmax_t) mp->fwheads); 2158 sbuf_printf(sb, "%s<fwsectors>%ju</fwsectors>\n", 2159 indent, (uintmax_t) mp->fwsectors); 2160 if (mp->ident[0] != '\0') { 2161 sbuf_printf(sb, "%s<ident>", indent); 2162 g_conf_printf_escaped(sb, "%s", mp->ident); 2163 sbuf_printf(sb, "</ident>\n"); 2164 } 2165 sbuf_printf(sb, "%s<length>%ju</length>\n", 2166 indent, (uintmax_t) mp->mediasize); 2167 sbuf_printf(sb, "%s<compression>%s</compression>\n", indent, 2168 (mp->flags & MD_COMPRESS) == 0 ? "off": "on"); 2169 sbuf_printf(sb, "%s<access>%s</access>\n", indent, 2170 (mp->flags & MD_READONLY) == 0 ? "read-write": 2171 "read-only"); 2172 sbuf_printf(sb, "%s<type>%s</type>\n", indent, 2173 type); 2174 if ((mp->type == MD_VNODE && mp->vnode != NULL) || 2175 (mp->type == MD_PRELOAD && mp->file[0] != '\0')) { 2176 sbuf_printf(sb, "%s<file>", indent); 2177 g_conf_printf_escaped(sb, "%s", mp->file); 2178 sbuf_printf(sb, "</file>\n"); 2179 } 2180 sbuf_printf(sb, "%s<label>", indent); 2181 g_conf_printf_escaped(sb, "%s", mp->label); 2182 sbuf_printf(sb, "</label>\n"); 2183 } 2184 } 2185 } 2186 2187 static void 2188 g_md_fini(struct g_class *mp __unused) 2189 { 2190 2191 sx_destroy(&md_sx); 2192 if (status_dev != NULL) 2193 destroy_dev(status_dev); 2194 delete_unrhdr(md_uh); 2195 } 2196