1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/file.h> 29 #include <sys/stat.h> 30 #include <sys/atomic.h> 31 #include <sys/mntio.h> 32 #include <sys/mnttab.h> 33 #include <sys/mount.h> 34 #include <sys/sunddi.h> 35 #include <sys/sysmacros.h> 36 #include <sys/systm.h> 37 #include <sys/vfs.h> 38 #include <sys/vfs_opreg.h> 39 #include <sys/fs/mntdata.h> 40 #include <fs/fs_subr.h> 41 #include <sys/vmsystm.h> 42 #include <vm/seg_vn.h> 43 44 #define MNTROOTINO 2 45 46 static mntnode_t *mntgetnode(vnode_t *); 47 48 vnodeops_t *mntvnodeops; 49 50 /* 51 * Design of kernel mnttab accounting. 52 * 53 * To support whitespace in mount names, we implement an ioctl 54 * (MNTIOC_GETMNTENT) which allows a programmatic interface to the data in 55 * /etc/mnttab. The libc functions getmntent() and getextmntent() are built 56 * atop this interface. 57 * 58 * To minimize the amount of memory used in the kernel, we keep all the 59 * necessary information in the user's address space. Large server 60 * configurations can have /etc/mnttab files in excess of 64k. 61 * 62 * To support both vanilla read() calls as well as ioctl() calls, we have two 63 * different snapshots of the kernel data structures, mnt_read and mnt_ioctl. 64 * These snapshots include the base location in user memory, the number of 65 * mounts in the snapshot, and any metadata associated with it. The metadata is 66 * used only to support the ioctl() interface, and is a series of extmnttab 67 * structures. When the user issues an ioctl(), we simply copyout a pointer to 68 * that structure, and the rest is handled in userland. 69 */ 70 71 /* 72 * NOTE: The following variable enables the generation of the "dev=xxx" 73 * in the option string for a mounted file system. Really this should 74 * be gotten rid of altogether, but for the sake of backwards compatibility 75 * we had to leave it in. It is defined as a 32-bit device number. This 76 * means that when 64-bit device numbers are in use, if either the major or 77 * minor part of the device number will not fit in a 16 bit quantity, the 78 * "dev=" will be set to NODEV (0x7fffffff). See PSARC 1999/566 and 79 * 1999/131 for details. The cmpldev() function used to generate the 32-bit 80 * device number handles this check and assigns the proper value. 81 */ 82 int mntfs_enabledev = 1; /* enable old "dev=xxx" option */ 83 84 static int 85 mntfs_devsize(struct vfs *vfsp) 86 { 87 dev32_t odev; 88 89 (void) cmpldev(&odev, vfsp->vfs_dev); 90 return (snprintf(NULL, 0, "dev=%x", odev)); 91 } 92 93 static int 94 mntfs_devprint(struct vfs *vfsp, char *buf) 95 { 96 dev32_t odev; 97 98 (void) cmpldev(&odev, vfsp->vfs_dev); 99 return (snprintf(buf, MAX_MNTOPT_STR, "dev=%x", odev)); 100 } 101 102 static int 103 mntfs_optsize(struct vfs *vfsp) 104 { 105 int i, size = 0; 106 mntopt_t *mop; 107 108 for (i = 0; i < vfsp->vfs_mntopts.mo_count; i++) { 109 mop = &vfsp->vfs_mntopts.mo_list[i]; 110 if (mop->mo_flags & MO_NODISPLAY) 111 continue; 112 if (mop->mo_flags & MO_SET) { 113 if (size) 114 size++; /* space for comma */ 115 size += strlen(mop->mo_name); 116 /* 117 * count option value if there is one 118 */ 119 if (mop->mo_arg != NULL) { 120 size += strlen(mop->mo_arg) + 1; 121 } 122 } 123 } 124 if (vfsp->vfs_zone != NULL && vfsp->vfs_zone != global_zone) { 125 /* 126 * Add space for "zone=<zone_name>" if required. 127 */ 128 if (size) 129 size++; /* space for comma */ 130 size += sizeof ("zone=") - 1; 131 size += strlen(vfsp->vfs_zone->zone_name); 132 } 133 if (mntfs_enabledev) { 134 if (size != 0) 135 size++; /* space for comma */ 136 size += mntfs_devsize(vfsp); 137 } 138 if (size == 0) 139 size = strlen("-"); 140 return (size); 141 } 142 143 static int 144 mntfs_optprint(struct vfs *vfsp, char *buf) 145 { 146 int i, optinbuf = 0; 147 mntopt_t *mop; 148 char *origbuf = buf; 149 150 for (i = 0; i < vfsp->vfs_mntopts.mo_count; i++) { 151 mop = &vfsp->vfs_mntopts.mo_list[i]; 152 if (mop->mo_flags & MO_NODISPLAY) 153 continue; 154 if (mop->mo_flags & MO_SET) { 155 if (optinbuf) 156 *buf++ = ','; 157 else 158 optinbuf = 1; 159 buf += snprintf(buf, MAX_MNTOPT_STR, 160 "%s", mop->mo_name); 161 /* 162 * print option value if there is one 163 */ 164 if (mop->mo_arg != NULL) { 165 buf += snprintf(buf, MAX_MNTOPT_STR, "=%s", 166 mop->mo_arg); 167 } 168 } 169 } 170 if (vfsp->vfs_zone != NULL && vfsp->vfs_zone != global_zone) { 171 if (optinbuf) 172 *buf++ = ','; 173 else 174 optinbuf = 1; 175 buf += snprintf(buf, MAX_MNTOPT_STR, "zone=%s", 176 vfsp->vfs_zone->zone_name); 177 } 178 if (mntfs_enabledev) { 179 if (optinbuf++) 180 *buf++ = ','; 181 buf += mntfs_devprint(vfsp, buf); 182 } 183 if (!optinbuf) { 184 buf += snprintf(buf, MAX_MNTOPT_STR, "-"); 185 } 186 return (buf - origbuf); 187 } 188 189 static size_t 190 mntfs_vfs_len(vfs_t *vfsp, zone_t *zone) 191 { 192 size_t size = 0; 193 const char *resource, *mntpt; 194 195 mntpt = refstr_value(vfsp->vfs_mntpt); 196 if (mntpt != NULL && mntpt[0] != '\0') { 197 size += strlen(ZONE_PATH_TRANSLATE(mntpt, zone)) + 1; 198 } else { 199 size += strlen("-") + 1; 200 } 201 202 resource = refstr_value(vfsp->vfs_resource); 203 if (resource != NULL && resource[0] != '\0') { 204 if (resource[0] != '/') { 205 size += strlen(resource) + 1; 206 } else if (!ZONE_PATH_VISIBLE(resource, zone)) { 207 /* 208 * Same as the zone's view of the mount point. 209 */ 210 size += strlen(ZONE_PATH_TRANSLATE(mntpt, zone)) + 1; 211 } else { 212 size += strlen(ZONE_PATH_TRANSLATE(resource, zone)) + 1; 213 } 214 } else { 215 size += strlen("-") + 1; 216 } 217 size += strlen(vfssw[vfsp->vfs_fstype].vsw_name) + 1; 218 size += mntfs_optsize(vfsp); 219 size += snprintf(NULL, 0, "\t%ld\n", vfsp->vfs_mtime); 220 return (size); 221 } 222 223 static void 224 mntfs_zonerootvfs(zone_t *zone, vfs_t *rootvfsp) 225 { 226 /* 227 * Basically copy over the real vfs_t on which the root vnode is 228 * located, changing its mountpoint and resource to match those of 229 * the zone's rootpath. 230 */ 231 *rootvfsp = *zone->zone_rootvp->v_vfsp; 232 rootvfsp->vfs_mntpt = refstr_alloc(zone->zone_rootpath); 233 rootvfsp->vfs_resource = rootvfsp->vfs_mntpt; 234 } 235 236 static size_t 237 mntfs_zone_len(uint_t *nent_ptr, zone_t *zone, int showhidden) 238 { 239 struct vfs *zonelist; 240 struct vfs *vfsp; 241 size_t size = 0; 242 uint_t cnt = 0; 243 244 ASSERT(zone->zone_rootpath != NULL); 245 246 /* 247 * If the zone has a root entry, it will be the first in the list. If 248 * it doesn't, we conjure one up. 249 */ 250 vfsp = zonelist = zone->zone_vfslist; 251 if (zonelist == NULL || 252 strcmp(refstr_value(vfsp->vfs_mntpt), zone->zone_rootpath) != 0) { 253 vfs_t tvfs; 254 /* 255 * The root of the zone is not a mount point. The vfs we want 256 * to report is that of the zone's root vnode. 257 */ 258 ASSERT(zone != global_zone); 259 mntfs_zonerootvfs(zone, &tvfs); 260 size += mntfs_vfs_len(&tvfs, zone); 261 refstr_rele(tvfs.vfs_mntpt); 262 cnt++; 263 } 264 if (zonelist == NULL) 265 goto out; 266 do { 267 /* 268 * Skip mounts that should not show up in mnttab 269 */ 270 if (!showhidden && (vfsp->vfs_flag & VFS_NOMNTTAB)) { 271 vfsp = vfsp->vfs_zone_next; 272 continue; 273 } 274 cnt++; 275 size += mntfs_vfs_len(vfsp, zone); 276 vfsp = vfsp->vfs_zone_next; 277 } while (vfsp != zonelist); 278 out: 279 *nent_ptr = cnt; 280 return (size); 281 } 282 283 static size_t 284 mntfs_global_len(uint_t *nent_ptr, int showhidden) 285 { 286 struct vfs *vfsp; 287 size_t size = 0; 288 uint_t cnt = 0; 289 290 vfsp = rootvfs; 291 do { 292 /* 293 * Skip mounts that should not show up in mnttab 294 */ 295 if (!showhidden && (vfsp->vfs_flag & VFS_NOMNTTAB)) { 296 vfsp = vfsp->vfs_next; 297 continue; 298 } 299 cnt++; 300 size += mntfs_vfs_len(vfsp, global_zone); 301 vfsp = vfsp->vfs_next; 302 } while (vfsp != rootvfs); 303 *nent_ptr = cnt; 304 return (size); 305 } 306 307 static void 308 mntfs_vfs_generate(vfs_t *vfsp, zone_t *zone, struct extmnttab *tab, 309 char **basep, int forread) 310 { 311 const char *resource, *mntpt; 312 char *cp = *basep; 313 314 mntpt = refstr_value(vfsp->vfs_mntpt); 315 resource = refstr_value(vfsp->vfs_resource); 316 317 if (tab) 318 tab->mnt_special = cp; 319 if (resource != NULL && resource[0] != '\0') { 320 if (resource[0] != '/') { 321 cp += snprintf(cp, MAXPATHLEN, "%s", resource); 322 } else if (!ZONE_PATH_VISIBLE(resource, zone)) { 323 /* 324 * Use the mount point as the resource. 325 */ 326 cp += snprintf(cp, MAXPATHLEN, "%s", 327 ZONE_PATH_TRANSLATE(mntpt, zone)); 328 } else { 329 cp += snprintf(cp, MAXPATHLEN, "%s", 330 ZONE_PATH_TRANSLATE(resource, zone)); 331 } 332 } else { 333 cp += snprintf(cp, MAXPATHLEN, "-"); 334 } 335 *cp++ = forread ? '\t' : '\0'; 336 337 if (tab) 338 tab->mnt_mountp = cp; 339 if (mntpt != NULL && mntpt[0] != '\0') { 340 /* 341 * We know the mount point is visible from within the zone, 342 * otherwise it wouldn't be on the zone's vfs list. 343 */ 344 cp += snprintf(cp, MAXPATHLEN, "%s", 345 ZONE_PATH_TRANSLATE(mntpt, zone)); 346 } else { 347 cp += snprintf(cp, MAXPATHLEN, "-"); 348 } 349 *cp++ = forread ? '\t' : '\0'; 350 351 if (tab) 352 tab->mnt_fstype = cp; 353 cp += snprintf(cp, MAXPATHLEN, "%s", 354 vfssw[vfsp->vfs_fstype].vsw_name); 355 *cp++ = forread ? '\t' : '\0'; 356 357 if (tab) 358 tab->mnt_mntopts = cp; 359 cp += mntfs_optprint(vfsp, cp); 360 *cp++ = forread ? '\t' : '\0'; 361 362 if (tab) 363 tab->mnt_time = cp; 364 cp += snprintf(cp, MAX_MNTOPT_STR, "%ld", vfsp->vfs_mtime); 365 *cp++ = forread ? '\n' : '\0'; 366 367 if (tab) { 368 tab->mnt_major = getmajor(vfsp->vfs_dev); 369 tab->mnt_minor = getminor(vfsp->vfs_dev); 370 } 371 372 *basep = cp; 373 } 374 375 static void 376 mntfs_zone_generate(zone_t *zone, int showhidden, struct extmnttab *tab, 377 char *basep, int forread) 378 { 379 vfs_t *zonelist; 380 vfs_t *vfsp; 381 char *cp = basep; 382 383 /* 384 * If the zone has a root entry, it will be the first in the list. If 385 * it doesn't, we conjure one up. 386 */ 387 vfsp = zonelist = zone->zone_vfslist; 388 if (zonelist == NULL || 389 strcmp(refstr_value(vfsp->vfs_mntpt), zone->zone_rootpath) != 0) { 390 vfs_t tvfs; 391 /* 392 * The root of the zone is not a mount point. The vfs we want 393 * to report is that of the zone's root vnode. 394 */ 395 ASSERT(zone != global_zone); 396 mntfs_zonerootvfs(zone, &tvfs); 397 mntfs_vfs_generate(&tvfs, zone, tab, &cp, forread); 398 refstr_rele(tvfs.vfs_mntpt); 399 if (tab) 400 tab++; 401 } 402 if (zonelist == NULL) 403 return; 404 do { 405 /* 406 * Skip mounts that should not show up in mnttab 407 */ 408 if (!showhidden && (vfsp->vfs_flag & VFS_NOMNTTAB)) { 409 vfsp = vfsp->vfs_zone_next; 410 continue; 411 } 412 mntfs_vfs_generate(vfsp, zone, tab, &cp, forread); 413 if (tab) 414 tab++; 415 vfsp = vfsp->vfs_zone_next; 416 } while (vfsp != zonelist); 417 } 418 419 static void 420 mntfs_global_generate(int showhidden, struct extmnttab *tab, char *basep, 421 int forread) 422 { 423 vfs_t *vfsp; 424 char *cp = basep; 425 426 vfsp = rootvfs; 427 do { 428 /* 429 * Skip mounts that should not show up in mnttab 430 */ 431 if (!showhidden && vfsp->vfs_flag & VFS_NOMNTTAB) { 432 vfsp = vfsp->vfs_next; 433 continue; 434 } 435 mntfs_vfs_generate(vfsp, global_zone, tab, &cp, forread); 436 if (tab) 437 tab++; 438 vfsp = vfsp->vfs_next; 439 } while (vfsp != rootvfs); 440 } 441 442 static char * 443 mntfs_mapin(char *base, size_t size) 444 { 445 size_t rlen = roundup(size, PAGESIZE); 446 struct as *as = curproc->p_as; 447 char *addr; 448 449 as_rangelock(as); 450 map_addr(&addr, rlen, 0, 1, 0); 451 if (addr == NULL || as_map(as, addr, rlen, segvn_create, zfod_argsp)) { 452 as_rangeunlock(as); 453 return (NULL); 454 } 455 as_rangeunlock(as); 456 if (copyout(base, addr, size)) { 457 (void) as_unmap(as, addr, rlen); 458 return (NULL); 459 } 460 return (addr); 461 } 462 463 static void 464 mntfs_freesnap(mntsnap_t *snap) 465 { 466 if (snap->mnts_text != NULL) 467 (void) as_unmap(curproc->p_as, snap->mnts_text, 468 roundup(snap->mnts_textsize, PAGESIZE)); 469 snap->mnts_textsize = snap->mnts_count = 0; 470 if (snap->mnts_metadata != NULL) 471 (void) as_unmap(curproc->p_as, snap->mnts_metadata, 472 roundup(snap->mnts_metasize, PAGESIZE)); 473 snap->mnts_metasize = 0; 474 } 475 476 #ifdef _SYSCALL32_IMPL 477 478 typedef struct extmnttab32 { 479 uint32_t mnt_special; 480 uint32_t mnt_mountp; 481 uint32_t mnt_fstype; 482 uint32_t mnt_mntopts; 483 uint32_t mnt_time; 484 uint_t mnt_major; 485 uint_t mnt_minor; 486 } extmnttab32_t; 487 488 #endif 489 490 /* 491 * Snapshot the latest version of the kernel mounted resource information 492 * 493 * There are two types of snapshots: one destined for reading, and one destined 494 * for ioctl(). The difference is that the ioctl() interface is delimited by 495 * NULLs, while the read() interface is delimited by tabs and newlines. 496 */ 497 /* ARGSUSED */ 498 static int 499 mntfs_snapshot(mntnode_t *mnp, int forread, int datamodel) 500 { 501 size_t size; 502 timespec_t lastmodt; 503 mntdata_t *mntdata = MTOD(mnp); 504 zone_t *zone = mntdata->mnt_zone; 505 boolean_t global_view = (MTOD(mnp)->mnt_zone == global_zone); 506 boolean_t showhidden = ((mnp->mnt_flags & MNT_SHOWHIDDEN) != 0); 507 struct extmnttab *metadata_baseaddr; 508 char *text_baseaddr; 509 int i; 510 mntsnap_t *snap; 511 512 if (forread) 513 snap = &mnp->mnt_read; 514 else 515 snap = &mnp->mnt_ioctl; 516 517 vfs_list_read_lock(); 518 /* 519 * Check if the mnttab info has changed since the last snapshot 520 */ 521 vfs_mnttab_modtime(&lastmodt); 522 if (snap->mnts_count && 523 lastmodt.tv_sec == snap->mnts_time.tv_sec && 524 lastmodt.tv_nsec == snap->mnts_time.tv_nsec) { 525 vfs_list_unlock(); 526 return (0); 527 } 528 529 530 if (snap->mnts_count != 0) 531 mntfs_freesnap(snap); 532 if (global_view) 533 size = mntfs_global_len(&snap->mnts_count, showhidden); 534 else 535 size = mntfs_zone_len(&snap->mnts_count, zone, showhidden); 536 ASSERT(size != 0); 537 538 if (!forread) 539 metadata_baseaddr = kmem_alloc( 540 snap->mnts_count * sizeof (struct extmnttab), KM_SLEEP); 541 else 542 metadata_baseaddr = NULL; 543 544 text_baseaddr = kmem_alloc(size, KM_SLEEP); 545 546 if (global_view) 547 mntfs_global_generate(showhidden, metadata_baseaddr, 548 text_baseaddr, forread); 549 else 550 mntfs_zone_generate(zone, showhidden, 551 metadata_baseaddr, text_baseaddr, forread); 552 553 vfs_mnttab_modtime(&snap->mnts_time); 554 vfs_list_unlock(); 555 556 snap->mnts_text = mntfs_mapin(text_baseaddr, size); 557 snap->mnts_textsize = size; 558 kmem_free(text_baseaddr, size); 559 560 /* 561 * The pointers in the metadata refer to addreesses in the range 562 * [base_addr, base_addr + size]. Now that we have mapped the text into 563 * the user's address space, we have to convert these addresses into the 564 * new (user) range. We also handle the conversion for 32-bit and 565 * 32-bit applications here. 566 */ 567 if (!forread) { 568 struct extmnttab *tab; 569 #ifdef _SYSCALL32_IMPL 570 struct extmnttab32 *tab32; 571 572 if (datamodel == DATAMODEL_ILP32) { 573 tab = (struct extmnttab *)metadata_baseaddr; 574 tab32 = (struct extmnttab32 *)metadata_baseaddr; 575 576 for (i = 0; i < snap->mnts_count; i++) { 577 tab32[i].mnt_special = 578 (uintptr_t)snap->mnts_text + 579 (tab[i].mnt_special - text_baseaddr); 580 tab32[i].mnt_mountp = 581 (uintptr_t)snap->mnts_text + 582 (tab[i].mnt_mountp - text_baseaddr); 583 tab32[i].mnt_fstype = 584 (uintptr_t)snap->mnts_text + 585 (tab[i].mnt_fstype - text_baseaddr); 586 tab32[i].mnt_mntopts = 587 (uintptr_t)snap->mnts_text + 588 (tab[i].mnt_mntopts - text_baseaddr); 589 tab32[i].mnt_time = (uintptr_t)snap->mnts_text + 590 (tab[i].mnt_time - text_baseaddr); 591 tab32[i].mnt_major = tab[i].mnt_major; 592 tab32[i].mnt_minor = tab[i].mnt_minor; 593 } 594 595 snap->mnts_metasize = 596 snap->mnts_count * sizeof (struct extmnttab32); 597 snap->mnts_metadata = mntfs_mapin( 598 (char *)metadata_baseaddr, 599 snap->mnts_metasize); 600 601 } else { 602 #endif 603 tab = (struct extmnttab *)metadata_baseaddr; 604 for (i = 0; i < snap->mnts_count; i++) { 605 tab[i].mnt_special = snap->mnts_text + 606 (tab[i].mnt_special - text_baseaddr); 607 tab[i].mnt_mountp = snap->mnts_text + 608 (tab[i].mnt_mountp - text_baseaddr); 609 tab[i].mnt_fstype = snap->mnts_text + 610 (tab[i].mnt_fstype - text_baseaddr); 611 tab[i].mnt_mntopts = snap->mnts_text + 612 (tab[i].mnt_mntopts - text_baseaddr); 613 tab[i].mnt_time = snap->mnts_text + 614 (tab[i].mnt_time - text_baseaddr); 615 } 616 617 snap->mnts_metasize = 618 snap->mnts_count * sizeof (struct extmnttab); 619 snap->mnts_metadata = mntfs_mapin( 620 (char *)metadata_baseaddr, snap->mnts_metasize); 621 #ifdef _SYSCALL32_IMPL 622 } 623 #endif 624 625 kmem_free(metadata_baseaddr, 626 snap->mnts_count * sizeof (struct extmnttab)); 627 } 628 629 mntdata->mnt_size = size; 630 631 if (snap->mnts_text == NULL || 632 (!forread && snap->mnts_metadata == NULL)) { 633 mntfs_freesnap(snap); 634 return (ENOMEM); 635 } 636 637 return (0); 638 } 639 640 /* 641 * Public function to convert vfs_mntopts into a string. 642 * A buffer of sufficient size is allocated, which is returned via bufp, 643 * and whose length is returned via lenp. 644 */ 645 void 646 mntfs_getmntopts(struct vfs *vfsp, char **bufp, size_t *lenp) 647 { 648 size_t len; 649 char *buf; 650 651 vfs_list_read_lock(); 652 653 len = mntfs_optsize(vfsp) + 1; 654 buf = kmem_alloc(len, KM_NOSLEEP); 655 if (buf == NULL) { 656 *bufp = NULL; 657 vfs_list_unlock(); 658 return; 659 } 660 buf[len - 1] = '\0'; 661 (void) mntfs_optprint(vfsp, buf); 662 ASSERT(buf[len - 1] == '\0'); 663 664 vfs_list_unlock(); 665 *bufp = buf; 666 *lenp = len; 667 } 668 669 670 /* ARGSUSED */ 671 static int 672 mntopen(vnode_t **vpp, int flag, cred_t *cr) 673 { 674 vnode_t *vp = *vpp; 675 mntnode_t *nmnp; 676 677 /* 678 * Not allowed to open for writing, return error. 679 */ 680 if (flag & FWRITE) 681 return (EPERM); 682 /* 683 * Create a new mnt/vnode for each open, this will give us a handle to 684 * hang the snapshot on. 685 */ 686 nmnp = mntgetnode(vp); 687 688 *vpp = MTOV(nmnp); 689 atomic_add_32(&MTOD(nmnp)->mnt_nopen, 1); 690 VN_RELE(vp); 691 return (0); 692 } 693 694 /* ARGSUSED */ 695 static int 696 mntclose(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr) 697 { 698 mntnode_t *mnp = VTOM(vp); 699 700 /* Clean up any locks or shares held by the current process */ 701 cleanlocks(vp, ttoproc(curthread)->p_pid, 0); 702 cleanshares(vp, ttoproc(curthread)->p_pid); 703 704 if (count > 1) 705 return (0); 706 if (vp->v_count == 1) { 707 mntfs_freesnap(&mnp->mnt_read); 708 mntfs_freesnap(&mnp->mnt_ioctl); 709 atomic_add_32(&MTOD(mnp)->mnt_nopen, -1); 710 } 711 return (0); 712 } 713 714 /* ARGSUSED */ 715 static int 716 mntread(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cred, caller_context_t *ct) 717 { 718 int error = 0; 719 off_t off = uio->uio_offset; 720 size_t len = uio->uio_resid; 721 mntnode_t *mnp = VTOM(vp); 722 char *buf; 723 mntsnap_t *snap = &mnp->mnt_read; 724 int datamodel; 725 726 if (off == (off_t)0 || snap->mnts_count == 0) { 727 /* 728 * It is assumed that any kernel callers wishing 729 * to read mnttab will be using extmnttab entries 730 * and not extmnttab32 entries, whether or not 731 * the kernel is LP64 or ILP32. Thus, force the 732 * datamodel that mntfs_snapshot uses to be 733 * DATAMODEL_LP64. 734 */ 735 if (uio->uio_segflg == UIO_SYSSPACE) 736 datamodel = DATAMODEL_LP64; 737 else 738 datamodel = get_udatamodel(); 739 if ((error = mntfs_snapshot(mnp, 1, datamodel)) != 0) 740 return (error); 741 } 742 if ((size_t)(off + len) > snap->mnts_textsize) 743 len = snap->mnts_textsize - off; 744 745 if (off < 0 || len > snap->mnts_textsize) 746 return (EFAULT); 747 748 if (len == 0) 749 return (0); 750 751 /* 752 * The mnttab image is stored in the user's address space, 753 * so we have to copy it into the kernel from userland, 754 * then copy it back out to the specified address. 755 */ 756 buf = kmem_alloc(len, KM_SLEEP); 757 if (copyin(snap->mnts_text + off, buf, len)) 758 error = EFAULT; 759 else { 760 error = uiomove(buf, len, UIO_READ, uio); 761 } 762 kmem_free(buf, len); 763 764 return (error); 765 } 766 767 768 static int 769 mntgetattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr) 770 { 771 mntnode_t *mnp = VTOM(vp); 772 int error; 773 vnode_t *rvp; 774 extern timespec_t vfs_mnttab_ctime; 775 mntdata_t *mntdata = MTOD(VTOM(vp)); 776 mntsnap_t *snap = mnp->mnt_read.mnts_count ? 777 &mnp->mnt_read : &mnp->mnt_ioctl; 778 779 /* 780 * Return all the attributes. Should be refined 781 * so that it returns only those asked for. 782 * Most of this is complete fakery anyway. 783 */ 784 rvp = mnp->mnt_mountvp; 785 /* 786 * Attributes are same as underlying file with modifications 787 */ 788 if (error = VOP_GETATTR(rvp, vap, flags, cr)) 789 return (error); 790 791 /* 792 * We always look like a regular file 793 */ 794 vap->va_type = VREG; 795 /* 796 * mode should basically be read only 797 */ 798 vap->va_mode &= 07444; 799 vap->va_fsid = vp->v_vfsp->vfs_dev; 800 vap->va_blksize = DEV_BSIZE; 801 vap->va_rdev = 0; 802 vap->va_seq = 0; 803 /* 804 * Set nlink to the number of open vnodes for mnttab info 805 * plus one for existing. 806 */ 807 vap->va_nlink = mntdata->mnt_nopen + 1; 808 /* 809 * If we haven't taken a snapshot yet, set the 810 * size to the size of the latest snapshot. 811 */ 812 vap->va_size = snap->mnts_textsize ? snap->mnts_textsize : 813 mntdata->mnt_size; 814 /* 815 * Fetch mtime from the vfs mnttab timestamp 816 */ 817 vap->va_ctime = vfs_mnttab_ctime; 818 vfs_list_read_lock(); 819 vfs_mnttab_modtime(&vap->va_mtime); 820 vap->va_atime = vap->va_mtime; 821 vfs_list_unlock(); 822 /* 823 * Nodeid is always ROOTINO; 824 */ 825 vap->va_nodeid = (ino64_t)MNTROOTINO; 826 vap->va_nblocks = btod(vap->va_size); 827 return (0); 828 } 829 830 831 static int 832 mntaccess(vnode_t *vp, int mode, int flags, cred_t *cr) 833 { 834 mntnode_t *mnp = VTOM(vp); 835 836 if (mode & (VWRITE|VEXEC)) 837 return (EROFS); 838 839 /* 840 * Do access check on the underlying directory vnode. 841 */ 842 return (VOP_ACCESS(mnp->mnt_mountvp, mode, flags, cr)); 843 } 844 845 846 /* 847 * New /mntfs vnode required; allocate it and fill in most of the fields. 848 */ 849 static mntnode_t * 850 mntgetnode(vnode_t *dp) 851 { 852 mntnode_t *mnp; 853 vnode_t *vp; 854 855 mnp = kmem_zalloc(sizeof (mntnode_t), KM_SLEEP); 856 mnp->mnt_vnode = vn_alloc(KM_SLEEP); 857 mnp->mnt_mountvp = VTOM(dp)->mnt_mountvp; 858 vp = MTOV(mnp); 859 vp->v_flag = VNOCACHE|VNOMAP|VNOSWAP|VNOMOUNT; 860 vn_setops(vp, mntvnodeops); 861 vp->v_vfsp = dp->v_vfsp; 862 vp->v_type = VREG; 863 vp->v_data = (caddr_t)mnp; 864 865 return (mnp); 866 } 867 868 /* 869 * Free the storage obtained from mntgetnode(). 870 */ 871 static void 872 mntfreenode(mntnode_t *mnp) 873 { 874 vnode_t *vp = MTOV(mnp); 875 876 vn_invalid(vp); 877 vn_free(vp); 878 kmem_free(mnp, sizeof (*mnp)); 879 } 880 881 882 /* ARGSUSED */ 883 static int 884 mntfsync(vnode_t *vp, int syncflag, cred_t *cr) 885 { 886 return (0); 887 } 888 889 /* ARGSUSED */ 890 static void 891 mntinactive(vnode_t *vp, cred_t *cr) 892 { 893 mntnode_t *mnp = VTOM(vp); 894 895 mntfreenode(mnp); 896 } 897 898 /* ARGSUSED */ 899 static int 900 mntseek(vnode_t *vp, offset_t ooff, offset_t *noffp) 901 { 902 if (*noffp == 0) 903 VTOM(vp)->mnt_offset = 0; 904 905 return (0); 906 } 907 908 /* 909 * Return the answer requested to poll(). 910 * POLLRDBAND will return when the mtime of the mnttab 911 * information is newer than the latest one read for this open. 912 */ 913 /* ARGSUSED */ 914 static int 915 mntpoll(vnode_t *vp, short ev, int any, short *revp, pollhead_t **phpp) 916 { 917 mntnode_t *mnp = VTOM(vp); 918 mntsnap_t *snap = &mnp->mnt_read; 919 920 if (mnp->mnt_ioctl.mnts_time.tv_sec > snap->mnts_time.tv_sec || 921 (mnp->mnt_ioctl.mnts_time.tv_sec == snap->mnts_time.tv_sec && 922 mnp->mnt_ioctl.mnts_time.tv_nsec > snap->mnts_time.tv_nsec)) 923 snap = &mnp->mnt_ioctl; 924 925 *revp = 0; 926 *phpp = (pollhead_t *)NULL; 927 if (ev & POLLIN) 928 *revp |= POLLIN; 929 930 if (ev & POLLRDNORM) 931 *revp |= POLLRDNORM; 932 933 if (ev & POLLRDBAND) { 934 vfs_mnttab_poll(&snap->mnts_time, phpp); 935 if (*phpp == (pollhead_t *)NULL) 936 *revp |= POLLRDBAND; 937 } 938 if (*revp || *phpp != NULL || any) { 939 return (0); 940 } 941 /* 942 * If someone is polling an unsupported poll events (e.g. 943 * POLLOUT, POLLPRI, etc.), just return POLLERR revents. 944 * That way we will ensure that we don't return a 0 945 * revents with a NULL pollhead pointer. 946 */ 947 *revp = POLLERR; 948 return (0); 949 } 950 /* ARGSUSED */ 951 static int 952 mntioctl(struct vnode *vp, int cmd, intptr_t arg, int flag, 953 cred_t *cr, int *rvalp) 954 { 955 uint_t *up = (uint_t *)arg; 956 mntnode_t *mnp = VTOM(vp); 957 mntsnap_t *snap = &mnp->mnt_ioctl; 958 int error; 959 960 error = 0; 961 switch (cmd) { 962 963 case MNTIOC_NMNTS: { /* get no. of mounted resources */ 964 if (snap->mnts_count == 0) { 965 if ((error = 966 mntfs_snapshot(mnp, 0, flag & DATAMODEL_MASK)) != 0) 967 return (error); 968 } 969 if (suword32(up, snap->mnts_count) != 0) 970 error = EFAULT; 971 break; 972 } 973 974 case MNTIOC_GETDEVLIST: { /* get mounted device major/minor nos */ 975 uint_t *devlist; 976 int i; 977 size_t len; 978 979 if (snap->mnts_count == 0) { 980 if ((error = 981 mntfs_snapshot(mnp, 0, flag & DATAMODEL_MASK)) != 0) 982 return (error); 983 } 984 985 len = 2 * snap->mnts_count * sizeof (uint_t); 986 devlist = kmem_alloc(len, KM_SLEEP); 987 for (i = 0; i < snap->mnts_count; i++) { 988 989 #ifdef _SYSCALL32_IMPL 990 if ((flag & DATAMODEL_MASK) == DATAMODEL_ILP32) { 991 struct extmnttab32 tab; 992 993 if ((error = xcopyin(snap->mnts_text + 994 i * sizeof (struct extmnttab32), &tab, 995 sizeof (tab))) != 0) 996 break; 997 998 devlist[i*2] = tab.mnt_major; 999 devlist[i*2+1] = tab.mnt_minor; 1000 } else { 1001 #endif 1002 struct extmnttab tab; 1003 1004 if ((error = xcopyin(snap->mnts_text + 1005 i * sizeof (struct extmnttab), &tab, 1006 sizeof (tab))) != 0) 1007 break; 1008 1009 devlist[i*2] = tab.mnt_major; 1010 devlist[i*2+1] = tab.mnt_minor; 1011 #ifdef _SYSCALL32_IMPL 1012 } 1013 #endif 1014 } 1015 1016 if (error == 0) 1017 error = xcopyout(devlist, up, len); 1018 kmem_free(devlist, len); 1019 break; 1020 } 1021 1022 case MNTIOC_SETTAG: /* set tag on mounted file system */ 1023 case MNTIOC_CLRTAG: /* clear tag on mounted file system */ 1024 { 1025 struct mnttagdesc *dp = (struct mnttagdesc *)arg; 1026 STRUCT_DECL(mnttagdesc, tagdesc); 1027 char *cptr; 1028 uint32_t major, minor; 1029 char tagbuf[MAX_MNTOPT_TAG]; 1030 char *pbuf; 1031 size_t len; 1032 uint_t start = 0; 1033 mntdata_t *mntdata = MTOD(mnp); 1034 zone_t *zone = mntdata->mnt_zone; 1035 1036 STRUCT_INIT(tagdesc, flag & DATAMODEL_MASK); 1037 if (copyin(dp, STRUCT_BUF(tagdesc), STRUCT_SIZE(tagdesc))) { 1038 error = EFAULT; 1039 break; 1040 } 1041 pbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP); 1042 if (zone != global_zone) { 1043 (void) strcpy(pbuf, zone->zone_rootpath); 1044 /* truncate "/" and nul */ 1045 start = zone->zone_rootpathlen - 2; 1046 ASSERT(pbuf[start] == '/'); 1047 } 1048 cptr = STRUCT_FGETP(tagdesc, mtd_mntpt); 1049 error = copyinstr(cptr, pbuf + start, MAXPATHLEN - start, &len); 1050 if (error) { 1051 kmem_free(pbuf, MAXPATHLEN); 1052 break; 1053 } 1054 if (start != 0 && pbuf[start] != '/') { 1055 kmem_free(pbuf, MAXPATHLEN); 1056 error = EINVAL; 1057 break; 1058 } 1059 cptr = STRUCT_FGETP(tagdesc, mtd_tag); 1060 if ((error = copyinstr(cptr, tagbuf, MAX_MNTOPT_TAG, &len))) { 1061 kmem_free(pbuf, MAXPATHLEN); 1062 break; 1063 } 1064 major = STRUCT_FGET(tagdesc, mtd_major); 1065 minor = STRUCT_FGET(tagdesc, mtd_minor); 1066 if (cmd == MNTIOC_SETTAG) 1067 error = vfs_settag(major, minor, pbuf, tagbuf, cr); 1068 else 1069 error = vfs_clrtag(major, minor, pbuf, tagbuf, cr); 1070 kmem_free(pbuf, MAXPATHLEN); 1071 break; 1072 } 1073 1074 case MNTIOC_SHOWHIDDEN: 1075 { 1076 mutex_enter(&vp->v_lock); 1077 mnp->mnt_flags |= MNT_SHOWHIDDEN; 1078 mutex_exit(&vp->v_lock); 1079 break; 1080 } 1081 1082 case MNTIOC_GETMNTENT: 1083 { 1084 size_t idx; 1085 uintptr_t addr; 1086 1087 idx = mnp->mnt_offset; 1088 if (snap->mnts_count == 0 || idx == 0) { 1089 if ((error = 1090 mntfs_snapshot(mnp, 0, flag & DATAMODEL_MASK)) != 0) 1091 return (error); 1092 } 1093 /* 1094 * If the next index is beyond the end of the current mnttab, 1095 * return EOF 1096 */ 1097 if (idx >= snap->mnts_count) { 1098 *rvalp = 1; 1099 return (0); 1100 } 1101 1102 #ifdef _SYSCALL32_IMPL 1103 if ((flag & DATAMODEL_MASK) == DATAMODEL_ILP32) { 1104 addr = (uintptr_t)(snap->mnts_metadata + idx * 1105 sizeof (struct extmnttab32)); 1106 error = suword32((void *)arg, addr); 1107 } else { 1108 #endif 1109 addr = (uintptr_t)(snap->mnts_metadata + idx * 1110 sizeof (struct extmnttab)); 1111 error = sulword((void *)arg, addr); 1112 #ifdef _SYSCALL32_IMPL 1113 } 1114 #endif 1115 1116 if (error != 0) 1117 return (error); 1118 1119 mnp->mnt_offset++; 1120 break; 1121 } 1122 1123 default: 1124 error = EINVAL; 1125 break; 1126 } 1127 1128 return (error); 1129 } 1130 1131 1132 /* 1133 * /mntfs vnode operations vector 1134 */ 1135 const fs_operation_def_t mnt_vnodeops_template[] = { 1136 VOPNAME_OPEN, { .vop_open = mntopen }, 1137 VOPNAME_CLOSE, { .vop_close = mntclose }, 1138 VOPNAME_READ, { .vop_read = mntread }, 1139 VOPNAME_IOCTL, { .vop_ioctl = mntioctl }, 1140 VOPNAME_GETATTR, { .vop_getattr = mntgetattr }, 1141 VOPNAME_ACCESS, { .vop_access = mntaccess }, 1142 VOPNAME_FSYNC, { .vop_fsync = mntfsync }, 1143 VOPNAME_INACTIVE, { .vop_inactive = mntinactive }, 1144 VOPNAME_SEEK, { .vop_seek = mntseek }, 1145 VOPNAME_POLL, { .vop_poll = mntpoll }, 1146 VOPNAME_DISPOSE, { .error = fs_error }, 1147 VOPNAME_SHRLOCK, { .error = fs_error }, 1148 NULL, NULL 1149 }; 1150