1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <sys/file.h> 27 #include <sys/stat.h> 28 #include <sys/atomic.h> 29 #include <sys/mntio.h> 30 #include <sys/mnttab.h> 31 #include <sys/mount.h> 32 #include <sys/sunddi.h> 33 #include <sys/sysmacros.h> 34 #include <sys/systm.h> 35 #include <sys/vfs.h> 36 #include <sys/vfs_opreg.h> 37 #include <sys/fs/mntdata.h> 38 #include <fs/fs_subr.h> 39 #include <sys/vmsystm.h> 40 #include <vm/seg_vn.h> 41 42 #define MNTROOTINO 2 43 44 static mntnode_t *mntgetnode(vnode_t *); 45 46 vnodeops_t *mntvnodeops; 47 extern void vfs_mnttab_readop(void); 48 49 /* 50 * Design of kernel mnttab accounting. 51 * 52 * To support whitespace in mount names, we implement an ioctl 53 * (MNTIOC_GETMNTENT) which allows a programmatic interface to the data in 54 * /etc/mnttab. The libc functions getmntent() and getextmntent() are built 55 * atop this interface. 56 * 57 * To minimize the amount of memory used in the kernel, we keep all the 58 * necessary information in the user's address space. Large server 59 * configurations can have /etc/mnttab files in excess of 64k. 60 * 61 * To support both vanilla read() calls as well as ioctl() calls, we have two 62 * different snapshots of the kernel data structures, mnt_read and mnt_ioctl. 63 * These snapshots include the base location in user memory, the number of 64 * mounts in the snapshot, and any metadata associated with it. The metadata is 65 * used only to support the ioctl() interface, and is a series of extmnttab 66 * structures. When the user issues an ioctl(), we simply copyout a pointer to 67 * that structure, and the rest is handled in userland. 68 */ 69 70 /* 71 * NOTE: The following variable enables the generation of the "dev=xxx" 72 * in the option string for a mounted file system. Really this should 73 * be gotten rid of altogether, but for the sake of backwards compatibility 74 * we had to leave it in. It is defined as a 32-bit device number. This 75 * means that when 64-bit device numbers are in use, if either the major or 76 * minor part of the device number will not fit in a 16 bit quantity, the 77 * "dev=" will be set to NODEV (0x7fffffff). See PSARC 1999/566 and 78 * 1999/131 for details. The cmpldev() function used to generate the 32-bit 79 * device number handles this check and assigns the proper value. 80 */ 81 int mntfs_enabledev = 1; /* enable old "dev=xxx" option */ 82 83 static int 84 mntfs_devsize(struct vfs *vfsp) 85 { 86 dev32_t odev; 87 88 (void) cmpldev(&odev, vfsp->vfs_dev); 89 return (snprintf(NULL, 0, "dev=%x", odev)); 90 } 91 92 static int 93 mntfs_devprint(struct vfs *vfsp, char *buf) 94 { 95 dev32_t odev; 96 97 (void) cmpldev(&odev, vfsp->vfs_dev); 98 return (snprintf(buf, MAX_MNTOPT_STR, "dev=%x", odev)); 99 } 100 101 static int 102 mntfs_optsize(struct vfs *vfsp) 103 { 104 int i, size = 0; 105 mntopt_t *mop; 106 107 for (i = 0; i < vfsp->vfs_mntopts.mo_count; i++) { 108 mop = &vfsp->vfs_mntopts.mo_list[i]; 109 if (mop->mo_flags & MO_NODISPLAY) 110 continue; 111 if (mop->mo_flags & MO_SET) { 112 if (size) 113 size++; /* space for comma */ 114 size += strlen(mop->mo_name); 115 /* 116 * count option value if there is one 117 */ 118 if (mop->mo_arg != NULL) { 119 size += strlen(mop->mo_arg) + 1; 120 } 121 } 122 } 123 if (vfsp->vfs_zone != NULL && vfsp->vfs_zone != global_zone) { 124 /* 125 * Add space for "zone=<zone_name>" if required. 126 */ 127 if (size) 128 size++; /* space for comma */ 129 size += sizeof ("zone=") - 1; 130 size += strlen(vfsp->vfs_zone->zone_name); 131 } 132 if (mntfs_enabledev) { 133 if (size != 0) 134 size++; /* space for comma */ 135 size += mntfs_devsize(vfsp); 136 } 137 if (size == 0) 138 size = strlen("-"); 139 return (size); 140 } 141 142 static int 143 mntfs_optprint(struct vfs *vfsp, char *buf) 144 { 145 int i, optinbuf = 0; 146 mntopt_t *mop; 147 char *origbuf = buf; 148 149 for (i = 0; i < vfsp->vfs_mntopts.mo_count; i++) { 150 mop = &vfsp->vfs_mntopts.mo_list[i]; 151 if (mop->mo_flags & MO_NODISPLAY) 152 continue; 153 if (mop->mo_flags & MO_SET) { 154 if (optinbuf) 155 *buf++ = ','; 156 else 157 optinbuf = 1; 158 buf += snprintf(buf, MAX_MNTOPT_STR, 159 "%s", mop->mo_name); 160 /* 161 * print option value if there is one 162 */ 163 if (mop->mo_arg != NULL) { 164 buf += snprintf(buf, MAX_MNTOPT_STR, "=%s", 165 mop->mo_arg); 166 } 167 } 168 } 169 if (vfsp->vfs_zone != NULL && vfsp->vfs_zone != global_zone) { 170 if (optinbuf) 171 *buf++ = ','; 172 else 173 optinbuf = 1; 174 buf += snprintf(buf, MAX_MNTOPT_STR, "zone=%s", 175 vfsp->vfs_zone->zone_name); 176 } 177 if (mntfs_enabledev) { 178 if (optinbuf++) 179 *buf++ = ','; 180 buf += mntfs_devprint(vfsp, buf); 181 } 182 if (!optinbuf) { 183 buf += snprintf(buf, MAX_MNTOPT_STR, "-"); 184 } 185 return (buf - origbuf); 186 } 187 188 static size_t 189 mntfs_vfs_len(vfs_t *vfsp, zone_t *zone) 190 { 191 size_t size = 0; 192 const char *resource, *mntpt; 193 194 mntpt = refstr_value(vfsp->vfs_mntpt); 195 if (mntpt != NULL && mntpt[0] != '\0') { 196 size += strlen(ZONE_PATH_TRANSLATE(mntpt, zone)) + 1; 197 } else { 198 size += strlen("-") + 1; 199 } 200 201 resource = refstr_value(vfsp->vfs_resource); 202 if (resource != NULL && resource[0] != '\0') { 203 if (resource[0] != '/') { 204 size += strlen(resource) + 1; 205 } else if (!ZONE_PATH_VISIBLE(resource, zone)) { 206 /* 207 * Same as the zone's view of the mount point. 208 */ 209 size += strlen(ZONE_PATH_TRANSLATE(mntpt, zone)) + 1; 210 } else { 211 size += strlen(ZONE_PATH_TRANSLATE(resource, zone)) + 1; 212 } 213 } else { 214 size += strlen("-") + 1; 215 } 216 size += strlen(vfssw[vfsp->vfs_fstype].vsw_name) + 1; 217 size += mntfs_optsize(vfsp); 218 size += snprintf(NULL, 0, "\t%ld\n", vfsp->vfs_mtime); 219 return (size); 220 } 221 222 static void 223 mntfs_zonerootvfs(zone_t *zone, vfs_t *rootvfsp) 224 { 225 /* 226 * Basically copy over the real vfs_t on which the root vnode is 227 * located, changing its mountpoint and resource to match those of 228 * the zone's rootpath. 229 */ 230 *rootvfsp = *zone->zone_rootvp->v_vfsp; 231 rootvfsp->vfs_mntpt = refstr_alloc(zone->zone_rootpath); 232 rootvfsp->vfs_resource = rootvfsp->vfs_mntpt; 233 } 234 235 static size_t 236 mntfs_zone_len(uint_t *nent_ptr, zone_t *zone, int showhidden) 237 { 238 struct vfs *zonelist; 239 struct vfs *vfsp; 240 size_t size = 0; 241 uint_t cnt = 0; 242 243 ASSERT(zone->zone_rootpath != NULL); 244 245 /* 246 * If the zone has a root entry, it will be the first in the list. If 247 * it doesn't, we conjure one up. 248 */ 249 vfsp = zonelist = zone->zone_vfslist; 250 if (zonelist == NULL || 251 strcmp(refstr_value(vfsp->vfs_mntpt), zone->zone_rootpath) != 0) { 252 vfs_t tvfs; 253 /* 254 * The root of the zone is not a mount point. The vfs we want 255 * to report is that of the zone's root vnode. 256 */ 257 ASSERT(zone != global_zone); 258 mntfs_zonerootvfs(zone, &tvfs); 259 size += mntfs_vfs_len(&tvfs, zone); 260 refstr_rele(tvfs.vfs_mntpt); 261 cnt++; 262 } 263 if (zonelist == NULL) 264 goto out; 265 do { 266 /* 267 * Skip mounts that should not show up in mnttab 268 */ 269 if (!showhidden && (vfsp->vfs_flag & VFS_NOMNTTAB)) { 270 vfsp = vfsp->vfs_zone_next; 271 continue; 272 } 273 cnt++; 274 size += mntfs_vfs_len(vfsp, zone); 275 vfsp = vfsp->vfs_zone_next; 276 } while (vfsp != zonelist); 277 out: 278 *nent_ptr = cnt; 279 return (size); 280 } 281 282 static size_t 283 mntfs_global_len(uint_t *nent_ptr, int showhidden) 284 { 285 struct vfs *vfsp; 286 size_t size = 0; 287 uint_t cnt = 0; 288 289 vfsp = rootvfs; 290 do { 291 /* 292 * Skip mounts that should not show up in mnttab 293 */ 294 if (!showhidden && (vfsp->vfs_flag & VFS_NOMNTTAB)) { 295 vfsp = vfsp->vfs_next; 296 continue; 297 } 298 cnt++; 299 size += mntfs_vfs_len(vfsp, global_zone); 300 vfsp = vfsp->vfs_next; 301 } while (vfsp != rootvfs); 302 *nent_ptr = cnt; 303 return (size); 304 } 305 306 static void 307 mntfs_vfs_generate(vfs_t *vfsp, zone_t *zone, struct extmnttab *tab, 308 char **basep, int forread) 309 { 310 const char *resource, *mntpt; 311 char *cp = *basep; 312 313 mntpt = refstr_value(vfsp->vfs_mntpt); 314 resource = refstr_value(vfsp->vfs_resource); 315 316 if (tab) 317 tab->mnt_special = cp; 318 if (resource != NULL && resource[0] != '\0') { 319 if (resource[0] != '/') { 320 cp += snprintf(cp, MAXPATHLEN, "%s", resource); 321 } else if (!ZONE_PATH_VISIBLE(resource, zone)) { 322 /* 323 * Use the mount point as the resource. 324 */ 325 cp += snprintf(cp, MAXPATHLEN, "%s", 326 ZONE_PATH_TRANSLATE(mntpt, zone)); 327 } else { 328 cp += snprintf(cp, MAXPATHLEN, "%s", 329 ZONE_PATH_TRANSLATE(resource, zone)); 330 } 331 } else { 332 cp += snprintf(cp, MAXPATHLEN, "-"); 333 } 334 *cp++ = forread ? '\t' : '\0'; 335 336 if (tab) 337 tab->mnt_mountp = cp; 338 if (mntpt != NULL && mntpt[0] != '\0') { 339 /* 340 * We know the mount point is visible from within the zone, 341 * otherwise it wouldn't be on the zone's vfs list. 342 */ 343 cp += snprintf(cp, MAXPATHLEN, "%s", 344 ZONE_PATH_TRANSLATE(mntpt, zone)); 345 } else { 346 cp += snprintf(cp, MAXPATHLEN, "-"); 347 } 348 *cp++ = forread ? '\t' : '\0'; 349 350 if (tab) 351 tab->mnt_fstype = cp; 352 cp += snprintf(cp, MAXPATHLEN, "%s", 353 vfssw[vfsp->vfs_fstype].vsw_name); 354 *cp++ = forread ? '\t' : '\0'; 355 356 if (tab) 357 tab->mnt_mntopts = cp; 358 cp += mntfs_optprint(vfsp, cp); 359 *cp++ = forread ? '\t' : '\0'; 360 361 if (tab) 362 tab->mnt_time = cp; 363 cp += snprintf(cp, MAX_MNTOPT_STR, "%ld", vfsp->vfs_mtime); 364 *cp++ = forread ? '\n' : '\0'; 365 366 if (tab) { 367 tab->mnt_major = getmajor(vfsp->vfs_dev); 368 tab->mnt_minor = getminor(vfsp->vfs_dev); 369 } 370 371 *basep = cp; 372 } 373 374 static void 375 mntfs_zone_generate(zone_t *zone, int showhidden, struct extmnttab *tab, 376 char *basep, int forread) 377 { 378 vfs_t *zonelist; 379 vfs_t *vfsp; 380 char *cp = basep; 381 382 /* 383 * If the zone has a root entry, it will be the first in the list. If 384 * it doesn't, we conjure one up. 385 */ 386 vfsp = zonelist = zone->zone_vfslist; 387 if (zonelist == NULL || 388 strcmp(refstr_value(vfsp->vfs_mntpt), zone->zone_rootpath) != 0) { 389 vfs_t tvfs; 390 /* 391 * The root of the zone is not a mount point. The vfs we want 392 * to report is that of the zone's root vnode. 393 */ 394 ASSERT(zone != global_zone); 395 mntfs_zonerootvfs(zone, &tvfs); 396 mntfs_vfs_generate(&tvfs, zone, tab, &cp, forread); 397 refstr_rele(tvfs.vfs_mntpt); 398 if (tab) 399 tab++; 400 } 401 if (zonelist == NULL) 402 return; 403 do { 404 /* 405 * Skip mounts that should not show up in mnttab 406 */ 407 if (!showhidden && (vfsp->vfs_flag & VFS_NOMNTTAB)) { 408 vfsp = vfsp->vfs_zone_next; 409 continue; 410 } 411 mntfs_vfs_generate(vfsp, zone, tab, &cp, forread); 412 if (tab) 413 tab++; 414 vfsp = vfsp->vfs_zone_next; 415 } while (vfsp != zonelist); 416 } 417 418 static void 419 mntfs_global_generate(int showhidden, struct extmnttab *tab, char *basep, 420 int forread) 421 { 422 vfs_t *vfsp; 423 char *cp = basep; 424 425 vfsp = rootvfs; 426 do { 427 /* 428 * Skip mounts that should not show up in mnttab 429 */ 430 if (!showhidden && vfsp->vfs_flag & VFS_NOMNTTAB) { 431 vfsp = vfsp->vfs_next; 432 continue; 433 } 434 mntfs_vfs_generate(vfsp, global_zone, tab, &cp, forread); 435 if (tab) 436 tab++; 437 vfsp = vfsp->vfs_next; 438 } while (vfsp != rootvfs); 439 } 440 441 static char * 442 mntfs_mapin(char *base, size_t size) 443 { 444 size_t rlen = roundup(size, PAGESIZE); 445 struct as *as = curproc->p_as; 446 char *addr = NULL; 447 448 as_rangelock(as); 449 map_addr(&addr, rlen, 0, 1, 0); 450 if (addr == NULL || as_map(as, addr, rlen, segvn_create, zfod_argsp)) { 451 as_rangeunlock(as); 452 return (NULL); 453 } 454 as_rangeunlock(as); 455 if (copyout(base, addr, size)) { 456 (void) as_unmap(as, addr, rlen); 457 return (NULL); 458 } 459 return (addr); 460 } 461 462 static void 463 mntfs_freesnap(mntsnap_t *snap) 464 { 465 if (snap->mnts_text != NULL) 466 (void) as_unmap(curproc->p_as, snap->mnts_text, 467 roundup(snap->mnts_textsize, PAGESIZE)); 468 snap->mnts_textsize = snap->mnts_count = 0; 469 if (snap->mnts_metadata != NULL) 470 (void) as_unmap(curproc->p_as, snap->mnts_metadata, 471 roundup(snap->mnts_metasize, PAGESIZE)); 472 snap->mnts_metasize = 0; 473 } 474 475 #ifdef _SYSCALL32_IMPL 476 477 typedef struct extmnttab32 { 478 uint32_t mnt_special; 479 uint32_t mnt_mountp; 480 uint32_t mnt_fstype; 481 uint32_t mnt_mntopts; 482 uint32_t mnt_time; 483 uint_t mnt_major; 484 uint_t mnt_minor; 485 } extmnttab32_t; 486 487 #endif 488 489 /* 490 * Snapshot the latest version of the kernel mounted resource information 491 * 492 * There are two types of snapshots: one destined for reading, and one destined 493 * for ioctl(). The difference is that the ioctl() interface is delimited by 494 * NULLs, while the read() interface is delimited by tabs and newlines. 495 */ 496 /* ARGSUSED */ 497 static int 498 mntfs_snapshot(mntnode_t *mnp, int forread, int datamodel) 499 { 500 size_t size; 501 timespec_t lastmodt; 502 mntdata_t *mntdata = MTOD(mnp); 503 zone_t *zone = mntdata->mnt_zone; 504 boolean_t global_view = (MTOD(mnp)->mnt_zone == global_zone); 505 boolean_t showhidden = ((mnp->mnt_flags & MNT_SHOWHIDDEN) != 0); 506 struct extmnttab *metadata_baseaddr; 507 char *text_baseaddr; 508 int i; 509 mntsnap_t *snap; 510 511 if (forread) 512 snap = &mnp->mnt_read; 513 else 514 snap = &mnp->mnt_ioctl; 515 516 vfs_list_read_lock(); 517 /* 518 * Check if the mnttab info has changed since the last snapshot 519 */ 520 vfs_mnttab_modtime(&lastmodt); 521 if (snap->mnts_count && 522 lastmodt.tv_sec == snap->mnts_time.tv_sec && 523 lastmodt.tv_nsec == snap->mnts_time.tv_nsec) { 524 vfs_list_unlock(); 525 return (0); 526 } 527 528 529 if (snap->mnts_count != 0) 530 mntfs_freesnap(snap); 531 if (global_view) 532 size = mntfs_global_len(&snap->mnts_count, showhidden); 533 else 534 size = mntfs_zone_len(&snap->mnts_count, zone, showhidden); 535 ASSERT(size != 0); 536 537 if (!forread) 538 metadata_baseaddr = kmem_alloc( 539 snap->mnts_count * sizeof (struct extmnttab), KM_SLEEP); 540 else 541 metadata_baseaddr = NULL; 542 543 text_baseaddr = kmem_alloc(size, KM_SLEEP); 544 545 if (global_view) 546 mntfs_global_generate(showhidden, metadata_baseaddr, 547 text_baseaddr, forread); 548 else 549 mntfs_zone_generate(zone, showhidden, 550 metadata_baseaddr, text_baseaddr, forread); 551 552 vfs_mnttab_modtime(&snap->mnts_time); 553 vfs_list_unlock(); 554 555 snap->mnts_text = mntfs_mapin(text_baseaddr, size); 556 snap->mnts_textsize = size; 557 kmem_free(text_baseaddr, size); 558 559 /* 560 * The pointers in the metadata refer to addreesses in the range 561 * [base_addr, base_addr + size]. Now that we have mapped the text into 562 * the user's address space, we have to convert these addresses into the 563 * new (user) range. We also handle the conversion for 32-bit and 564 * 32-bit applications here. 565 */ 566 if (!forread) { 567 struct extmnttab *tab; 568 #ifdef _SYSCALL32_IMPL 569 struct extmnttab32 *tab32; 570 571 if (datamodel == DATAMODEL_ILP32) { 572 tab = (struct extmnttab *)metadata_baseaddr; 573 tab32 = (struct extmnttab32 *)metadata_baseaddr; 574 575 for (i = 0; i < snap->mnts_count; i++) { 576 tab32[i].mnt_special = 577 (uintptr_t)snap->mnts_text + 578 (tab[i].mnt_special - text_baseaddr); 579 tab32[i].mnt_mountp = 580 (uintptr_t)snap->mnts_text + 581 (tab[i].mnt_mountp - text_baseaddr); 582 tab32[i].mnt_fstype = 583 (uintptr_t)snap->mnts_text + 584 (tab[i].mnt_fstype - text_baseaddr); 585 tab32[i].mnt_mntopts = 586 (uintptr_t)snap->mnts_text + 587 (tab[i].mnt_mntopts - text_baseaddr); 588 tab32[i].mnt_time = (uintptr_t)snap->mnts_text + 589 (tab[i].mnt_time - text_baseaddr); 590 tab32[i].mnt_major = tab[i].mnt_major; 591 tab32[i].mnt_minor = tab[i].mnt_minor; 592 } 593 594 snap->mnts_metasize = 595 snap->mnts_count * sizeof (struct extmnttab32); 596 snap->mnts_metadata = mntfs_mapin( 597 (char *)metadata_baseaddr, 598 snap->mnts_metasize); 599 600 } else { 601 #endif 602 tab = (struct extmnttab *)metadata_baseaddr; 603 for (i = 0; i < snap->mnts_count; i++) { 604 tab[i].mnt_special = snap->mnts_text + 605 (tab[i].mnt_special - text_baseaddr); 606 tab[i].mnt_mountp = snap->mnts_text + 607 (tab[i].mnt_mountp - text_baseaddr); 608 tab[i].mnt_fstype = snap->mnts_text + 609 (tab[i].mnt_fstype - text_baseaddr); 610 tab[i].mnt_mntopts = snap->mnts_text + 611 (tab[i].mnt_mntopts - text_baseaddr); 612 tab[i].mnt_time = snap->mnts_text + 613 (tab[i].mnt_time - text_baseaddr); 614 } 615 616 snap->mnts_metasize = 617 snap->mnts_count * sizeof (struct extmnttab); 618 snap->mnts_metadata = mntfs_mapin( 619 (char *)metadata_baseaddr, snap->mnts_metasize); 620 #ifdef _SYSCALL32_IMPL 621 } 622 #endif 623 624 kmem_free(metadata_baseaddr, 625 snap->mnts_count * sizeof (struct extmnttab)); 626 } 627 628 mntdata->mnt_size = size; 629 630 if (snap->mnts_text == NULL || 631 (!forread && snap->mnts_metadata == NULL)) { 632 mntfs_freesnap(snap); 633 return (ENOMEM); 634 } 635 vfs_mnttab_readop(); 636 return (0); 637 } 638 639 /* 640 * Public function to convert vfs_mntopts into a string. 641 * A buffer of sufficient size is allocated, which is returned via bufp, 642 * and whose length is returned via lenp. 643 */ 644 void 645 mntfs_getmntopts(struct vfs *vfsp, char **bufp, size_t *lenp) 646 { 647 size_t len; 648 char *buf; 649 650 vfs_list_read_lock(); 651 652 len = mntfs_optsize(vfsp) + 1; 653 buf = kmem_alloc(len, KM_NOSLEEP); 654 if (buf == NULL) { 655 *bufp = NULL; 656 vfs_list_unlock(); 657 return; 658 } 659 buf[len - 1] = '\0'; 660 (void) mntfs_optprint(vfsp, buf); 661 ASSERT(buf[len - 1] == '\0'); 662 663 vfs_list_unlock(); 664 *bufp = buf; 665 *lenp = len; 666 } 667 668 669 /* ARGSUSED */ 670 static int 671 mntopen(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct) 672 { 673 vnode_t *vp = *vpp; 674 mntnode_t *nmnp; 675 676 /* 677 * Not allowed to open for writing, return error. 678 */ 679 if (flag & FWRITE) 680 return (EPERM); 681 /* 682 * Create a new mnt/vnode for each open, this will give us a handle to 683 * hang the snapshot on. 684 */ 685 nmnp = mntgetnode(vp); 686 687 *vpp = MTOV(nmnp); 688 atomic_add_32(&MTOD(nmnp)->mnt_nopen, 1); 689 VN_RELE(vp); 690 return (0); 691 } 692 693 /* ARGSUSED */ 694 static int 695 mntclose(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr, 696 caller_context_t *ct) 697 { 698 mntnode_t *mnp = VTOM(vp); 699 700 /* Clean up any locks or shares held by the current process */ 701 cleanlocks(vp, ttoproc(curthread)->p_pid, 0); 702 cleanshares(vp, ttoproc(curthread)->p_pid); 703 704 if (count > 1) 705 return (0); 706 if (vp->v_count == 1) { 707 mntfs_freesnap(&mnp->mnt_read); 708 mntfs_freesnap(&mnp->mnt_ioctl); 709 atomic_add_32(&MTOD(mnp)->mnt_nopen, -1); 710 } 711 return (0); 712 } 713 714 /* ARGSUSED */ 715 static int 716 mntread(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cred, caller_context_t *ct) 717 { 718 int error = 0; 719 off_t off = uio->uio_offset; 720 size_t len = uio->uio_resid; 721 mntnode_t *mnp = VTOM(vp); 722 char *buf; 723 mntsnap_t *snap; 724 int datamodel; 725 726 rw_enter(&mnp->mnt_contents, RW_READER); 727 snap = &mnp->mnt_read; 728 if (off == (off_t)0 || snap->mnts_count == 0) { 729 /* 730 * It is assumed that any kernel callers wishing 731 * to read mnttab will be using extmnttab entries 732 * and not extmnttab32 entries, whether or not 733 * the kernel is LP64 or ILP32. Thus, force the 734 * datamodel that mntfs_snapshot uses to be 735 * DATAMODEL_LP64. 736 */ 737 if (uio->uio_segflg == UIO_SYSSPACE) 738 datamodel = DATAMODEL_LP64; 739 else 740 datamodel = get_udatamodel(); 741 if (!rw_tryupgrade(&mnp->mnt_contents)) { 742 rw_exit(&mnp->mnt_contents); 743 rw_enter(&mnp->mnt_contents, RW_WRITER); 744 } 745 if ((error = mntfs_snapshot(mnp, 1, datamodel)) != 0) { 746 rw_exit(&mnp->mnt_contents); 747 return (error); 748 } 749 rw_downgrade(&mnp->mnt_contents); 750 } 751 if ((size_t)(off + len) > snap->mnts_textsize) 752 len = snap->mnts_textsize - off; 753 754 if (off < 0 || len > snap->mnts_textsize) { 755 rw_exit(&mnp->mnt_contents); 756 return (EFAULT); 757 } 758 759 if (len == 0) { 760 rw_exit(&mnp->mnt_contents); 761 return (0); 762 } 763 764 /* 765 * The mnttab image is stored in the user's address space, 766 * so we have to copy it into the kernel from userland, 767 * then copy it back out to the specified address. 768 */ 769 buf = kmem_alloc(len, KM_SLEEP); 770 if (copyin(snap->mnts_text + off, buf, len)) 771 error = EFAULT; 772 else { 773 error = uiomove(buf, len, UIO_READ, uio); 774 } 775 kmem_free(buf, len); 776 vfs_mnttab_readop(); 777 rw_exit(&mnp->mnt_contents); 778 return (error); 779 } 780 781 782 static int 783 mntgetattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, 784 caller_context_t *ct) 785 { 786 mntnode_t *mnp = VTOM(vp); 787 int error; 788 vnode_t *rvp; 789 extern timespec_t vfs_mnttab_ctime; 790 mntdata_t *mntdata = MTOD(VTOM(vp)); 791 mntsnap_t *snap; 792 793 rw_enter(&mnp->mnt_contents, RW_READER); 794 snap = mnp->mnt_read.mnts_count ? &mnp->mnt_read : &mnp->mnt_ioctl; 795 /* 796 * Return all the attributes. Should be refined 797 * so that it returns only those asked for. 798 * Most of this is complete fakery anyway. 799 */ 800 rvp = mnp->mnt_mountvp; 801 /* 802 * Attributes are same as underlying file with modifications 803 */ 804 if (error = VOP_GETATTR(rvp, vap, flags, cr, ct)) 805 return (error); 806 807 /* 808 * We always look like a regular file 809 */ 810 vap->va_type = VREG; 811 /* 812 * mode should basically be read only 813 */ 814 vap->va_mode &= 07444; 815 vap->va_fsid = vp->v_vfsp->vfs_dev; 816 vap->va_blksize = DEV_BSIZE; 817 vap->va_rdev = 0; 818 vap->va_seq = 0; 819 /* 820 * Set nlink to the number of open vnodes for mnttab info 821 * plus one for existing. 822 */ 823 vap->va_nlink = mntdata->mnt_nopen + 1; 824 /* 825 * If we haven't taken a snapshot yet, set the 826 * size to the size of the latest snapshot. 827 */ 828 vap->va_size = snap->mnts_textsize ? snap->mnts_textsize : 829 mntdata->mnt_size; 830 rw_exit(&mnp->mnt_contents); 831 /* 832 * Fetch mtime from the vfs mnttab timestamp 833 */ 834 vap->va_ctime = vfs_mnttab_ctime; 835 vfs_list_read_lock(); 836 vfs_mnttab_modtime(&vap->va_mtime); 837 vap->va_atime = vap->va_mtime; 838 vfs_list_unlock(); 839 /* 840 * Nodeid is always ROOTINO; 841 */ 842 vap->va_nodeid = (ino64_t)MNTROOTINO; 843 vap->va_nblocks = btod(vap->va_size); 844 return (0); 845 } 846 847 848 static int 849 mntaccess(vnode_t *vp, int mode, int flags, cred_t *cr, 850 caller_context_t *ct) 851 { 852 mntnode_t *mnp = VTOM(vp); 853 854 if (mode & (VWRITE|VEXEC)) 855 return (EROFS); 856 857 /* 858 * Do access check on the underlying directory vnode. 859 */ 860 return (VOP_ACCESS(mnp->mnt_mountvp, mode, flags, cr, ct)); 861 } 862 863 864 /* 865 * New /mntfs vnode required; allocate it and fill in most of the fields. 866 */ 867 static mntnode_t * 868 mntgetnode(vnode_t *dp) 869 { 870 mntnode_t *mnp; 871 vnode_t *vp; 872 873 mnp = kmem_zalloc(sizeof (mntnode_t), KM_SLEEP); 874 mnp->mnt_vnode = vn_alloc(KM_SLEEP); 875 mnp->mnt_mountvp = VTOM(dp)->mnt_mountvp; 876 rw_init(&mnp->mnt_contents, NULL, RW_DEFAULT, NULL); 877 vp = MTOV(mnp); 878 vp->v_flag = VNOCACHE|VNOMAP|VNOSWAP|VNOMOUNT; 879 vn_setops(vp, mntvnodeops); 880 vp->v_vfsp = dp->v_vfsp; 881 vp->v_type = VREG; 882 vp->v_data = (caddr_t)mnp; 883 884 return (mnp); 885 } 886 887 /* 888 * Free the storage obtained from mntgetnode(). 889 */ 890 static void 891 mntfreenode(mntnode_t *mnp) 892 { 893 vnode_t *vp = MTOV(mnp); 894 895 rw_destroy(&mnp->mnt_contents); 896 vn_invalid(vp); 897 vn_free(vp); 898 kmem_free(mnp, sizeof (*mnp)); 899 } 900 901 902 /* ARGSUSED */ 903 static int 904 mntfsync(vnode_t *vp, int syncflag, cred_t *cr, caller_context_t *ct) 905 { 906 return (0); 907 } 908 909 /* ARGSUSED */ 910 static void 911 mntinactive(vnode_t *vp, cred_t *cr, caller_context_t *ct) 912 { 913 mntnode_t *mnp = VTOM(vp); 914 915 mntfreenode(mnp); 916 } 917 918 /* ARGSUSED */ 919 static int 920 mntseek(vnode_t *vp, offset_t ooff, offset_t *noffp, 921 caller_context_t *ct) 922 { 923 mntnode_t *mnp = VTOM(vp); 924 925 if (*noffp == 0) { 926 rw_enter(&mnp->mnt_contents, RW_WRITER); 927 VTOM(vp)->mnt_offset = 0; 928 rw_exit(&mnp->mnt_contents); 929 } 930 931 return (0); 932 } 933 934 /* 935 * Return the answer requested to poll(). 936 * POLLRDBAND will return when the mtime of the mnttab 937 * information is newer than the latest one read for this open. 938 */ 939 /* ARGSUSED */ 940 static int 941 mntpoll(vnode_t *vp, short ev, int any, short *revp, pollhead_t **phpp, 942 caller_context_t *ct) 943 { 944 mntnode_t *mnp = VTOM(vp); 945 mntsnap_t *snap; 946 947 rw_enter(&mnp->mnt_contents, RW_READER); 948 snap = &mnp->mnt_read; 949 if (mnp->mnt_ioctl.mnts_time.tv_sec > snap->mnts_time.tv_sec || 950 (mnp->mnt_ioctl.mnts_time.tv_sec == snap->mnts_time.tv_sec && 951 mnp->mnt_ioctl.mnts_time.tv_nsec > snap->mnts_time.tv_nsec)) 952 snap = &mnp->mnt_ioctl; 953 954 *revp = 0; 955 *phpp = (pollhead_t *)NULL; 956 if (ev & POLLIN) 957 *revp |= POLLIN; 958 959 if (ev & POLLRDNORM) 960 *revp |= POLLRDNORM; 961 962 if (ev & POLLRDBAND) { 963 vfs_mnttab_poll(&snap->mnts_time, phpp); 964 if (*phpp == (pollhead_t *)NULL) 965 *revp |= POLLRDBAND; 966 } 967 rw_exit(&mnp->mnt_contents); 968 969 if (*revp || *phpp != NULL || any) { 970 return (0); 971 } 972 /* 973 * If someone is polling an unsupported poll events (e.g. 974 * POLLOUT, POLLPRI, etc.), just return POLLERR revents. 975 * That way we will ensure that we don't return a 0 976 * revents with a NULL pollhead pointer. 977 */ 978 *revp = POLLERR; 979 return (0); 980 } 981 /* ARGSUSED */ 982 static int 983 mntioctl(struct vnode *vp, int cmd, intptr_t arg, int flag, 984 cred_t *cr, int *rvalp, caller_context_t *ct) 985 { 986 uint_t *up = (uint_t *)arg; 987 mntnode_t *mnp = VTOM(vp); 988 mntsnap_t *snap; 989 int error; 990 991 error = 0; 992 rw_enter(&mnp->mnt_contents, RW_READER); 993 snap = &mnp->mnt_ioctl; 994 switch (cmd) { 995 996 case MNTIOC_NMNTS: { /* get no. of mounted resources */ 997 if (snap->mnts_count == 0) { 998 if (!rw_tryupgrade(&mnp->mnt_contents)) { 999 rw_exit(&mnp->mnt_contents); 1000 rw_enter(&mnp->mnt_contents, RW_WRITER); 1001 } 1002 if ((error = 1003 mntfs_snapshot(mnp, 0, flag & DATAMODEL_MASK)) 1004 != 0) { 1005 rw_exit(&mnp->mnt_contents); 1006 return (error); 1007 } 1008 rw_downgrade(&mnp->mnt_contents); 1009 } 1010 if (suword32(up, snap->mnts_count) != 0) 1011 error = EFAULT; 1012 break; 1013 } 1014 1015 case MNTIOC_GETDEVLIST: { /* get mounted device major/minor nos */ 1016 uint_t *devlist; 1017 int i; 1018 size_t len; 1019 1020 if (snap->mnts_count == 0) { 1021 if (!rw_tryupgrade(&mnp->mnt_contents)) { 1022 rw_exit(&mnp->mnt_contents); 1023 rw_enter(&mnp->mnt_contents, RW_WRITER); 1024 } 1025 if ((error = 1026 mntfs_snapshot(mnp, 0, flag & DATAMODEL_MASK)) 1027 != 0) { 1028 rw_exit(&mnp->mnt_contents); 1029 return (error); 1030 } 1031 rw_downgrade(&mnp->mnt_contents); 1032 } 1033 1034 len = 2 * snap->mnts_count * sizeof (uint_t); 1035 devlist = kmem_alloc(len, KM_SLEEP); 1036 for (i = 0; i < snap->mnts_count; i++) { 1037 1038 #ifdef _SYSCALL32_IMPL 1039 if ((flag & DATAMODEL_MASK) == DATAMODEL_ILP32) { 1040 struct extmnttab32 tab; 1041 1042 if ((error = xcopyin(snap->mnts_text + 1043 i * sizeof (struct extmnttab32), &tab, 1044 sizeof (tab))) != 0) 1045 break; 1046 1047 devlist[i*2] = tab.mnt_major; 1048 devlist[i*2+1] = tab.mnt_minor; 1049 } else { 1050 #endif 1051 struct extmnttab tab; 1052 1053 if ((error = xcopyin(snap->mnts_text + 1054 i * sizeof (struct extmnttab), &tab, 1055 sizeof (tab))) != 0) 1056 break; 1057 1058 devlist[i*2] = tab.mnt_major; 1059 devlist[i*2+1] = tab.mnt_minor; 1060 #ifdef _SYSCALL32_IMPL 1061 } 1062 #endif 1063 } 1064 1065 if (error == 0) 1066 error = xcopyout(devlist, up, len); 1067 kmem_free(devlist, len); 1068 break; 1069 } 1070 1071 case MNTIOC_SETTAG: /* set tag on mounted file system */ 1072 case MNTIOC_CLRTAG: /* clear tag on mounted file system */ 1073 { 1074 struct mnttagdesc *dp = (struct mnttagdesc *)arg; 1075 STRUCT_DECL(mnttagdesc, tagdesc); 1076 char *cptr; 1077 uint32_t major, minor; 1078 char tagbuf[MAX_MNTOPT_TAG]; 1079 char *pbuf; 1080 size_t len; 1081 uint_t start = 0; 1082 mntdata_t *mntdata = MTOD(mnp); 1083 zone_t *zone = mntdata->mnt_zone; 1084 1085 STRUCT_INIT(tagdesc, flag & DATAMODEL_MASK); 1086 if (copyin(dp, STRUCT_BUF(tagdesc), STRUCT_SIZE(tagdesc))) { 1087 error = EFAULT; 1088 break; 1089 } 1090 pbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP); 1091 if (zone != global_zone) { 1092 (void) strcpy(pbuf, zone->zone_rootpath); 1093 /* truncate "/" and nul */ 1094 start = zone->zone_rootpathlen - 2; 1095 ASSERT(pbuf[start] == '/'); 1096 } 1097 cptr = STRUCT_FGETP(tagdesc, mtd_mntpt); 1098 error = copyinstr(cptr, pbuf + start, MAXPATHLEN - start, &len); 1099 if (error) { 1100 kmem_free(pbuf, MAXPATHLEN); 1101 break; 1102 } 1103 if (start != 0 && pbuf[start] != '/') { 1104 kmem_free(pbuf, MAXPATHLEN); 1105 error = EINVAL; 1106 break; 1107 } 1108 cptr = STRUCT_FGETP(tagdesc, mtd_tag); 1109 if ((error = copyinstr(cptr, tagbuf, MAX_MNTOPT_TAG, &len))) { 1110 kmem_free(pbuf, MAXPATHLEN); 1111 break; 1112 } 1113 major = STRUCT_FGET(tagdesc, mtd_major); 1114 minor = STRUCT_FGET(tagdesc, mtd_minor); 1115 if (cmd == MNTIOC_SETTAG) 1116 error = vfs_settag(major, minor, pbuf, tagbuf, cr); 1117 else 1118 error = vfs_clrtag(major, minor, pbuf, tagbuf, cr); 1119 kmem_free(pbuf, MAXPATHLEN); 1120 break; 1121 } 1122 1123 case MNTIOC_SHOWHIDDEN: 1124 { 1125 mutex_enter(&vp->v_lock); 1126 mnp->mnt_flags |= MNT_SHOWHIDDEN; 1127 mutex_exit(&vp->v_lock); 1128 break; 1129 } 1130 1131 case MNTIOC_GETMNTENT: 1132 { 1133 size_t idx; 1134 uintptr_t addr; 1135 1136 if (!rw_tryupgrade(&mnp->mnt_contents)) { 1137 rw_exit(&mnp->mnt_contents); 1138 rw_enter(&mnp->mnt_contents, RW_WRITER); 1139 } 1140 idx = mnp->mnt_offset; 1141 if (snap->mnts_count == 0 || idx == 0) { 1142 if ((error = 1143 mntfs_snapshot(mnp, 0, flag & DATAMODEL_MASK)) 1144 != 0) { 1145 rw_exit(&mnp->mnt_contents); 1146 return (error); 1147 } 1148 } 1149 /* 1150 * If the next index is beyond the end of the current mnttab, 1151 * return EOF 1152 */ 1153 if (idx >= snap->mnts_count) { 1154 *rvalp = 1; 1155 rw_exit(&mnp->mnt_contents); 1156 return (0); 1157 } 1158 1159 #ifdef _SYSCALL32_IMPL 1160 if ((flag & DATAMODEL_MASK) == DATAMODEL_ILP32) { 1161 addr = (uintptr_t)(snap->mnts_metadata + idx * 1162 sizeof (struct extmnttab32)); 1163 error = suword32((void *)arg, addr); 1164 } else { 1165 #endif 1166 addr = (uintptr_t)(snap->mnts_metadata + idx * 1167 sizeof (struct extmnttab)); 1168 error = sulword((void *)arg, addr); 1169 #ifdef _SYSCALL32_IMPL 1170 } 1171 #endif 1172 1173 if (error != 0) { 1174 rw_exit(&mnp->mnt_contents); 1175 return (error); 1176 } 1177 1178 mnp->mnt_offset++; 1179 break; 1180 } 1181 1182 default: 1183 error = EINVAL; 1184 break; 1185 } 1186 1187 rw_exit(&mnp->mnt_contents); 1188 return (error); 1189 } 1190 1191 /* 1192 * /mntfs vnode operations vector 1193 */ 1194 const fs_operation_def_t mnt_vnodeops_template[] = { 1195 VOPNAME_OPEN, { .vop_open = mntopen }, 1196 VOPNAME_CLOSE, { .vop_close = mntclose }, 1197 VOPNAME_READ, { .vop_read = mntread }, 1198 VOPNAME_IOCTL, { .vop_ioctl = mntioctl }, 1199 VOPNAME_GETATTR, { .vop_getattr = mntgetattr }, 1200 VOPNAME_ACCESS, { .vop_access = mntaccess }, 1201 VOPNAME_FSYNC, { .vop_fsync = mntfsync }, 1202 VOPNAME_INACTIVE, { .vop_inactive = mntinactive }, 1203 VOPNAME_SEEK, { .vop_seek = mntseek }, 1204 VOPNAME_POLL, { .vop_poll = mntpoll }, 1205 VOPNAME_DISPOSE, { .error = fs_error }, 1206 VOPNAME_SHRLOCK, { .error = fs_error }, 1207 NULL, NULL 1208 }; 1209