1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/file.h> 29 #include <sys/stat.h> 30 #include <sys/atomic.h> 31 #include <sys/mntio.h> 32 #include <sys/mnttab.h> 33 #include <sys/mount.h> 34 #include <sys/sunddi.h> 35 #include <sys/sysmacros.h> 36 #include <sys/systm.h> 37 #include <sys/vfs.h> 38 #include <sys/vfs_opreg.h> 39 #include <sys/fs/mntdata.h> 40 #include <fs/fs_subr.h> 41 #include <sys/vmsystm.h> 42 #include <vm/seg_vn.h> 43 44 #define MNTROOTINO 2 45 46 static mntnode_t *mntgetnode(vnode_t *); 47 48 vnodeops_t *mntvnodeops; 49 extern void vfs_mnttab_readop(void); 50 51 /* 52 * Design of kernel mnttab accounting. 53 * 54 * To support whitespace in mount names, we implement an ioctl 55 * (MNTIOC_GETMNTENT) which allows a programmatic interface to the data in 56 * /etc/mnttab. The libc functions getmntent() and getextmntent() are built 57 * atop this interface. 58 * 59 * To minimize the amount of memory used in the kernel, we keep all the 60 * necessary information in the user's address space. Large server 61 * configurations can have /etc/mnttab files in excess of 64k. 62 * 63 * To support both vanilla read() calls as well as ioctl() calls, we have two 64 * different snapshots of the kernel data structures, mnt_read and mnt_ioctl. 65 * These snapshots include the base location in user memory, the number of 66 * mounts in the snapshot, and any metadata associated with it. The metadata is 67 * used only to support the ioctl() interface, and is a series of extmnttab 68 * structures. When the user issues an ioctl(), we simply copyout a pointer to 69 * that structure, and the rest is handled in userland. 70 */ 71 72 /* 73 * NOTE: The following variable enables the generation of the "dev=xxx" 74 * in the option string for a mounted file system. Really this should 75 * be gotten rid of altogether, but for the sake of backwards compatibility 76 * we had to leave it in. It is defined as a 32-bit device number. This 77 * means that when 64-bit device numbers are in use, if either the major or 78 * minor part of the device number will not fit in a 16 bit quantity, the 79 * "dev=" will be set to NODEV (0x7fffffff). See PSARC 1999/566 and 80 * 1999/131 for details. The cmpldev() function used to generate the 32-bit 81 * device number handles this check and assigns the proper value. 82 */ 83 int mntfs_enabledev = 1; /* enable old "dev=xxx" option */ 84 85 static int 86 mntfs_devsize(struct vfs *vfsp) 87 { 88 dev32_t odev; 89 90 (void) cmpldev(&odev, vfsp->vfs_dev); 91 return (snprintf(NULL, 0, "dev=%x", odev)); 92 } 93 94 static int 95 mntfs_devprint(struct vfs *vfsp, char *buf) 96 { 97 dev32_t odev; 98 99 (void) cmpldev(&odev, vfsp->vfs_dev); 100 return (snprintf(buf, MAX_MNTOPT_STR, "dev=%x", odev)); 101 } 102 103 static int 104 mntfs_optsize(struct vfs *vfsp) 105 { 106 int i, size = 0; 107 mntopt_t *mop; 108 109 for (i = 0; i < vfsp->vfs_mntopts.mo_count; i++) { 110 mop = &vfsp->vfs_mntopts.mo_list[i]; 111 if (mop->mo_flags & MO_NODISPLAY) 112 continue; 113 if (mop->mo_flags & MO_SET) { 114 if (size) 115 size++; /* space for comma */ 116 size += strlen(mop->mo_name); 117 /* 118 * count option value if there is one 119 */ 120 if (mop->mo_arg != NULL) { 121 size += strlen(mop->mo_arg) + 1; 122 } 123 } 124 } 125 if (vfsp->vfs_zone != NULL && vfsp->vfs_zone != global_zone) { 126 /* 127 * Add space for "zone=<zone_name>" if required. 128 */ 129 if (size) 130 size++; /* space for comma */ 131 size += sizeof ("zone=") - 1; 132 size += strlen(vfsp->vfs_zone->zone_name); 133 } 134 if (mntfs_enabledev) { 135 if (size != 0) 136 size++; /* space for comma */ 137 size += mntfs_devsize(vfsp); 138 } 139 if (size == 0) 140 size = strlen("-"); 141 return (size); 142 } 143 144 static int 145 mntfs_optprint(struct vfs *vfsp, char *buf) 146 { 147 int i, optinbuf = 0; 148 mntopt_t *mop; 149 char *origbuf = buf; 150 151 for (i = 0; i < vfsp->vfs_mntopts.mo_count; i++) { 152 mop = &vfsp->vfs_mntopts.mo_list[i]; 153 if (mop->mo_flags & MO_NODISPLAY) 154 continue; 155 if (mop->mo_flags & MO_SET) { 156 if (optinbuf) 157 *buf++ = ','; 158 else 159 optinbuf = 1; 160 buf += snprintf(buf, MAX_MNTOPT_STR, 161 "%s", mop->mo_name); 162 /* 163 * print option value if there is one 164 */ 165 if (mop->mo_arg != NULL) { 166 buf += snprintf(buf, MAX_MNTOPT_STR, "=%s", 167 mop->mo_arg); 168 } 169 } 170 } 171 if (vfsp->vfs_zone != NULL && vfsp->vfs_zone != global_zone) { 172 if (optinbuf) 173 *buf++ = ','; 174 else 175 optinbuf = 1; 176 buf += snprintf(buf, MAX_MNTOPT_STR, "zone=%s", 177 vfsp->vfs_zone->zone_name); 178 } 179 if (mntfs_enabledev) { 180 if (optinbuf++) 181 *buf++ = ','; 182 buf += mntfs_devprint(vfsp, buf); 183 } 184 if (!optinbuf) { 185 buf += snprintf(buf, MAX_MNTOPT_STR, "-"); 186 } 187 return (buf - origbuf); 188 } 189 190 static size_t 191 mntfs_vfs_len(vfs_t *vfsp, zone_t *zone) 192 { 193 size_t size = 0; 194 const char *resource, *mntpt; 195 196 mntpt = refstr_value(vfsp->vfs_mntpt); 197 if (mntpt != NULL && mntpt[0] != '\0') { 198 size += strlen(ZONE_PATH_TRANSLATE(mntpt, zone)) + 1; 199 } else { 200 size += strlen("-") + 1; 201 } 202 203 resource = refstr_value(vfsp->vfs_resource); 204 if (resource != NULL && resource[0] != '\0') { 205 if (resource[0] != '/') { 206 size += strlen(resource) + 1; 207 } else if (!ZONE_PATH_VISIBLE(resource, zone)) { 208 /* 209 * Same as the zone's view of the mount point. 210 */ 211 size += strlen(ZONE_PATH_TRANSLATE(mntpt, zone)) + 1; 212 } else { 213 size += strlen(ZONE_PATH_TRANSLATE(resource, zone)) + 1; 214 } 215 } else { 216 size += strlen("-") + 1; 217 } 218 size += strlen(vfssw[vfsp->vfs_fstype].vsw_name) + 1; 219 size += mntfs_optsize(vfsp); 220 size += snprintf(NULL, 0, "\t%ld\n", vfsp->vfs_mtime); 221 return (size); 222 } 223 224 static void 225 mntfs_zonerootvfs(zone_t *zone, vfs_t *rootvfsp) 226 { 227 /* 228 * Basically copy over the real vfs_t on which the root vnode is 229 * located, changing its mountpoint and resource to match those of 230 * the zone's rootpath. 231 */ 232 *rootvfsp = *zone->zone_rootvp->v_vfsp; 233 rootvfsp->vfs_mntpt = refstr_alloc(zone->zone_rootpath); 234 rootvfsp->vfs_resource = rootvfsp->vfs_mntpt; 235 } 236 237 static size_t 238 mntfs_zone_len(uint_t *nent_ptr, zone_t *zone, int showhidden) 239 { 240 struct vfs *zonelist; 241 struct vfs *vfsp; 242 size_t size = 0; 243 uint_t cnt = 0; 244 245 ASSERT(zone->zone_rootpath != NULL); 246 247 /* 248 * If the zone has a root entry, it will be the first in the list. If 249 * it doesn't, we conjure one up. 250 */ 251 vfsp = zonelist = zone->zone_vfslist; 252 if (zonelist == NULL || 253 strcmp(refstr_value(vfsp->vfs_mntpt), zone->zone_rootpath) != 0) { 254 vfs_t tvfs; 255 /* 256 * The root of the zone is not a mount point. The vfs we want 257 * to report is that of the zone's root vnode. 258 */ 259 ASSERT(zone != global_zone); 260 mntfs_zonerootvfs(zone, &tvfs); 261 size += mntfs_vfs_len(&tvfs, zone); 262 refstr_rele(tvfs.vfs_mntpt); 263 cnt++; 264 } 265 if (zonelist == NULL) 266 goto out; 267 do { 268 /* 269 * Skip mounts that should not show up in mnttab 270 */ 271 if (!showhidden && (vfsp->vfs_flag & VFS_NOMNTTAB)) { 272 vfsp = vfsp->vfs_zone_next; 273 continue; 274 } 275 cnt++; 276 size += mntfs_vfs_len(vfsp, zone); 277 vfsp = vfsp->vfs_zone_next; 278 } while (vfsp != zonelist); 279 out: 280 *nent_ptr = cnt; 281 return (size); 282 } 283 284 static size_t 285 mntfs_global_len(uint_t *nent_ptr, int showhidden) 286 { 287 struct vfs *vfsp; 288 size_t size = 0; 289 uint_t cnt = 0; 290 291 vfsp = rootvfs; 292 do { 293 /* 294 * Skip mounts that should not show up in mnttab 295 */ 296 if (!showhidden && (vfsp->vfs_flag & VFS_NOMNTTAB)) { 297 vfsp = vfsp->vfs_next; 298 continue; 299 } 300 cnt++; 301 size += mntfs_vfs_len(vfsp, global_zone); 302 vfsp = vfsp->vfs_next; 303 } while (vfsp != rootvfs); 304 *nent_ptr = cnt; 305 return (size); 306 } 307 308 static void 309 mntfs_vfs_generate(vfs_t *vfsp, zone_t *zone, struct extmnttab *tab, 310 char **basep, int forread) 311 { 312 const char *resource, *mntpt; 313 char *cp = *basep; 314 315 mntpt = refstr_value(vfsp->vfs_mntpt); 316 resource = refstr_value(vfsp->vfs_resource); 317 318 if (tab) 319 tab->mnt_special = cp; 320 if (resource != NULL && resource[0] != '\0') { 321 if (resource[0] != '/') { 322 cp += snprintf(cp, MAXPATHLEN, "%s", resource); 323 } else if (!ZONE_PATH_VISIBLE(resource, zone)) { 324 /* 325 * Use the mount point as the resource. 326 */ 327 cp += snprintf(cp, MAXPATHLEN, "%s", 328 ZONE_PATH_TRANSLATE(mntpt, zone)); 329 } else { 330 cp += snprintf(cp, MAXPATHLEN, "%s", 331 ZONE_PATH_TRANSLATE(resource, zone)); 332 } 333 } else { 334 cp += snprintf(cp, MAXPATHLEN, "-"); 335 } 336 *cp++ = forread ? '\t' : '\0'; 337 338 if (tab) 339 tab->mnt_mountp = cp; 340 if (mntpt != NULL && mntpt[0] != '\0') { 341 /* 342 * We know the mount point is visible from within the zone, 343 * otherwise it wouldn't be on the zone's vfs list. 344 */ 345 cp += snprintf(cp, MAXPATHLEN, "%s", 346 ZONE_PATH_TRANSLATE(mntpt, zone)); 347 } else { 348 cp += snprintf(cp, MAXPATHLEN, "-"); 349 } 350 *cp++ = forread ? '\t' : '\0'; 351 352 if (tab) 353 tab->mnt_fstype = cp; 354 cp += snprintf(cp, MAXPATHLEN, "%s", 355 vfssw[vfsp->vfs_fstype].vsw_name); 356 *cp++ = forread ? '\t' : '\0'; 357 358 if (tab) 359 tab->mnt_mntopts = cp; 360 cp += mntfs_optprint(vfsp, cp); 361 *cp++ = forread ? '\t' : '\0'; 362 363 if (tab) 364 tab->mnt_time = cp; 365 cp += snprintf(cp, MAX_MNTOPT_STR, "%ld", vfsp->vfs_mtime); 366 *cp++ = forread ? '\n' : '\0'; 367 368 if (tab) { 369 tab->mnt_major = getmajor(vfsp->vfs_dev); 370 tab->mnt_minor = getminor(vfsp->vfs_dev); 371 } 372 373 *basep = cp; 374 } 375 376 static void 377 mntfs_zone_generate(zone_t *zone, int showhidden, struct extmnttab *tab, 378 char *basep, int forread) 379 { 380 vfs_t *zonelist; 381 vfs_t *vfsp; 382 char *cp = basep; 383 384 /* 385 * If the zone has a root entry, it will be the first in the list. If 386 * it doesn't, we conjure one up. 387 */ 388 vfsp = zonelist = zone->zone_vfslist; 389 if (zonelist == NULL || 390 strcmp(refstr_value(vfsp->vfs_mntpt), zone->zone_rootpath) != 0) { 391 vfs_t tvfs; 392 /* 393 * The root of the zone is not a mount point. The vfs we want 394 * to report is that of the zone's root vnode. 395 */ 396 ASSERT(zone != global_zone); 397 mntfs_zonerootvfs(zone, &tvfs); 398 mntfs_vfs_generate(&tvfs, zone, tab, &cp, forread); 399 refstr_rele(tvfs.vfs_mntpt); 400 if (tab) 401 tab++; 402 } 403 if (zonelist == NULL) 404 return; 405 do { 406 /* 407 * Skip mounts that should not show up in mnttab 408 */ 409 if (!showhidden && (vfsp->vfs_flag & VFS_NOMNTTAB)) { 410 vfsp = vfsp->vfs_zone_next; 411 continue; 412 } 413 mntfs_vfs_generate(vfsp, zone, tab, &cp, forread); 414 if (tab) 415 tab++; 416 vfsp = vfsp->vfs_zone_next; 417 } while (vfsp != zonelist); 418 } 419 420 static void 421 mntfs_global_generate(int showhidden, struct extmnttab *tab, char *basep, 422 int forread) 423 { 424 vfs_t *vfsp; 425 char *cp = basep; 426 427 vfsp = rootvfs; 428 do { 429 /* 430 * Skip mounts that should not show up in mnttab 431 */ 432 if (!showhidden && vfsp->vfs_flag & VFS_NOMNTTAB) { 433 vfsp = vfsp->vfs_next; 434 continue; 435 } 436 mntfs_vfs_generate(vfsp, global_zone, tab, &cp, forread); 437 if (tab) 438 tab++; 439 vfsp = vfsp->vfs_next; 440 } while (vfsp != rootvfs); 441 } 442 443 static char * 444 mntfs_mapin(char *base, size_t size) 445 { 446 size_t rlen = roundup(size, PAGESIZE); 447 struct as *as = curproc->p_as; 448 char *addr; 449 450 as_rangelock(as); 451 map_addr(&addr, rlen, 0, 1, 0); 452 if (addr == NULL || as_map(as, addr, rlen, segvn_create, zfod_argsp)) { 453 as_rangeunlock(as); 454 return (NULL); 455 } 456 as_rangeunlock(as); 457 if (copyout(base, addr, size)) { 458 (void) as_unmap(as, addr, rlen); 459 return (NULL); 460 } 461 return (addr); 462 } 463 464 static void 465 mntfs_freesnap(mntsnap_t *snap) 466 { 467 if (snap->mnts_text != NULL) 468 (void) as_unmap(curproc->p_as, snap->mnts_text, 469 roundup(snap->mnts_textsize, PAGESIZE)); 470 snap->mnts_textsize = snap->mnts_count = 0; 471 if (snap->mnts_metadata != NULL) 472 (void) as_unmap(curproc->p_as, snap->mnts_metadata, 473 roundup(snap->mnts_metasize, PAGESIZE)); 474 snap->mnts_metasize = 0; 475 } 476 477 #ifdef _SYSCALL32_IMPL 478 479 typedef struct extmnttab32 { 480 uint32_t mnt_special; 481 uint32_t mnt_mountp; 482 uint32_t mnt_fstype; 483 uint32_t mnt_mntopts; 484 uint32_t mnt_time; 485 uint_t mnt_major; 486 uint_t mnt_minor; 487 } extmnttab32_t; 488 489 #endif 490 491 /* 492 * Snapshot the latest version of the kernel mounted resource information 493 * 494 * There are two types of snapshots: one destined for reading, and one destined 495 * for ioctl(). The difference is that the ioctl() interface is delimited by 496 * NULLs, while the read() interface is delimited by tabs and newlines. 497 */ 498 /* ARGSUSED */ 499 static int 500 mntfs_snapshot(mntnode_t *mnp, int forread, int datamodel) 501 { 502 size_t size; 503 timespec_t lastmodt; 504 mntdata_t *mntdata = MTOD(mnp); 505 zone_t *zone = mntdata->mnt_zone; 506 boolean_t global_view = (MTOD(mnp)->mnt_zone == global_zone); 507 boolean_t showhidden = ((mnp->mnt_flags & MNT_SHOWHIDDEN) != 0); 508 struct extmnttab *metadata_baseaddr; 509 char *text_baseaddr; 510 int i; 511 mntsnap_t *snap; 512 513 if (forread) 514 snap = &mnp->mnt_read; 515 else 516 snap = &mnp->mnt_ioctl; 517 518 vfs_list_read_lock(); 519 /* 520 * Check if the mnttab info has changed since the last snapshot 521 */ 522 vfs_mnttab_modtime(&lastmodt); 523 if (snap->mnts_count && 524 lastmodt.tv_sec == snap->mnts_time.tv_sec && 525 lastmodt.tv_nsec == snap->mnts_time.tv_nsec) { 526 vfs_list_unlock(); 527 return (0); 528 } 529 530 531 if (snap->mnts_count != 0) 532 mntfs_freesnap(snap); 533 if (global_view) 534 size = mntfs_global_len(&snap->mnts_count, showhidden); 535 else 536 size = mntfs_zone_len(&snap->mnts_count, zone, showhidden); 537 ASSERT(size != 0); 538 539 if (!forread) 540 metadata_baseaddr = kmem_alloc( 541 snap->mnts_count * sizeof (struct extmnttab), KM_SLEEP); 542 else 543 metadata_baseaddr = NULL; 544 545 text_baseaddr = kmem_alloc(size, KM_SLEEP); 546 547 if (global_view) 548 mntfs_global_generate(showhidden, metadata_baseaddr, 549 text_baseaddr, forread); 550 else 551 mntfs_zone_generate(zone, showhidden, 552 metadata_baseaddr, text_baseaddr, forread); 553 554 vfs_mnttab_modtime(&snap->mnts_time); 555 vfs_list_unlock(); 556 557 snap->mnts_text = mntfs_mapin(text_baseaddr, size); 558 snap->mnts_textsize = size; 559 kmem_free(text_baseaddr, size); 560 561 /* 562 * The pointers in the metadata refer to addreesses in the range 563 * [base_addr, base_addr + size]. Now that we have mapped the text into 564 * the user's address space, we have to convert these addresses into the 565 * new (user) range. We also handle the conversion for 32-bit and 566 * 32-bit applications here. 567 */ 568 if (!forread) { 569 struct extmnttab *tab; 570 #ifdef _SYSCALL32_IMPL 571 struct extmnttab32 *tab32; 572 573 if (datamodel == DATAMODEL_ILP32) { 574 tab = (struct extmnttab *)metadata_baseaddr; 575 tab32 = (struct extmnttab32 *)metadata_baseaddr; 576 577 for (i = 0; i < snap->mnts_count; i++) { 578 tab32[i].mnt_special = 579 (uintptr_t)snap->mnts_text + 580 (tab[i].mnt_special - text_baseaddr); 581 tab32[i].mnt_mountp = 582 (uintptr_t)snap->mnts_text + 583 (tab[i].mnt_mountp - text_baseaddr); 584 tab32[i].mnt_fstype = 585 (uintptr_t)snap->mnts_text + 586 (tab[i].mnt_fstype - text_baseaddr); 587 tab32[i].mnt_mntopts = 588 (uintptr_t)snap->mnts_text + 589 (tab[i].mnt_mntopts - text_baseaddr); 590 tab32[i].mnt_time = (uintptr_t)snap->mnts_text + 591 (tab[i].mnt_time - text_baseaddr); 592 tab32[i].mnt_major = tab[i].mnt_major; 593 tab32[i].mnt_minor = tab[i].mnt_minor; 594 } 595 596 snap->mnts_metasize = 597 snap->mnts_count * sizeof (struct extmnttab32); 598 snap->mnts_metadata = mntfs_mapin( 599 (char *)metadata_baseaddr, 600 snap->mnts_metasize); 601 602 } else { 603 #endif 604 tab = (struct extmnttab *)metadata_baseaddr; 605 for (i = 0; i < snap->mnts_count; i++) { 606 tab[i].mnt_special = snap->mnts_text + 607 (tab[i].mnt_special - text_baseaddr); 608 tab[i].mnt_mountp = snap->mnts_text + 609 (tab[i].mnt_mountp - text_baseaddr); 610 tab[i].mnt_fstype = snap->mnts_text + 611 (tab[i].mnt_fstype - text_baseaddr); 612 tab[i].mnt_mntopts = snap->mnts_text + 613 (tab[i].mnt_mntopts - text_baseaddr); 614 tab[i].mnt_time = snap->mnts_text + 615 (tab[i].mnt_time - text_baseaddr); 616 } 617 618 snap->mnts_metasize = 619 snap->mnts_count * sizeof (struct extmnttab); 620 snap->mnts_metadata = mntfs_mapin( 621 (char *)metadata_baseaddr, snap->mnts_metasize); 622 #ifdef _SYSCALL32_IMPL 623 } 624 #endif 625 626 kmem_free(metadata_baseaddr, 627 snap->mnts_count * sizeof (struct extmnttab)); 628 } 629 630 mntdata->mnt_size = size; 631 632 if (snap->mnts_text == NULL || 633 (!forread && snap->mnts_metadata == NULL)) { 634 mntfs_freesnap(snap); 635 return (ENOMEM); 636 } 637 vfs_mnttab_readop(); 638 return (0); 639 } 640 641 /* 642 * Public function to convert vfs_mntopts into a string. 643 * A buffer of sufficient size is allocated, which is returned via bufp, 644 * and whose length is returned via lenp. 645 */ 646 void 647 mntfs_getmntopts(struct vfs *vfsp, char **bufp, size_t *lenp) 648 { 649 size_t len; 650 char *buf; 651 652 vfs_list_read_lock(); 653 654 len = mntfs_optsize(vfsp) + 1; 655 buf = kmem_alloc(len, KM_NOSLEEP); 656 if (buf == NULL) { 657 *bufp = NULL; 658 vfs_list_unlock(); 659 return; 660 } 661 buf[len - 1] = '\0'; 662 (void) mntfs_optprint(vfsp, buf); 663 ASSERT(buf[len - 1] == '\0'); 664 665 vfs_list_unlock(); 666 *bufp = buf; 667 *lenp = len; 668 } 669 670 671 /* ARGSUSED */ 672 static int 673 mntopen(vnode_t **vpp, int flag, cred_t *cr) 674 { 675 vnode_t *vp = *vpp; 676 mntnode_t *nmnp; 677 678 /* 679 * Not allowed to open for writing, return error. 680 */ 681 if (flag & FWRITE) 682 return (EPERM); 683 /* 684 * Create a new mnt/vnode for each open, this will give us a handle to 685 * hang the snapshot on. 686 */ 687 nmnp = mntgetnode(vp); 688 689 *vpp = MTOV(nmnp); 690 atomic_add_32(&MTOD(nmnp)->mnt_nopen, 1); 691 VN_RELE(vp); 692 return (0); 693 } 694 695 /* ARGSUSED */ 696 static int 697 mntclose(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr) 698 { 699 mntnode_t *mnp = VTOM(vp); 700 701 /* Clean up any locks or shares held by the current process */ 702 cleanlocks(vp, ttoproc(curthread)->p_pid, 0); 703 cleanshares(vp, ttoproc(curthread)->p_pid); 704 705 if (count > 1) 706 return (0); 707 if (vp->v_count == 1) { 708 mntfs_freesnap(&mnp->mnt_read); 709 mntfs_freesnap(&mnp->mnt_ioctl); 710 atomic_add_32(&MTOD(mnp)->mnt_nopen, -1); 711 } 712 return (0); 713 } 714 715 /* ARGSUSED */ 716 static int 717 mntread(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cred, caller_context_t *ct) 718 { 719 int error = 0; 720 off_t off = uio->uio_offset; 721 size_t len = uio->uio_resid; 722 mntnode_t *mnp = VTOM(vp); 723 char *buf; 724 mntsnap_t *snap = &mnp->mnt_read; 725 int datamodel; 726 727 if (off == (off_t)0 || snap->mnts_count == 0) { 728 /* 729 * It is assumed that any kernel callers wishing 730 * to read mnttab will be using extmnttab entries 731 * and not extmnttab32 entries, whether or not 732 * the kernel is LP64 or ILP32. Thus, force the 733 * datamodel that mntfs_snapshot uses to be 734 * DATAMODEL_LP64. 735 */ 736 if (uio->uio_segflg == UIO_SYSSPACE) 737 datamodel = DATAMODEL_LP64; 738 else 739 datamodel = get_udatamodel(); 740 if ((error = mntfs_snapshot(mnp, 1, datamodel)) != 0) 741 return (error); 742 } 743 if ((size_t)(off + len) > snap->mnts_textsize) 744 len = snap->mnts_textsize - off; 745 746 if (off < 0 || len > snap->mnts_textsize) 747 return (EFAULT); 748 749 if (len == 0) 750 return (0); 751 752 /* 753 * The mnttab image is stored in the user's address space, 754 * so we have to copy it into the kernel from userland, 755 * then copy it back out to the specified address. 756 */ 757 buf = kmem_alloc(len, KM_SLEEP); 758 if (copyin(snap->mnts_text + off, buf, len)) 759 error = EFAULT; 760 else { 761 error = uiomove(buf, len, UIO_READ, uio); 762 } 763 kmem_free(buf, len); 764 vfs_mnttab_readop(); 765 return (error); 766 } 767 768 769 static int 770 mntgetattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr) 771 { 772 mntnode_t *mnp = VTOM(vp); 773 int error; 774 vnode_t *rvp; 775 extern timespec_t vfs_mnttab_ctime; 776 mntdata_t *mntdata = MTOD(VTOM(vp)); 777 mntsnap_t *snap = mnp->mnt_read.mnts_count ? 778 &mnp->mnt_read : &mnp->mnt_ioctl; 779 780 /* 781 * Return all the attributes. Should be refined 782 * so that it returns only those asked for. 783 * Most of this is complete fakery anyway. 784 */ 785 rvp = mnp->mnt_mountvp; 786 /* 787 * Attributes are same as underlying file with modifications 788 */ 789 if (error = VOP_GETATTR(rvp, vap, flags, cr)) 790 return (error); 791 792 /* 793 * We always look like a regular file 794 */ 795 vap->va_type = VREG; 796 /* 797 * mode should basically be read only 798 */ 799 vap->va_mode &= 07444; 800 vap->va_fsid = vp->v_vfsp->vfs_dev; 801 vap->va_blksize = DEV_BSIZE; 802 vap->va_rdev = 0; 803 vap->va_seq = 0; 804 /* 805 * Set nlink to the number of open vnodes for mnttab info 806 * plus one for existing. 807 */ 808 vap->va_nlink = mntdata->mnt_nopen + 1; 809 /* 810 * If we haven't taken a snapshot yet, set the 811 * size to the size of the latest snapshot. 812 */ 813 vap->va_size = snap->mnts_textsize ? snap->mnts_textsize : 814 mntdata->mnt_size; 815 /* 816 * Fetch mtime from the vfs mnttab timestamp 817 */ 818 vap->va_ctime = vfs_mnttab_ctime; 819 vfs_list_read_lock(); 820 vfs_mnttab_modtime(&vap->va_mtime); 821 vap->va_atime = vap->va_mtime; 822 vfs_list_unlock(); 823 /* 824 * Nodeid is always ROOTINO; 825 */ 826 vap->va_nodeid = (ino64_t)MNTROOTINO; 827 vap->va_nblocks = btod(vap->va_size); 828 return (0); 829 } 830 831 832 static int 833 mntaccess(vnode_t *vp, int mode, int flags, cred_t *cr) 834 { 835 mntnode_t *mnp = VTOM(vp); 836 837 if (mode & (VWRITE|VEXEC)) 838 return (EROFS); 839 840 /* 841 * Do access check on the underlying directory vnode. 842 */ 843 return (VOP_ACCESS(mnp->mnt_mountvp, mode, flags, cr)); 844 } 845 846 847 /* 848 * New /mntfs vnode required; allocate it and fill in most of the fields. 849 */ 850 static mntnode_t * 851 mntgetnode(vnode_t *dp) 852 { 853 mntnode_t *mnp; 854 vnode_t *vp; 855 856 mnp = kmem_zalloc(sizeof (mntnode_t), KM_SLEEP); 857 mnp->mnt_vnode = vn_alloc(KM_SLEEP); 858 mnp->mnt_mountvp = VTOM(dp)->mnt_mountvp; 859 vp = MTOV(mnp); 860 vp->v_flag = VNOCACHE|VNOMAP|VNOSWAP|VNOMOUNT; 861 vn_setops(vp, mntvnodeops); 862 vp->v_vfsp = dp->v_vfsp; 863 vp->v_type = VREG; 864 vp->v_data = (caddr_t)mnp; 865 866 return (mnp); 867 } 868 869 /* 870 * Free the storage obtained from mntgetnode(). 871 */ 872 static void 873 mntfreenode(mntnode_t *mnp) 874 { 875 vnode_t *vp = MTOV(mnp); 876 877 vn_invalid(vp); 878 vn_free(vp); 879 kmem_free(mnp, sizeof (*mnp)); 880 } 881 882 883 /* ARGSUSED */ 884 static int 885 mntfsync(vnode_t *vp, int syncflag, cred_t *cr) 886 { 887 return (0); 888 } 889 890 /* ARGSUSED */ 891 static void 892 mntinactive(vnode_t *vp, cred_t *cr) 893 { 894 mntnode_t *mnp = VTOM(vp); 895 896 mntfreenode(mnp); 897 } 898 899 /* ARGSUSED */ 900 static int 901 mntseek(vnode_t *vp, offset_t ooff, offset_t *noffp) 902 { 903 if (*noffp == 0) 904 VTOM(vp)->mnt_offset = 0; 905 906 return (0); 907 } 908 909 /* 910 * Return the answer requested to poll(). 911 * POLLRDBAND will return when the mtime of the mnttab 912 * information is newer than the latest one read for this open. 913 */ 914 /* ARGSUSED */ 915 static int 916 mntpoll(vnode_t *vp, short ev, int any, short *revp, pollhead_t **phpp) 917 { 918 mntnode_t *mnp = VTOM(vp); 919 mntsnap_t *snap = &mnp->mnt_read; 920 921 if (mnp->mnt_ioctl.mnts_time.tv_sec > snap->mnts_time.tv_sec || 922 (mnp->mnt_ioctl.mnts_time.tv_sec == snap->mnts_time.tv_sec && 923 mnp->mnt_ioctl.mnts_time.tv_nsec > snap->mnts_time.tv_nsec)) 924 snap = &mnp->mnt_ioctl; 925 926 *revp = 0; 927 *phpp = (pollhead_t *)NULL; 928 if (ev & POLLIN) 929 *revp |= POLLIN; 930 931 if (ev & POLLRDNORM) 932 *revp |= POLLRDNORM; 933 934 if (ev & POLLRDBAND) { 935 vfs_mnttab_poll(&snap->mnts_time, phpp); 936 if (*phpp == (pollhead_t *)NULL) 937 *revp |= POLLRDBAND; 938 } 939 if (*revp || *phpp != NULL || any) { 940 return (0); 941 } 942 /* 943 * If someone is polling an unsupported poll events (e.g. 944 * POLLOUT, POLLPRI, etc.), just return POLLERR revents. 945 * That way we will ensure that we don't return a 0 946 * revents with a NULL pollhead pointer. 947 */ 948 *revp = POLLERR; 949 return (0); 950 } 951 /* ARGSUSED */ 952 static int 953 mntioctl(struct vnode *vp, int cmd, intptr_t arg, int flag, 954 cred_t *cr, int *rvalp) 955 { 956 uint_t *up = (uint_t *)arg; 957 mntnode_t *mnp = VTOM(vp); 958 mntsnap_t *snap = &mnp->mnt_ioctl; 959 int error; 960 961 error = 0; 962 switch (cmd) { 963 964 case MNTIOC_NMNTS: { /* get no. of mounted resources */ 965 if (snap->mnts_count == 0) { 966 if ((error = 967 mntfs_snapshot(mnp, 0, flag & DATAMODEL_MASK)) != 0) 968 return (error); 969 } 970 if (suword32(up, snap->mnts_count) != 0) 971 error = EFAULT; 972 break; 973 } 974 975 case MNTIOC_GETDEVLIST: { /* get mounted device major/minor nos */ 976 uint_t *devlist; 977 int i; 978 size_t len; 979 980 if (snap->mnts_count == 0) { 981 if ((error = 982 mntfs_snapshot(mnp, 0, flag & DATAMODEL_MASK)) != 0) 983 return (error); 984 } 985 986 len = 2 * snap->mnts_count * sizeof (uint_t); 987 devlist = kmem_alloc(len, KM_SLEEP); 988 for (i = 0; i < snap->mnts_count; i++) { 989 990 #ifdef _SYSCALL32_IMPL 991 if ((flag & DATAMODEL_MASK) == DATAMODEL_ILP32) { 992 struct extmnttab32 tab; 993 994 if ((error = xcopyin(snap->mnts_text + 995 i * sizeof (struct extmnttab32), &tab, 996 sizeof (tab))) != 0) 997 break; 998 999 devlist[i*2] = tab.mnt_major; 1000 devlist[i*2+1] = tab.mnt_minor; 1001 } else { 1002 #endif 1003 struct extmnttab tab; 1004 1005 if ((error = xcopyin(snap->mnts_text + 1006 i * sizeof (struct extmnttab), &tab, 1007 sizeof (tab))) != 0) 1008 break; 1009 1010 devlist[i*2] = tab.mnt_major; 1011 devlist[i*2+1] = tab.mnt_minor; 1012 #ifdef _SYSCALL32_IMPL 1013 } 1014 #endif 1015 } 1016 1017 if (error == 0) 1018 error = xcopyout(devlist, up, len); 1019 kmem_free(devlist, len); 1020 break; 1021 } 1022 1023 case MNTIOC_SETTAG: /* set tag on mounted file system */ 1024 case MNTIOC_CLRTAG: /* clear tag on mounted file system */ 1025 { 1026 struct mnttagdesc *dp = (struct mnttagdesc *)arg; 1027 STRUCT_DECL(mnttagdesc, tagdesc); 1028 char *cptr; 1029 uint32_t major, minor; 1030 char tagbuf[MAX_MNTOPT_TAG]; 1031 char *pbuf; 1032 size_t len; 1033 uint_t start = 0; 1034 mntdata_t *mntdata = MTOD(mnp); 1035 zone_t *zone = mntdata->mnt_zone; 1036 1037 STRUCT_INIT(tagdesc, flag & DATAMODEL_MASK); 1038 if (copyin(dp, STRUCT_BUF(tagdesc), STRUCT_SIZE(tagdesc))) { 1039 error = EFAULT; 1040 break; 1041 } 1042 pbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP); 1043 if (zone != global_zone) { 1044 (void) strcpy(pbuf, zone->zone_rootpath); 1045 /* truncate "/" and nul */ 1046 start = zone->zone_rootpathlen - 2; 1047 ASSERT(pbuf[start] == '/'); 1048 } 1049 cptr = STRUCT_FGETP(tagdesc, mtd_mntpt); 1050 error = copyinstr(cptr, pbuf + start, MAXPATHLEN - start, &len); 1051 if (error) { 1052 kmem_free(pbuf, MAXPATHLEN); 1053 break; 1054 } 1055 if (start != 0 && pbuf[start] != '/') { 1056 kmem_free(pbuf, MAXPATHLEN); 1057 error = EINVAL; 1058 break; 1059 } 1060 cptr = STRUCT_FGETP(tagdesc, mtd_tag); 1061 if ((error = copyinstr(cptr, tagbuf, MAX_MNTOPT_TAG, &len))) { 1062 kmem_free(pbuf, MAXPATHLEN); 1063 break; 1064 } 1065 major = STRUCT_FGET(tagdesc, mtd_major); 1066 minor = STRUCT_FGET(tagdesc, mtd_minor); 1067 if (cmd == MNTIOC_SETTAG) 1068 error = vfs_settag(major, minor, pbuf, tagbuf, cr); 1069 else 1070 error = vfs_clrtag(major, minor, pbuf, tagbuf, cr); 1071 kmem_free(pbuf, MAXPATHLEN); 1072 break; 1073 } 1074 1075 case MNTIOC_SHOWHIDDEN: 1076 { 1077 mutex_enter(&vp->v_lock); 1078 mnp->mnt_flags |= MNT_SHOWHIDDEN; 1079 mutex_exit(&vp->v_lock); 1080 break; 1081 } 1082 1083 case MNTIOC_GETMNTENT: 1084 { 1085 size_t idx; 1086 uintptr_t addr; 1087 1088 idx = mnp->mnt_offset; 1089 if (snap->mnts_count == 0 || idx == 0) { 1090 if ((error = 1091 mntfs_snapshot(mnp, 0, flag & DATAMODEL_MASK)) != 0) 1092 return (error); 1093 } 1094 /* 1095 * If the next index is beyond the end of the current mnttab, 1096 * return EOF 1097 */ 1098 if (idx >= snap->mnts_count) { 1099 *rvalp = 1; 1100 return (0); 1101 } 1102 1103 #ifdef _SYSCALL32_IMPL 1104 if ((flag & DATAMODEL_MASK) == DATAMODEL_ILP32) { 1105 addr = (uintptr_t)(snap->mnts_metadata + idx * 1106 sizeof (struct extmnttab32)); 1107 error = suword32((void *)arg, addr); 1108 } else { 1109 #endif 1110 addr = (uintptr_t)(snap->mnts_metadata + idx * 1111 sizeof (struct extmnttab)); 1112 error = sulword((void *)arg, addr); 1113 #ifdef _SYSCALL32_IMPL 1114 } 1115 #endif 1116 1117 if (error != 0) 1118 return (error); 1119 1120 mnp->mnt_offset++; 1121 break; 1122 } 1123 1124 default: 1125 error = EINVAL; 1126 break; 1127 } 1128 1129 return (error); 1130 } 1131 1132 /* 1133 * /mntfs vnode operations vector 1134 */ 1135 const fs_operation_def_t mnt_vnodeops_template[] = { 1136 VOPNAME_OPEN, { .vop_open = mntopen }, 1137 VOPNAME_CLOSE, { .vop_close = mntclose }, 1138 VOPNAME_READ, { .vop_read = mntread }, 1139 VOPNAME_IOCTL, { .vop_ioctl = mntioctl }, 1140 VOPNAME_GETATTR, { .vop_getattr = mntgetattr }, 1141 VOPNAME_ACCESS, { .vop_access = mntaccess }, 1142 VOPNAME_FSYNC, { .vop_fsync = mntfsync }, 1143 VOPNAME_INACTIVE, { .vop_inactive = mntinactive }, 1144 VOPNAME_SEEK, { .vop_seek = mntseek }, 1145 VOPNAME_POLL, { .vop_poll = mntpoll }, 1146 VOPNAME_DISPOSE, { .error = fs_error }, 1147 VOPNAME_SHRLOCK, { .error = fs_error }, 1148 NULL, NULL 1149 }; 1150