1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/file.h> 29 #include <sys/stat.h> 30 #include <sys/atomic.h> 31 #include <sys/mntio.h> 32 #include <sys/mnttab.h> 33 #include <sys/mount.h> 34 #include <sys/sunddi.h> 35 #include <sys/sysmacros.h> 36 #include <sys/systm.h> 37 #include <sys/vfs.h> 38 #include <sys/vfs_opreg.h> 39 #include <sys/fs/mntdata.h> 40 #include <fs/fs_subr.h> 41 #include <sys/vmsystm.h> 42 #include <vm/seg_vn.h> 43 44 #define MNTROOTINO 2 45 46 static mntnode_t *mntgetnode(vnode_t *); 47 48 vnodeops_t *mntvnodeops; 49 extern void vfs_mnttab_readop(void); 50 51 /* 52 * Design of kernel mnttab accounting. 53 * 54 * To support whitespace in mount names, we implement an ioctl 55 * (MNTIOC_GETMNTENT) which allows a programmatic interface to the data in 56 * /etc/mnttab. The libc functions getmntent() and getextmntent() are built 57 * atop this interface. 58 * 59 * To minimize the amount of memory used in the kernel, we keep all the 60 * necessary information in the user's address space. Large server 61 * configurations can have /etc/mnttab files in excess of 64k. 62 * 63 * To support both vanilla read() calls as well as ioctl() calls, we have two 64 * different snapshots of the kernel data structures, mnt_read and mnt_ioctl. 65 * These snapshots include the base location in user memory, the number of 66 * mounts in the snapshot, and any metadata associated with it. The metadata is 67 * used only to support the ioctl() interface, and is a series of extmnttab 68 * structures. When the user issues an ioctl(), we simply copyout a pointer to 69 * that structure, and the rest is handled in userland. 70 */ 71 72 /* 73 * NOTE: The following variable enables the generation of the "dev=xxx" 74 * in the option string for a mounted file system. Really this should 75 * be gotten rid of altogether, but for the sake of backwards compatibility 76 * we had to leave it in. It is defined as a 32-bit device number. This 77 * means that when 64-bit device numbers are in use, if either the major or 78 * minor part of the device number will not fit in a 16 bit quantity, the 79 * "dev=" will be set to NODEV (0x7fffffff). See PSARC 1999/566 and 80 * 1999/131 for details. The cmpldev() function used to generate the 32-bit 81 * device number handles this check and assigns the proper value. 82 */ 83 int mntfs_enabledev = 1; /* enable old "dev=xxx" option */ 84 85 static int 86 mntfs_devsize(struct vfs *vfsp) 87 { 88 dev32_t odev; 89 90 (void) cmpldev(&odev, vfsp->vfs_dev); 91 return (snprintf(NULL, 0, "dev=%x", odev)); 92 } 93 94 static int 95 mntfs_devprint(struct vfs *vfsp, char *buf) 96 { 97 dev32_t odev; 98 99 (void) cmpldev(&odev, vfsp->vfs_dev); 100 return (snprintf(buf, MAX_MNTOPT_STR, "dev=%x", odev)); 101 } 102 103 static int 104 mntfs_optsize(struct vfs *vfsp) 105 { 106 int i, size = 0; 107 mntopt_t *mop; 108 109 for (i = 0; i < vfsp->vfs_mntopts.mo_count; i++) { 110 mop = &vfsp->vfs_mntopts.mo_list[i]; 111 if (mop->mo_flags & MO_NODISPLAY) 112 continue; 113 if (mop->mo_flags & MO_SET) { 114 if (size) 115 size++; /* space for comma */ 116 size += strlen(mop->mo_name); 117 /* 118 * count option value if there is one 119 */ 120 if (mop->mo_arg != NULL) { 121 size += strlen(mop->mo_arg) + 1; 122 } 123 } 124 } 125 if (vfsp->vfs_zone != NULL && vfsp->vfs_zone != global_zone) { 126 /* 127 * Add space for "zone=<zone_name>" if required. 128 */ 129 if (size) 130 size++; /* space for comma */ 131 size += sizeof ("zone=") - 1; 132 size += strlen(vfsp->vfs_zone->zone_name); 133 } 134 if (mntfs_enabledev) { 135 if (size != 0) 136 size++; /* space for comma */ 137 size += mntfs_devsize(vfsp); 138 } 139 if (size == 0) 140 size = strlen("-"); 141 return (size); 142 } 143 144 static int 145 mntfs_optprint(struct vfs *vfsp, char *buf) 146 { 147 int i, optinbuf = 0; 148 mntopt_t *mop; 149 char *origbuf = buf; 150 151 for (i = 0; i < vfsp->vfs_mntopts.mo_count; i++) { 152 mop = &vfsp->vfs_mntopts.mo_list[i]; 153 if (mop->mo_flags & MO_NODISPLAY) 154 continue; 155 if (mop->mo_flags & MO_SET) { 156 if (optinbuf) 157 *buf++ = ','; 158 else 159 optinbuf = 1; 160 buf += snprintf(buf, MAX_MNTOPT_STR, 161 "%s", mop->mo_name); 162 /* 163 * print option value if there is one 164 */ 165 if (mop->mo_arg != NULL) { 166 buf += snprintf(buf, MAX_MNTOPT_STR, "=%s", 167 mop->mo_arg); 168 } 169 } 170 } 171 if (vfsp->vfs_zone != NULL && vfsp->vfs_zone != global_zone) { 172 if (optinbuf) 173 *buf++ = ','; 174 else 175 optinbuf = 1; 176 buf += snprintf(buf, MAX_MNTOPT_STR, "zone=%s", 177 vfsp->vfs_zone->zone_name); 178 } 179 if (mntfs_enabledev) { 180 if (optinbuf++) 181 *buf++ = ','; 182 buf += mntfs_devprint(vfsp, buf); 183 } 184 if (!optinbuf) { 185 buf += snprintf(buf, MAX_MNTOPT_STR, "-"); 186 } 187 return (buf - origbuf); 188 } 189 190 static size_t 191 mntfs_vfs_len(vfs_t *vfsp, zone_t *zone) 192 { 193 size_t size = 0; 194 const char *resource, *mntpt; 195 196 mntpt = refstr_value(vfsp->vfs_mntpt); 197 if (mntpt != NULL && mntpt[0] != '\0') { 198 size += strlen(ZONE_PATH_TRANSLATE(mntpt, zone)) + 1; 199 } else { 200 size += strlen("-") + 1; 201 } 202 203 resource = refstr_value(vfsp->vfs_resource); 204 if (resource != NULL && resource[0] != '\0') { 205 if (resource[0] != '/') { 206 size += strlen(resource) + 1; 207 } else if (!ZONE_PATH_VISIBLE(resource, zone)) { 208 /* 209 * Same as the zone's view of the mount point. 210 */ 211 size += strlen(ZONE_PATH_TRANSLATE(mntpt, zone)) + 1; 212 } else { 213 size += strlen(ZONE_PATH_TRANSLATE(resource, zone)) + 1; 214 } 215 } else { 216 size += strlen("-") + 1; 217 } 218 size += strlen(vfssw[vfsp->vfs_fstype].vsw_name) + 1; 219 size += mntfs_optsize(vfsp); 220 size += snprintf(NULL, 0, "\t%ld\n", vfsp->vfs_mtime); 221 return (size); 222 } 223 224 static void 225 mntfs_zonerootvfs(zone_t *zone, vfs_t *rootvfsp) 226 { 227 /* 228 * Basically copy over the real vfs_t on which the root vnode is 229 * located, changing its mountpoint and resource to match those of 230 * the zone's rootpath. 231 */ 232 *rootvfsp = *zone->zone_rootvp->v_vfsp; 233 rootvfsp->vfs_mntpt = refstr_alloc(zone->zone_rootpath); 234 rootvfsp->vfs_resource = rootvfsp->vfs_mntpt; 235 } 236 237 static size_t 238 mntfs_zone_len(uint_t *nent_ptr, zone_t *zone, int showhidden) 239 { 240 struct vfs *zonelist; 241 struct vfs *vfsp; 242 size_t size = 0; 243 uint_t cnt = 0; 244 245 ASSERT(zone->zone_rootpath != NULL); 246 247 /* 248 * If the zone has a root entry, it will be the first in the list. If 249 * it doesn't, we conjure one up. 250 */ 251 vfsp = zonelist = zone->zone_vfslist; 252 if (zonelist == NULL || 253 strcmp(refstr_value(vfsp->vfs_mntpt), zone->zone_rootpath) != 0) { 254 vfs_t tvfs; 255 /* 256 * The root of the zone is not a mount point. The vfs we want 257 * to report is that of the zone's root vnode. 258 */ 259 ASSERT(zone != global_zone); 260 mntfs_zonerootvfs(zone, &tvfs); 261 size += mntfs_vfs_len(&tvfs, zone); 262 refstr_rele(tvfs.vfs_mntpt); 263 cnt++; 264 } 265 if (zonelist == NULL) 266 goto out; 267 do { 268 /* 269 * Skip mounts that should not show up in mnttab 270 */ 271 if (!showhidden && (vfsp->vfs_flag & VFS_NOMNTTAB)) { 272 vfsp = vfsp->vfs_zone_next; 273 continue; 274 } 275 cnt++; 276 size += mntfs_vfs_len(vfsp, zone); 277 vfsp = vfsp->vfs_zone_next; 278 } while (vfsp != zonelist); 279 out: 280 *nent_ptr = cnt; 281 return (size); 282 } 283 284 static size_t 285 mntfs_global_len(uint_t *nent_ptr, int showhidden) 286 { 287 struct vfs *vfsp; 288 size_t size = 0; 289 uint_t cnt = 0; 290 291 vfsp = rootvfs; 292 do { 293 /* 294 * Skip mounts that should not show up in mnttab 295 */ 296 if (!showhidden && (vfsp->vfs_flag & VFS_NOMNTTAB)) { 297 vfsp = vfsp->vfs_next; 298 continue; 299 } 300 cnt++; 301 size += mntfs_vfs_len(vfsp, global_zone); 302 vfsp = vfsp->vfs_next; 303 } while (vfsp != rootvfs); 304 *nent_ptr = cnt; 305 return (size); 306 } 307 308 static void 309 mntfs_vfs_generate(vfs_t *vfsp, zone_t *zone, struct extmnttab *tab, 310 char **basep, int forread) 311 { 312 const char *resource, *mntpt; 313 char *cp = *basep; 314 315 mntpt = refstr_value(vfsp->vfs_mntpt); 316 resource = refstr_value(vfsp->vfs_resource); 317 318 if (tab) 319 tab->mnt_special = cp; 320 if (resource != NULL && resource[0] != '\0') { 321 if (resource[0] != '/') { 322 cp += snprintf(cp, MAXPATHLEN, "%s", resource); 323 } else if (!ZONE_PATH_VISIBLE(resource, zone)) { 324 /* 325 * Use the mount point as the resource. 326 */ 327 cp += snprintf(cp, MAXPATHLEN, "%s", 328 ZONE_PATH_TRANSLATE(mntpt, zone)); 329 } else { 330 cp += snprintf(cp, MAXPATHLEN, "%s", 331 ZONE_PATH_TRANSLATE(resource, zone)); 332 } 333 } else { 334 cp += snprintf(cp, MAXPATHLEN, "-"); 335 } 336 *cp++ = forread ? '\t' : '\0'; 337 338 if (tab) 339 tab->mnt_mountp = cp; 340 if (mntpt != NULL && mntpt[0] != '\0') { 341 /* 342 * We know the mount point is visible from within the zone, 343 * otherwise it wouldn't be on the zone's vfs list. 344 */ 345 cp += snprintf(cp, MAXPATHLEN, "%s", 346 ZONE_PATH_TRANSLATE(mntpt, zone)); 347 } else { 348 cp += snprintf(cp, MAXPATHLEN, "-"); 349 } 350 *cp++ = forread ? '\t' : '\0'; 351 352 if (tab) 353 tab->mnt_fstype = cp; 354 cp += snprintf(cp, MAXPATHLEN, "%s", 355 vfssw[vfsp->vfs_fstype].vsw_name); 356 *cp++ = forread ? '\t' : '\0'; 357 358 if (tab) 359 tab->mnt_mntopts = cp; 360 cp += mntfs_optprint(vfsp, cp); 361 *cp++ = forread ? '\t' : '\0'; 362 363 if (tab) 364 tab->mnt_time = cp; 365 cp += snprintf(cp, MAX_MNTOPT_STR, "%ld", vfsp->vfs_mtime); 366 *cp++ = forread ? '\n' : '\0'; 367 368 if (tab) { 369 tab->mnt_major = getmajor(vfsp->vfs_dev); 370 tab->mnt_minor = getminor(vfsp->vfs_dev); 371 } 372 373 *basep = cp; 374 } 375 376 static void 377 mntfs_zone_generate(zone_t *zone, int showhidden, struct extmnttab *tab, 378 char *basep, int forread) 379 { 380 vfs_t *zonelist; 381 vfs_t *vfsp; 382 char *cp = basep; 383 384 /* 385 * If the zone has a root entry, it will be the first in the list. If 386 * it doesn't, we conjure one up. 387 */ 388 vfsp = zonelist = zone->zone_vfslist; 389 if (zonelist == NULL || 390 strcmp(refstr_value(vfsp->vfs_mntpt), zone->zone_rootpath) != 0) { 391 vfs_t tvfs; 392 /* 393 * The root of the zone is not a mount point. The vfs we want 394 * to report is that of the zone's root vnode. 395 */ 396 ASSERT(zone != global_zone); 397 mntfs_zonerootvfs(zone, &tvfs); 398 mntfs_vfs_generate(&tvfs, zone, tab, &cp, forread); 399 refstr_rele(tvfs.vfs_mntpt); 400 if (tab) 401 tab++; 402 } 403 if (zonelist == NULL) 404 return; 405 do { 406 /* 407 * Skip mounts that should not show up in mnttab 408 */ 409 if (!showhidden && (vfsp->vfs_flag & VFS_NOMNTTAB)) { 410 vfsp = vfsp->vfs_zone_next; 411 continue; 412 } 413 mntfs_vfs_generate(vfsp, zone, tab, &cp, forread); 414 if (tab) 415 tab++; 416 vfsp = vfsp->vfs_zone_next; 417 } while (vfsp != zonelist); 418 } 419 420 static void 421 mntfs_global_generate(int showhidden, struct extmnttab *tab, char *basep, 422 int forread) 423 { 424 vfs_t *vfsp; 425 char *cp = basep; 426 427 vfsp = rootvfs; 428 do { 429 /* 430 * Skip mounts that should not show up in mnttab 431 */ 432 if (!showhidden && vfsp->vfs_flag & VFS_NOMNTTAB) { 433 vfsp = vfsp->vfs_next; 434 continue; 435 } 436 mntfs_vfs_generate(vfsp, global_zone, tab, &cp, forread); 437 if (tab) 438 tab++; 439 vfsp = vfsp->vfs_next; 440 } while (vfsp != rootvfs); 441 } 442 443 static char * 444 mntfs_mapin(char *base, size_t size) 445 { 446 size_t rlen = roundup(size, PAGESIZE); 447 struct as *as = curproc->p_as; 448 char *addr = NULL; 449 450 as_rangelock(as); 451 map_addr(&addr, rlen, 0, 1, 0); 452 if (addr == NULL || as_map(as, addr, rlen, segvn_create, zfod_argsp)) { 453 as_rangeunlock(as); 454 return (NULL); 455 } 456 as_rangeunlock(as); 457 if (copyout(base, addr, size)) { 458 (void) as_unmap(as, addr, rlen); 459 return (NULL); 460 } 461 return (addr); 462 } 463 464 static void 465 mntfs_freesnap(mntsnap_t *snap) 466 { 467 if (snap->mnts_text != NULL) 468 (void) as_unmap(curproc->p_as, snap->mnts_text, 469 roundup(snap->mnts_textsize, PAGESIZE)); 470 snap->mnts_textsize = snap->mnts_count = 0; 471 if (snap->mnts_metadata != NULL) 472 (void) as_unmap(curproc->p_as, snap->mnts_metadata, 473 roundup(snap->mnts_metasize, PAGESIZE)); 474 snap->mnts_metasize = 0; 475 } 476 477 #ifdef _SYSCALL32_IMPL 478 479 typedef struct extmnttab32 { 480 uint32_t mnt_special; 481 uint32_t mnt_mountp; 482 uint32_t mnt_fstype; 483 uint32_t mnt_mntopts; 484 uint32_t mnt_time; 485 uint_t mnt_major; 486 uint_t mnt_minor; 487 } extmnttab32_t; 488 489 #endif 490 491 /* 492 * Snapshot the latest version of the kernel mounted resource information 493 * 494 * There are two types of snapshots: one destined for reading, and one destined 495 * for ioctl(). The difference is that the ioctl() interface is delimited by 496 * NULLs, while the read() interface is delimited by tabs and newlines. 497 */ 498 /* ARGSUSED */ 499 static int 500 mntfs_snapshot(mntnode_t *mnp, int forread, int datamodel) 501 { 502 size_t size; 503 timespec_t lastmodt; 504 mntdata_t *mntdata = MTOD(mnp); 505 zone_t *zone = mntdata->mnt_zone; 506 boolean_t global_view = (MTOD(mnp)->mnt_zone == global_zone); 507 boolean_t showhidden = ((mnp->mnt_flags & MNT_SHOWHIDDEN) != 0); 508 struct extmnttab *metadata_baseaddr; 509 char *text_baseaddr; 510 int i; 511 mntsnap_t *snap; 512 513 if (forread) 514 snap = &mnp->mnt_read; 515 else 516 snap = &mnp->mnt_ioctl; 517 518 vfs_list_read_lock(); 519 /* 520 * Check if the mnttab info has changed since the last snapshot 521 */ 522 vfs_mnttab_modtime(&lastmodt); 523 if (snap->mnts_count && 524 lastmodt.tv_sec == snap->mnts_time.tv_sec && 525 lastmodt.tv_nsec == snap->mnts_time.tv_nsec) { 526 vfs_list_unlock(); 527 return (0); 528 } 529 530 531 if (snap->mnts_count != 0) 532 mntfs_freesnap(snap); 533 if (global_view) 534 size = mntfs_global_len(&snap->mnts_count, showhidden); 535 else 536 size = mntfs_zone_len(&snap->mnts_count, zone, showhidden); 537 ASSERT(size != 0); 538 539 if (!forread) 540 metadata_baseaddr = kmem_alloc( 541 snap->mnts_count * sizeof (struct extmnttab), KM_SLEEP); 542 else 543 metadata_baseaddr = NULL; 544 545 text_baseaddr = kmem_alloc(size, KM_SLEEP); 546 547 if (global_view) 548 mntfs_global_generate(showhidden, metadata_baseaddr, 549 text_baseaddr, forread); 550 else 551 mntfs_zone_generate(zone, showhidden, 552 metadata_baseaddr, text_baseaddr, forread); 553 554 vfs_mnttab_modtime(&snap->mnts_time); 555 vfs_list_unlock(); 556 557 snap->mnts_text = mntfs_mapin(text_baseaddr, size); 558 snap->mnts_textsize = size; 559 kmem_free(text_baseaddr, size); 560 561 /* 562 * The pointers in the metadata refer to addreesses in the range 563 * [base_addr, base_addr + size]. Now that we have mapped the text into 564 * the user's address space, we have to convert these addresses into the 565 * new (user) range. We also handle the conversion for 32-bit and 566 * 32-bit applications here. 567 */ 568 if (!forread) { 569 struct extmnttab *tab; 570 #ifdef _SYSCALL32_IMPL 571 struct extmnttab32 *tab32; 572 573 if (datamodel == DATAMODEL_ILP32) { 574 tab = (struct extmnttab *)metadata_baseaddr; 575 tab32 = (struct extmnttab32 *)metadata_baseaddr; 576 577 for (i = 0; i < snap->mnts_count; i++) { 578 tab32[i].mnt_special = 579 (uintptr_t)snap->mnts_text + 580 (tab[i].mnt_special - text_baseaddr); 581 tab32[i].mnt_mountp = 582 (uintptr_t)snap->mnts_text + 583 (tab[i].mnt_mountp - text_baseaddr); 584 tab32[i].mnt_fstype = 585 (uintptr_t)snap->mnts_text + 586 (tab[i].mnt_fstype - text_baseaddr); 587 tab32[i].mnt_mntopts = 588 (uintptr_t)snap->mnts_text + 589 (tab[i].mnt_mntopts - text_baseaddr); 590 tab32[i].mnt_time = (uintptr_t)snap->mnts_text + 591 (tab[i].mnt_time - text_baseaddr); 592 tab32[i].mnt_major = tab[i].mnt_major; 593 tab32[i].mnt_minor = tab[i].mnt_minor; 594 } 595 596 snap->mnts_metasize = 597 snap->mnts_count * sizeof (struct extmnttab32); 598 snap->mnts_metadata = mntfs_mapin( 599 (char *)metadata_baseaddr, 600 snap->mnts_metasize); 601 602 } else { 603 #endif 604 tab = (struct extmnttab *)metadata_baseaddr; 605 for (i = 0; i < snap->mnts_count; i++) { 606 tab[i].mnt_special = snap->mnts_text + 607 (tab[i].mnt_special - text_baseaddr); 608 tab[i].mnt_mountp = snap->mnts_text + 609 (tab[i].mnt_mountp - text_baseaddr); 610 tab[i].mnt_fstype = snap->mnts_text + 611 (tab[i].mnt_fstype - text_baseaddr); 612 tab[i].mnt_mntopts = snap->mnts_text + 613 (tab[i].mnt_mntopts - text_baseaddr); 614 tab[i].mnt_time = snap->mnts_text + 615 (tab[i].mnt_time - text_baseaddr); 616 } 617 618 snap->mnts_metasize = 619 snap->mnts_count * sizeof (struct extmnttab); 620 snap->mnts_metadata = mntfs_mapin( 621 (char *)metadata_baseaddr, snap->mnts_metasize); 622 #ifdef _SYSCALL32_IMPL 623 } 624 #endif 625 626 kmem_free(metadata_baseaddr, 627 snap->mnts_count * sizeof (struct extmnttab)); 628 } 629 630 mntdata->mnt_size = size; 631 632 if (snap->mnts_text == NULL || 633 (!forread && snap->mnts_metadata == NULL)) { 634 mntfs_freesnap(snap); 635 return (ENOMEM); 636 } 637 vfs_mnttab_readop(); 638 return (0); 639 } 640 641 /* 642 * Public function to convert vfs_mntopts into a string. 643 * A buffer of sufficient size is allocated, which is returned via bufp, 644 * and whose length is returned via lenp. 645 */ 646 void 647 mntfs_getmntopts(struct vfs *vfsp, char **bufp, size_t *lenp) 648 { 649 size_t len; 650 char *buf; 651 652 vfs_list_read_lock(); 653 654 len = mntfs_optsize(vfsp) + 1; 655 buf = kmem_alloc(len, KM_NOSLEEP); 656 if (buf == NULL) { 657 *bufp = NULL; 658 vfs_list_unlock(); 659 return; 660 } 661 buf[len - 1] = '\0'; 662 (void) mntfs_optprint(vfsp, buf); 663 ASSERT(buf[len - 1] == '\0'); 664 665 vfs_list_unlock(); 666 *bufp = buf; 667 *lenp = len; 668 } 669 670 671 /* ARGSUSED */ 672 static int 673 mntopen(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct) 674 { 675 vnode_t *vp = *vpp; 676 mntnode_t *nmnp; 677 678 /* 679 * Not allowed to open for writing, return error. 680 */ 681 if (flag & FWRITE) 682 return (EPERM); 683 /* 684 * Create a new mnt/vnode for each open, this will give us a handle to 685 * hang the snapshot on. 686 */ 687 nmnp = mntgetnode(vp); 688 689 *vpp = MTOV(nmnp); 690 atomic_add_32(&MTOD(nmnp)->mnt_nopen, 1); 691 VN_RELE(vp); 692 return (0); 693 } 694 695 /* ARGSUSED */ 696 static int 697 mntclose(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr, 698 caller_context_t *ct) 699 { 700 mntnode_t *mnp = VTOM(vp); 701 702 /* Clean up any locks or shares held by the current process */ 703 cleanlocks(vp, ttoproc(curthread)->p_pid, 0); 704 cleanshares(vp, ttoproc(curthread)->p_pid); 705 706 if (count > 1) 707 return (0); 708 if (vp->v_count == 1) { 709 mntfs_freesnap(&mnp->mnt_read); 710 mntfs_freesnap(&mnp->mnt_ioctl); 711 atomic_add_32(&MTOD(mnp)->mnt_nopen, -1); 712 } 713 return (0); 714 } 715 716 /* ARGSUSED */ 717 static int 718 mntread(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cred, caller_context_t *ct) 719 { 720 int error = 0; 721 off_t off = uio->uio_offset; 722 size_t len = uio->uio_resid; 723 mntnode_t *mnp = VTOM(vp); 724 char *buf; 725 mntsnap_t *snap = &mnp->mnt_read; 726 int datamodel; 727 728 if (off == (off_t)0 || snap->mnts_count == 0) { 729 /* 730 * It is assumed that any kernel callers wishing 731 * to read mnttab will be using extmnttab entries 732 * and not extmnttab32 entries, whether or not 733 * the kernel is LP64 or ILP32. Thus, force the 734 * datamodel that mntfs_snapshot uses to be 735 * DATAMODEL_LP64. 736 */ 737 if (uio->uio_segflg == UIO_SYSSPACE) 738 datamodel = DATAMODEL_LP64; 739 else 740 datamodel = get_udatamodel(); 741 if ((error = mntfs_snapshot(mnp, 1, datamodel)) != 0) 742 return (error); 743 } 744 if ((size_t)(off + len) > snap->mnts_textsize) 745 len = snap->mnts_textsize - off; 746 747 if (off < 0 || len > snap->mnts_textsize) 748 return (EFAULT); 749 750 if (len == 0) 751 return (0); 752 753 /* 754 * The mnttab image is stored in the user's address space, 755 * so we have to copy it into the kernel from userland, 756 * then copy it back out to the specified address. 757 */ 758 buf = kmem_alloc(len, KM_SLEEP); 759 if (copyin(snap->mnts_text + off, buf, len)) 760 error = EFAULT; 761 else { 762 error = uiomove(buf, len, UIO_READ, uio); 763 } 764 kmem_free(buf, len); 765 vfs_mnttab_readop(); 766 return (error); 767 } 768 769 770 static int 771 mntgetattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, 772 caller_context_t *ct) 773 { 774 mntnode_t *mnp = VTOM(vp); 775 int error; 776 vnode_t *rvp; 777 extern timespec_t vfs_mnttab_ctime; 778 mntdata_t *mntdata = MTOD(VTOM(vp)); 779 mntsnap_t *snap = mnp->mnt_read.mnts_count ? 780 &mnp->mnt_read : &mnp->mnt_ioctl; 781 782 /* 783 * Return all the attributes. Should be refined 784 * so that it returns only those asked for. 785 * Most of this is complete fakery anyway. 786 */ 787 rvp = mnp->mnt_mountvp; 788 /* 789 * Attributes are same as underlying file with modifications 790 */ 791 if (error = VOP_GETATTR(rvp, vap, flags, cr, ct)) 792 return (error); 793 794 /* 795 * We always look like a regular file 796 */ 797 vap->va_type = VREG; 798 /* 799 * mode should basically be read only 800 */ 801 vap->va_mode &= 07444; 802 vap->va_fsid = vp->v_vfsp->vfs_dev; 803 vap->va_blksize = DEV_BSIZE; 804 vap->va_rdev = 0; 805 vap->va_seq = 0; 806 /* 807 * Set nlink to the number of open vnodes for mnttab info 808 * plus one for existing. 809 */ 810 vap->va_nlink = mntdata->mnt_nopen + 1; 811 /* 812 * If we haven't taken a snapshot yet, set the 813 * size to the size of the latest snapshot. 814 */ 815 vap->va_size = snap->mnts_textsize ? snap->mnts_textsize : 816 mntdata->mnt_size; 817 /* 818 * Fetch mtime from the vfs mnttab timestamp 819 */ 820 vap->va_ctime = vfs_mnttab_ctime; 821 vfs_list_read_lock(); 822 vfs_mnttab_modtime(&vap->va_mtime); 823 vap->va_atime = vap->va_mtime; 824 vfs_list_unlock(); 825 /* 826 * Nodeid is always ROOTINO; 827 */ 828 vap->va_nodeid = (ino64_t)MNTROOTINO; 829 vap->va_nblocks = btod(vap->va_size); 830 return (0); 831 } 832 833 834 static int 835 mntaccess(vnode_t *vp, int mode, int flags, cred_t *cr, 836 caller_context_t *ct) 837 { 838 mntnode_t *mnp = VTOM(vp); 839 840 if (mode & (VWRITE|VEXEC)) 841 return (EROFS); 842 843 /* 844 * Do access check on the underlying directory vnode. 845 */ 846 return (VOP_ACCESS(mnp->mnt_mountvp, mode, flags, cr, ct)); 847 } 848 849 850 /* 851 * New /mntfs vnode required; allocate it and fill in most of the fields. 852 */ 853 static mntnode_t * 854 mntgetnode(vnode_t *dp) 855 { 856 mntnode_t *mnp; 857 vnode_t *vp; 858 859 mnp = kmem_zalloc(sizeof (mntnode_t), KM_SLEEP); 860 mnp->mnt_vnode = vn_alloc(KM_SLEEP); 861 mnp->mnt_mountvp = VTOM(dp)->mnt_mountvp; 862 vp = MTOV(mnp); 863 vp->v_flag = VNOCACHE|VNOMAP|VNOSWAP|VNOMOUNT; 864 vn_setops(vp, mntvnodeops); 865 vp->v_vfsp = dp->v_vfsp; 866 vp->v_type = VREG; 867 vp->v_data = (caddr_t)mnp; 868 869 return (mnp); 870 } 871 872 /* 873 * Free the storage obtained from mntgetnode(). 874 */ 875 static void 876 mntfreenode(mntnode_t *mnp) 877 { 878 vnode_t *vp = MTOV(mnp); 879 880 vn_invalid(vp); 881 vn_free(vp); 882 kmem_free(mnp, sizeof (*mnp)); 883 } 884 885 886 /* ARGSUSED */ 887 static int 888 mntfsync(vnode_t *vp, int syncflag, cred_t *cr, caller_context_t *ct) 889 { 890 return (0); 891 } 892 893 /* ARGSUSED */ 894 static void 895 mntinactive(vnode_t *vp, cred_t *cr, caller_context_t *ct) 896 { 897 mntnode_t *mnp = VTOM(vp); 898 899 mntfreenode(mnp); 900 } 901 902 /* ARGSUSED */ 903 static int 904 mntseek(vnode_t *vp, offset_t ooff, offset_t *noffp, 905 caller_context_t *ct) 906 { 907 if (*noffp == 0) 908 VTOM(vp)->mnt_offset = 0; 909 910 return (0); 911 } 912 913 /* 914 * Return the answer requested to poll(). 915 * POLLRDBAND will return when the mtime of the mnttab 916 * information is newer than the latest one read for this open. 917 */ 918 /* ARGSUSED */ 919 static int 920 mntpoll(vnode_t *vp, short ev, int any, short *revp, pollhead_t **phpp, 921 caller_context_t *ct) 922 { 923 mntnode_t *mnp = VTOM(vp); 924 mntsnap_t *snap = &mnp->mnt_read; 925 926 if (mnp->mnt_ioctl.mnts_time.tv_sec > snap->mnts_time.tv_sec || 927 (mnp->mnt_ioctl.mnts_time.tv_sec == snap->mnts_time.tv_sec && 928 mnp->mnt_ioctl.mnts_time.tv_nsec > snap->mnts_time.tv_nsec)) 929 snap = &mnp->mnt_ioctl; 930 931 *revp = 0; 932 *phpp = (pollhead_t *)NULL; 933 if (ev & POLLIN) 934 *revp |= POLLIN; 935 936 if (ev & POLLRDNORM) 937 *revp |= POLLRDNORM; 938 939 if (ev & POLLRDBAND) { 940 vfs_mnttab_poll(&snap->mnts_time, phpp); 941 if (*phpp == (pollhead_t *)NULL) 942 *revp |= POLLRDBAND; 943 } 944 if (*revp || *phpp != NULL || any) { 945 return (0); 946 } 947 /* 948 * If someone is polling an unsupported poll events (e.g. 949 * POLLOUT, POLLPRI, etc.), just return POLLERR revents. 950 * That way we will ensure that we don't return a 0 951 * revents with a NULL pollhead pointer. 952 */ 953 *revp = POLLERR; 954 return (0); 955 } 956 /* ARGSUSED */ 957 static int 958 mntioctl(struct vnode *vp, int cmd, intptr_t arg, int flag, 959 cred_t *cr, int *rvalp, caller_context_t *ct) 960 { 961 uint_t *up = (uint_t *)arg; 962 mntnode_t *mnp = VTOM(vp); 963 mntsnap_t *snap = &mnp->mnt_ioctl; 964 int error; 965 966 error = 0; 967 switch (cmd) { 968 969 case MNTIOC_NMNTS: { /* get no. of mounted resources */ 970 if (snap->mnts_count == 0) { 971 if ((error = 972 mntfs_snapshot(mnp, 0, flag & DATAMODEL_MASK)) != 0) 973 return (error); 974 } 975 if (suword32(up, snap->mnts_count) != 0) 976 error = EFAULT; 977 break; 978 } 979 980 case MNTIOC_GETDEVLIST: { /* get mounted device major/minor nos */ 981 uint_t *devlist; 982 int i; 983 size_t len; 984 985 if (snap->mnts_count == 0) { 986 if ((error = 987 mntfs_snapshot(mnp, 0, flag & DATAMODEL_MASK)) != 0) 988 return (error); 989 } 990 991 len = 2 * snap->mnts_count * sizeof (uint_t); 992 devlist = kmem_alloc(len, KM_SLEEP); 993 for (i = 0; i < snap->mnts_count; i++) { 994 995 #ifdef _SYSCALL32_IMPL 996 if ((flag & DATAMODEL_MASK) == DATAMODEL_ILP32) { 997 struct extmnttab32 tab; 998 999 if ((error = xcopyin(snap->mnts_text + 1000 i * sizeof (struct extmnttab32), &tab, 1001 sizeof (tab))) != 0) 1002 break; 1003 1004 devlist[i*2] = tab.mnt_major; 1005 devlist[i*2+1] = tab.mnt_minor; 1006 } else { 1007 #endif 1008 struct extmnttab tab; 1009 1010 if ((error = xcopyin(snap->mnts_text + 1011 i * sizeof (struct extmnttab), &tab, 1012 sizeof (tab))) != 0) 1013 break; 1014 1015 devlist[i*2] = tab.mnt_major; 1016 devlist[i*2+1] = tab.mnt_minor; 1017 #ifdef _SYSCALL32_IMPL 1018 } 1019 #endif 1020 } 1021 1022 if (error == 0) 1023 error = xcopyout(devlist, up, len); 1024 kmem_free(devlist, len); 1025 break; 1026 } 1027 1028 case MNTIOC_SETTAG: /* set tag on mounted file system */ 1029 case MNTIOC_CLRTAG: /* clear tag on mounted file system */ 1030 { 1031 struct mnttagdesc *dp = (struct mnttagdesc *)arg; 1032 STRUCT_DECL(mnttagdesc, tagdesc); 1033 char *cptr; 1034 uint32_t major, minor; 1035 char tagbuf[MAX_MNTOPT_TAG]; 1036 char *pbuf; 1037 size_t len; 1038 uint_t start = 0; 1039 mntdata_t *mntdata = MTOD(mnp); 1040 zone_t *zone = mntdata->mnt_zone; 1041 1042 STRUCT_INIT(tagdesc, flag & DATAMODEL_MASK); 1043 if (copyin(dp, STRUCT_BUF(tagdesc), STRUCT_SIZE(tagdesc))) { 1044 error = EFAULT; 1045 break; 1046 } 1047 pbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP); 1048 if (zone != global_zone) { 1049 (void) strcpy(pbuf, zone->zone_rootpath); 1050 /* truncate "/" and nul */ 1051 start = zone->zone_rootpathlen - 2; 1052 ASSERT(pbuf[start] == '/'); 1053 } 1054 cptr = STRUCT_FGETP(tagdesc, mtd_mntpt); 1055 error = copyinstr(cptr, pbuf + start, MAXPATHLEN - start, &len); 1056 if (error) { 1057 kmem_free(pbuf, MAXPATHLEN); 1058 break; 1059 } 1060 if (start != 0 && pbuf[start] != '/') { 1061 kmem_free(pbuf, MAXPATHLEN); 1062 error = EINVAL; 1063 break; 1064 } 1065 cptr = STRUCT_FGETP(tagdesc, mtd_tag); 1066 if ((error = copyinstr(cptr, tagbuf, MAX_MNTOPT_TAG, &len))) { 1067 kmem_free(pbuf, MAXPATHLEN); 1068 break; 1069 } 1070 major = STRUCT_FGET(tagdesc, mtd_major); 1071 minor = STRUCT_FGET(tagdesc, mtd_minor); 1072 if (cmd == MNTIOC_SETTAG) 1073 error = vfs_settag(major, minor, pbuf, tagbuf, cr); 1074 else 1075 error = vfs_clrtag(major, minor, pbuf, tagbuf, cr); 1076 kmem_free(pbuf, MAXPATHLEN); 1077 break; 1078 } 1079 1080 case MNTIOC_SHOWHIDDEN: 1081 { 1082 mutex_enter(&vp->v_lock); 1083 mnp->mnt_flags |= MNT_SHOWHIDDEN; 1084 mutex_exit(&vp->v_lock); 1085 break; 1086 } 1087 1088 case MNTIOC_GETMNTENT: 1089 { 1090 size_t idx; 1091 uintptr_t addr; 1092 1093 idx = mnp->mnt_offset; 1094 if (snap->mnts_count == 0 || idx == 0) { 1095 if ((error = 1096 mntfs_snapshot(mnp, 0, flag & DATAMODEL_MASK)) != 0) 1097 return (error); 1098 } 1099 /* 1100 * If the next index is beyond the end of the current mnttab, 1101 * return EOF 1102 */ 1103 if (idx >= snap->mnts_count) { 1104 *rvalp = 1; 1105 return (0); 1106 } 1107 1108 #ifdef _SYSCALL32_IMPL 1109 if ((flag & DATAMODEL_MASK) == DATAMODEL_ILP32) { 1110 addr = (uintptr_t)(snap->mnts_metadata + idx * 1111 sizeof (struct extmnttab32)); 1112 error = suword32((void *)arg, addr); 1113 } else { 1114 #endif 1115 addr = (uintptr_t)(snap->mnts_metadata + idx * 1116 sizeof (struct extmnttab)); 1117 error = sulword((void *)arg, addr); 1118 #ifdef _SYSCALL32_IMPL 1119 } 1120 #endif 1121 1122 if (error != 0) 1123 return (error); 1124 1125 mnp->mnt_offset++; 1126 break; 1127 } 1128 1129 default: 1130 error = EINVAL; 1131 break; 1132 } 1133 1134 return (error); 1135 } 1136 1137 /* 1138 * /mntfs vnode operations vector 1139 */ 1140 const fs_operation_def_t mnt_vnodeops_template[] = { 1141 VOPNAME_OPEN, { .vop_open = mntopen }, 1142 VOPNAME_CLOSE, { .vop_close = mntclose }, 1143 VOPNAME_READ, { .vop_read = mntread }, 1144 VOPNAME_IOCTL, { .vop_ioctl = mntioctl }, 1145 VOPNAME_GETATTR, { .vop_getattr = mntgetattr }, 1146 VOPNAME_ACCESS, { .vop_access = mntaccess }, 1147 VOPNAME_FSYNC, { .vop_fsync = mntfsync }, 1148 VOPNAME_INACTIVE, { .vop_inactive = mntinactive }, 1149 VOPNAME_SEEK, { .vop_seek = mntseek }, 1150 VOPNAME_POLL, { .vop_poll = mntpoll }, 1151 VOPNAME_DISPOSE, { .error = fs_error }, 1152 VOPNAME_SHRLOCK, { .error = fs_error }, 1153 NULL, NULL 1154 }; 1155