1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/file.h> 29 #include <sys/stat.h> 30 #include <sys/atomic.h> 31 #include <sys/mntio.h> 32 #include <sys/mnttab.h> 33 #include <sys/mount.h> 34 #include <sys/sunddi.h> 35 #include <sys/sysmacros.h> 36 #include <sys/systm.h> 37 #include <sys/vfs.h> 38 #include <sys/vfs_opreg.h> 39 #include <sys/fs/mntdata.h> 40 #include <fs/fs_subr.h> 41 #include <sys/vmsystm.h> 42 #include <vm/seg_vn.h> 43 44 #define MNTROOTINO 2 45 46 static mntnode_t *mntgetnode(vnode_t *); 47 48 vnodeops_t *mntvnodeops; 49 vnodeops_t *mntdummyvnodeops; 50 extern struct vnode *mntdummyvp; 51 52 /* 53 * Design of kernel mnttab accounting. 54 * 55 * To support whitespace in mount names, we implement an ioctl 56 * (MNTIOC_GETMNTENT) which allows a programmatic interface to the data in 57 * /etc/mnttab. The libc functions getmntent() and getextmntent() are built 58 * atop this interface. 59 * 60 * To minimize the amount of memory used in the kernel, we keep all the 61 * necessary information in the user's address space. Large server 62 * configurations can have /etc/mnttab files in excess of 64k. 63 * 64 * To support both vanilla read() calls as well as ioctl() calls, we have two 65 * different snapshots of the kernel data structures, mnt_read and mnt_ioctl. 66 * These snapshots include the base location in user memory, the number of 67 * mounts in the snapshot, and any metadata associated with it. The metadata is 68 * used only to support the ioctl() interface, and is a series of extmnttab 69 * structures. When the user issues an ioctl(), we simply copyout a pointer to 70 * that structure, and the rest is handled in userland. 71 */ 72 73 /* 74 * NOTE: The following variable enables the generation of the "dev=xxx" 75 * in the option string for a mounted file system. Really this should 76 * be gotten rid of altogether, but for the sake of backwards compatibility 77 * we had to leave it in. It is defined as a 32-bit device number. This 78 * means that when 64-bit device numbers are in use, if either the major or 79 * minor part of the device number will not fit in a 16 bit quantity, the 80 * "dev=" will be set to NODEV (0x7fffffff). See PSARC 1999/566 and 81 * 1999/131 for details. The cmpldev() function used to generate the 32-bit 82 * device number handles this check and assigns the proper value. 83 */ 84 int mntfs_enabledev = 1; /* enable old "dev=xxx" option */ 85 86 static int 87 mntfs_devsize(struct vfs *vfsp) 88 { 89 dev32_t odev; 90 91 (void) cmpldev(&odev, vfsp->vfs_dev); 92 return (snprintf(NULL, 0, "dev=%x", odev)); 93 } 94 95 static int 96 mntfs_devprint(struct vfs *vfsp, char *buf) 97 { 98 dev32_t odev; 99 100 (void) cmpldev(&odev, vfsp->vfs_dev); 101 return (snprintf(buf, MAX_MNTOPT_STR, "dev=%x", odev)); 102 } 103 104 static int 105 mntfs_optsize(struct vfs *vfsp) 106 { 107 int i, size = 0; 108 mntopt_t *mop; 109 110 for (i = 0; i < vfsp->vfs_mntopts.mo_count; i++) { 111 mop = &vfsp->vfs_mntopts.mo_list[i]; 112 if (mop->mo_flags & MO_NODISPLAY) 113 continue; 114 if (mop->mo_flags & MO_SET) { 115 if (size) 116 size++; /* space for comma */ 117 size += strlen(mop->mo_name); 118 /* 119 * count option value if there is one 120 */ 121 if (mop->mo_arg != NULL) { 122 size += strlen(mop->mo_arg) + 1; 123 } 124 } 125 } 126 if (vfsp->vfs_zone != NULL && vfsp->vfs_zone != global_zone) { 127 /* 128 * Add space for "zone=<zone_name>" if required. 129 */ 130 if (size) 131 size++; /* space for comma */ 132 size += sizeof ("zone=") - 1; 133 size += strlen(vfsp->vfs_zone->zone_name); 134 } 135 if (mntfs_enabledev) { 136 if (size != 0) 137 size++; /* space for comma */ 138 size += mntfs_devsize(vfsp); 139 } 140 if (size == 0) 141 size = strlen("-"); 142 return (size); 143 } 144 145 static int 146 mntfs_optprint(struct vfs *vfsp, char *buf) 147 { 148 int i, optinbuf = 0; 149 mntopt_t *mop; 150 char *origbuf = buf; 151 152 for (i = 0; i < vfsp->vfs_mntopts.mo_count; i++) { 153 mop = &vfsp->vfs_mntopts.mo_list[i]; 154 if (mop->mo_flags & MO_NODISPLAY) 155 continue; 156 if (mop->mo_flags & MO_SET) { 157 if (optinbuf) 158 *buf++ = ','; 159 else 160 optinbuf = 1; 161 buf += snprintf(buf, MAX_MNTOPT_STR, 162 "%s", mop->mo_name); 163 /* 164 * print option value if there is one 165 */ 166 if (mop->mo_arg != NULL) { 167 buf += snprintf(buf, MAX_MNTOPT_STR, "=%s", 168 mop->mo_arg); 169 } 170 } 171 } 172 if (vfsp->vfs_zone != NULL && vfsp->vfs_zone != global_zone) { 173 if (optinbuf) 174 *buf++ = ','; 175 else 176 optinbuf = 1; 177 buf += snprintf(buf, MAX_MNTOPT_STR, "zone=%s", 178 vfsp->vfs_zone->zone_name); 179 } 180 if (mntfs_enabledev) { 181 if (optinbuf++) 182 *buf++ = ','; 183 buf += mntfs_devprint(vfsp, buf); 184 } 185 if (!optinbuf) { 186 buf += snprintf(buf, MAX_MNTOPT_STR, "-"); 187 } 188 return (buf - origbuf); 189 } 190 191 static size_t 192 mntfs_vfs_len(vfs_t *vfsp, zone_t *zone) 193 { 194 size_t size = 0; 195 const char *resource, *mntpt; 196 197 mntpt = refstr_value(vfsp->vfs_mntpt); 198 if (mntpt != NULL && mntpt[0] != '\0') { 199 size += strlen(ZONE_PATH_TRANSLATE(mntpt, zone)) + 1; 200 } else { 201 size += strlen("-") + 1; 202 } 203 204 resource = refstr_value(vfsp->vfs_resource); 205 if (resource != NULL && resource[0] != '\0') { 206 if (resource[0] != '/') { 207 size += strlen(resource) + 1; 208 } else if (!ZONE_PATH_VISIBLE(resource, zone)) { 209 /* 210 * Same as the zone's view of the mount point. 211 */ 212 size += strlen(ZONE_PATH_TRANSLATE(mntpt, zone)) + 1; 213 } else { 214 size += strlen(ZONE_PATH_TRANSLATE(resource, zone)) + 1; 215 } 216 } else { 217 size += strlen("-") + 1; 218 } 219 size += strlen(vfssw[vfsp->vfs_fstype].vsw_name) + 1; 220 size += mntfs_optsize(vfsp); 221 size += snprintf(NULL, 0, "\t%ld\n", vfsp->vfs_mtime); 222 return (size); 223 } 224 225 static void 226 mntfs_zonerootvfs(zone_t *zone, vfs_t *rootvfsp) 227 { 228 /* 229 * Basically copy over the real vfs_t on which the root vnode is 230 * located, changing its mountpoint and resource to match those of 231 * the zone's rootpath. 232 */ 233 *rootvfsp = *zone->zone_rootvp->v_vfsp; 234 rootvfsp->vfs_mntpt = refstr_alloc(zone->zone_rootpath); 235 rootvfsp->vfs_resource = rootvfsp->vfs_mntpt; 236 } 237 238 static size_t 239 mntfs_zone_len(uint_t *nent_ptr, zone_t *zone, int showhidden) 240 { 241 struct vfs *zonelist; 242 struct vfs *vfsp; 243 size_t size = 0; 244 uint_t cnt = 0; 245 246 ASSERT(zone->zone_rootpath != NULL); 247 248 /* 249 * If the zone has a root entry, it will be the first in the list. If 250 * it doesn't, we conjure one up. 251 */ 252 vfsp = zonelist = zone->zone_vfslist; 253 if (zonelist == NULL || 254 strcmp(refstr_value(vfsp->vfs_mntpt), zone->zone_rootpath) != 0) { 255 vfs_t tvfs; 256 /* 257 * The root of the zone is not a mount point. The vfs we want 258 * to report is that of the zone's root vnode. 259 */ 260 ASSERT(zone != global_zone); 261 mntfs_zonerootvfs(zone, &tvfs); 262 size += mntfs_vfs_len(&tvfs, zone); 263 refstr_rele(tvfs.vfs_mntpt); 264 cnt++; 265 } 266 if (zonelist == NULL) 267 goto out; 268 do { 269 /* 270 * Skip mounts that should not show up in mnttab 271 */ 272 if (!showhidden && (vfsp->vfs_flag & VFS_NOMNTTAB)) { 273 vfsp = vfsp->vfs_zone_next; 274 continue; 275 } 276 cnt++; 277 size += mntfs_vfs_len(vfsp, zone); 278 vfsp = vfsp->vfs_zone_next; 279 } while (vfsp != zonelist); 280 out: 281 *nent_ptr = cnt; 282 return (size); 283 } 284 285 static size_t 286 mntfs_global_len(uint_t *nent_ptr, int showhidden) 287 { 288 struct vfs *vfsp; 289 size_t size = 0; 290 uint_t cnt = 0; 291 292 vfsp = rootvfs; 293 do { 294 /* 295 * Skip mounts that should not show up in mnttab 296 */ 297 if (!showhidden && (vfsp->vfs_flag & VFS_NOMNTTAB)) { 298 vfsp = vfsp->vfs_next; 299 continue; 300 } 301 cnt++; 302 size += mntfs_vfs_len(vfsp, global_zone); 303 vfsp = vfsp->vfs_next; 304 } while (vfsp != rootvfs); 305 *nent_ptr = cnt; 306 return (size); 307 } 308 309 static void 310 mntfs_vfs_generate(vfs_t *vfsp, zone_t *zone, struct extmnttab *tab, 311 char **basep, int forread) 312 { 313 const char *resource, *mntpt; 314 char *cp = *basep; 315 316 mntpt = refstr_value(vfsp->vfs_mntpt); 317 resource = refstr_value(vfsp->vfs_resource); 318 319 if (tab) 320 tab->mnt_special = cp; 321 if (resource != NULL && resource[0] != '\0') { 322 if (resource[0] != '/') { 323 cp += snprintf(cp, MAXPATHLEN, "%s", resource); 324 } else if (!ZONE_PATH_VISIBLE(resource, zone)) { 325 /* 326 * Use the mount point as the resource. 327 */ 328 cp += snprintf(cp, MAXPATHLEN, "%s", 329 ZONE_PATH_TRANSLATE(mntpt, zone)); 330 } else { 331 cp += snprintf(cp, MAXPATHLEN, "%s", 332 ZONE_PATH_TRANSLATE(resource, zone)); 333 } 334 } else { 335 cp += snprintf(cp, MAXPATHLEN, "-"); 336 } 337 *cp++ = forread ? '\t' : '\0'; 338 339 if (tab) 340 tab->mnt_mountp = cp; 341 if (mntpt != NULL && mntpt[0] != '\0') { 342 /* 343 * We know the mount point is visible from within the zone, 344 * otherwise it wouldn't be on the zone's vfs list. 345 */ 346 cp += snprintf(cp, MAXPATHLEN, "%s", 347 ZONE_PATH_TRANSLATE(mntpt, zone)); 348 } else { 349 cp += snprintf(cp, MAXPATHLEN, "-"); 350 } 351 *cp++ = forread ? '\t' : '\0'; 352 353 if (tab) 354 tab->mnt_fstype = cp; 355 cp += snprintf(cp, MAXPATHLEN, "%s", 356 vfssw[vfsp->vfs_fstype].vsw_name); 357 *cp++ = forread ? '\t' : '\0'; 358 359 if (tab) 360 tab->mnt_mntopts = cp; 361 cp += mntfs_optprint(vfsp, cp); 362 *cp++ = forread ? '\t' : '\0'; 363 364 if (tab) 365 tab->mnt_time = cp; 366 cp += snprintf(cp, MAX_MNTOPT_STR, "%ld", vfsp->vfs_mtime); 367 *cp++ = forread ? '\n' : '\0'; 368 369 if (tab) { 370 tab->mnt_major = getmajor(vfsp->vfs_dev); 371 tab->mnt_minor = getminor(vfsp->vfs_dev); 372 } 373 374 *basep = cp; 375 } 376 377 static void 378 mntfs_zone_generate(zone_t *zone, int showhidden, struct extmnttab *tab, 379 char *basep, int forread) 380 { 381 vfs_t *zonelist; 382 vfs_t *vfsp; 383 char *cp = basep; 384 385 /* 386 * If the zone has a root entry, it will be the first in the list. If 387 * it doesn't, we conjure one up. 388 */ 389 vfsp = zonelist = zone->zone_vfslist; 390 if (zonelist == NULL || 391 strcmp(refstr_value(vfsp->vfs_mntpt), zone->zone_rootpath) != 0) { 392 vfs_t tvfs; 393 /* 394 * The root of the zone is not a mount point. The vfs we want 395 * to report is that of the zone's root vnode. 396 */ 397 ASSERT(zone != global_zone); 398 mntfs_zonerootvfs(zone, &tvfs); 399 mntfs_vfs_generate(&tvfs, zone, tab, &cp, forread); 400 refstr_rele(tvfs.vfs_mntpt); 401 if (tab) 402 tab++; 403 } 404 if (zonelist == NULL) 405 return; 406 do { 407 /* 408 * Skip mounts that should not show up in mnttab 409 */ 410 if (!showhidden && (vfsp->vfs_flag & VFS_NOMNTTAB)) { 411 vfsp = vfsp->vfs_zone_next; 412 continue; 413 } 414 mntfs_vfs_generate(vfsp, zone, tab, &cp, forread); 415 if (tab) 416 tab++; 417 vfsp = vfsp->vfs_zone_next; 418 } while (vfsp != zonelist); 419 } 420 421 static void 422 mntfs_global_generate(int showhidden, struct extmnttab *tab, char *basep, 423 int forread) 424 { 425 vfs_t *vfsp; 426 char *cp = basep; 427 428 vfsp = rootvfs; 429 do { 430 /* 431 * Skip mounts that should not show up in mnttab 432 */ 433 if (!showhidden && vfsp->vfs_flag & VFS_NOMNTTAB) { 434 vfsp = vfsp->vfs_next; 435 continue; 436 } 437 mntfs_vfs_generate(vfsp, global_zone, tab, &cp, forread); 438 if (tab) 439 tab++; 440 vfsp = vfsp->vfs_next; 441 } while (vfsp != rootvfs); 442 } 443 444 static char * 445 mntfs_mapin(char *base, size_t size) 446 { 447 size_t rlen = roundup(size, PAGESIZE); 448 struct as *as = curproc->p_as; 449 char *addr; 450 451 as_rangelock(as); 452 map_addr(&addr, rlen, 0, 1, 0); 453 if (addr == NULL || as_map(as, addr, rlen, segvn_create, zfod_argsp)) { 454 as_rangeunlock(as); 455 return (NULL); 456 } 457 as_rangeunlock(as); 458 if (copyout(base, addr, size)) { 459 (void) as_unmap(as, addr, rlen); 460 return (NULL); 461 } 462 return (addr); 463 } 464 465 static void 466 mntfs_freesnap(mntsnap_t *snap) 467 { 468 if (snap->mnts_text != NULL) 469 (void) as_unmap(curproc->p_as, snap->mnts_text, 470 roundup(snap->mnts_textsize, PAGESIZE)); 471 snap->mnts_textsize = snap->mnts_count = 0; 472 if (snap->mnts_metadata != NULL) 473 (void) as_unmap(curproc->p_as, snap->mnts_metadata, 474 roundup(snap->mnts_metasize, PAGESIZE)); 475 snap->mnts_metasize = 0; 476 } 477 478 #ifdef _SYSCALL32_IMPL 479 480 typedef struct extmnttab32 { 481 uint32_t mnt_special; 482 uint32_t mnt_mountp; 483 uint32_t mnt_fstype; 484 uint32_t mnt_mntopts; 485 uint32_t mnt_time; 486 uint_t mnt_major; 487 uint_t mnt_minor; 488 } extmnttab32_t; 489 490 #endif 491 492 /* 493 * called to generate a dummy read vop call so that 494 * any module monitoring /etc/mnttab for access gets notified. 495 */ 496 static void 497 mntdummyreadop() 498 { 499 struct uio uio; 500 struct iovec iov; 501 char tbuf[1]; 502 503 /* 504 * Make a VOP_READ call on the dummy vnode so that any 505 * module interested in mnttab getting modified could 506 * intercept this vnode and capture the event. 507 * 508 * Pass a dummy uio struct. Nobody should reference the buffer. 509 * We need to pass a valid uio struct pointer to take care of 510 * any module intercepting this vnode which could attempt to 511 * look at it. Currently only the file events notification 512 * module intercepts this vnode. 513 */ 514 bzero(&uio, sizeof (uio)); 515 bzero(&iov, sizeof (iov)); 516 iov.iov_base = tbuf; 517 iov.iov_len = 0; 518 uio.uio_iov = &iov; 519 uio.uio_iovcnt = 1; 520 uio.uio_loffset = 0; 521 uio.uio_segflg = UIO_SYSSPACE; 522 uio.uio_resid = 0; 523 (void) VOP_READ(mntdummyvp, &uio, 0, kcred, NULL); 524 } 525 526 /* 527 * Snapshot the latest version of the kernel mounted resource information 528 * 529 * There are two types of snapshots: one destined for reading, and one destined 530 * for ioctl(). The difference is that the ioctl() interface is delimited by 531 * NULLs, while the read() interface is delimited by tabs and newlines. 532 */ 533 /* ARGSUSED */ 534 static int 535 mntfs_snapshot(mntnode_t *mnp, int forread, int datamodel) 536 { 537 size_t size; 538 timespec_t lastmodt; 539 mntdata_t *mntdata = MTOD(mnp); 540 zone_t *zone = mntdata->mnt_zone; 541 boolean_t global_view = (MTOD(mnp)->mnt_zone == global_zone); 542 boolean_t showhidden = ((mnp->mnt_flags & MNT_SHOWHIDDEN) != 0); 543 struct extmnttab *metadata_baseaddr; 544 char *text_baseaddr; 545 int i; 546 mntsnap_t *snap; 547 548 if (forread) 549 snap = &mnp->mnt_read; 550 else 551 snap = &mnp->mnt_ioctl; 552 553 vfs_list_read_lock(); 554 /* 555 * Check if the mnttab info has changed since the last snapshot 556 */ 557 vfs_mnttab_modtime(&lastmodt); 558 if (snap->mnts_count && 559 lastmodt.tv_sec == snap->mnts_time.tv_sec && 560 lastmodt.tv_nsec == snap->mnts_time.tv_nsec) { 561 vfs_list_unlock(); 562 return (0); 563 } 564 565 566 if (snap->mnts_count != 0) 567 mntfs_freesnap(snap); 568 if (global_view) 569 size = mntfs_global_len(&snap->mnts_count, showhidden); 570 else 571 size = mntfs_zone_len(&snap->mnts_count, zone, showhidden); 572 ASSERT(size != 0); 573 574 if (!forread) 575 metadata_baseaddr = kmem_alloc( 576 snap->mnts_count * sizeof (struct extmnttab), KM_SLEEP); 577 else 578 metadata_baseaddr = NULL; 579 580 text_baseaddr = kmem_alloc(size, KM_SLEEP); 581 582 if (global_view) 583 mntfs_global_generate(showhidden, metadata_baseaddr, 584 text_baseaddr, forread); 585 else 586 mntfs_zone_generate(zone, showhidden, 587 metadata_baseaddr, text_baseaddr, forread); 588 589 vfs_mnttab_modtime(&snap->mnts_time); 590 vfs_list_unlock(); 591 592 snap->mnts_text = mntfs_mapin(text_baseaddr, size); 593 snap->mnts_textsize = size; 594 kmem_free(text_baseaddr, size); 595 596 /* 597 * The pointers in the metadata refer to addreesses in the range 598 * [base_addr, base_addr + size]. Now that we have mapped the text into 599 * the user's address space, we have to convert these addresses into the 600 * new (user) range. We also handle the conversion for 32-bit and 601 * 32-bit applications here. 602 */ 603 if (!forread) { 604 struct extmnttab *tab; 605 #ifdef _SYSCALL32_IMPL 606 struct extmnttab32 *tab32; 607 608 if (datamodel == DATAMODEL_ILP32) { 609 tab = (struct extmnttab *)metadata_baseaddr; 610 tab32 = (struct extmnttab32 *)metadata_baseaddr; 611 612 for (i = 0; i < snap->mnts_count; i++) { 613 tab32[i].mnt_special = 614 (uintptr_t)snap->mnts_text + 615 (tab[i].mnt_special - text_baseaddr); 616 tab32[i].mnt_mountp = 617 (uintptr_t)snap->mnts_text + 618 (tab[i].mnt_mountp - text_baseaddr); 619 tab32[i].mnt_fstype = 620 (uintptr_t)snap->mnts_text + 621 (tab[i].mnt_fstype - text_baseaddr); 622 tab32[i].mnt_mntopts = 623 (uintptr_t)snap->mnts_text + 624 (tab[i].mnt_mntopts - text_baseaddr); 625 tab32[i].mnt_time = (uintptr_t)snap->mnts_text + 626 (tab[i].mnt_time - text_baseaddr); 627 tab32[i].mnt_major = tab[i].mnt_major; 628 tab32[i].mnt_minor = tab[i].mnt_minor; 629 } 630 631 snap->mnts_metasize = 632 snap->mnts_count * sizeof (struct extmnttab32); 633 snap->mnts_metadata = mntfs_mapin( 634 (char *)metadata_baseaddr, 635 snap->mnts_metasize); 636 637 } else { 638 #endif 639 tab = (struct extmnttab *)metadata_baseaddr; 640 for (i = 0; i < snap->mnts_count; i++) { 641 tab[i].mnt_special = snap->mnts_text + 642 (tab[i].mnt_special - text_baseaddr); 643 tab[i].mnt_mountp = snap->mnts_text + 644 (tab[i].mnt_mountp - text_baseaddr); 645 tab[i].mnt_fstype = snap->mnts_text + 646 (tab[i].mnt_fstype - text_baseaddr); 647 tab[i].mnt_mntopts = snap->mnts_text + 648 (tab[i].mnt_mntopts - text_baseaddr); 649 tab[i].mnt_time = snap->mnts_text + 650 (tab[i].mnt_time - text_baseaddr); 651 } 652 653 snap->mnts_metasize = 654 snap->mnts_count * sizeof (struct extmnttab); 655 snap->mnts_metadata = mntfs_mapin( 656 (char *)metadata_baseaddr, snap->mnts_metasize); 657 #ifdef _SYSCALL32_IMPL 658 } 659 #endif 660 661 kmem_free(metadata_baseaddr, 662 snap->mnts_count * sizeof (struct extmnttab)); 663 } 664 665 mntdata->mnt_size = size; 666 667 if (snap->mnts_text == NULL || 668 (!forread && snap->mnts_metadata == NULL)) { 669 mntfs_freesnap(snap); 670 return (ENOMEM); 671 } 672 mntdummyreadop(); 673 return (0); 674 } 675 676 /* 677 * Public function to convert vfs_mntopts into a string. 678 * A buffer of sufficient size is allocated, which is returned via bufp, 679 * and whose length is returned via lenp. 680 */ 681 void 682 mntfs_getmntopts(struct vfs *vfsp, char **bufp, size_t *lenp) 683 { 684 size_t len; 685 char *buf; 686 687 vfs_list_read_lock(); 688 689 len = mntfs_optsize(vfsp) + 1; 690 buf = kmem_alloc(len, KM_NOSLEEP); 691 if (buf == NULL) { 692 *bufp = NULL; 693 vfs_list_unlock(); 694 return; 695 } 696 buf[len - 1] = '\0'; 697 (void) mntfs_optprint(vfsp, buf); 698 ASSERT(buf[len - 1] == '\0'); 699 700 vfs_list_unlock(); 701 *bufp = buf; 702 *lenp = len; 703 } 704 705 706 /* ARGSUSED */ 707 static int 708 mntopen(vnode_t **vpp, int flag, cred_t *cr) 709 { 710 vnode_t *vp = *vpp; 711 mntnode_t *nmnp; 712 713 /* 714 * Not allowed to open for writing, return error. 715 */ 716 if (flag & FWRITE) 717 return (EPERM); 718 /* 719 * Create a new mnt/vnode for each open, this will give us a handle to 720 * hang the snapshot on. 721 */ 722 nmnp = mntgetnode(vp); 723 724 *vpp = MTOV(nmnp); 725 atomic_add_32(&MTOD(nmnp)->mnt_nopen, 1); 726 VN_RELE(vp); 727 return (0); 728 } 729 730 /* ARGSUSED */ 731 static int 732 mntclose(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr) 733 { 734 mntnode_t *mnp = VTOM(vp); 735 736 /* Clean up any locks or shares held by the current process */ 737 cleanlocks(vp, ttoproc(curthread)->p_pid, 0); 738 cleanshares(vp, ttoproc(curthread)->p_pid); 739 740 if (count > 1) 741 return (0); 742 if (vp->v_count == 1) { 743 mntfs_freesnap(&mnp->mnt_read); 744 mntfs_freesnap(&mnp->mnt_ioctl); 745 atomic_add_32(&MTOD(mnp)->mnt_nopen, -1); 746 } 747 return (0); 748 } 749 750 /* ARGSUSED */ 751 static int 752 mntread(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cred, caller_context_t *ct) 753 { 754 int error = 0; 755 off_t off = uio->uio_offset; 756 size_t len = uio->uio_resid; 757 mntnode_t *mnp = VTOM(vp); 758 char *buf; 759 mntsnap_t *snap = &mnp->mnt_read; 760 int datamodel; 761 762 if (off == (off_t)0 || snap->mnts_count == 0) { 763 /* 764 * It is assumed that any kernel callers wishing 765 * to read mnttab will be using extmnttab entries 766 * and not extmnttab32 entries, whether or not 767 * the kernel is LP64 or ILP32. Thus, force the 768 * datamodel that mntfs_snapshot uses to be 769 * DATAMODEL_LP64. 770 */ 771 if (uio->uio_segflg == UIO_SYSSPACE) 772 datamodel = DATAMODEL_LP64; 773 else 774 datamodel = get_udatamodel(); 775 if ((error = mntfs_snapshot(mnp, 1, datamodel)) != 0) 776 return (error); 777 } 778 if ((size_t)(off + len) > snap->mnts_textsize) 779 len = snap->mnts_textsize - off; 780 781 if (off < 0 || len > snap->mnts_textsize) 782 return (EFAULT); 783 784 if (len == 0) 785 return (0); 786 787 /* 788 * The mnttab image is stored in the user's address space, 789 * so we have to copy it into the kernel from userland, 790 * then copy it back out to the specified address. 791 */ 792 buf = kmem_alloc(len, KM_SLEEP); 793 if (copyin(snap->mnts_text + off, buf, len)) 794 error = EFAULT; 795 else { 796 error = uiomove(buf, len, UIO_READ, uio); 797 } 798 kmem_free(buf, len); 799 mntdummyreadop(); 800 return (error); 801 } 802 803 804 static int 805 mntgetattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr) 806 { 807 mntnode_t *mnp = VTOM(vp); 808 int error; 809 vnode_t *rvp; 810 extern timespec_t vfs_mnttab_ctime; 811 mntdata_t *mntdata = MTOD(VTOM(vp)); 812 mntsnap_t *snap = mnp->mnt_read.mnts_count ? 813 &mnp->mnt_read : &mnp->mnt_ioctl; 814 815 /* 816 * Return all the attributes. Should be refined 817 * so that it returns only those asked for. 818 * Most of this is complete fakery anyway. 819 */ 820 rvp = mnp->mnt_mountvp; 821 /* 822 * Attributes are same as underlying file with modifications 823 */ 824 if (error = VOP_GETATTR(rvp, vap, flags, cr)) 825 return (error); 826 827 /* 828 * We always look like a regular file 829 */ 830 vap->va_type = VREG; 831 /* 832 * mode should basically be read only 833 */ 834 vap->va_mode &= 07444; 835 vap->va_fsid = vp->v_vfsp->vfs_dev; 836 vap->va_blksize = DEV_BSIZE; 837 vap->va_rdev = 0; 838 vap->va_seq = 0; 839 /* 840 * Set nlink to the number of open vnodes for mnttab info 841 * plus one for existing. 842 */ 843 vap->va_nlink = mntdata->mnt_nopen + 1; 844 /* 845 * If we haven't taken a snapshot yet, set the 846 * size to the size of the latest snapshot. 847 */ 848 vap->va_size = snap->mnts_textsize ? snap->mnts_textsize : 849 mntdata->mnt_size; 850 /* 851 * Fetch mtime from the vfs mnttab timestamp 852 */ 853 vap->va_ctime = vfs_mnttab_ctime; 854 vfs_list_read_lock(); 855 vfs_mnttab_modtime(&vap->va_mtime); 856 vap->va_atime = vap->va_mtime; 857 vfs_list_unlock(); 858 /* 859 * Nodeid is always ROOTINO; 860 */ 861 vap->va_nodeid = (ino64_t)MNTROOTINO; 862 vap->va_nblocks = btod(vap->va_size); 863 return (0); 864 } 865 866 867 static int 868 mntaccess(vnode_t *vp, int mode, int flags, cred_t *cr) 869 { 870 mntnode_t *mnp = VTOM(vp); 871 872 if (mode & (VWRITE|VEXEC)) 873 return (EROFS); 874 875 /* 876 * Do access check on the underlying directory vnode. 877 */ 878 return (VOP_ACCESS(mnp->mnt_mountvp, mode, flags, cr)); 879 } 880 881 882 /* 883 * New /mntfs vnode required; allocate it and fill in most of the fields. 884 */ 885 static mntnode_t * 886 mntgetnode(vnode_t *dp) 887 { 888 mntnode_t *mnp; 889 vnode_t *vp; 890 891 mnp = kmem_zalloc(sizeof (mntnode_t), KM_SLEEP); 892 mnp->mnt_vnode = vn_alloc(KM_SLEEP); 893 mnp->mnt_mountvp = VTOM(dp)->mnt_mountvp; 894 vp = MTOV(mnp); 895 vp->v_flag = VNOCACHE|VNOMAP|VNOSWAP|VNOMOUNT; 896 vn_setops(vp, mntvnodeops); 897 vp->v_vfsp = dp->v_vfsp; 898 vp->v_type = VREG; 899 vp->v_data = (caddr_t)mnp; 900 901 return (mnp); 902 } 903 904 /* 905 * Free the storage obtained from mntgetnode(). 906 */ 907 static void 908 mntfreenode(mntnode_t *mnp) 909 { 910 vnode_t *vp = MTOV(mnp); 911 912 vn_invalid(vp); 913 vn_free(vp); 914 kmem_free(mnp, sizeof (*mnp)); 915 } 916 917 918 /* ARGSUSED */ 919 static int 920 mntfsync(vnode_t *vp, int syncflag, cred_t *cr) 921 { 922 return (0); 923 } 924 925 /* ARGSUSED */ 926 static void 927 mntinactive(vnode_t *vp, cred_t *cr) 928 { 929 mntnode_t *mnp = VTOM(vp); 930 931 mntfreenode(mnp); 932 } 933 934 /* ARGSUSED */ 935 static int 936 mntseek(vnode_t *vp, offset_t ooff, offset_t *noffp) 937 { 938 if (*noffp == 0) 939 VTOM(vp)->mnt_offset = 0; 940 941 return (0); 942 } 943 944 /* 945 * Return the answer requested to poll(). 946 * POLLRDBAND will return when the mtime of the mnttab 947 * information is newer than the latest one read for this open. 948 */ 949 /* ARGSUSED */ 950 static int 951 mntpoll(vnode_t *vp, short ev, int any, short *revp, pollhead_t **phpp) 952 { 953 mntnode_t *mnp = VTOM(vp); 954 mntsnap_t *snap = &mnp->mnt_read; 955 956 if (mnp->mnt_ioctl.mnts_time.tv_sec > snap->mnts_time.tv_sec || 957 (mnp->mnt_ioctl.mnts_time.tv_sec == snap->mnts_time.tv_sec && 958 mnp->mnt_ioctl.mnts_time.tv_nsec > snap->mnts_time.tv_nsec)) 959 snap = &mnp->mnt_ioctl; 960 961 *revp = 0; 962 *phpp = (pollhead_t *)NULL; 963 if (ev & POLLIN) 964 *revp |= POLLIN; 965 966 if (ev & POLLRDNORM) 967 *revp |= POLLRDNORM; 968 969 if (ev & POLLRDBAND) { 970 vfs_mnttab_poll(&snap->mnts_time, phpp); 971 if (*phpp == (pollhead_t *)NULL) 972 *revp |= POLLRDBAND; 973 } 974 if (*revp || *phpp != NULL || any) { 975 return (0); 976 } 977 /* 978 * If someone is polling an unsupported poll events (e.g. 979 * POLLOUT, POLLPRI, etc.), just return POLLERR revents. 980 * That way we will ensure that we don't return a 0 981 * revents with a NULL pollhead pointer. 982 */ 983 *revp = POLLERR; 984 return (0); 985 } 986 /* ARGSUSED */ 987 static int 988 mntioctl(struct vnode *vp, int cmd, intptr_t arg, int flag, 989 cred_t *cr, int *rvalp) 990 { 991 uint_t *up = (uint_t *)arg; 992 mntnode_t *mnp = VTOM(vp); 993 mntsnap_t *snap = &mnp->mnt_ioctl; 994 int error; 995 996 error = 0; 997 switch (cmd) { 998 999 case MNTIOC_NMNTS: { /* get no. of mounted resources */ 1000 if (snap->mnts_count == 0) { 1001 if ((error = 1002 mntfs_snapshot(mnp, 0, flag & DATAMODEL_MASK)) != 0) 1003 return (error); 1004 } 1005 if (suword32(up, snap->mnts_count) != 0) 1006 error = EFAULT; 1007 break; 1008 } 1009 1010 case MNTIOC_GETDEVLIST: { /* get mounted device major/minor nos */ 1011 uint_t *devlist; 1012 int i; 1013 size_t len; 1014 1015 if (snap->mnts_count == 0) { 1016 if ((error = 1017 mntfs_snapshot(mnp, 0, flag & DATAMODEL_MASK)) != 0) 1018 return (error); 1019 } 1020 1021 len = 2 * snap->mnts_count * sizeof (uint_t); 1022 devlist = kmem_alloc(len, KM_SLEEP); 1023 for (i = 0; i < snap->mnts_count; i++) { 1024 1025 #ifdef _SYSCALL32_IMPL 1026 if ((flag & DATAMODEL_MASK) == DATAMODEL_ILP32) { 1027 struct extmnttab32 tab; 1028 1029 if ((error = xcopyin(snap->mnts_text + 1030 i * sizeof (struct extmnttab32), &tab, 1031 sizeof (tab))) != 0) 1032 break; 1033 1034 devlist[i*2] = tab.mnt_major; 1035 devlist[i*2+1] = tab.mnt_minor; 1036 } else { 1037 #endif 1038 struct extmnttab tab; 1039 1040 if ((error = xcopyin(snap->mnts_text + 1041 i * sizeof (struct extmnttab), &tab, 1042 sizeof (tab))) != 0) 1043 break; 1044 1045 devlist[i*2] = tab.mnt_major; 1046 devlist[i*2+1] = tab.mnt_minor; 1047 #ifdef _SYSCALL32_IMPL 1048 } 1049 #endif 1050 } 1051 1052 if (error == 0) 1053 error = xcopyout(devlist, up, len); 1054 kmem_free(devlist, len); 1055 break; 1056 } 1057 1058 case MNTIOC_SETTAG: /* set tag on mounted file system */ 1059 case MNTIOC_CLRTAG: /* clear tag on mounted file system */ 1060 { 1061 struct mnttagdesc *dp = (struct mnttagdesc *)arg; 1062 STRUCT_DECL(mnttagdesc, tagdesc); 1063 char *cptr; 1064 uint32_t major, minor; 1065 char tagbuf[MAX_MNTOPT_TAG]; 1066 char *pbuf; 1067 size_t len; 1068 uint_t start = 0; 1069 mntdata_t *mntdata = MTOD(mnp); 1070 zone_t *zone = mntdata->mnt_zone; 1071 1072 STRUCT_INIT(tagdesc, flag & DATAMODEL_MASK); 1073 if (copyin(dp, STRUCT_BUF(tagdesc), STRUCT_SIZE(tagdesc))) { 1074 error = EFAULT; 1075 break; 1076 } 1077 pbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP); 1078 if (zone != global_zone) { 1079 (void) strcpy(pbuf, zone->zone_rootpath); 1080 /* truncate "/" and nul */ 1081 start = zone->zone_rootpathlen - 2; 1082 ASSERT(pbuf[start] == '/'); 1083 } 1084 cptr = STRUCT_FGETP(tagdesc, mtd_mntpt); 1085 error = copyinstr(cptr, pbuf + start, MAXPATHLEN - start, &len); 1086 if (error) { 1087 kmem_free(pbuf, MAXPATHLEN); 1088 break; 1089 } 1090 if (start != 0 && pbuf[start] != '/') { 1091 kmem_free(pbuf, MAXPATHLEN); 1092 error = EINVAL; 1093 break; 1094 } 1095 cptr = STRUCT_FGETP(tagdesc, mtd_tag); 1096 if ((error = copyinstr(cptr, tagbuf, MAX_MNTOPT_TAG, &len))) { 1097 kmem_free(pbuf, MAXPATHLEN); 1098 break; 1099 } 1100 major = STRUCT_FGET(tagdesc, mtd_major); 1101 minor = STRUCT_FGET(tagdesc, mtd_minor); 1102 if (cmd == MNTIOC_SETTAG) 1103 error = vfs_settag(major, minor, pbuf, tagbuf, cr); 1104 else 1105 error = vfs_clrtag(major, minor, pbuf, tagbuf, cr); 1106 kmem_free(pbuf, MAXPATHLEN); 1107 break; 1108 } 1109 1110 case MNTIOC_SHOWHIDDEN: 1111 { 1112 mutex_enter(&vp->v_lock); 1113 mnp->mnt_flags |= MNT_SHOWHIDDEN; 1114 mutex_exit(&vp->v_lock); 1115 break; 1116 } 1117 1118 case MNTIOC_GETMNTENT: 1119 { 1120 size_t idx; 1121 uintptr_t addr; 1122 1123 idx = mnp->mnt_offset; 1124 if (snap->mnts_count == 0 || idx == 0) { 1125 if ((error = 1126 mntfs_snapshot(mnp, 0, flag & DATAMODEL_MASK)) != 0) 1127 return (error); 1128 } 1129 /* 1130 * If the next index is beyond the end of the current mnttab, 1131 * return EOF 1132 */ 1133 if (idx >= snap->mnts_count) { 1134 *rvalp = 1; 1135 return (0); 1136 } 1137 1138 #ifdef _SYSCALL32_IMPL 1139 if ((flag & DATAMODEL_MASK) == DATAMODEL_ILP32) { 1140 addr = (uintptr_t)(snap->mnts_metadata + idx * 1141 sizeof (struct extmnttab32)); 1142 error = suword32((void *)arg, addr); 1143 } else { 1144 #endif 1145 addr = (uintptr_t)(snap->mnts_metadata + idx * 1146 sizeof (struct extmnttab)); 1147 error = sulword((void *)arg, addr); 1148 #ifdef _SYSCALL32_IMPL 1149 } 1150 #endif 1151 1152 if (error != 0) 1153 return (error); 1154 1155 mnp->mnt_offset++; 1156 break; 1157 } 1158 1159 default: 1160 error = EINVAL; 1161 break; 1162 } 1163 1164 return (error); 1165 } 1166 1167 /* ARGSUSED */ 1168 static int 1169 mntdummyread(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cred, 1170 caller_context_t *ct) 1171 { 1172 return (0); 1173 } 1174 1175 /* ARGSUSED */ 1176 static int 1177 mntdummywrite(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cred, 1178 caller_context_t *ct) 1179 { 1180 return (0); 1181 } 1182 1183 1184 /* 1185 * /mntfs vnode operations vector 1186 */ 1187 const fs_operation_def_t mnt_vnodeops_template[] = { 1188 VOPNAME_OPEN, { .vop_open = mntopen }, 1189 VOPNAME_CLOSE, { .vop_close = mntclose }, 1190 VOPNAME_READ, { .vop_read = mntread }, 1191 VOPNAME_IOCTL, { .vop_ioctl = mntioctl }, 1192 VOPNAME_GETATTR, { .vop_getattr = mntgetattr }, 1193 VOPNAME_ACCESS, { .vop_access = mntaccess }, 1194 VOPNAME_FSYNC, { .vop_fsync = mntfsync }, 1195 VOPNAME_INACTIVE, { .vop_inactive = mntinactive }, 1196 VOPNAME_SEEK, { .vop_seek = mntseek }, 1197 VOPNAME_POLL, { .vop_poll = mntpoll }, 1198 VOPNAME_DISPOSE, { .error = fs_error }, 1199 VOPNAME_SHRLOCK, { .error = fs_error }, 1200 NULL, NULL 1201 }; 1202 1203 const fs_operation_def_t mnt_dummyvnodeops_template[] = { 1204 VOPNAME_READ, { .vop_read = mntdummyread }, 1205 VOPNAME_WRITE, { .vop_write = mntdummywrite }, 1206 VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, 1207 NULL, NULL 1208 }; 1209