1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/file.h> 29 #include <sys/stat.h> 30 #include <sys/atomic.h> 31 #include <sys/mntio.h> 32 #include <sys/mnttab.h> 33 #include <sys/mount.h> 34 #include <sys/sunddi.h> 35 #include <sys/sysmacros.h> 36 #include <sys/systm.h> 37 #include <sys/vfs.h> 38 #include <sys/vfs_opreg.h> 39 #include <sys/fs/mntdata.h> 40 #include <fs/fs_subr.h> 41 #include <sys/vmsystm.h> 42 #include <vm/seg_vn.h> 43 44 #define MNTROOTINO 2 45 46 static mntnode_t *mntgetnode(vnode_t *); 47 48 vnodeops_t *mntvnodeops; 49 vnodeops_t *mntdummyvnodeops; 50 extern struct vnode *mntdummyvp; 51 52 /* 53 * Design of kernel mnttab accounting. 54 * 55 * To support whitespace in mount names, we implement an ioctl 56 * (MNTIOC_GETMNTENT) which allows a programmatic interface to the data in 57 * /etc/mnttab. The libc functions getmntent() and getextmntent() are built 58 * atop this interface. 59 * 60 * To minimize the amount of memory used in the kernel, we keep all the 61 * necessary information in the user's address space. Large server 62 * configurations can have /etc/mnttab files in excess of 64k. 63 * 64 * To support both vanilla read() calls as well as ioctl() calls, we have two 65 * different snapshots of the kernel data structures, mnt_read and mnt_ioctl. 66 * These snapshots include the base location in user memory, the number of 67 * mounts in the snapshot, and any metadata associated with it. The metadata is 68 * used only to support the ioctl() interface, and is a series of extmnttab 69 * structures. When the user issues an ioctl(), we simply copyout a pointer to 70 * that structure, and the rest is handled in userland. 71 */ 72 73 /* 74 * NOTE: The following variable enables the generation of the "dev=xxx" 75 * in the option string for a mounted file system. Really this should 76 * be gotten rid of altogether, but for the sake of backwards compatibility 77 * we had to leave it in. It is defined as a 32-bit device number. This 78 * means that when 64-bit device numbers are in use, if either the major or 79 * minor part of the device number will not fit in a 16 bit quantity, the 80 * "dev=" will be set to NODEV (0x7fffffff). See PSARC 1999/566 and 81 * 1999/131 for details. The cmpldev() function used to generate the 32-bit 82 * device number handles this check and assigns the proper value. 83 */ 84 int mntfs_enabledev = 1; /* enable old "dev=xxx" option */ 85 86 static int 87 mntfs_devsize(struct vfs *vfsp) 88 { 89 dev32_t odev; 90 91 (void) cmpldev(&odev, vfsp->vfs_dev); 92 return (snprintf(NULL, 0, "dev=%x", odev)); 93 } 94 95 static int 96 mntfs_devprint(struct vfs *vfsp, char *buf) 97 { 98 dev32_t odev; 99 100 (void) cmpldev(&odev, vfsp->vfs_dev); 101 return (snprintf(buf, MAX_MNTOPT_STR, "dev=%x", odev)); 102 } 103 104 static int 105 mntfs_optsize(struct vfs *vfsp) 106 { 107 int i, size = 0; 108 mntopt_t *mop; 109 110 for (i = 0; i < vfsp->vfs_mntopts.mo_count; i++) { 111 mop = &vfsp->vfs_mntopts.mo_list[i]; 112 if (mop->mo_flags & MO_NODISPLAY) 113 continue; 114 if (mop->mo_flags & MO_SET) { 115 if (size) 116 size++; /* space for comma */ 117 size += strlen(mop->mo_name); 118 /* 119 * count option value if there is one 120 */ 121 if (mop->mo_arg != NULL) { 122 size += strlen(mop->mo_arg) + 1; 123 } 124 } 125 } 126 if (vfsp->vfs_zone != NULL && vfsp->vfs_zone != global_zone) { 127 /* 128 * Add space for "zone=<zone_name>" if required. 129 */ 130 if (size) 131 size++; /* space for comma */ 132 size += sizeof ("zone=") - 1; 133 size += strlen(vfsp->vfs_zone->zone_name); 134 } 135 if (mntfs_enabledev) { 136 if (size != 0) 137 size++; /* space for comma */ 138 size += mntfs_devsize(vfsp); 139 } 140 if (size == 0) 141 size = strlen("-"); 142 return (size); 143 } 144 145 static int 146 mntfs_optprint(struct vfs *vfsp, char *buf) 147 { 148 int i, optinbuf = 0; 149 mntopt_t *mop; 150 char *origbuf = buf; 151 152 for (i = 0; i < vfsp->vfs_mntopts.mo_count; i++) { 153 mop = &vfsp->vfs_mntopts.mo_list[i]; 154 if (mop->mo_flags & MO_NODISPLAY) 155 continue; 156 if (mop->mo_flags & MO_SET) { 157 if (optinbuf) 158 *buf++ = ','; 159 else 160 optinbuf = 1; 161 buf += snprintf(buf, MAX_MNTOPT_STR, 162 "%s", mop->mo_name); 163 /* 164 * print option value if there is one 165 */ 166 if (mop->mo_arg != NULL) { 167 buf += snprintf(buf, MAX_MNTOPT_STR, "=%s", 168 mop->mo_arg); 169 } 170 } 171 } 172 if (vfsp->vfs_zone != NULL && vfsp->vfs_zone != global_zone) { 173 if (optinbuf) 174 *buf++ = ','; 175 else 176 optinbuf = 1; 177 buf += snprintf(buf, MAX_MNTOPT_STR, "zone=%s", 178 vfsp->vfs_zone->zone_name); 179 } 180 if (mntfs_enabledev) { 181 if (optinbuf++) 182 *buf++ = ','; 183 buf += mntfs_devprint(vfsp, buf); 184 } 185 if (!optinbuf) { 186 buf += snprintf(buf, MAX_MNTOPT_STR, "-"); 187 } 188 return (buf - origbuf); 189 } 190 191 static size_t 192 mntfs_vfs_len(vfs_t *vfsp, zone_t *zone) 193 { 194 size_t size = 0; 195 const char *resource, *mntpt; 196 197 mntpt = refstr_value(vfsp->vfs_mntpt); 198 if (mntpt != NULL && mntpt[0] != '\0') { 199 size += strlen(ZONE_PATH_TRANSLATE(mntpt, zone)) + 1; 200 } else { 201 size += strlen("-") + 1; 202 } 203 204 resource = refstr_value(vfsp->vfs_resource); 205 if (resource != NULL && resource[0] != '\0') { 206 if (resource[0] != '/') { 207 size += strlen(resource) + 1; 208 } else if (!ZONE_PATH_VISIBLE(resource, zone)) { 209 /* 210 * Same as the zone's view of the mount point. 211 */ 212 size += strlen(ZONE_PATH_TRANSLATE(mntpt, zone)) + 1; 213 } else { 214 size += strlen(ZONE_PATH_TRANSLATE(resource, zone)) + 1; 215 } 216 } else { 217 size += strlen("-") + 1; 218 } 219 size += strlen(vfssw[vfsp->vfs_fstype].vsw_name) + 1; 220 size += mntfs_optsize(vfsp); 221 size += snprintf(NULL, 0, "\t%ld\n", vfsp->vfs_mtime); 222 return (size); 223 } 224 225 static void 226 mntfs_zonerootvfs(zone_t *zone, vfs_t *rootvfsp) 227 { 228 /* 229 * Basically copy over the real vfs_t on which the root vnode is 230 * located, changing its mountpoint and resource to match those of 231 * the zone's rootpath. 232 */ 233 *rootvfsp = *zone->zone_rootvp->v_vfsp; 234 rootvfsp->vfs_mntpt = refstr_alloc(zone->zone_rootpath); 235 rootvfsp->vfs_resource = rootvfsp->vfs_mntpt; 236 } 237 238 static size_t 239 mntfs_zone_len(uint_t *nent_ptr, zone_t *zone, int showhidden) 240 { 241 struct vfs *zonelist; 242 struct vfs *vfsp; 243 size_t size = 0; 244 uint_t cnt = 0; 245 246 ASSERT(zone->zone_rootpath != NULL); 247 248 /* 249 * If the zone has a root entry, it will be the first in the list. If 250 * it doesn't, we conjure one up. 251 */ 252 vfsp = zonelist = zone->zone_vfslist; 253 if (zonelist == NULL || 254 strcmp(refstr_value(vfsp->vfs_mntpt), zone->zone_rootpath) != 0) { 255 vfs_t tvfs; 256 /* 257 * The root of the zone is not a mount point. The vfs we want 258 * to report is that of the zone's root vnode. 259 */ 260 ASSERT(zone != global_zone); 261 mntfs_zonerootvfs(zone, &tvfs); 262 size += mntfs_vfs_len(&tvfs, zone); 263 refstr_rele(tvfs.vfs_mntpt); 264 cnt++; 265 } 266 if (zonelist == NULL) 267 goto out; 268 do { 269 /* 270 * Skip mounts that should not show up in mnttab 271 */ 272 if (!showhidden && (vfsp->vfs_flag & VFS_NOMNTTAB)) { 273 vfsp = vfsp->vfs_zone_next; 274 continue; 275 } 276 cnt++; 277 size += mntfs_vfs_len(vfsp, zone); 278 vfsp = vfsp->vfs_zone_next; 279 } while (vfsp != zonelist); 280 out: 281 *nent_ptr = cnt; 282 return (size); 283 } 284 285 static size_t 286 mntfs_global_len(uint_t *nent_ptr, int showhidden) 287 { 288 struct vfs *vfsp; 289 size_t size = 0; 290 uint_t cnt = 0; 291 292 vfsp = rootvfs; 293 do { 294 /* 295 * Skip mounts that should not show up in mnttab 296 */ 297 if (!showhidden && (vfsp->vfs_flag & VFS_NOMNTTAB)) { 298 vfsp = vfsp->vfs_next; 299 continue; 300 } 301 cnt++; 302 size += mntfs_vfs_len(vfsp, global_zone); 303 vfsp = vfsp->vfs_next; 304 } while (vfsp != rootvfs); 305 *nent_ptr = cnt; 306 return (size); 307 } 308 309 static void 310 mntfs_vfs_generate(vfs_t *vfsp, zone_t *zone, struct extmnttab *tab, 311 char **basep, int forread) 312 { 313 const char *resource, *mntpt; 314 char *cp = *basep; 315 316 mntpt = refstr_value(vfsp->vfs_mntpt); 317 resource = refstr_value(vfsp->vfs_resource); 318 319 if (tab) 320 tab->mnt_special = cp; 321 if (resource != NULL && resource[0] != '\0') { 322 if (resource[0] != '/') { 323 cp += snprintf(cp, MAXPATHLEN, "%s", resource); 324 } else if (!ZONE_PATH_VISIBLE(resource, zone)) { 325 /* 326 * Use the mount point as the resource. 327 */ 328 cp += snprintf(cp, MAXPATHLEN, "%s", 329 ZONE_PATH_TRANSLATE(mntpt, zone)); 330 } else { 331 cp += snprintf(cp, MAXPATHLEN, "%s", 332 ZONE_PATH_TRANSLATE(resource, zone)); 333 } 334 } else { 335 cp += snprintf(cp, MAXPATHLEN, "-"); 336 } 337 *cp++ = forread ? '\t' : '\0'; 338 339 if (tab) 340 tab->mnt_mountp = cp; 341 if (mntpt != NULL && mntpt[0] != '\0') { 342 /* 343 * We know the mount point is visible from within the zone, 344 * otherwise it wouldn't be on the zone's vfs list. 345 */ 346 cp += snprintf(cp, MAXPATHLEN, "%s", 347 ZONE_PATH_TRANSLATE(mntpt, zone)); 348 } else { 349 cp += snprintf(cp, MAXPATHLEN, "-"); 350 } 351 *cp++ = forread ? '\t' : '\0'; 352 353 if (tab) 354 tab->mnt_fstype = cp; 355 cp += snprintf(cp, MAXPATHLEN, "%s", 356 vfssw[vfsp->vfs_fstype].vsw_name); 357 *cp++ = forread ? '\t' : '\0'; 358 359 if (tab) 360 tab->mnt_mntopts = cp; 361 cp += mntfs_optprint(vfsp, cp); 362 *cp++ = forread ? '\t' : '\0'; 363 364 if (tab) 365 tab->mnt_time = cp; 366 cp += snprintf(cp, MAX_MNTOPT_STR, "%ld", vfsp->vfs_mtime); 367 *cp++ = forread ? '\n' : '\0'; 368 369 if (tab) { 370 tab->mnt_major = getmajor(vfsp->vfs_dev); 371 tab->mnt_minor = getminor(vfsp->vfs_dev); 372 } 373 374 *basep = cp; 375 } 376 377 static void 378 mntfs_zone_generate(zone_t *zone, int showhidden, struct extmnttab *tab, 379 char *basep, int forread) 380 { 381 vfs_t *zonelist; 382 vfs_t *vfsp; 383 char *cp = basep; 384 385 /* 386 * If the zone has a root entry, it will be the first in the list. If 387 * it doesn't, we conjure one up. 388 */ 389 vfsp = zonelist = zone->zone_vfslist; 390 if (zonelist == NULL || 391 strcmp(refstr_value(vfsp->vfs_mntpt), zone->zone_rootpath) != 0) { 392 vfs_t tvfs; 393 /* 394 * The root of the zone is not a mount point. The vfs we want 395 * to report is that of the zone's root vnode. 396 */ 397 ASSERT(zone != global_zone); 398 mntfs_zonerootvfs(zone, &tvfs); 399 mntfs_vfs_generate(&tvfs, zone, tab, &cp, forread); 400 refstr_rele(tvfs.vfs_mntpt); 401 if (tab) 402 tab++; 403 } 404 if (zonelist == NULL) 405 return; 406 do { 407 /* 408 * Skip mounts that should not show up in mnttab 409 */ 410 if (!showhidden && (vfsp->vfs_flag & VFS_NOMNTTAB)) { 411 vfsp = vfsp->vfs_zone_next; 412 continue; 413 } 414 mntfs_vfs_generate(vfsp, zone, tab, &cp, forread); 415 if (tab) 416 tab++; 417 vfsp = vfsp->vfs_zone_next; 418 } while (vfsp != zonelist); 419 } 420 421 static void 422 mntfs_global_generate(int showhidden, struct extmnttab *tab, char *basep, 423 int forread) 424 { 425 vfs_t *vfsp; 426 char *cp = basep; 427 428 vfsp = rootvfs; 429 do { 430 /* 431 * Skip mounts that should not show up in mnttab 432 */ 433 if (!showhidden && vfsp->vfs_flag & VFS_NOMNTTAB) { 434 vfsp = vfsp->vfs_next; 435 continue; 436 } 437 mntfs_vfs_generate(vfsp, global_zone, tab, &cp, forread); 438 if (tab) 439 tab++; 440 vfsp = vfsp->vfs_next; 441 } while (vfsp != rootvfs); 442 } 443 444 static char * 445 mntfs_mapin(char *base, size_t size) 446 { 447 size_t rlen = roundup(size, PAGESIZE); 448 struct as *as = curproc->p_as; 449 char *addr; 450 451 as_rangelock(as); 452 map_addr(&addr, rlen, 0, 1, 0); 453 if (addr == NULL || as_map(as, addr, rlen, segvn_create, zfod_argsp)) { 454 as_rangeunlock(as); 455 return (NULL); 456 } 457 as_rangeunlock(as); 458 if (copyout(base, addr, size)) { 459 (void) as_unmap(as, addr, rlen); 460 return (NULL); 461 } 462 return (addr); 463 } 464 465 static void 466 mntfs_freesnap(mntsnap_t *snap) 467 { 468 if (snap->mnts_text != NULL) 469 (void) as_unmap(curproc->p_as, snap->mnts_text, 470 roundup(snap->mnts_textsize, PAGESIZE)); 471 snap->mnts_textsize = snap->mnts_count = 0; 472 if (snap->mnts_metadata != NULL) 473 (void) as_unmap(curproc->p_as, snap->mnts_metadata, 474 roundup(snap->mnts_metasize, PAGESIZE)); 475 snap->mnts_metasize = 0; 476 } 477 478 #ifdef _SYSCALL32_IMPL 479 480 typedef struct extmnttab32 { 481 uint32_t mnt_special; 482 uint32_t mnt_mountp; 483 uint32_t mnt_fstype; 484 uint32_t mnt_mntopts; 485 uint32_t mnt_time; 486 uint_t mnt_major; 487 uint_t mnt_minor; 488 } extmnttab32_t; 489 490 #endif 491 492 /* 493 * called to generate a dummy read vop call so that 494 * any module monitoring /etc/mnttab for access gets notified. 495 */ 496 static void 497 mntdummyreadop() 498 { 499 struct uio uio; 500 struct iovec iov; 501 char tbuf[1]; 502 503 if (mntdummyvp == NULL) 504 return; 505 506 /* 507 * Make a VOP_READ call on the dummy vnode so that any 508 * module interested in mnttab getting modified could 509 * intercept this vnode and capture the event. 510 * 511 * Pass a dummy uio struct. Nobody should reference the buffer. 512 * We need to pass a valid uio struct pointer to take care of 513 * any module intercepting this vnode which could attempt to 514 * look at it. Currently only the file events notification 515 * module intercepts this vnode. 516 */ 517 bzero(&uio, sizeof (uio)); 518 bzero(&iov, sizeof (iov)); 519 iov.iov_base = tbuf; 520 iov.iov_len = 0; 521 uio.uio_iov = &iov; 522 uio.uio_iovcnt = 1; 523 uio.uio_loffset = 0; 524 uio.uio_segflg = UIO_SYSSPACE; 525 uio.uio_resid = 0; 526 (void) VOP_READ(mntdummyvp, &uio, 0, kcred, NULL); 527 } 528 529 /* 530 * Snapshot the latest version of the kernel mounted resource information 531 * 532 * There are two types of snapshots: one destined for reading, and one destined 533 * for ioctl(). The difference is that the ioctl() interface is delimited by 534 * NULLs, while the read() interface is delimited by tabs and newlines. 535 */ 536 /* ARGSUSED */ 537 static int 538 mntfs_snapshot(mntnode_t *mnp, int forread, int datamodel) 539 { 540 size_t size; 541 timespec_t lastmodt; 542 mntdata_t *mntdata = MTOD(mnp); 543 zone_t *zone = mntdata->mnt_zone; 544 boolean_t global_view = (MTOD(mnp)->mnt_zone == global_zone); 545 boolean_t showhidden = ((mnp->mnt_flags & MNT_SHOWHIDDEN) != 0); 546 struct extmnttab *metadata_baseaddr; 547 char *text_baseaddr; 548 int i; 549 mntsnap_t *snap; 550 551 if (forread) 552 snap = &mnp->mnt_read; 553 else 554 snap = &mnp->mnt_ioctl; 555 556 vfs_list_read_lock(); 557 /* 558 * Check if the mnttab info has changed since the last snapshot 559 */ 560 vfs_mnttab_modtime(&lastmodt); 561 if (snap->mnts_count && 562 lastmodt.tv_sec == snap->mnts_time.tv_sec && 563 lastmodt.tv_nsec == snap->mnts_time.tv_nsec) { 564 vfs_list_unlock(); 565 return (0); 566 } 567 568 569 if (snap->mnts_count != 0) 570 mntfs_freesnap(snap); 571 if (global_view) 572 size = mntfs_global_len(&snap->mnts_count, showhidden); 573 else 574 size = mntfs_zone_len(&snap->mnts_count, zone, showhidden); 575 ASSERT(size != 0); 576 577 if (!forread) 578 metadata_baseaddr = kmem_alloc( 579 snap->mnts_count * sizeof (struct extmnttab), KM_SLEEP); 580 else 581 metadata_baseaddr = NULL; 582 583 text_baseaddr = kmem_alloc(size, KM_SLEEP); 584 585 if (global_view) 586 mntfs_global_generate(showhidden, metadata_baseaddr, 587 text_baseaddr, forread); 588 else 589 mntfs_zone_generate(zone, showhidden, 590 metadata_baseaddr, text_baseaddr, forread); 591 592 vfs_mnttab_modtime(&snap->mnts_time); 593 vfs_list_unlock(); 594 595 snap->mnts_text = mntfs_mapin(text_baseaddr, size); 596 snap->mnts_textsize = size; 597 kmem_free(text_baseaddr, size); 598 599 /* 600 * The pointers in the metadata refer to addreesses in the range 601 * [base_addr, base_addr + size]. Now that we have mapped the text into 602 * the user's address space, we have to convert these addresses into the 603 * new (user) range. We also handle the conversion for 32-bit and 604 * 32-bit applications here. 605 */ 606 if (!forread) { 607 struct extmnttab *tab; 608 #ifdef _SYSCALL32_IMPL 609 struct extmnttab32 *tab32; 610 611 if (datamodel == DATAMODEL_ILP32) { 612 tab = (struct extmnttab *)metadata_baseaddr; 613 tab32 = (struct extmnttab32 *)metadata_baseaddr; 614 615 for (i = 0; i < snap->mnts_count; i++) { 616 tab32[i].mnt_special = 617 (uintptr_t)snap->mnts_text + 618 (tab[i].mnt_special - text_baseaddr); 619 tab32[i].mnt_mountp = 620 (uintptr_t)snap->mnts_text + 621 (tab[i].mnt_mountp - text_baseaddr); 622 tab32[i].mnt_fstype = 623 (uintptr_t)snap->mnts_text + 624 (tab[i].mnt_fstype - text_baseaddr); 625 tab32[i].mnt_mntopts = 626 (uintptr_t)snap->mnts_text + 627 (tab[i].mnt_mntopts - text_baseaddr); 628 tab32[i].mnt_time = (uintptr_t)snap->mnts_text + 629 (tab[i].mnt_time - text_baseaddr); 630 tab32[i].mnt_major = tab[i].mnt_major; 631 tab32[i].mnt_minor = tab[i].mnt_minor; 632 } 633 634 snap->mnts_metasize = 635 snap->mnts_count * sizeof (struct extmnttab32); 636 snap->mnts_metadata = mntfs_mapin( 637 (char *)metadata_baseaddr, 638 snap->mnts_metasize); 639 640 } else { 641 #endif 642 tab = (struct extmnttab *)metadata_baseaddr; 643 for (i = 0; i < snap->mnts_count; i++) { 644 tab[i].mnt_special = snap->mnts_text + 645 (tab[i].mnt_special - text_baseaddr); 646 tab[i].mnt_mountp = snap->mnts_text + 647 (tab[i].mnt_mountp - text_baseaddr); 648 tab[i].mnt_fstype = snap->mnts_text + 649 (tab[i].mnt_fstype - text_baseaddr); 650 tab[i].mnt_mntopts = snap->mnts_text + 651 (tab[i].mnt_mntopts - text_baseaddr); 652 tab[i].mnt_time = snap->mnts_text + 653 (tab[i].mnt_time - text_baseaddr); 654 } 655 656 snap->mnts_metasize = 657 snap->mnts_count * sizeof (struct extmnttab); 658 snap->mnts_metadata = mntfs_mapin( 659 (char *)metadata_baseaddr, snap->mnts_metasize); 660 #ifdef _SYSCALL32_IMPL 661 } 662 #endif 663 664 kmem_free(metadata_baseaddr, 665 snap->mnts_count * sizeof (struct extmnttab)); 666 } 667 668 mntdata->mnt_size = size; 669 670 if (snap->mnts_text == NULL || 671 (!forread && snap->mnts_metadata == NULL)) { 672 mntfs_freesnap(snap); 673 return (ENOMEM); 674 } 675 mntdummyreadop(); 676 return (0); 677 } 678 679 /* 680 * Public function to convert vfs_mntopts into a string. 681 * A buffer of sufficient size is allocated, which is returned via bufp, 682 * and whose length is returned via lenp. 683 */ 684 void 685 mntfs_getmntopts(struct vfs *vfsp, char **bufp, size_t *lenp) 686 { 687 size_t len; 688 char *buf; 689 690 vfs_list_read_lock(); 691 692 len = mntfs_optsize(vfsp) + 1; 693 buf = kmem_alloc(len, KM_NOSLEEP); 694 if (buf == NULL) { 695 *bufp = NULL; 696 vfs_list_unlock(); 697 return; 698 } 699 buf[len - 1] = '\0'; 700 (void) mntfs_optprint(vfsp, buf); 701 ASSERT(buf[len - 1] == '\0'); 702 703 vfs_list_unlock(); 704 *bufp = buf; 705 *lenp = len; 706 } 707 708 709 /* ARGSUSED */ 710 static int 711 mntopen(vnode_t **vpp, int flag, cred_t *cr) 712 { 713 vnode_t *vp = *vpp; 714 mntnode_t *nmnp; 715 716 /* 717 * Not allowed to open for writing, return error. 718 */ 719 if (flag & FWRITE) 720 return (EPERM); 721 /* 722 * Create a new mnt/vnode for each open, this will give us a handle to 723 * hang the snapshot on. 724 */ 725 nmnp = mntgetnode(vp); 726 727 *vpp = MTOV(nmnp); 728 atomic_add_32(&MTOD(nmnp)->mnt_nopen, 1); 729 VN_RELE(vp); 730 return (0); 731 } 732 733 /* ARGSUSED */ 734 static int 735 mntclose(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr) 736 { 737 mntnode_t *mnp = VTOM(vp); 738 739 /* Clean up any locks or shares held by the current process */ 740 cleanlocks(vp, ttoproc(curthread)->p_pid, 0); 741 cleanshares(vp, ttoproc(curthread)->p_pid); 742 743 if (count > 1) 744 return (0); 745 if (vp->v_count == 1) { 746 mntfs_freesnap(&mnp->mnt_read); 747 mntfs_freesnap(&mnp->mnt_ioctl); 748 atomic_add_32(&MTOD(mnp)->mnt_nopen, -1); 749 } 750 return (0); 751 } 752 753 /* ARGSUSED */ 754 static int 755 mntread(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cred, caller_context_t *ct) 756 { 757 int error = 0; 758 off_t off = uio->uio_offset; 759 size_t len = uio->uio_resid; 760 mntnode_t *mnp = VTOM(vp); 761 char *buf; 762 mntsnap_t *snap = &mnp->mnt_read; 763 int datamodel; 764 765 if (off == (off_t)0 || snap->mnts_count == 0) { 766 /* 767 * It is assumed that any kernel callers wishing 768 * to read mnttab will be using extmnttab entries 769 * and not extmnttab32 entries, whether or not 770 * the kernel is LP64 or ILP32. Thus, force the 771 * datamodel that mntfs_snapshot uses to be 772 * DATAMODEL_LP64. 773 */ 774 if (uio->uio_segflg == UIO_SYSSPACE) 775 datamodel = DATAMODEL_LP64; 776 else 777 datamodel = get_udatamodel(); 778 if ((error = mntfs_snapshot(mnp, 1, datamodel)) != 0) 779 return (error); 780 } 781 if ((size_t)(off + len) > snap->mnts_textsize) 782 len = snap->mnts_textsize - off; 783 784 if (off < 0 || len > snap->mnts_textsize) 785 return (EFAULT); 786 787 if (len == 0) 788 return (0); 789 790 /* 791 * The mnttab image is stored in the user's address space, 792 * so we have to copy it into the kernel from userland, 793 * then copy it back out to the specified address. 794 */ 795 buf = kmem_alloc(len, KM_SLEEP); 796 if (copyin(snap->mnts_text + off, buf, len)) 797 error = EFAULT; 798 else { 799 error = uiomove(buf, len, UIO_READ, uio); 800 } 801 kmem_free(buf, len); 802 mntdummyreadop(); 803 return (error); 804 } 805 806 807 static int 808 mntgetattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr) 809 { 810 mntnode_t *mnp = VTOM(vp); 811 int error; 812 vnode_t *rvp; 813 extern timespec_t vfs_mnttab_ctime; 814 mntdata_t *mntdata = MTOD(VTOM(vp)); 815 mntsnap_t *snap = mnp->mnt_read.mnts_count ? 816 &mnp->mnt_read : &mnp->mnt_ioctl; 817 818 /* 819 * Return all the attributes. Should be refined 820 * so that it returns only those asked for. 821 * Most of this is complete fakery anyway. 822 */ 823 rvp = mnp->mnt_mountvp; 824 /* 825 * Attributes are same as underlying file with modifications 826 */ 827 if (error = VOP_GETATTR(rvp, vap, flags, cr)) 828 return (error); 829 830 /* 831 * We always look like a regular file 832 */ 833 vap->va_type = VREG; 834 /* 835 * mode should basically be read only 836 */ 837 vap->va_mode &= 07444; 838 vap->va_fsid = vp->v_vfsp->vfs_dev; 839 vap->va_blksize = DEV_BSIZE; 840 vap->va_rdev = 0; 841 vap->va_seq = 0; 842 /* 843 * Set nlink to the number of open vnodes for mnttab info 844 * plus one for existing. 845 */ 846 vap->va_nlink = mntdata->mnt_nopen + 1; 847 /* 848 * If we haven't taken a snapshot yet, set the 849 * size to the size of the latest snapshot. 850 */ 851 vap->va_size = snap->mnts_textsize ? snap->mnts_textsize : 852 mntdata->mnt_size; 853 /* 854 * Fetch mtime from the vfs mnttab timestamp 855 */ 856 vap->va_ctime = vfs_mnttab_ctime; 857 vfs_list_read_lock(); 858 vfs_mnttab_modtime(&vap->va_mtime); 859 vap->va_atime = vap->va_mtime; 860 vfs_list_unlock(); 861 /* 862 * Nodeid is always ROOTINO; 863 */ 864 vap->va_nodeid = (ino64_t)MNTROOTINO; 865 vap->va_nblocks = btod(vap->va_size); 866 return (0); 867 } 868 869 870 static int 871 mntaccess(vnode_t *vp, int mode, int flags, cred_t *cr) 872 { 873 mntnode_t *mnp = VTOM(vp); 874 875 if (mode & (VWRITE|VEXEC)) 876 return (EROFS); 877 878 /* 879 * Do access check on the underlying directory vnode. 880 */ 881 return (VOP_ACCESS(mnp->mnt_mountvp, mode, flags, cr)); 882 } 883 884 885 /* 886 * New /mntfs vnode required; allocate it and fill in most of the fields. 887 */ 888 static mntnode_t * 889 mntgetnode(vnode_t *dp) 890 { 891 mntnode_t *mnp; 892 vnode_t *vp; 893 894 mnp = kmem_zalloc(sizeof (mntnode_t), KM_SLEEP); 895 mnp->mnt_vnode = vn_alloc(KM_SLEEP); 896 mnp->mnt_mountvp = VTOM(dp)->mnt_mountvp; 897 vp = MTOV(mnp); 898 vp->v_flag = VNOCACHE|VNOMAP|VNOSWAP|VNOMOUNT; 899 vn_setops(vp, mntvnodeops); 900 vp->v_vfsp = dp->v_vfsp; 901 vp->v_type = VREG; 902 vp->v_data = (caddr_t)mnp; 903 904 return (mnp); 905 } 906 907 /* 908 * Free the storage obtained from mntgetnode(). 909 */ 910 static void 911 mntfreenode(mntnode_t *mnp) 912 { 913 vnode_t *vp = MTOV(mnp); 914 915 vn_invalid(vp); 916 vn_free(vp); 917 kmem_free(mnp, sizeof (*mnp)); 918 } 919 920 921 /* ARGSUSED */ 922 static int 923 mntfsync(vnode_t *vp, int syncflag, cred_t *cr) 924 { 925 return (0); 926 } 927 928 /* ARGSUSED */ 929 static void 930 mntinactive(vnode_t *vp, cred_t *cr) 931 { 932 mntnode_t *mnp = VTOM(vp); 933 934 mntfreenode(mnp); 935 } 936 937 /* ARGSUSED */ 938 static int 939 mntseek(vnode_t *vp, offset_t ooff, offset_t *noffp) 940 { 941 if (*noffp == 0) 942 VTOM(vp)->mnt_offset = 0; 943 944 return (0); 945 } 946 947 /* 948 * Return the answer requested to poll(). 949 * POLLRDBAND will return when the mtime of the mnttab 950 * information is newer than the latest one read for this open. 951 */ 952 /* ARGSUSED */ 953 static int 954 mntpoll(vnode_t *vp, short ev, int any, short *revp, pollhead_t **phpp) 955 { 956 mntnode_t *mnp = VTOM(vp); 957 mntsnap_t *snap = &mnp->mnt_read; 958 959 if (mnp->mnt_ioctl.mnts_time.tv_sec > snap->mnts_time.tv_sec || 960 (mnp->mnt_ioctl.mnts_time.tv_sec == snap->mnts_time.tv_sec && 961 mnp->mnt_ioctl.mnts_time.tv_nsec > snap->mnts_time.tv_nsec)) 962 snap = &mnp->mnt_ioctl; 963 964 *revp = 0; 965 *phpp = (pollhead_t *)NULL; 966 if (ev & POLLIN) 967 *revp |= POLLIN; 968 969 if (ev & POLLRDNORM) 970 *revp |= POLLRDNORM; 971 972 if (ev & POLLRDBAND) { 973 vfs_mnttab_poll(&snap->mnts_time, phpp); 974 if (*phpp == (pollhead_t *)NULL) 975 *revp |= POLLRDBAND; 976 } 977 if (*revp || *phpp != NULL || any) { 978 return (0); 979 } 980 /* 981 * If someone is polling an unsupported poll events (e.g. 982 * POLLOUT, POLLPRI, etc.), just return POLLERR revents. 983 * That way we will ensure that we don't return a 0 984 * revents with a NULL pollhead pointer. 985 */ 986 *revp = POLLERR; 987 return (0); 988 } 989 /* ARGSUSED */ 990 static int 991 mntioctl(struct vnode *vp, int cmd, intptr_t arg, int flag, 992 cred_t *cr, int *rvalp) 993 { 994 uint_t *up = (uint_t *)arg; 995 mntnode_t *mnp = VTOM(vp); 996 mntsnap_t *snap = &mnp->mnt_ioctl; 997 int error; 998 999 error = 0; 1000 switch (cmd) { 1001 1002 case MNTIOC_NMNTS: { /* get no. of mounted resources */ 1003 if (snap->mnts_count == 0) { 1004 if ((error = 1005 mntfs_snapshot(mnp, 0, flag & DATAMODEL_MASK)) != 0) 1006 return (error); 1007 } 1008 if (suword32(up, snap->mnts_count) != 0) 1009 error = EFAULT; 1010 break; 1011 } 1012 1013 case MNTIOC_GETDEVLIST: { /* get mounted device major/minor nos */ 1014 uint_t *devlist; 1015 int i; 1016 size_t len; 1017 1018 if (snap->mnts_count == 0) { 1019 if ((error = 1020 mntfs_snapshot(mnp, 0, flag & DATAMODEL_MASK)) != 0) 1021 return (error); 1022 } 1023 1024 len = 2 * snap->mnts_count * sizeof (uint_t); 1025 devlist = kmem_alloc(len, KM_SLEEP); 1026 for (i = 0; i < snap->mnts_count; i++) { 1027 1028 #ifdef _SYSCALL32_IMPL 1029 if ((flag & DATAMODEL_MASK) == DATAMODEL_ILP32) { 1030 struct extmnttab32 tab; 1031 1032 if ((error = xcopyin(snap->mnts_text + 1033 i * sizeof (struct extmnttab32), &tab, 1034 sizeof (tab))) != 0) 1035 break; 1036 1037 devlist[i*2] = tab.mnt_major; 1038 devlist[i*2+1] = tab.mnt_minor; 1039 } else { 1040 #endif 1041 struct extmnttab tab; 1042 1043 if ((error = xcopyin(snap->mnts_text + 1044 i * sizeof (struct extmnttab), &tab, 1045 sizeof (tab))) != 0) 1046 break; 1047 1048 devlist[i*2] = tab.mnt_major; 1049 devlist[i*2+1] = tab.mnt_minor; 1050 #ifdef _SYSCALL32_IMPL 1051 } 1052 #endif 1053 } 1054 1055 if (error == 0) 1056 error = xcopyout(devlist, up, len); 1057 kmem_free(devlist, len); 1058 break; 1059 } 1060 1061 case MNTIOC_SETTAG: /* set tag on mounted file system */ 1062 case MNTIOC_CLRTAG: /* clear tag on mounted file system */ 1063 { 1064 struct mnttagdesc *dp = (struct mnttagdesc *)arg; 1065 STRUCT_DECL(mnttagdesc, tagdesc); 1066 char *cptr; 1067 uint32_t major, minor; 1068 char tagbuf[MAX_MNTOPT_TAG]; 1069 char *pbuf; 1070 size_t len; 1071 uint_t start = 0; 1072 mntdata_t *mntdata = MTOD(mnp); 1073 zone_t *zone = mntdata->mnt_zone; 1074 1075 STRUCT_INIT(tagdesc, flag & DATAMODEL_MASK); 1076 if (copyin(dp, STRUCT_BUF(tagdesc), STRUCT_SIZE(tagdesc))) { 1077 error = EFAULT; 1078 break; 1079 } 1080 pbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP); 1081 if (zone != global_zone) { 1082 (void) strcpy(pbuf, zone->zone_rootpath); 1083 /* truncate "/" and nul */ 1084 start = zone->zone_rootpathlen - 2; 1085 ASSERT(pbuf[start] == '/'); 1086 } 1087 cptr = STRUCT_FGETP(tagdesc, mtd_mntpt); 1088 error = copyinstr(cptr, pbuf + start, MAXPATHLEN - start, &len); 1089 if (error) { 1090 kmem_free(pbuf, MAXPATHLEN); 1091 break; 1092 } 1093 if (start != 0 && pbuf[start] != '/') { 1094 kmem_free(pbuf, MAXPATHLEN); 1095 error = EINVAL; 1096 break; 1097 } 1098 cptr = STRUCT_FGETP(tagdesc, mtd_tag); 1099 if ((error = copyinstr(cptr, tagbuf, MAX_MNTOPT_TAG, &len))) { 1100 kmem_free(pbuf, MAXPATHLEN); 1101 break; 1102 } 1103 major = STRUCT_FGET(tagdesc, mtd_major); 1104 minor = STRUCT_FGET(tagdesc, mtd_minor); 1105 if (cmd == MNTIOC_SETTAG) 1106 error = vfs_settag(major, minor, pbuf, tagbuf, cr); 1107 else 1108 error = vfs_clrtag(major, minor, pbuf, tagbuf, cr); 1109 kmem_free(pbuf, MAXPATHLEN); 1110 break; 1111 } 1112 1113 case MNTIOC_SHOWHIDDEN: 1114 { 1115 mutex_enter(&vp->v_lock); 1116 mnp->mnt_flags |= MNT_SHOWHIDDEN; 1117 mutex_exit(&vp->v_lock); 1118 break; 1119 } 1120 1121 case MNTIOC_GETMNTENT: 1122 { 1123 size_t idx; 1124 uintptr_t addr; 1125 1126 idx = mnp->mnt_offset; 1127 if (snap->mnts_count == 0 || idx == 0) { 1128 if ((error = 1129 mntfs_snapshot(mnp, 0, flag & DATAMODEL_MASK)) != 0) 1130 return (error); 1131 } 1132 /* 1133 * If the next index is beyond the end of the current mnttab, 1134 * return EOF 1135 */ 1136 if (idx >= snap->mnts_count) { 1137 *rvalp = 1; 1138 return (0); 1139 } 1140 1141 #ifdef _SYSCALL32_IMPL 1142 if ((flag & DATAMODEL_MASK) == DATAMODEL_ILP32) { 1143 addr = (uintptr_t)(snap->mnts_metadata + idx * 1144 sizeof (struct extmnttab32)); 1145 error = suword32((void *)arg, addr); 1146 } else { 1147 #endif 1148 addr = (uintptr_t)(snap->mnts_metadata + idx * 1149 sizeof (struct extmnttab)); 1150 error = sulword((void *)arg, addr); 1151 #ifdef _SYSCALL32_IMPL 1152 } 1153 #endif 1154 1155 if (error != 0) 1156 return (error); 1157 1158 mnp->mnt_offset++; 1159 break; 1160 } 1161 1162 default: 1163 error = EINVAL; 1164 break; 1165 } 1166 1167 return (error); 1168 } 1169 1170 /* ARGSUSED */ 1171 static int 1172 mntdummyread(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cred, 1173 caller_context_t *ct) 1174 { 1175 return (0); 1176 } 1177 1178 /* ARGSUSED */ 1179 static int 1180 mntdummywrite(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cred, 1181 caller_context_t *ct) 1182 { 1183 return (0); 1184 } 1185 1186 1187 /* 1188 * /mntfs vnode operations vector 1189 */ 1190 const fs_operation_def_t mnt_vnodeops_template[] = { 1191 VOPNAME_OPEN, { .vop_open = mntopen }, 1192 VOPNAME_CLOSE, { .vop_close = mntclose }, 1193 VOPNAME_READ, { .vop_read = mntread }, 1194 VOPNAME_IOCTL, { .vop_ioctl = mntioctl }, 1195 VOPNAME_GETATTR, { .vop_getattr = mntgetattr }, 1196 VOPNAME_ACCESS, { .vop_access = mntaccess }, 1197 VOPNAME_FSYNC, { .vop_fsync = mntfsync }, 1198 VOPNAME_INACTIVE, { .vop_inactive = mntinactive }, 1199 VOPNAME_SEEK, { .vop_seek = mntseek }, 1200 VOPNAME_POLL, { .vop_poll = mntpoll }, 1201 VOPNAME_DISPOSE, { .error = fs_error }, 1202 VOPNAME_SHRLOCK, { .error = fs_error }, 1203 NULL, NULL 1204 }; 1205 1206 const fs_operation_def_t mnt_dummyvnodeops_template[] = { 1207 VOPNAME_READ, { .vop_read = mntdummyread }, 1208 VOPNAME_WRITE, { .vop_write = mntdummywrite }, 1209 VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, 1210 NULL, NULL 1211 }; 1212