1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <sys/file.h> 27 #include <sys/stat.h> 28 #include <sys/atomic.h> 29 #include <sys/mntio.h> 30 #include <sys/mnttab.h> 31 #include <sys/mount.h> 32 #include <sys/sunddi.h> 33 #include <sys/sysmacros.h> 34 #include <sys/systm.h> 35 #include <sys/vfs.h> 36 #include <sys/vfs_opreg.h> 37 #include <sys/fs/mntdata.h> 38 #include <fs/fs_subr.h> 39 #include <sys/vmsystm.h> 40 #include <vm/seg_vn.h> 41 #include <sys/time.h> 42 #include <sys/ksynch.h> 43 #include <sys/sdt.h> 44 45 #define MNTROOTINO 2 46 47 static mntnode_t *mntgetnode(vnode_t *); 48 49 vnodeops_t *mntvnodeops; 50 extern void vfs_mnttab_readop(void); 51 52 /* 53 * Design of kernel mnttab accounting. 54 * 55 * mntfs provides two methods of reading the in-kernel mnttab, i.e. the state of 56 * the mounted resources: the read-only file /etc/mnttab, and a collection of 57 * ioctl() commands. Most of these interfaces are public and are described in 58 * mnttab(4). Three private ioctl() commands, MNTIOC_GETMNTENT, 59 * MNTIOC_GETEXTMNTENT and MNTIOC_GETMNTANY, provide for the getmntent(3C) 60 * family of functions, allowing them to support white space in mount names. 61 * 62 * A significant feature of mntfs is that it provides a file descriptor with a 63 * snapshot once it begins to consume mnttab data. Thus, as the process 64 * continues to consume data, its view of the in-kernel mnttab does not change 65 * even if resources are mounted or unmounted. The intent is to ensure that 66 * processes are guaranteed to read self-consistent data even as the system 67 * changes. 68 * 69 * The snapshot is implemented by a "database", unique to each zone, that 70 * comprises a linked list of mntelem_ts. The database is identified by 71 * zone_mntfs_db and is protected by zone_mntfs_db_lock. Each element contains 72 * the text entry in /etc/mnttab for a mounted resource, i.e. a vfs_t, and is 73 * marked with its time of "birth", i.e. creation. An element is "killed", and 74 * marked with its time of death, when it is found to be out of date, e.g. when 75 * the corresponding resource has been unmounted. 76 * 77 * When a process performs the first read() or ioctl() for a file descriptor for 78 * /etc/mnttab, the database is updated by a call to mntfs_snapshot() to ensure 79 * that an element exists for each currently mounted resource. Following this, 80 * the current time is written into a snapshot structure, a mntsnap_t, embedded 81 * in the descriptor's mntnode_t. 82 * 83 * mntfs is able to enumerate the /etc/mnttab entries corresponding to a 84 * particular file descriptor by searching the database for entries that were 85 * born before the appropriate snapshot and that either are still alive or died 86 * after the snapshot was created. Consumers use the iterator function 87 * mntfs_get_next_elem() to identify the next suitable element in the database. 88 * 89 * Each snapshot has a hold on its corresponding database elements, effected by 90 * a per-element reference count. At last close(), a snapshot is destroyed in 91 * mntfs_freesnap() by releasing all of its holds; an element is destroyed if 92 * its reference count becomes zero. Therefore the database never exists unless 93 * there is at least one active consumer of /etc/mnttab. 94 * 95 * getmntent(3C) et al. "do not open, close or rewind the file." This implies 96 * that getmntent() and read() must be able to operate without interaction on 97 * the same file descriptor; this is accomplished by the use of separate 98 * mntsnap_ts for both read() and ioctl(). 99 * 100 * mntfs observes the following lock-ordering: 101 * 102 * mnp->mnt_contents -> vfslist -> zonep->zone_mntfs_db_lock 103 * 104 * NOTE: The following variable enables the generation of the "dev=xxx" 105 * in the option string for a mounted file system. Really this should 106 * be gotten rid of altogether, but for the sake of backwards compatibility 107 * we had to leave it in. It is defined as a 32-bit device number. This 108 * means that when 64-bit device numbers are in use, if either the major or 109 * minor part of the device number will not fit in a 16 bit quantity, the 110 * "dev=" will be set to NODEV (0x7fffffff). See PSARC 1999/566 and 111 * 1999/131 for details. The cmpldev() function used to generate the 32-bit 112 * device number handles this check and assigns the proper value. 113 */ 114 int mntfs_enabledev = 1; /* enable old "dev=xxx" option */ 115 116 extern void vfs_mono_time(timespec_t *); 117 enum { MNTFS_FIRST, MNTFS_SECOND, MNTFS_NEITHER }; 118 119 /* 120 * Determine whether a field within a line from /etc/mnttab contains actual 121 * content or simply the marker string "-". This never applies to the time, 122 * therefore the delimiter must be a tab. 123 */ 124 #define MNTFS_REAL_FIELD(x) (*(x) != '-' || *((x) + 1) != '\t') 125 126 static int 127 mntfs_devsize(struct vfs *vfsp) 128 { 129 dev32_t odev; 130 131 (void) cmpldev(&odev, vfsp->vfs_dev); 132 return (snprintf(NULL, 0, "dev=%x", odev)); 133 } 134 135 static int 136 mntfs_devprint(struct vfs *vfsp, char *buf) 137 { 138 dev32_t odev; 139 140 (void) cmpldev(&odev, vfsp->vfs_dev); 141 return (snprintf(buf, MAX_MNTOPT_STR, "dev=%x", odev)); 142 } 143 144 /* Identify which, if either, of two supplied timespec structs is newer. */ 145 static int 146 mntfs_newest(timespec_t *a, timespec_t *b) 147 { 148 if (a->tv_sec == b->tv_sec && 149 a->tv_nsec == b->tv_nsec) { 150 return (MNTFS_NEITHER); 151 } else if (b->tv_sec > a->tv_sec || 152 (b->tv_sec == a->tv_sec && 153 b->tv_nsec > a->tv_nsec)) { 154 return (MNTFS_SECOND); 155 } else { 156 return (MNTFS_FIRST); 157 } 158 } 159 160 static int 161 mntfs_optsize(struct vfs *vfsp) 162 { 163 int i, size = 0; 164 mntopt_t *mop; 165 166 for (i = 0; i < vfsp->vfs_mntopts.mo_count; i++) { 167 mop = &vfsp->vfs_mntopts.mo_list[i]; 168 if (mop->mo_flags & MO_NODISPLAY) 169 continue; 170 if (mop->mo_flags & MO_SET) { 171 if (size) 172 size++; /* space for comma */ 173 size += strlen(mop->mo_name); 174 /* 175 * count option value if there is one 176 */ 177 if (mop->mo_arg != NULL) { 178 size += strlen(mop->mo_arg) + 1; 179 } 180 } 181 } 182 if (vfsp->vfs_zone != NULL && vfsp->vfs_zone != global_zone) { 183 /* 184 * Add space for "zone=<zone_name>" if required. 185 */ 186 if (size) 187 size++; /* space for comma */ 188 size += sizeof ("zone=") - 1; 189 size += strlen(vfsp->vfs_zone->zone_name); 190 } 191 if (mntfs_enabledev) { 192 if (size != 0) 193 size++; /* space for comma */ 194 size += mntfs_devsize(vfsp); 195 } 196 if (size == 0) 197 size = strlen("-"); 198 return (size); 199 } 200 201 static int 202 mntfs_optprint(struct vfs *vfsp, char *buf) 203 { 204 int i, optinbuf = 0; 205 mntopt_t *mop; 206 char *origbuf = buf; 207 208 for (i = 0; i < vfsp->vfs_mntopts.mo_count; i++) { 209 mop = &vfsp->vfs_mntopts.mo_list[i]; 210 if (mop->mo_flags & MO_NODISPLAY) 211 continue; 212 if (mop->mo_flags & MO_SET) { 213 if (optinbuf) 214 *buf++ = ','; 215 else 216 optinbuf = 1; 217 buf += snprintf(buf, MAX_MNTOPT_STR, 218 "%s", mop->mo_name); 219 /* 220 * print option value if there is one 221 */ 222 if (mop->mo_arg != NULL) { 223 buf += snprintf(buf, MAX_MNTOPT_STR, "=%s", 224 mop->mo_arg); 225 } 226 } 227 } 228 if (vfsp->vfs_zone != NULL && vfsp->vfs_zone != global_zone) { 229 if (optinbuf) 230 *buf++ = ','; 231 else 232 optinbuf = 1; 233 buf += snprintf(buf, MAX_MNTOPT_STR, "zone=%s", 234 vfsp->vfs_zone->zone_name); 235 } 236 if (mntfs_enabledev) { 237 if (optinbuf++) 238 *buf++ = ','; 239 buf += mntfs_devprint(vfsp, buf); 240 } 241 if (!optinbuf) { 242 buf += snprintf(buf, MAX_MNTOPT_STR, "-"); 243 } 244 return (buf - origbuf); 245 } 246 247 void 248 mntfs_populate_text(vfs_t *vfsp, zone_t *zonep, mntelem_t *elemp) 249 { 250 struct extmnttab *tabp = &elemp->mnte_tab; 251 const char *resource, *mntpt; 252 char *cp = elemp->mnte_text; 253 mntpt = refstr_value(vfsp->vfs_mntpt); 254 resource = refstr_value(vfsp->vfs_resource); 255 256 tabp->mnt_special = 0; 257 if (resource != NULL && resource[0] != '\0') { 258 if (resource[0] != '/') { 259 cp += snprintf(cp, MAXPATHLEN, "%s\t", resource); 260 } else if (!ZONE_PATH_VISIBLE(resource, zonep)) { 261 /* 262 * Use the mount point as the resource. 263 */ 264 cp += snprintf(cp, MAXPATHLEN, "%s\t", 265 ZONE_PATH_TRANSLATE(mntpt, zonep)); 266 } else { 267 cp += snprintf(cp, MAXPATHLEN, "%s\t", 268 ZONE_PATH_TRANSLATE(resource, zonep)); 269 } 270 } else { 271 cp += snprintf(cp, MAXPATHLEN, "-\t"); 272 } 273 274 tabp->mnt_mountp = (char *)(cp - elemp->mnte_text); 275 if (mntpt != NULL && mntpt[0] != '\0') { 276 /* 277 * We know the mount point is visible from within the zone, 278 * otherwise it wouldn't be on the zone's vfs list. 279 */ 280 cp += snprintf(cp, MAXPATHLEN, "%s\t", 281 ZONE_PATH_TRANSLATE(mntpt, zonep)); 282 } else { 283 cp += snprintf(cp, MAXPATHLEN, "-\t"); 284 } 285 286 tabp->mnt_fstype = (char *)(cp - elemp->mnte_text); 287 cp += snprintf(cp, MAXPATHLEN, "%s\t", 288 vfssw[vfsp->vfs_fstype].vsw_name); 289 290 tabp->mnt_mntopts = (char *)(cp - elemp->mnte_text); 291 cp += mntfs_optprint(vfsp, cp); 292 *cp++ = '\t'; 293 294 tabp->mnt_time = (char *)(cp - elemp->mnte_text); 295 cp += snprintf(cp, MAX_MNTOPT_STR, "%ld", vfsp->vfs_mtime); 296 *cp++ = '\n'; /* over-write snprintf's trailing null-byte */ 297 298 tabp->mnt_major = getmajor(vfsp->vfs_dev); 299 tabp->mnt_minor = getminor(vfsp->vfs_dev); 300 301 elemp->mnte_text_size = cp - elemp->mnte_text; 302 elemp->mnte_vfs_ctime = vfsp->vfs_hrctime; 303 elemp->mnte_hidden = vfsp->vfs_flag & VFS_NOMNTTAB; 304 } 305 306 /* Determine the length of the /etc/mnttab entry for this vfs_t. */ 307 static size_t 308 mntfs_text_len(vfs_t *vfsp, zone_t *zone) 309 { 310 size_t size = 0; 311 const char *resource, *mntpt; 312 size_t mntsize; 313 314 mntpt = refstr_value(vfsp->vfs_mntpt); 315 if (mntpt != NULL && mntpt[0] != '\0') { 316 mntsize = strlen(ZONE_PATH_TRANSLATE(mntpt, zone)) + 1; 317 } else { 318 mntsize = 2; /* "-\t" */ 319 } 320 size += mntsize; 321 322 resource = refstr_value(vfsp->vfs_resource); 323 if (resource != NULL && resource[0] != '\0') { 324 if (resource[0] != '/') { 325 size += strlen(resource) + 1; 326 } else if (!ZONE_PATH_VISIBLE(resource, zone)) { 327 /* 328 * Same as the zone's view of the mount point. 329 */ 330 size += mntsize; 331 } else { 332 size += strlen(ZONE_PATH_TRANSLATE(resource, zone)) + 1; 333 } 334 } else { 335 size += 2; /* "-\t" */ 336 } 337 size += strlen(vfssw[vfsp->vfs_fstype].vsw_name) + 1; 338 size += mntfs_optsize(vfsp); 339 size += snprintf(NULL, 0, "\t%ld\n", vfsp->vfs_mtime); 340 return (size); 341 } 342 343 /* Destroy the resources associated with a snapshot element. */ 344 static void 345 mntfs_destroy_elem(mntelem_t *elemp) 346 { 347 kmem_free(elemp->mnte_text, elemp->mnte_text_size); 348 kmem_free(elemp, sizeof (mntelem_t)); 349 } 350 351 /* 352 * Return 1 if the given snapshot is in the range of the given element; return 353 * 0 otherwise. 354 */ 355 static int 356 mntfs_elem_in_range(mntsnap_t *snapp, mntelem_t *elemp) 357 { 358 timespec_t *stimep = &snapp->mnts_time; 359 timespec_t *btimep = &elemp->mnte_birth; 360 timespec_t *dtimep = &elemp->mnte_death; 361 362 /* 363 * If a snapshot is in range of an element then the snapshot must have 364 * been created after the birth of the element, and either the element 365 * is still alive or it died after the snapshot was created. 366 */ 367 if (mntfs_newest(btimep, stimep) == MNTFS_SECOND && 368 (MNTFS_ELEM_IS_ALIVE(elemp) || 369 mntfs_newest(stimep, dtimep) == MNTFS_SECOND)) 370 return (1); 371 else 372 return (0); 373 } 374 375 /* 376 * Return the next valid database element, after the one provided, for a given 377 * snapshot; return NULL if none exists. The caller must hold the zone's 378 * database lock as a reader before calling this function. 379 */ 380 static mntelem_t * 381 mntfs_get_next_elem(mntsnap_t *snapp, mntelem_t *elemp) 382 { 383 int show_hidden = snapp->mnts_flags & MNTS_SHOWHIDDEN; 384 385 do { 386 elemp = elemp->mnte_next; 387 } while (elemp && 388 (!mntfs_elem_in_range(snapp, elemp) || 389 (!show_hidden && elemp->mnte_hidden))); 390 return (elemp); 391 } 392 393 /* 394 * This function frees the resources associated with a mntsnap_t. It walks 395 * through the database, decrementing the reference count of any element that 396 * satisfies the snapshot. If the reference count of an element becomes zero 397 * then it is removed from the database. 398 */ 399 static void 400 mntfs_freesnap(mntnode_t *mnp, mntsnap_t *snapp) 401 { 402 zone_t *zonep = MTOD(mnp)->mnt_zone; 403 krwlock_t *dblockp = &zonep->zone_mntfs_db_lock; 404 mntelem_t **elempp = &zonep->zone_mntfs_db; 405 mntelem_t *elemp; 406 int show_hidden = snapp->mnts_flags & MNTS_SHOWHIDDEN; 407 size_t number_decremented = 0; 408 409 ASSERT(RW_WRITE_HELD(&mnp->mnt_contents)); 410 411 /* Ignore an uninitialised snapshot. */ 412 if (snapp->mnts_nmnts == 0) 413 return; 414 415 /* Drop the holds on any matching database elements. */ 416 rw_enter(dblockp, RW_WRITER); 417 while ((elemp = *elempp) != NULL) { 418 if (mntfs_elem_in_range(snapp, elemp) && 419 (!elemp->mnte_hidden || show_hidden) && 420 ++number_decremented && --elemp->mnte_refcnt == 0) { 421 if ((*elempp = elemp->mnte_next) != NULL) 422 (*elempp)->mnte_prev = elemp->mnte_prev; 423 mntfs_destroy_elem(elemp); 424 } else { 425 elempp = &elemp->mnte_next; 426 } 427 } 428 rw_exit(dblockp); 429 ASSERT(number_decremented == snapp->mnts_nmnts); 430 431 /* Clear the snapshot data. */ 432 bzero(snapp, sizeof (mntsnap_t)); 433 } 434 435 /* Insert the new database element newp after the existing element prevp. */ 436 static void 437 mntfs_insert_after(mntelem_t *newp, mntelem_t *prevp) 438 { 439 newp->mnte_prev = prevp; 440 newp->mnte_next = prevp->mnte_next; 441 prevp->mnte_next = newp; 442 if (newp->mnte_next != NULL) 443 newp->mnte_next->mnte_prev = newp; 444 } 445 446 /* Create and return a copy of a given database element. */ 447 static mntelem_t * 448 mntfs_copy(mntelem_t *origp) 449 { 450 mntelem_t *copyp; 451 452 copyp = kmem_zalloc(sizeof (mntelem_t), KM_SLEEP); 453 copyp->mnte_vfs_ctime = origp->mnte_vfs_ctime; 454 copyp->mnte_text_size = origp->mnte_text_size; 455 copyp->mnte_text = kmem_alloc(copyp->mnte_text_size, KM_SLEEP); 456 bcopy(origp->mnte_text, copyp->mnte_text, copyp->mnte_text_size); 457 copyp->mnte_tab = origp->mnte_tab; 458 copyp->mnte_hidden = origp->mnte_hidden; 459 460 return (copyp); 461 } 462 463 /* 464 * Compare two database elements and determine whether or not the vfs_t payload 465 * data of each are the same. Return 1 if so and 0 otherwise. 466 */ 467 static int 468 mntfs_is_same_element(mntelem_t *a, mntelem_t *b) 469 { 470 if (a->mnte_hidden == b->mnte_hidden && 471 a->mnte_text_size == b->mnte_text_size && 472 bcmp(a->mnte_text, b->mnte_text, a->mnte_text_size) == 0 && 473 bcmp(&a->mnte_tab, &b->mnte_tab, sizeof (struct extmnttab)) == 0) 474 return (1); 475 else 476 return (0); 477 } 478 479 /* 480 * mntfs_snapshot() updates the database, creating it if necessary, so that it 481 * accurately reflects the state of the in-kernel mnttab. It also increments 482 * the reference count on all database elements that correspond to currently- 483 * mounted resources. Finally, it initialises the appropriate snapshot 484 * structure. 485 * 486 * Each vfs_t is given a high-resolution time stamp, for the benefit of mntfs, 487 * when it is inserted into the in-kernel mnttab. This time stamp is copied into 488 * the corresponding database element when it is created, allowing the element 489 * and the vfs_t to be identified as a pair. It is possible that some file 490 * systems may make unadvertised changes to, for example, a resource's mount 491 * options. Therefore, in order to determine whether a database element is an 492 * up-to-date representation of a given vfs_t, it is compared with a temporary 493 * element generated for this purpose. Although less efficient, this is safer 494 * than implementing an mtime for a vfs_t. 495 * 496 * Some mounted resources are marked as "hidden" with a VFS_NOMNTTAB flag. These 497 * are considered invisible unless the user has already set the MNT_SHOWHIDDEN 498 * flag in the vnode using the MNTIOC_SHOWHIDDEN ioctl. 499 */ 500 static void 501 mntfs_snapshot(mntnode_t *mnp, mntsnap_t *snapp) 502 { 503 mntdata_t *mnd = MTOD(mnp); 504 zone_t *zonep = mnd->mnt_zone; 505 int is_global_zone = (zonep == global_zone); 506 int show_hidden = mnp->mnt_flags & MNT_SHOWHIDDEN; 507 vfs_t *vfsp, *firstvfsp, *lastvfsp; 508 vfs_t dummyvfs; 509 vfs_t *dummyvfsp = NULL; 510 krwlock_t *dblockp = &zonep->zone_mntfs_db_lock; 511 mntelem_t **headpp = &zonep->zone_mntfs_db; 512 mntelem_t *elemp; 513 mntelem_t *prevp = NULL; 514 int order; 515 mntelem_t *tempelemp; 516 mntelem_t *newp; 517 mntelem_t *firstp = NULL; 518 size_t nmnts = 0; 519 size_t total_text_size = 0; 520 size_t normal_text_size = 0; 521 int insert_before; 522 timespec_t last_mtime; 523 size_t entry_length, new_entry_length; 524 525 526 ASSERT(RW_WRITE_HELD(&mnp->mnt_contents)); 527 vfs_list_read_lock(); 528 vfs_mnttab_modtime(&last_mtime); 529 530 /* 531 * If this snapshot already exists then we must have been asked to 532 * rewind the file, i.e. discard the snapshot and create a new one in 533 * its place. In this case we first see if the in-kernel mnttab has 534 * advertised a change; if not then we simply reinitialise the metadata. 535 */ 536 if (snapp->mnts_nmnts) { 537 if (mntfs_newest(&last_mtime, &snapp->mnts_last_mtime) == 538 MNTFS_NEITHER) { 539 /* 540 * An unchanged mtime is no guarantee that the 541 * in-kernel mnttab is unchanged; for example, a 542 * concurrent remount may be between calls to 543 * vfs_setmntopt_nolock() and vfs_mnttab_modtimeupd(). 544 * It follows that the database may have changed, and 545 * in particular that some elements in this snapshot 546 * may have been killed by another call to 547 * mntfs_snapshot(). It is therefore not merely 548 * unnecessary to update the snapshot's time but in 549 * fact dangerous; it needs to be left alone. 550 */ 551 snapp->mnts_next = snapp->mnts_first; 552 snapp->mnts_flags &= ~MNTS_REWIND; 553 snapp->mnts_foffset = snapp->mnts_ieoffset = 0; 554 vfs_list_unlock(); 555 return; 556 } else { 557 mntfs_freesnap(mnp, snapp); 558 } 559 } 560 561 /* 562 * Create a temporary database element. For each vfs_t, the temporary 563 * element will be populated with the corresponding text. If the vfs_t 564 * does not have a corresponding element within the database, or if 565 * there is such an element but it is stale, a copy of the temporary 566 * element is inserted into the database at the appropriate location. 567 */ 568 tempelemp = kmem_alloc(sizeof (mntelem_t), KM_SLEEP); 569 entry_length = MNT_LINE_MAX; 570 tempelemp->mnte_text = kmem_alloc(entry_length, KM_SLEEP); 571 572 /* Find the first and last vfs_t for the given zone. */ 573 if (is_global_zone) { 574 firstvfsp = rootvfs; 575 lastvfsp = firstvfsp->vfs_prev; 576 } else { 577 firstvfsp = zonep->zone_vfslist; 578 /* 579 * If there isn't already a vfs_t for root then we create a 580 * dummy which will be used as the head of the list (which will 581 * therefore no longer be circular). 582 */ 583 if (firstvfsp == NULL || 584 strcmp(refstr_value(firstvfsp->vfs_mntpt), 585 zonep->zone_rootpath) != 0) { 586 /* 587 * The zone's vfs_ts will have mount points relative to 588 * the zone's root path. The vfs_t for the zone's 589 * root file system would therefore have a mount point 590 * equal to the zone's root path. Since the zone's root 591 * path isn't a mount point, we copy the vfs_t of the 592 * zone's root vnode, and provide it with a fake mount 593 * point and resource. 594 * 595 * Note that by cloning another vfs_t we also acquire 596 * its high-resolution ctime. This might appear to 597 * violate the requirement that the ctimes in the list 598 * of vfs_ts are unique and monotonically increasing; 599 * this is not the case. The dummy vfs_t appears in only 600 * a non-global zone's vfs_t list, where the cloned 601 * vfs_t would not ordinarily be visible; the ctimes are 602 * therefore unique. The zone's root path must be 603 * available before the zone boots, and so its root 604 * vnode's vfs_t's ctime must be lower than those of any 605 * resources subsequently mounted by the zone. The 606 * ctimes are therefore monotonically increasing. 607 */ 608 dummyvfs = *zonep->zone_rootvp->v_vfsp; 609 dummyvfs.vfs_mntpt = refstr_alloc(zonep->zone_rootpath); 610 dummyvfs.vfs_resource = dummyvfs.vfs_mntpt; 611 dummyvfsp = &dummyvfs; 612 if (firstvfsp == NULL) { 613 lastvfsp = dummyvfsp; 614 } else { 615 lastvfsp = firstvfsp->vfs_zone_prev; 616 dummyvfsp->vfs_zone_next = firstvfsp; 617 } 618 firstvfsp = dummyvfsp; 619 } else { 620 lastvfsp = firstvfsp->vfs_zone_prev; 621 } 622 } 623 624 /* 625 * Now walk through all the vfs_ts for this zone. For each one, find the 626 * corresponding database element, creating it first if necessary, and 627 * increment its reference count. 628 */ 629 rw_enter(dblockp, RW_WRITER); 630 elemp = zonep->zone_mntfs_db; 631 /* CSTYLED */ 632 for (vfsp = firstvfsp;; 633 vfsp = is_global_zone ? vfsp->vfs_next : vfsp->vfs_zone_next) { 634 DTRACE_PROBE1(new__vfs, vfs_t *, vfsp); 635 /* Consider only visible entries. */ 636 if ((vfsp->vfs_flag & VFS_NOMNTTAB) == 0 || show_hidden) { 637 /* 638 * Walk through the existing database looking for either 639 * an element that matches the current vfs_t, or for the 640 * correct place in which to insert a new element. 641 */ 642 insert_before = 0; 643 for (; elemp; prevp = elemp, elemp = elemp->mnte_next) { 644 DTRACE_PROBE1(considering__elem, mntelem_t *, 645 elemp); 646 647 /* Compare the vfs_t with the element. */ 648 order = mntfs_newest(&elemp->mnte_vfs_ctime, 649 &vfsp->vfs_hrctime); 650 651 /* 652 * If we encounter a database element newer than 653 * this vfs_t then we've stepped over a gap 654 * where the element for this vfs_t must be 655 * inserted. 656 */ 657 if (order == MNTFS_FIRST) { 658 insert_before = 1; 659 break; 660 } 661 662 /* Dead elements no longer interest us. */ 663 if (MNTFS_ELEM_IS_DEAD(elemp)) 664 continue; 665 666 /* 667 * If the time stamps are the same then the 668 * element is potential match for the vfs_t, 669 * although it may later prove to be stale. 670 */ 671 if (order == MNTFS_NEITHER) 672 break; 673 674 /* 675 * This element must be older than the vfs_t. 676 * It must, therefore, correspond to a vfs_t 677 * that has been unmounted. Since the element is 678 * still alive, we kill it if it is visible. 679 */ 680 if (!elemp->mnte_hidden || show_hidden) 681 vfs_mono_time(&elemp->mnte_death); 682 } 683 DTRACE_PROBE2(possible__match, vfs_t *, vfsp, 684 mntelem_t *, elemp); 685 686 /* Create a new database element if required. */ 687 new_entry_length = mntfs_text_len(vfsp, zonep); 688 if (new_entry_length > entry_length) { 689 kmem_free(tempelemp->mnte_text, entry_length); 690 tempelemp->mnte_text = 691 kmem_alloc(new_entry_length, KM_SLEEP); 692 entry_length = new_entry_length; 693 } 694 mntfs_populate_text(vfsp, zonep, tempelemp); 695 ASSERT(tempelemp->mnte_text_size == new_entry_length); 696 if (elemp == NULL) { 697 /* 698 * We ran off the end of the database. Insert a 699 * new element at the end. 700 */ 701 newp = mntfs_copy(tempelemp); 702 vfs_mono_time(&newp->mnte_birth); 703 if (prevp) { 704 mntfs_insert_after(newp, prevp); 705 } else { 706 newp->mnte_next = NULL; 707 newp->mnte_prev = NULL; 708 ASSERT(*headpp == NULL); 709 *headpp = newp; 710 } 711 elemp = newp; 712 } else if (insert_before) { 713 /* 714 * Insert a new element before the current one. 715 */ 716 newp = mntfs_copy(tempelemp); 717 vfs_mono_time(&newp->mnte_birth); 718 if (prevp) { 719 mntfs_insert_after(newp, prevp); 720 } else { 721 newp->mnte_next = elemp; 722 newp->mnte_prev = NULL; 723 elemp->mnte_prev = newp; 724 ASSERT(*headpp == elemp); 725 *headpp = newp; 726 } 727 elemp = newp; 728 } else if (!mntfs_is_same_element(elemp, tempelemp)) { 729 /* 730 * The element corresponds to the vfs_t, but the 731 * vfs_t has changed; it must have been 732 * remounted. Kill the old element and insert a 733 * new one after it. 734 */ 735 vfs_mono_time(&elemp->mnte_death); 736 newp = mntfs_copy(tempelemp); 737 vfs_mono_time(&newp->mnte_birth); 738 mntfs_insert_after(newp, elemp); 739 elemp = newp; 740 } 741 742 /* We've found the corresponding element. Hold it. */ 743 DTRACE_PROBE1(incrementing, mntelem_t *, elemp); 744 elemp->mnte_refcnt++; 745 746 /* 747 * Update the parameters used to initialise the 748 * snapshot. 749 */ 750 nmnts++; 751 total_text_size += elemp->mnte_text_size; 752 if (!elemp->mnte_hidden) 753 normal_text_size += elemp->mnte_text_size; 754 if (!firstp) 755 firstp = elemp; 756 757 prevp = elemp; 758 elemp = elemp->mnte_next; 759 } 760 761 if (vfsp == lastvfsp) 762 break; 763 } 764 765 /* 766 * Any remaining visible database elements that are still alive must be 767 * killed now, because their corresponding vfs_ts must have been 768 * unmounted. 769 */ 770 for (; elemp; elemp = elemp->mnte_next) { 771 if (MNTFS_ELEM_IS_ALIVE(elemp) && 772 (!elemp->mnte_hidden || show_hidden)) 773 vfs_mono_time(&elemp->mnte_death); 774 } 775 776 /* Initialise the snapshot. */ 777 vfs_mono_time(&snapp->mnts_time); 778 snapp->mnts_last_mtime = last_mtime; 779 snapp->mnts_first = snapp->mnts_next = firstp; 780 snapp->mnts_flags = show_hidden ? MNTS_SHOWHIDDEN : 0; 781 snapp->mnts_nmnts = nmnts; 782 snapp->mnts_text_size = total_text_size; 783 snapp->mnts_foffset = snapp->mnts_ieoffset = 0; 784 785 /* 786 * Record /etc/mnttab's current size and mtime for possible future use 787 * by mntgetattr(). 788 */ 789 mnd->mnt_size = normal_text_size; 790 mnd->mnt_mtime = last_mtime; 791 if (show_hidden) { 792 mnd->mnt_hidden_size = total_text_size; 793 mnd->mnt_hidden_mtime = last_mtime; 794 } 795 796 /* Clean up. */ 797 rw_exit(dblockp); 798 vfs_list_unlock(); 799 if (dummyvfsp != NULL) 800 refstr_rele(dummyvfsp->vfs_mntpt); 801 kmem_free(tempelemp->mnte_text, entry_length); 802 kmem_free(tempelemp, sizeof (mntelem_t)); 803 } 804 805 /* 806 * Public function to convert vfs_mntopts into a string. 807 * A buffer of sufficient size is allocated, which is returned via bufp, 808 * and whose length is returned via lenp. 809 */ 810 void 811 mntfs_getmntopts(struct vfs *vfsp, char **bufp, size_t *lenp) 812 { 813 size_t len; 814 char *buf; 815 816 vfs_list_read_lock(); 817 818 len = mntfs_optsize(vfsp) + 1; 819 buf = kmem_alloc(len, KM_NOSLEEP); 820 if (buf == NULL) { 821 *bufp = NULL; 822 vfs_list_unlock(); 823 return; 824 } 825 buf[len - 1] = '\0'; 826 (void) mntfs_optprint(vfsp, buf); 827 ASSERT(buf[len - 1] == '\0'); 828 829 vfs_list_unlock(); 830 *bufp = buf; 831 *lenp = len; 832 } 833 834 /* ARGSUSED */ 835 static int 836 mntopen(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct) 837 { 838 vnode_t *vp = *vpp; 839 mntnode_t *nmnp; 840 841 /* 842 * Not allowed to open for writing, return error. 843 */ 844 if (flag & FWRITE) 845 return (EPERM); 846 /* 847 * Create a new mnt/vnode for each open, this will give us a handle to 848 * hang the snapshot on. 849 */ 850 nmnp = mntgetnode(vp); 851 852 *vpp = MTOV(nmnp); 853 atomic_add_32(&MTOD(nmnp)->mnt_nopen, 1); 854 VN_RELE(vp); 855 return (0); 856 } 857 858 /* ARGSUSED */ 859 static int 860 mntclose(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr, 861 caller_context_t *ct) 862 { 863 mntnode_t *mnp = VTOM(vp); 864 865 /* Clean up any locks or shares held by the current process */ 866 cleanlocks(vp, ttoproc(curthread)->p_pid, 0); 867 cleanshares(vp, ttoproc(curthread)->p_pid); 868 869 if (count > 1) 870 return (0); 871 if (vp->v_count == 1) { 872 rw_enter(&mnp->mnt_contents, RW_WRITER); 873 mntfs_freesnap(mnp, &mnp->mnt_read); 874 mntfs_freesnap(mnp, &mnp->mnt_ioctl); 875 rw_exit(&mnp->mnt_contents); 876 atomic_add_32(&MTOD(mnp)->mnt_nopen, -1); 877 } 878 return (0); 879 } 880 881 /* ARGSUSED */ 882 static int 883 mntread(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cred, caller_context_t *ct) 884 { 885 mntnode_t *mnp = VTOM(vp); 886 zone_t *zonep = MTOD(mnp)->mnt_zone; 887 mntsnap_t *snapp = &mnp->mnt_read; 888 off_t off = uio->uio_offset; 889 size_t len = uio->uio_resid; 890 char *bufferp; 891 size_t available, copylen; 892 size_t written = 0; 893 mntelem_t *elemp; 894 krwlock_t *dblockp = &zonep->zone_mntfs_db_lock; 895 int error = 0; 896 off_t ieoffset; 897 898 rw_enter(&mnp->mnt_contents, RW_WRITER); 899 if (snapp->mnts_nmnts == 0 || (off == (off_t)0)) 900 mntfs_snapshot(mnp, snapp); 901 902 if ((size_t)(off + len) > snapp->mnts_text_size) 903 len = snapp->mnts_text_size - off; 904 905 if (off < 0 || len > snapp->mnts_text_size) { 906 rw_exit(&mnp->mnt_contents); 907 return (EFAULT); 908 } 909 910 if (len == 0) { 911 rw_exit(&mnp->mnt_contents); 912 return (0); 913 } 914 915 /* 916 * For the file offset provided, locate the corresponding database 917 * element and calculate the corresponding offset within its text. If 918 * the file offset is the same as that reached during the last read(2) 919 * then use the saved element and intra-element offset. 920 */ 921 rw_enter(dblockp, RW_READER); 922 if (off == 0 || (off == snapp->mnts_foffset)) { 923 elemp = snapp->mnts_next; 924 ieoffset = snapp->mnts_ieoffset; 925 } else { 926 off_t total_off; 927 /* 928 * Find the element corresponding to the requested file offset 929 * by walking through the database and summing the text sizes 930 * of the individual elements. If the requested file offset is 931 * greater than that reached on the last visit then we can start 932 * at the last seen element; otherwise, we have to start at the 933 * beginning. 934 */ 935 if (off > snapp->mnts_foffset) { 936 elemp = snapp->mnts_next; 937 total_off = snapp->mnts_foffset - snapp->mnts_ieoffset; 938 } else { 939 elemp = snapp->mnts_first; 940 total_off = 0; 941 } 942 while (off > total_off + elemp->mnte_text_size) { 943 total_off += elemp->mnte_text_size; 944 elemp = mntfs_get_next_elem(snapp, elemp); 945 ASSERT(elemp != NULL); 946 } 947 /* Calculate the intra-element offset. */ 948 if (off > total_off) 949 ieoffset = off - total_off; 950 else 951 ieoffset = 0; 952 } 953 954 /* 955 * Create a buffer and populate it with the text from successive 956 * database elements until it is full. 957 */ 958 bufferp = kmem_alloc(len, KM_SLEEP); 959 while (written < len) { 960 available = elemp->mnte_text_size - ieoffset; 961 copylen = MIN(len - written, available); 962 bcopy(elemp->mnte_text + ieoffset, bufferp + written, copylen); 963 written += copylen; 964 if (copylen == available) { 965 elemp = mntfs_get_next_elem(snapp, elemp); 966 ASSERT(elemp != NULL || written == len); 967 ieoffset = 0; 968 } else { 969 ieoffset += copylen; 970 } 971 } 972 rw_exit(dblockp); 973 974 /* 975 * Write the populated buffer, update the snapshot's state if 976 * successful and then advertise our read. 977 */ 978 error = uiomove(bufferp, len, UIO_READ, uio); 979 if (error == 0) { 980 snapp->mnts_next = elemp; 981 snapp->mnts_foffset = off + len; 982 snapp->mnts_ieoffset = ieoffset; 983 } 984 vfs_mnttab_readop(); 985 rw_exit(&mnp->mnt_contents); 986 987 /* Clean up. */ 988 kmem_free(bufferp, len); 989 return (error); 990 } 991 992 static int 993 mntgetattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, 994 caller_context_t *ct) 995 { 996 int mask = vap->va_mask; 997 int error; 998 mntnode_t *mnp = VTOM(vp); 999 timespec_t mtime, old_mtime; 1000 size_t size, old_size; 1001 mntdata_t *mntdata = MTOD(VTOM(vp)); 1002 mntsnap_t *rsnapp, *isnapp; 1003 extern timespec_t vfs_mnttab_ctime; 1004 1005 1006 /* AT_MODE, AT_UID and AT_GID are derived from the underlying file. */ 1007 if (mask & AT_MODE|AT_UID|AT_GID) { 1008 if (error = VOP_GETATTR(mnp->mnt_mountvp, vap, flags, cr, ct)) 1009 return (error); 1010 } 1011 1012 /* 1013 * There are some minor subtleties in the determination of 1014 * /etc/mnttab's size and mtime. We wish to avoid any condition in 1015 * which, in the vicinity of a change to the in-kernel mnttab, we 1016 * return an old value for one but a new value for the other. We cannot 1017 * simply hold vfslist for the entire calculation because we might need 1018 * to call mntfs_snapshot(), which calls vfs_list_read_lock(). 1019 */ 1020 if (mask & AT_SIZE|AT_NBLOCKS) { 1021 rw_enter(&mnp->mnt_contents, RW_WRITER); 1022 1023 vfs_list_read_lock(); 1024 vfs_mnttab_modtime(&mtime); 1025 if (mnp->mnt_flags & MNT_SHOWHIDDEN) { 1026 old_mtime = mntdata->mnt_hidden_mtime; 1027 old_size = mntdata->mnt_hidden_size; 1028 } else { 1029 old_mtime = mntdata->mnt_mtime; 1030 old_size = mntdata->mnt_size; 1031 } 1032 vfs_list_unlock(); 1033 1034 rsnapp = &mnp->mnt_read; 1035 isnapp = &mnp->mnt_ioctl; 1036 if (rsnapp->mnts_nmnts || isnapp->mnts_nmnts) { 1037 /* 1038 * The mntnode already has at least one snapshot from 1039 * which to take the size; the user will understand from 1040 * mnttab(4) that the current size of the in-kernel 1041 * mnttab is irrelevant. 1042 */ 1043 size = rsnapp->mnts_nmnts ? rsnapp->mnts_text_size : 1044 isnapp->mnts_text_size; 1045 } else if (mntfs_newest(&mtime, &old_mtime) == MNTFS_NEITHER) { 1046 /* 1047 * There is no existing valid snapshot but the in-kernel 1048 * mnttab has not changed since the time that the last 1049 * one was generated. Use the old file size; note that 1050 * it is guaranteed to be consistent with mtime, which 1051 * may be returned to the user later. 1052 */ 1053 size = old_size; 1054 } else { 1055 /* 1056 * There is no snapshot and the in-kernel mnttab has 1057 * changed since the last one was created. We generate a 1058 * new snapshot which we use for not only the size but 1059 * also the mtime, thereby ensuring that the two are 1060 * consistent. 1061 */ 1062 mntfs_snapshot(mnp, rsnapp); 1063 size = rsnapp->mnts_text_size; 1064 mtime = rsnapp->mnts_last_mtime; 1065 mntfs_freesnap(mnp, rsnapp); 1066 } 1067 1068 rw_exit(&mnp->mnt_contents); 1069 } else if (mask & AT_ATIME|AT_MTIME) { 1070 vfs_list_read_lock(); 1071 vfs_mnttab_modtime(&mtime); 1072 vfs_list_unlock(); 1073 } 1074 1075 /* Always look like a regular file. */ 1076 if (mask & AT_TYPE) 1077 vap->va_type = VREG; 1078 /* Mode should basically be read only. */ 1079 if (mask & AT_MODE) 1080 vap->va_mode &= 07444; 1081 if (mask & AT_FSID) 1082 vap->va_fsid = vp->v_vfsp->vfs_dev; 1083 /* Nodeid is always ROOTINO. */ 1084 if (mask & AT_NODEID) 1085 vap->va_nodeid = (ino64_t)MNTROOTINO; 1086 /* 1087 * Set nlink to the number of open vnodes for mnttab info 1088 * plus one for existing. 1089 */ 1090 if (mask & AT_NLINK) 1091 vap->va_nlink = mntdata->mnt_nopen + 1; 1092 if (mask & AT_SIZE) 1093 vap->va_size = size; 1094 if (mask & AT_ATIME) 1095 vap->va_atime = mtime; 1096 if (mask & AT_MTIME) 1097 vap->va_mtime = mtime; 1098 if (mask & AT_CTIME) 1099 vap->va_ctime = vfs_mnttab_ctime; 1100 if (mask & AT_RDEV) 1101 vap->va_rdev = 0; 1102 if (mask & AT_BLKSIZE) 1103 vap->va_blksize = DEV_BSIZE; 1104 if (mask & AT_NBLOCKS) 1105 vap->va_nblocks = btod(size); 1106 if (mask & AT_SEQ) 1107 vap->va_seq = 0; 1108 1109 return (0); 1110 } 1111 1112 static int 1113 mntaccess(vnode_t *vp, int mode, int flags, cred_t *cr, 1114 caller_context_t *ct) 1115 { 1116 mntnode_t *mnp = VTOM(vp); 1117 1118 if (mode & (VWRITE|VEXEC)) 1119 return (EROFS); 1120 1121 /* 1122 * Do access check on the underlying directory vnode. 1123 */ 1124 return (VOP_ACCESS(mnp->mnt_mountvp, mode, flags, cr, ct)); 1125 } 1126 1127 1128 /* 1129 * New /mntfs vnode required; allocate it and fill in most of the fields. 1130 */ 1131 static mntnode_t * 1132 mntgetnode(vnode_t *dp) 1133 { 1134 mntnode_t *mnp; 1135 vnode_t *vp; 1136 1137 mnp = kmem_zalloc(sizeof (mntnode_t), KM_SLEEP); 1138 mnp->mnt_vnode = vn_alloc(KM_SLEEP); 1139 mnp->mnt_mountvp = VTOM(dp)->mnt_mountvp; 1140 rw_init(&mnp->mnt_contents, NULL, RW_DEFAULT, NULL); 1141 vp = MTOV(mnp); 1142 vp->v_flag = VNOCACHE|VNOMAP|VNOSWAP|VNOMOUNT; 1143 vn_setops(vp, mntvnodeops); 1144 vp->v_vfsp = dp->v_vfsp; 1145 vp->v_type = VREG; 1146 vp->v_data = (caddr_t)mnp; 1147 1148 return (mnp); 1149 } 1150 1151 /* 1152 * Free the storage obtained from mntgetnode(). 1153 */ 1154 static void 1155 mntfreenode(mntnode_t *mnp) 1156 { 1157 vnode_t *vp = MTOV(mnp); 1158 1159 rw_destroy(&mnp->mnt_contents); 1160 vn_invalid(vp); 1161 vn_free(vp); 1162 kmem_free(mnp, sizeof (*mnp)); 1163 } 1164 1165 1166 /* ARGSUSED */ 1167 static int 1168 mntfsync(vnode_t *vp, int syncflag, cred_t *cr, caller_context_t *ct) 1169 { 1170 return (0); 1171 } 1172 1173 /* ARGSUSED */ 1174 static void 1175 mntinactive(vnode_t *vp, cred_t *cr, caller_context_t *ct) 1176 { 1177 mntnode_t *mnp = VTOM(vp); 1178 1179 mntfreenode(mnp); 1180 } 1181 1182 /* 1183 * lseek(2) is supported only to rewind the file by resetmnttab(3C). Rewinding 1184 * has a special meaning for /etc/mnttab: it forces mntfs to refresh the 1185 * snapshot at the next ioctl(). 1186 * 1187 * mnttab(4) explains that "the snapshot...is taken any time a read(2) is 1188 * performed at offset 0". We therefore ignore the read snapshot here. 1189 */ 1190 /* ARGSUSED */ 1191 static int 1192 mntseek(vnode_t *vp, offset_t ooff, offset_t *noffp, caller_context_t *ct) 1193 { 1194 mntnode_t *mnp = VTOM(vp); 1195 1196 if (*noffp == 0) { 1197 rw_enter(&mnp->mnt_contents, RW_WRITER); 1198 mnp->mnt_ioctl.mnts_flags |= MNTS_REWIND; 1199 rw_exit(&mnp->mnt_contents); 1200 } 1201 1202 return (0); 1203 } 1204 1205 /* 1206 * Return the answer requested to poll(). 1207 * POLLRDBAND will return when the mtime of the mnttab 1208 * information is newer than the latest one read for this open. 1209 */ 1210 /* ARGSUSED */ 1211 static int 1212 mntpoll(vnode_t *vp, short ev, int any, short *revp, pollhead_t **phpp, 1213 caller_context_t *ct) 1214 { 1215 mntnode_t *mnp = VTOM(vp); 1216 mntsnap_t *snapp; 1217 1218 rw_enter(&mnp->mnt_contents, RW_READER); 1219 if (mntfs_newest(&mnp->mnt_ioctl.mnts_last_mtime, 1220 &mnp->mnt_read.mnts_last_mtime) == MNTFS_FIRST) 1221 snapp = &mnp->mnt_ioctl; 1222 else 1223 snapp = &mnp->mnt_read; 1224 1225 *revp = 0; 1226 *phpp = (pollhead_t *)NULL; 1227 if (ev & POLLIN) 1228 *revp |= POLLIN; 1229 1230 if (ev & POLLRDNORM) 1231 *revp |= POLLRDNORM; 1232 1233 if (ev & POLLRDBAND) { 1234 vfs_mnttab_poll(&snapp->mnts_last_mtime, phpp); 1235 if (*phpp == (pollhead_t *)NULL) 1236 *revp |= POLLRDBAND; 1237 } 1238 rw_exit(&mnp->mnt_contents); 1239 1240 if (*revp || *phpp != NULL || any) { 1241 return (0); 1242 } 1243 /* 1244 * If someone is polling an unsupported poll events (e.g. 1245 * POLLOUT, POLLPRI, etc.), just return POLLERR revents. 1246 * That way we will ensure that we don't return a 0 1247 * revents with a NULL pollhead pointer. 1248 */ 1249 *revp = POLLERR; 1250 return (0); 1251 } 1252 1253 /* 1254 * mntfs_same_word() returns 1 if two words are the same in the context of 1255 * MNTIOC_GETMNTANY and 0 otherwise. 1256 * 1257 * worda is a memory address that lies somewhere in the buffer bufa; it cannot 1258 * be NULL since this is used to indicate to getmntany(3C) that the user does 1259 * not wish to match a particular field. The text to which worda points is 1260 * supplied by the user; if it is not null-terminated then it cannot match. 1261 * 1262 * Buffer bufb contains a line from /etc/mnttab, in which the fields are 1263 * delimited by tab or new-line characters. offb is the offset of the second 1264 * word within this buffer. 1265 * 1266 * mntfs_same_word() returns 1 if the words are the same and 0 otherwise. 1267 */ 1268 int 1269 mntfs_same_word(char *worda, char *bufa, size_t sizea, off_t offb, char *bufb, 1270 size_t sizeb) 1271 { 1272 char *wordb = bufb + offb; 1273 int bytes_remaining; 1274 1275 ASSERT(worda != NULL); 1276 1277 bytes_remaining = MIN(((bufa + sizea) - worda), 1278 ((bufb + sizeb) - wordb)); 1279 while (bytes_remaining && *worda == *wordb) { 1280 worda++; 1281 wordb++; 1282 bytes_remaining--; 1283 } 1284 if (bytes_remaining && 1285 *worda == '\0' && (*wordb == '\t' || *wordb == '\n')) 1286 return (1); 1287 else 1288 return (0); 1289 } 1290 1291 /* 1292 * mntfs_special_info_string() returns which, if either, of VBLK or VCHR 1293 * corresponds to a supplied path. If the path is a special device then the 1294 * function optionally sets the major and minor numbers. 1295 */ 1296 vtype_t 1297 mntfs_special_info_string(char *path, uint_t *major, uint_t *minor, cred_t *cr) 1298 { 1299 vattr_t vattr; 1300 vnode_t *vp; 1301 vtype_t type; 1302 int error; 1303 1304 if (path == NULL || *path != '/' || 1305 lookupnameat(path + 1, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp, rootdir)) 1306 return (0); 1307 1308 vattr.va_mask = AT_TYPE | AT_RDEV; 1309 error = VOP_GETATTR(vp, &vattr, ATTR_REAL, cr, NULL); 1310 VN_RELE(vp); 1311 1312 if (error == 0 && ((type = vattr.va_type) == VBLK || type == VCHR)) { 1313 if (major && minor) { 1314 *major = getmajor(vattr.va_rdev); 1315 *minor = getminor(vattr.va_rdev); 1316 } 1317 return (type); 1318 } else { 1319 return (0); 1320 } 1321 } 1322 1323 /* 1324 * mntfs_special_info_element() extracts the name of the mounted resource 1325 * for a given element and copies it into a null-terminated string, which it 1326 * then passes to mntfs_special_info_string(). 1327 */ 1328 vtype_t 1329 mntfs_special_info_element(mntelem_t *elemp, cred_t *cr) 1330 { 1331 char *newpath; 1332 vtype_t type; 1333 1334 newpath = kmem_alloc(elemp->mnte_text_size, KM_SLEEP); 1335 bcopy(elemp->mnte_text, newpath, (off_t)(elemp->mnte_tab.mnt_mountp)); 1336 *(newpath + (off_t)elemp->mnte_tab.mnt_mountp - 1) = '\0'; 1337 type = mntfs_special_info_string(newpath, NULL, NULL, cr); 1338 kmem_free(newpath, elemp->mnte_text_size); 1339 1340 return (type); 1341 } 1342 1343 /* 1344 * Convert an address that points to a byte within a user buffer into an 1345 * address that points to the corresponding offset within a kernel buffer. If 1346 * the user address is NULL then make no conversion. If the address does not 1347 * lie within the buffer then reset it to NULL. 1348 */ 1349 char * 1350 mntfs_import_addr(char *uaddr, char *ubufp, char *kbufp, size_t bufsize) 1351 { 1352 if (uaddr < ubufp || uaddr >= ubufp + bufsize) 1353 return (NULL); 1354 else 1355 return (kbufp + (uaddr - ubufp)); 1356 } 1357 1358 /* 1359 * These 32-bit versions are to support STRUCT_DECL(9F) etc. in 1360 * mntfs_copyout_element() and mntioctl(). 1361 */ 1362 #ifdef _SYSCALL32_IMPL 1363 typedef struct extmnttab32 { 1364 uint32_t mnt_special; 1365 uint32_t mnt_mountp; 1366 uint32_t mnt_fstype; 1367 uint32_t mnt_mntopts; 1368 uint32_t mnt_time; 1369 uint_t mnt_major; 1370 uint_t mnt_minor; 1371 } extmnttab32_t; 1372 1373 typedef struct mnttab32 { 1374 uint32_t mnt_special; 1375 uint32_t mnt_mountp; 1376 uint32_t mnt_fstype; 1377 uint32_t mnt_mntopts; 1378 uint32_t mnt_time; 1379 } mnttab32_t; 1380 1381 struct mntentbuf32 { 1382 uint32_t mbuf_emp; 1383 uint_t mbuf_bufsize; 1384 uint32_t mbuf_buf; 1385 }; 1386 #endif 1387 1388 /* 1389 * mntfs_copyout_element() is common code for the MNTIOC_GETMNTENT, 1390 * MNTIOC_GETEXTMNTENT and MNTIOC_GETMNTANY ioctls. Having identifed the 1391 * database element desired by the user, this function copies out the text and 1392 * the pointers to the relevant userland addresses. It returns 0 on success 1393 * and non-zero otherwise. 1394 */ 1395 int 1396 mntfs_copyout_elem(mntelem_t *elemp, struct extmnttab *uemp, 1397 char *ubufp, int cmd, int datamodel) 1398 { 1399 STRUCT_DECL(extmnttab, ktab); 1400 char *dbbufp = elemp->mnte_text; 1401 size_t dbbufsize = elemp->mnte_text_size; 1402 struct extmnttab *dbtabp = &elemp->mnte_tab; 1403 size_t ssize; 1404 char *kbufp; 1405 int error = 0; 1406 1407 1408 /* 1409 * We create a struct extmnttab within the kernel of the size 1410 * determined by the user's data model. We then populate its 1411 * fields by combining the start address of the text buffer 1412 * supplied by the user, ubufp, with the offsets stored for 1413 * this database element within dbtabp, a pointer to a struct 1414 * extmnttab. 1415 * 1416 * Note that if the corresponding field is "-" this signifies 1417 * no real content, and we set the address to NULL. This does 1418 * not apply to mnt_time. 1419 */ 1420 STRUCT_INIT(ktab, datamodel); 1421 STRUCT_FSETP(ktab, mnt_special, 1422 MNTFS_REAL_FIELD(dbbufp) ? ubufp : NULL); 1423 STRUCT_FSETP(ktab, mnt_mountp, 1424 MNTFS_REAL_FIELD(dbbufp + (off_t)dbtabp->mnt_mountp) ? 1425 ubufp + (off_t)dbtabp->mnt_mountp : NULL); 1426 STRUCT_FSETP(ktab, mnt_fstype, 1427 MNTFS_REAL_FIELD(dbbufp + (off_t)dbtabp->mnt_fstype) ? 1428 ubufp + (off_t)dbtabp->mnt_fstype : NULL); 1429 STRUCT_FSETP(ktab, mnt_mntopts, 1430 MNTFS_REAL_FIELD(dbbufp + (off_t)dbtabp->mnt_mntopts) ? 1431 ubufp + (off_t)dbtabp->mnt_mntopts : NULL); 1432 STRUCT_FSETP(ktab, mnt_time, 1433 ubufp + (off_t)dbtabp->mnt_time); 1434 if (cmd == MNTIOC_GETEXTMNTENT) { 1435 STRUCT_FSETP(ktab, mnt_major, dbtabp->mnt_major); 1436 STRUCT_FSETP(ktab, mnt_minor, dbtabp->mnt_minor); 1437 ssize = SIZEOF_STRUCT(extmnttab, datamodel); 1438 } else { 1439 ssize = SIZEOF_STRUCT(mnttab, datamodel); 1440 } 1441 if (copyout(STRUCT_BUF(ktab), uemp, ssize)) 1442 return (EFAULT); 1443 1444 /* 1445 * We create a text buffer in the kernel into which we copy the 1446 * /etc/mnttab entry for this element. We change the tab and 1447 * new-line delimiters to null bytes before copying out the 1448 * buffer. 1449 */ 1450 kbufp = kmem_alloc(dbbufsize, KM_SLEEP); 1451 bcopy(elemp->mnte_text, kbufp, dbbufsize); 1452 *(kbufp + (off_t)dbtabp->mnt_mountp - 1) = 1453 *(kbufp + (off_t)dbtabp->mnt_fstype - 1) = 1454 *(kbufp + (off_t)dbtabp->mnt_mntopts - 1) = 1455 *(kbufp + (off_t)dbtabp->mnt_time - 1) = 1456 *(kbufp + dbbufsize - 1) = '\0'; 1457 if (copyout(kbufp, ubufp, dbbufsize)) 1458 error = EFAULT; 1459 1460 kmem_free(kbufp, dbbufsize); 1461 return (error); 1462 } 1463 1464 /* ARGSUSED */ 1465 static int 1466 mntioctl(struct vnode *vp, int cmd, intptr_t arg, int flag, cred_t *cr, 1467 int *rvalp, caller_context_t *ct) 1468 { 1469 uint_t *up = (uint_t *)arg; 1470 mntnode_t *mnp = VTOM(vp); 1471 mntsnap_t *snapp = &mnp->mnt_ioctl; 1472 int error = 0; 1473 zone_t *zonep = MTOD(mnp)->mnt_zone; 1474 krwlock_t *dblockp = &zonep->zone_mntfs_db_lock; 1475 model_t datamodel = flag & DATAMODEL_MASK; 1476 1477 switch (cmd) { 1478 1479 case MNTIOC_NMNTS: /* get no. of mounted resources */ 1480 { 1481 rw_enter(&mnp->mnt_contents, RW_READER); 1482 if (snapp->mnts_nmnts == 0 || 1483 (snapp->mnts_flags & MNTS_REWIND)) { 1484 if (!rw_tryupgrade(&mnp->mnt_contents)) { 1485 rw_exit(&mnp->mnt_contents); 1486 rw_enter(&mnp->mnt_contents, RW_WRITER); 1487 } 1488 if (snapp->mnts_nmnts == 0 || 1489 (snapp->mnts_flags & MNTS_REWIND)) 1490 mntfs_snapshot(mnp, snapp); 1491 } 1492 rw_exit(&mnp->mnt_contents); 1493 1494 if (suword32(up, snapp->mnts_nmnts) != 0) 1495 error = EFAULT; 1496 break; 1497 } 1498 1499 case MNTIOC_GETDEVLIST: /* get mounted device major/minor nos */ 1500 { 1501 size_t len; 1502 uint_t *devlist; 1503 mntelem_t *elemp; 1504 int i = 0; 1505 1506 rw_enter(&mnp->mnt_contents, RW_READER); 1507 if (snapp->mnts_nmnts == 0 || 1508 (snapp->mnts_flags & MNTS_REWIND)) { 1509 if (!rw_tryupgrade(&mnp->mnt_contents)) { 1510 rw_exit(&mnp->mnt_contents); 1511 rw_enter(&mnp->mnt_contents, RW_WRITER); 1512 } 1513 if (snapp->mnts_nmnts == 0 || 1514 (snapp->mnts_flags & MNTS_REWIND)) 1515 mntfs_snapshot(mnp, snapp); 1516 rw_downgrade(&mnp->mnt_contents); 1517 } 1518 1519 /* Create a local buffer to hold the device numbers. */ 1520 len = 2 * snapp->mnts_nmnts * sizeof (uint_t); 1521 devlist = kmem_alloc(len, KM_SLEEP); 1522 1523 /* 1524 * Walk the database elements for this snapshot and add their 1525 * major and minor numbers. 1526 */ 1527 rw_enter(dblockp, RW_READER); 1528 for (elemp = snapp->mnts_first; elemp; 1529 elemp = mntfs_get_next_elem(snapp, elemp)) { 1530 devlist[2 * i] = elemp->mnte_tab.mnt_major; 1531 devlist[2 * i + 1] = elemp->mnte_tab.mnt_minor; 1532 i++; 1533 } 1534 rw_exit(dblockp); 1535 ASSERT(i == snapp->mnts_nmnts); 1536 rw_exit(&mnp->mnt_contents); 1537 1538 error = xcopyout(devlist, up, len); 1539 kmem_free(devlist, len); 1540 break; 1541 } 1542 1543 case MNTIOC_SETTAG: /* set tag on mounted file system */ 1544 case MNTIOC_CLRTAG: /* clear tag on mounted file system */ 1545 { 1546 struct mnttagdesc *dp = (struct mnttagdesc *)arg; 1547 STRUCT_DECL(mnttagdesc, tagdesc); 1548 char *cptr; 1549 uint32_t major, minor; 1550 char tagbuf[MAX_MNTOPT_TAG]; 1551 char *pbuf; 1552 size_t len; 1553 uint_t start = 0; 1554 mntdata_t *mntdata = MTOD(mnp); 1555 zone_t *zone = mntdata->mnt_zone; 1556 1557 STRUCT_INIT(tagdesc, flag & DATAMODEL_MASK); 1558 if (copyin(dp, STRUCT_BUF(tagdesc), STRUCT_SIZE(tagdesc))) { 1559 error = EFAULT; 1560 break; 1561 } 1562 pbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP); 1563 if (zone != global_zone) { 1564 (void) strcpy(pbuf, zone->zone_rootpath); 1565 /* truncate "/" and nul */ 1566 start = zone->zone_rootpathlen - 2; 1567 ASSERT(pbuf[start] == '/'); 1568 } 1569 cptr = STRUCT_FGETP(tagdesc, mtd_mntpt); 1570 error = copyinstr(cptr, pbuf + start, MAXPATHLEN - start, &len); 1571 if (error) { 1572 kmem_free(pbuf, MAXPATHLEN); 1573 break; 1574 } 1575 if (start != 0 && pbuf[start] != '/') { 1576 kmem_free(pbuf, MAXPATHLEN); 1577 error = EINVAL; 1578 break; 1579 } 1580 cptr = STRUCT_FGETP(tagdesc, mtd_tag); 1581 if ((error = copyinstr(cptr, tagbuf, MAX_MNTOPT_TAG, &len))) { 1582 kmem_free(pbuf, MAXPATHLEN); 1583 break; 1584 } 1585 major = STRUCT_FGET(tagdesc, mtd_major); 1586 minor = STRUCT_FGET(tagdesc, mtd_minor); 1587 if (cmd == MNTIOC_SETTAG) 1588 error = vfs_settag(major, minor, pbuf, tagbuf, cr); 1589 else 1590 error = vfs_clrtag(major, minor, pbuf, tagbuf, cr); 1591 kmem_free(pbuf, MAXPATHLEN); 1592 break; 1593 } 1594 1595 case MNTIOC_SHOWHIDDEN: 1596 { 1597 rw_enter(&mnp->mnt_contents, RW_WRITER); 1598 mnp->mnt_flags |= MNT_SHOWHIDDEN; 1599 rw_exit(&mnp->mnt_contents); 1600 break; 1601 } 1602 1603 case MNTIOC_GETMNTANY: 1604 { 1605 STRUCT_DECL(mntentbuf, embuf); /* Our copy of user's embuf */ 1606 STRUCT_DECL(extmnttab, ktab); /* Out copy of user's emp */ 1607 struct extmnttab *uemp; /* uaddr of user's emp */ 1608 char *ubufp; /* uaddr of user's text buf */ 1609 size_t ubufsize; /* size of the above */ 1610 struct extmnttab preftab; /* our version of user's emp */ 1611 char *prefbuf; /* our copy of user's text */ 1612 mntelem_t *elemp; /* a database element */ 1613 struct extmnttab *dbtabp; /* element's extmnttab */ 1614 char *dbbufp; /* element's text buf */ 1615 size_t dbbufsize; /* size of the above */ 1616 vtype_t type; /* type, if any, of special */ 1617 1618 1619 /* 1620 * embuf is a struct embuf within the kernel. We copy into it 1621 * the struct embuf supplied by the user. 1622 */ 1623 STRUCT_INIT(embuf, datamodel); 1624 if (copyin((void *) arg, STRUCT_BUF(embuf), 1625 STRUCT_SIZE(embuf))) { 1626 error = EFAULT; 1627 break; 1628 } 1629 uemp = STRUCT_FGETP(embuf, mbuf_emp); 1630 ubufp = STRUCT_FGETP(embuf, mbuf_buf); 1631 ubufsize = STRUCT_FGET(embuf, mbuf_bufsize); 1632 1633 /* 1634 * Check that the text buffer offered by the user is the 1635 * agreed size. 1636 */ 1637 if (ubufsize != MNT_LINE_MAX) { 1638 error = EINVAL; 1639 break; 1640 } 1641 1642 /* Copy the user-supplied entry into a local buffer. */ 1643 prefbuf = kmem_alloc(MNT_LINE_MAX, KM_SLEEP); 1644 if (copyin(ubufp, prefbuf, MNT_LINE_MAX)) { 1645 kmem_free(prefbuf, MNT_LINE_MAX); 1646 error = EFAULT; 1647 break; 1648 } 1649 1650 /* Ensure that any string within it is null-terminated. */ 1651 *(prefbuf + MNT_LINE_MAX - 1) = 0; 1652 1653 /* Copy in the user-supplied mpref */ 1654 STRUCT_INIT(ktab, datamodel); 1655 if (copyin(uemp, STRUCT_BUF(ktab), 1656 SIZEOF_STRUCT(mnttab, datamodel))) { 1657 kmem_free(prefbuf, MNT_LINE_MAX); 1658 error = EFAULT; 1659 break; 1660 } 1661 1662 /* 1663 * Copy the members of the user's pref struct into a local 1664 * struct. The pointers need to be offset and verified to 1665 * ensure that they lie within the bounds of the buffer. 1666 */ 1667 preftab.mnt_special = mntfs_import_addr(STRUCT_FGETP(ktab, 1668 mnt_special), ubufp, prefbuf, MNT_LINE_MAX); 1669 preftab.mnt_mountp = mntfs_import_addr(STRUCT_FGETP(ktab, 1670 mnt_mountp), ubufp, prefbuf, MNT_LINE_MAX); 1671 preftab.mnt_fstype = mntfs_import_addr(STRUCT_FGETP(ktab, 1672 mnt_fstype), ubufp, prefbuf, MNT_LINE_MAX); 1673 preftab.mnt_mntopts = mntfs_import_addr(STRUCT_FGETP(ktab, 1674 mnt_mntopts), ubufp, prefbuf, MNT_LINE_MAX); 1675 preftab.mnt_time = mntfs_import_addr(STRUCT_FGETP(ktab, 1676 mnt_time), ubufp, prefbuf, MNT_LINE_MAX); 1677 1678 /* 1679 * If the user specifies a mounted resource that is a special 1680 * device then we capture its mode and major and minor numbers; 1681 * cf. the block comment below. 1682 */ 1683 type = mntfs_special_info_string(preftab.mnt_special, 1684 &preftab.mnt_major, &preftab.mnt_minor, cr); 1685 1686 rw_enter(&mnp->mnt_contents, RW_WRITER); 1687 if (snapp->mnts_nmnts == 0 || 1688 (snapp->mnts_flags & MNTS_REWIND)) 1689 mntfs_snapshot(mnp, snapp); 1690 1691 /* 1692 * This is the core functionality that implements getmntany(). 1693 * We walk through the mntfs database until we find an element 1694 * matching the user's preferences that are contained in 1695 * preftab. Typically, this means checking that the text 1696 * matches. However, the mounted resource is special: if the 1697 * user is looking for a special device then we must find a 1698 * database element with the same major and minor numbers and 1699 * the same type, i.e. VBLK or VCHR. The type is not recorded 1700 * in the element because it cannot be inferred from the vfs_t. 1701 * We therefore check the type of suitable candidates via 1702 * mntfs_special_info_element(); since this calls into the 1703 * underlying file system we make sure to drop the database lock 1704 * first. 1705 */ 1706 elemp = snapp->mnts_next; 1707 rw_enter(dblockp, RW_READER); 1708 for (;;) { 1709 for (; elemp; elemp = mntfs_get_next_elem(snapp, 1710 elemp)) { 1711 dbtabp = &elemp->mnte_tab; 1712 dbbufp = elemp->mnte_text; 1713 dbbufsize = elemp->mnte_text_size; 1714 1715 if (((type && 1716 dbtabp->mnt_major == preftab.mnt_major && 1717 dbtabp->mnt_minor == preftab.mnt_minor && 1718 MNTFS_REAL_FIELD(dbbufp)) || 1719 (!type && (!preftab.mnt_special || 1720 mntfs_same_word(preftab.mnt_special, 1721 prefbuf, MNT_LINE_MAX, (off_t)0, dbbufp, 1722 dbbufsize)))) && 1723 1724 (!preftab.mnt_mountp || mntfs_same_word( 1725 preftab.mnt_mountp, prefbuf, MNT_LINE_MAX, 1726 (off_t)dbtabp->mnt_mountp, dbbufp, 1727 dbbufsize)) && 1728 1729 (!preftab.mnt_fstype || mntfs_same_word( 1730 preftab.mnt_fstype, prefbuf, MNT_LINE_MAX, 1731 (off_t)dbtabp->mnt_fstype, dbbufp, 1732 dbbufsize)) && 1733 1734 (!preftab.mnt_mntopts || mntfs_same_word( 1735 preftab.mnt_mntopts, prefbuf, MNT_LINE_MAX, 1736 (off_t)dbtabp->mnt_mntopts, dbbufp, 1737 dbbufsize)) && 1738 1739 (!preftab.mnt_time || mntfs_same_word( 1740 preftab.mnt_time, prefbuf, MNT_LINE_MAX, 1741 (off_t)dbtabp->mnt_time, dbbufp, 1742 dbbufsize))) 1743 break; 1744 } 1745 rw_exit(dblockp); 1746 1747 if (elemp == NULL || type == 0 || 1748 type == mntfs_special_info_element(elemp, cr)) 1749 break; 1750 1751 rw_enter(dblockp, RW_READER); 1752 elemp = mntfs_get_next_elem(snapp, elemp); 1753 } 1754 1755 kmem_free(prefbuf, MNT_LINE_MAX); 1756 1757 /* If we failed to find a match then return EOF. */ 1758 if (elemp == NULL) { 1759 rw_exit(&mnp->mnt_contents); 1760 *rvalp = MNTFS_EOF; 1761 break; 1762 } 1763 1764 /* 1765 * Check that the text buffer offered by the user will be large 1766 * enough to accommodate the text for this entry. 1767 */ 1768 if (elemp->mnte_text_size > MNT_LINE_MAX) { 1769 rw_exit(&mnp->mnt_contents); 1770 *rvalp = MNTFS_TOOLONG; 1771 break; 1772 } 1773 1774 /* 1775 * Populate the user's struct mnttab and text buffer using the 1776 * element's contents. 1777 */ 1778 if (mntfs_copyout_elem(elemp, uemp, ubufp, cmd, datamodel)) { 1779 error = EFAULT; 1780 } else { 1781 rw_enter(dblockp, RW_READER); 1782 elemp = mntfs_get_next_elem(snapp, elemp); 1783 rw_exit(dblockp); 1784 snapp->mnts_next = elemp; 1785 } 1786 rw_exit(&mnp->mnt_contents); 1787 break; 1788 } 1789 1790 case MNTIOC_GETMNTENT: 1791 case MNTIOC_GETEXTMNTENT: 1792 { 1793 STRUCT_DECL(mntentbuf, embuf); /* Our copy of user's embuf */ 1794 struct extmnttab *uemp; /* uaddr of user's emp */ 1795 char *ubufp; /* uaddr of user's text buf */ 1796 size_t ubufsize; /* size of the above */ 1797 mntelem_t *elemp; /* a database element */ 1798 1799 1800 rw_enter(&mnp->mnt_contents, RW_WRITER); 1801 if (snapp->mnts_nmnts == 0 || 1802 (snapp->mnts_flags & MNTS_REWIND)) 1803 mntfs_snapshot(mnp, snapp); 1804 if ((elemp = snapp->mnts_next) == NULL) { 1805 rw_exit(&mnp->mnt_contents); 1806 *rvalp = MNTFS_EOF; 1807 break; 1808 } 1809 1810 /* 1811 * embuf is a struct embuf within the kernel. We copy into it 1812 * the struct embuf supplied by the user. 1813 */ 1814 STRUCT_INIT(embuf, datamodel); 1815 if (copyin((void *) arg, STRUCT_BUF(embuf), 1816 STRUCT_SIZE(embuf))) { 1817 rw_exit(&mnp->mnt_contents); 1818 error = EFAULT; 1819 break; 1820 } 1821 uemp = STRUCT_FGETP(embuf, mbuf_emp); 1822 ubufp = STRUCT_FGETP(embuf, mbuf_buf); 1823 ubufsize = STRUCT_FGET(embuf, mbuf_bufsize); 1824 1825 /* 1826 * Check that the text buffer offered by the user will be large 1827 * enough to accommodate the text for this entry. 1828 */ 1829 if (elemp->mnte_text_size > ubufsize) { 1830 rw_exit(&mnp->mnt_contents); 1831 *rvalp = MNTFS_TOOLONG; 1832 break; 1833 } 1834 1835 /* 1836 * Populate the user's struct mnttab and text buffer using the 1837 * element's contents. 1838 */ 1839 if (mntfs_copyout_elem(elemp, uemp, ubufp, cmd, datamodel)) { 1840 error = EFAULT; 1841 } else { 1842 rw_enter(dblockp, RW_READER); 1843 elemp = mntfs_get_next_elem(snapp, elemp); 1844 rw_exit(dblockp); 1845 snapp->mnts_next = elemp; 1846 } 1847 rw_exit(&mnp->mnt_contents); 1848 break; 1849 } 1850 1851 default: 1852 error = EINVAL; 1853 break; 1854 } 1855 1856 return (error); 1857 } 1858 1859 /* 1860 * mntfs provides a new vnode for each open(2). Two vnodes will represent the 1861 * same instance of /etc/mnttab if they share the same (zone-specific) vfs. 1862 */ 1863 /* ARGSUSED */ 1864 int 1865 mntcmp(vnode_t *vp1, vnode_t *vp2, caller_context_t *ct) 1866 { 1867 return (vp1 != NULL && vp2 != NULL && vp1->v_vfsp == vp2->v_vfsp); 1868 } 1869 1870 /* 1871 * /mntfs vnode operations vector 1872 */ 1873 const fs_operation_def_t mnt_vnodeops_template[] = { 1874 VOPNAME_OPEN, { .vop_open = mntopen }, 1875 VOPNAME_CLOSE, { .vop_close = mntclose }, 1876 VOPNAME_READ, { .vop_read = mntread }, 1877 VOPNAME_IOCTL, { .vop_ioctl = mntioctl }, 1878 VOPNAME_GETATTR, { .vop_getattr = mntgetattr }, 1879 VOPNAME_ACCESS, { .vop_access = mntaccess }, 1880 VOPNAME_FSYNC, { .vop_fsync = mntfsync }, 1881 VOPNAME_INACTIVE, { .vop_inactive = mntinactive }, 1882 VOPNAME_SEEK, { .vop_seek = mntseek }, 1883 VOPNAME_POLL, { .vop_poll = mntpoll }, 1884 VOPNAME_CMP, { .vop_cmp = mntcmp }, 1885 VOPNAME_DISPOSE, { .error = fs_error }, 1886 VOPNAME_SHRLOCK, { .error = fs_error }, 1887 NULL, NULL 1888 }; 1889