1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. 23 */ 24 25 #include <sys/file.h> 26 #include <sys/stat.h> 27 #include <sys/atomic.h> 28 #include <sys/mntio.h> 29 #include <sys/mnttab.h> 30 #include <sys/mount.h> 31 #include <sys/sunddi.h> 32 #include <sys/sysmacros.h> 33 #include <sys/systm.h> 34 #include <sys/vfs.h> 35 #include <sys/vfs_opreg.h> 36 #include <sys/fs/mntdata.h> 37 #include <fs/fs_subr.h> 38 #include <sys/vmsystm.h> 39 #include <vm/seg_vn.h> 40 #include <sys/time.h> 41 #include <sys/ksynch.h> 42 #include <sys/sdt.h> 43 44 #define MNTROOTINO 2 45 46 static mntnode_t *mntgetnode(vnode_t *); 47 48 vnodeops_t *mntvnodeops; 49 extern void vfs_mnttab_readop(void); 50 51 /* 52 * Design of kernel mnttab accounting. 53 * 54 * mntfs provides two methods of reading the in-kernel mnttab, i.e. the state of 55 * the mounted resources: the read-only file /etc/mnttab, and a collection of 56 * ioctl() commands. Most of these interfaces are public and are described in 57 * mnttab(4). Three private ioctl() commands, MNTIOC_GETMNTENT, 58 * MNTIOC_GETEXTMNTENT and MNTIOC_GETMNTANY, provide for the getmntent(3C) 59 * family of functions, allowing them to support white space in mount names. 60 * 61 * A significant feature of mntfs is that it provides a file descriptor with a 62 * snapshot once it begins to consume mnttab data. Thus, as the process 63 * continues to consume data, its view of the in-kernel mnttab does not change 64 * even if resources are mounted or unmounted. The intent is to ensure that 65 * processes are guaranteed to read self-consistent data even as the system 66 * changes. 67 * 68 * The snapshot is implemented by a "database", unique to each zone, that 69 * comprises a linked list of mntelem_ts. The database is identified by 70 * zone_mntfs_db and is protected by zone_mntfs_db_lock. Each element contains 71 * the text entry in /etc/mnttab for a mounted resource, i.e. a vfs_t, and is 72 * marked with its time of "birth", i.e. creation. An element is "killed", and 73 * marked with its time of death, when it is found to be out of date, e.g. when 74 * the corresponding resource has been unmounted. 75 * 76 * When a process performs the first read() or ioctl() for a file descriptor for 77 * /etc/mnttab, the database is updated by a call to mntfs_snapshot() to ensure 78 * that an element exists for each currently mounted resource. Following this, 79 * the current time is written into a snapshot structure, a mntsnap_t, embedded 80 * in the descriptor's mntnode_t. 81 * 82 * mntfs is able to enumerate the /etc/mnttab entries corresponding to a 83 * particular file descriptor by searching the database for entries that were 84 * born before the appropriate snapshot and that either are still alive or died 85 * after the snapshot was created. Consumers use the iterator function 86 * mntfs_get_next_elem() to identify the next suitable element in the database. 87 * 88 * Each snapshot has a hold on its corresponding database elements, effected by 89 * a per-element reference count. At last close(), a snapshot is destroyed in 90 * mntfs_freesnap() by releasing all of its holds; an element is destroyed if 91 * its reference count becomes zero. Therefore the database never exists unless 92 * there is at least one active consumer of /etc/mnttab. 93 * 94 * getmntent(3C) et al. "do not open, close or rewind the file." This implies 95 * that getmntent() and read() must be able to operate without interaction on 96 * the same file descriptor; this is accomplished by the use of separate 97 * mntsnap_ts for both read() and ioctl(). 98 * 99 * mntfs observes the following lock-ordering: 100 * 101 * mnp->mnt_contents -> vfslist -> zonep->zone_mntfs_db_lock 102 * 103 * NOTE: The following variable enables the generation of the "dev=xxx" 104 * in the option string for a mounted file system. Really this should 105 * be gotten rid of altogether, but for the sake of backwards compatibility 106 * we had to leave it in. It is defined as a 32-bit device number. This 107 * means that when 64-bit device numbers are in use, if either the major or 108 * minor part of the device number will not fit in a 16 bit quantity, the 109 * "dev=" will be set to NODEV (0x7fffffff). See PSARC 1999/566 and 110 * 1999/131 for details. The cmpldev() function used to generate the 32-bit 111 * device number handles this check and assigns the proper value. 112 */ 113 int mntfs_enabledev = 1; /* enable old "dev=xxx" option */ 114 115 extern void vfs_mono_time(timespec_t *); 116 enum { MNTFS_FIRST, MNTFS_SECOND, MNTFS_NEITHER }; 117 118 /* 119 * Determine whether a field within a line from /etc/mnttab contains actual 120 * content or simply the marker string "-". This never applies to the time, 121 * therefore the delimiter must be a tab. 122 */ 123 #define MNTFS_REAL_FIELD(x) (*(x) != '-' || *((x) + 1) != '\t') 124 125 static int 126 mntfs_devsize(struct vfs *vfsp) 127 { 128 dev32_t odev; 129 130 (void) cmpldev(&odev, vfsp->vfs_dev); 131 return (snprintf(NULL, 0, "dev=%x", odev)); 132 } 133 134 static int 135 mntfs_devprint(struct vfs *vfsp, char *buf) 136 { 137 dev32_t odev; 138 139 (void) cmpldev(&odev, vfsp->vfs_dev); 140 return (snprintf(buf, MAX_MNTOPT_STR, "dev=%x", odev)); 141 } 142 143 /* Identify which, if either, of two supplied timespec structs is newer. */ 144 static int 145 mntfs_newest(timespec_t *a, timespec_t *b) 146 { 147 if (a->tv_sec == b->tv_sec && 148 a->tv_nsec == b->tv_nsec) { 149 return (MNTFS_NEITHER); 150 } else if (b->tv_sec > a->tv_sec || 151 (b->tv_sec == a->tv_sec && 152 b->tv_nsec > a->tv_nsec)) { 153 return (MNTFS_SECOND); 154 } else { 155 return (MNTFS_FIRST); 156 } 157 } 158 159 static int 160 mntfs_optsize(struct vfs *vfsp) 161 { 162 int i, size = 0; 163 mntopt_t *mop; 164 165 for (i = 0; i < vfsp->vfs_mntopts.mo_count; i++) { 166 mop = &vfsp->vfs_mntopts.mo_list[i]; 167 if (mop->mo_flags & MO_NODISPLAY) 168 continue; 169 if (mop->mo_flags & MO_SET) { 170 if (size) 171 size++; /* space for comma */ 172 size += strlen(mop->mo_name); 173 /* 174 * count option value if there is one 175 */ 176 if (mop->mo_arg != NULL) { 177 size += strlen(mop->mo_arg) + 1; 178 } 179 } 180 } 181 if (vfsp->vfs_zone != NULL && vfsp->vfs_zone != global_zone) { 182 /* 183 * Add space for "zone=<zone_name>" if required. 184 */ 185 if (size) 186 size++; /* space for comma */ 187 size += sizeof ("zone=") - 1; 188 size += strlen(vfsp->vfs_zone->zone_name); 189 } 190 if (mntfs_enabledev) { 191 if (size != 0) 192 size++; /* space for comma */ 193 size += mntfs_devsize(vfsp); 194 } 195 if (size == 0) 196 size = strlen("-"); 197 return (size); 198 } 199 200 static int 201 mntfs_optprint(struct vfs *vfsp, char *buf) 202 { 203 int i, optinbuf = 0; 204 mntopt_t *mop; 205 char *origbuf = buf; 206 207 for (i = 0; i < vfsp->vfs_mntopts.mo_count; i++) { 208 mop = &vfsp->vfs_mntopts.mo_list[i]; 209 if (mop->mo_flags & MO_NODISPLAY) 210 continue; 211 if (mop->mo_flags & MO_SET) { 212 if (optinbuf) 213 *buf++ = ','; 214 else 215 optinbuf = 1; 216 buf += snprintf(buf, MAX_MNTOPT_STR, 217 "%s", mop->mo_name); 218 /* 219 * print option value if there is one 220 */ 221 if (mop->mo_arg != NULL) { 222 buf += snprintf(buf, MAX_MNTOPT_STR, "=%s", 223 mop->mo_arg); 224 } 225 } 226 } 227 if (vfsp->vfs_zone != NULL && vfsp->vfs_zone != global_zone) { 228 if (optinbuf) 229 *buf++ = ','; 230 else 231 optinbuf = 1; 232 buf += snprintf(buf, MAX_MNTOPT_STR, "zone=%s", 233 vfsp->vfs_zone->zone_name); 234 } 235 if (mntfs_enabledev) { 236 if (optinbuf++) 237 *buf++ = ','; 238 buf += mntfs_devprint(vfsp, buf); 239 } 240 if (!optinbuf) { 241 buf += snprintf(buf, MAX_MNTOPT_STR, "-"); 242 } 243 return (buf - origbuf); 244 } 245 246 void 247 mntfs_populate_text(vfs_t *vfsp, zone_t *zonep, mntelem_t *elemp) 248 { 249 struct extmnttab *tabp = &elemp->mnte_tab; 250 const char *resource, *mntpt; 251 char *cp = elemp->mnte_text; 252 mntpt = refstr_value(vfsp->vfs_mntpt); 253 resource = refstr_value(vfsp->vfs_resource); 254 255 tabp->mnt_special = 0; 256 if (resource != NULL && resource[0] != '\0') { 257 if (resource[0] != '/') { 258 cp += snprintf(cp, MAXPATHLEN, "%s\t", resource); 259 } else if (!ZONE_PATH_VISIBLE(resource, zonep)) { 260 /* 261 * Use the mount point as the resource. 262 */ 263 cp += snprintf(cp, MAXPATHLEN, "%s\t", 264 ZONE_PATH_TRANSLATE(mntpt, zonep)); 265 } else { 266 cp += snprintf(cp, MAXPATHLEN, "%s\t", 267 ZONE_PATH_TRANSLATE(resource, zonep)); 268 } 269 } else { 270 cp += snprintf(cp, MAXPATHLEN, "-\t"); 271 } 272 273 tabp->mnt_mountp = (char *)(cp - elemp->mnte_text); 274 if (mntpt != NULL && mntpt[0] != '\0') { 275 /* 276 * We know the mount point is visible from within the zone, 277 * otherwise it wouldn't be on the zone's vfs list. 278 */ 279 cp += snprintf(cp, MAXPATHLEN, "%s\t", 280 ZONE_PATH_TRANSLATE(mntpt, zonep)); 281 } else { 282 cp += snprintf(cp, MAXPATHLEN, "-\t"); 283 } 284 285 tabp->mnt_fstype = (char *)(cp - elemp->mnte_text); 286 cp += snprintf(cp, MAXPATHLEN, "%s\t", 287 vfssw[vfsp->vfs_fstype].vsw_name); 288 289 tabp->mnt_mntopts = (char *)(cp - elemp->mnte_text); 290 cp += mntfs_optprint(vfsp, cp); 291 *cp++ = '\t'; 292 293 tabp->mnt_time = (char *)(cp - elemp->mnte_text); 294 cp += snprintf(cp, MAX_MNTOPT_STR, "%ld", vfsp->vfs_mtime); 295 *cp++ = '\n'; /* over-write snprintf's trailing null-byte */ 296 297 tabp->mnt_major = getmajor(vfsp->vfs_dev); 298 tabp->mnt_minor = getminor(vfsp->vfs_dev); 299 300 elemp->mnte_text_size = cp - elemp->mnte_text; 301 elemp->mnte_vfs_ctime = vfsp->vfs_hrctime; 302 elemp->mnte_hidden = vfsp->vfs_flag & VFS_NOMNTTAB; 303 } 304 305 /* Determine the length of the /etc/mnttab entry for this vfs_t. */ 306 static size_t 307 mntfs_text_len(vfs_t *vfsp, zone_t *zone) 308 { 309 size_t size = 0; 310 const char *resource, *mntpt; 311 size_t mntsize; 312 313 mntpt = refstr_value(vfsp->vfs_mntpt); 314 if (mntpt != NULL && mntpt[0] != '\0') { 315 mntsize = strlen(ZONE_PATH_TRANSLATE(mntpt, zone)) + 1; 316 } else { 317 mntsize = 2; /* "-\t" */ 318 } 319 size += mntsize; 320 321 resource = refstr_value(vfsp->vfs_resource); 322 if (resource != NULL && resource[0] != '\0') { 323 if (resource[0] != '/') { 324 size += strlen(resource) + 1; 325 } else if (!ZONE_PATH_VISIBLE(resource, zone)) { 326 /* 327 * Same as the zone's view of the mount point. 328 */ 329 size += mntsize; 330 } else { 331 size += strlen(ZONE_PATH_TRANSLATE(resource, zone)) + 1; 332 } 333 } else { 334 size += 2; /* "-\t" */ 335 } 336 size += strlen(vfssw[vfsp->vfs_fstype].vsw_name) + 1; 337 size += mntfs_optsize(vfsp); 338 size += snprintf(NULL, 0, "\t%ld\n", vfsp->vfs_mtime); 339 return (size); 340 } 341 342 /* Destroy the resources associated with a snapshot element. */ 343 static void 344 mntfs_destroy_elem(mntelem_t *elemp) 345 { 346 kmem_free(elemp->mnte_text, elemp->mnte_text_size); 347 kmem_free(elemp, sizeof (mntelem_t)); 348 } 349 350 /* 351 * Return 1 if the given snapshot is in the range of the given element; return 352 * 0 otherwise. 353 */ 354 static int 355 mntfs_elem_in_range(mntsnap_t *snapp, mntelem_t *elemp) 356 { 357 timespec_t *stimep = &snapp->mnts_time; 358 timespec_t *btimep = &elemp->mnte_birth; 359 timespec_t *dtimep = &elemp->mnte_death; 360 361 /* 362 * If a snapshot is in range of an element then the snapshot must have 363 * been created after the birth of the element, and either the element 364 * is still alive or it died after the snapshot was created. 365 */ 366 if (mntfs_newest(btimep, stimep) == MNTFS_SECOND && 367 (MNTFS_ELEM_IS_ALIVE(elemp) || 368 mntfs_newest(stimep, dtimep) == MNTFS_SECOND)) 369 return (1); 370 else 371 return (0); 372 } 373 374 /* 375 * Return the next valid database element, after the one provided, for a given 376 * snapshot; return NULL if none exists. The caller must hold the zone's 377 * database lock as a reader before calling this function. 378 */ 379 static mntelem_t * 380 mntfs_get_next_elem(mntsnap_t *snapp, mntelem_t *elemp) 381 { 382 int show_hidden = snapp->mnts_flags & MNTS_SHOWHIDDEN; 383 384 do { 385 elemp = elemp->mnte_next; 386 } while (elemp && 387 (!mntfs_elem_in_range(snapp, elemp) || 388 (!show_hidden && elemp->mnte_hidden))); 389 return (elemp); 390 } 391 392 /* 393 * This function frees the resources associated with a mntsnap_t. It walks 394 * through the database, decrementing the reference count of any element that 395 * satisfies the snapshot. If the reference count of an element becomes zero 396 * then it is removed from the database. 397 */ 398 static void 399 mntfs_freesnap(mntnode_t *mnp, mntsnap_t *snapp) 400 { 401 zone_t *zonep = MTOD(mnp)->mnt_zone_ref.zref_zone; 402 krwlock_t *dblockp = &zonep->zone_mntfs_db_lock; 403 mntelem_t **elempp = &zonep->zone_mntfs_db; 404 mntelem_t *elemp; 405 int show_hidden = snapp->mnts_flags & MNTS_SHOWHIDDEN; 406 size_t number_decremented = 0; 407 408 ASSERT(RW_WRITE_HELD(&mnp->mnt_contents)); 409 410 /* Ignore an uninitialised snapshot. */ 411 if (snapp->mnts_nmnts == 0) 412 return; 413 414 /* Drop the holds on any matching database elements. */ 415 rw_enter(dblockp, RW_WRITER); 416 while ((elemp = *elempp) != NULL) { 417 if (mntfs_elem_in_range(snapp, elemp) && 418 (!elemp->mnte_hidden || show_hidden) && 419 ++number_decremented && --elemp->mnte_refcnt == 0) { 420 if ((*elempp = elemp->mnte_next) != NULL) 421 (*elempp)->mnte_prev = elemp->mnte_prev; 422 mntfs_destroy_elem(elemp); 423 } else { 424 elempp = &elemp->mnte_next; 425 } 426 } 427 rw_exit(dblockp); 428 ASSERT(number_decremented == snapp->mnts_nmnts); 429 430 /* Clear the snapshot data. */ 431 bzero(snapp, sizeof (mntsnap_t)); 432 } 433 434 /* Insert the new database element newp after the existing element prevp. */ 435 static void 436 mntfs_insert_after(mntelem_t *newp, mntelem_t *prevp) 437 { 438 newp->mnte_prev = prevp; 439 newp->mnte_next = prevp->mnte_next; 440 prevp->mnte_next = newp; 441 if (newp->mnte_next != NULL) 442 newp->mnte_next->mnte_prev = newp; 443 } 444 445 /* Create and return a copy of a given database element. */ 446 static mntelem_t * 447 mntfs_copy(mntelem_t *origp) 448 { 449 mntelem_t *copyp; 450 451 copyp = kmem_zalloc(sizeof (mntelem_t), KM_SLEEP); 452 copyp->mnte_vfs_ctime = origp->mnte_vfs_ctime; 453 copyp->mnte_text_size = origp->mnte_text_size; 454 copyp->mnte_text = kmem_alloc(copyp->mnte_text_size, KM_SLEEP); 455 bcopy(origp->mnte_text, copyp->mnte_text, copyp->mnte_text_size); 456 copyp->mnte_tab = origp->mnte_tab; 457 copyp->mnte_hidden = origp->mnte_hidden; 458 459 return (copyp); 460 } 461 462 /* 463 * Compare two database elements and determine whether or not the vfs_t payload 464 * data of each are the same. Return 1 if so and 0 otherwise. 465 */ 466 static int 467 mntfs_is_same_element(mntelem_t *a, mntelem_t *b) 468 { 469 if (a->mnte_hidden == b->mnte_hidden && 470 a->mnte_text_size == b->mnte_text_size && 471 bcmp(a->mnte_text, b->mnte_text, a->mnte_text_size) == 0 && 472 bcmp(&a->mnte_tab, &b->mnte_tab, sizeof (struct extmnttab)) == 0) 473 return (1); 474 else 475 return (0); 476 } 477 478 /* 479 * mntfs_snapshot() updates the database, creating it if necessary, so that it 480 * accurately reflects the state of the in-kernel mnttab. It also increments 481 * the reference count on all database elements that correspond to currently- 482 * mounted resources. Finally, it initialises the appropriate snapshot 483 * structure. 484 * 485 * Each vfs_t is given a high-resolution time stamp, for the benefit of mntfs, 486 * when it is inserted into the in-kernel mnttab. This time stamp is copied into 487 * the corresponding database element when it is created, allowing the element 488 * and the vfs_t to be identified as a pair. It is possible that some file 489 * systems may make unadvertised changes to, for example, a resource's mount 490 * options. Therefore, in order to determine whether a database element is an 491 * up-to-date representation of a given vfs_t, it is compared with a temporary 492 * element generated for this purpose. Although less efficient, this is safer 493 * than implementing an mtime for a vfs_t. 494 * 495 * Some mounted resources are marked as "hidden" with a VFS_NOMNTTAB flag. These 496 * are considered invisible unless the user has already set the MNT_SHOWHIDDEN 497 * flag in the vnode using the MNTIOC_SHOWHIDDEN ioctl. 498 */ 499 static void 500 mntfs_snapshot(mntnode_t *mnp, mntsnap_t *snapp) 501 { 502 mntdata_t *mnd = MTOD(mnp); 503 zone_t *zonep = mnd->mnt_zone_ref.zref_zone; 504 int is_global_zone = (zonep == global_zone); 505 int show_hidden = mnp->mnt_flags & MNT_SHOWHIDDEN; 506 vfs_t *vfsp, *firstvfsp, *lastvfsp; 507 vfs_t dummyvfs; 508 vfs_t *dummyvfsp = NULL; 509 krwlock_t *dblockp = &zonep->zone_mntfs_db_lock; 510 mntelem_t **headpp = &zonep->zone_mntfs_db; 511 mntelem_t *elemp; 512 mntelem_t *prevp = NULL; 513 int order; 514 mntelem_t *tempelemp; 515 mntelem_t *newp; 516 mntelem_t *firstp = NULL; 517 size_t nmnts = 0; 518 size_t total_text_size = 0; 519 size_t normal_text_size = 0; 520 int insert_before; 521 timespec_t last_mtime; 522 size_t entry_length, new_entry_length; 523 524 525 ASSERT(RW_WRITE_HELD(&mnp->mnt_contents)); 526 vfs_list_read_lock(); 527 vfs_mnttab_modtime(&last_mtime); 528 529 /* 530 * If this snapshot already exists then we must have been asked to 531 * rewind the file, i.e. discard the snapshot and create a new one in 532 * its place. In this case we first see if the in-kernel mnttab has 533 * advertised a change; if not then we simply reinitialise the metadata. 534 */ 535 if (snapp->mnts_nmnts) { 536 if (mntfs_newest(&last_mtime, &snapp->mnts_last_mtime) == 537 MNTFS_NEITHER) { 538 /* 539 * An unchanged mtime is no guarantee that the 540 * in-kernel mnttab is unchanged; for example, a 541 * concurrent remount may be between calls to 542 * vfs_setmntopt_nolock() and vfs_mnttab_modtimeupd(). 543 * It follows that the database may have changed, and 544 * in particular that some elements in this snapshot 545 * may have been killed by another call to 546 * mntfs_snapshot(). It is therefore not merely 547 * unnecessary to update the snapshot's time but in 548 * fact dangerous; it needs to be left alone. 549 */ 550 snapp->mnts_next = snapp->mnts_first; 551 snapp->mnts_flags &= ~MNTS_REWIND; 552 snapp->mnts_foffset = snapp->mnts_ieoffset = 0; 553 vfs_list_unlock(); 554 return; 555 } else { 556 mntfs_freesnap(mnp, snapp); 557 } 558 } 559 560 /* 561 * Create a temporary database element. For each vfs_t, the temporary 562 * element will be populated with the corresponding text. If the vfs_t 563 * does not have a corresponding element within the database, or if 564 * there is such an element but it is stale, a copy of the temporary 565 * element is inserted into the database at the appropriate location. 566 */ 567 tempelemp = kmem_alloc(sizeof (mntelem_t), KM_SLEEP); 568 entry_length = MNT_LINE_MAX; 569 tempelemp->mnte_text = kmem_alloc(entry_length, KM_SLEEP); 570 571 /* Find the first and last vfs_t for the given zone. */ 572 if (is_global_zone) { 573 firstvfsp = rootvfs; 574 lastvfsp = firstvfsp->vfs_prev; 575 } else { 576 firstvfsp = zonep->zone_vfslist; 577 /* 578 * If there isn't already a vfs_t for root then we create a 579 * dummy which will be used as the head of the list (which will 580 * therefore no longer be circular). 581 */ 582 if (firstvfsp == NULL || 583 strcmp(refstr_value(firstvfsp->vfs_mntpt), 584 zonep->zone_rootpath) != 0) { 585 /* 586 * The zone's vfs_ts will have mount points relative to 587 * the zone's root path. The vfs_t for the zone's 588 * root file system would therefore have a mount point 589 * equal to the zone's root path. Since the zone's root 590 * path isn't a mount point, we copy the vfs_t of the 591 * zone's root vnode, and provide it with a fake mount 592 * and resource. However, if the zone's root is a 593 * zfs dataset, use the dataset name as the resource. 594 * 595 * Note that by cloning another vfs_t we also acquire 596 * its high-resolution ctime. This might appear to 597 * violate the requirement that the ctimes in the list 598 * of vfs_ts are unique and monotonically increasing; 599 * this is not the case. The dummy vfs_t appears in only 600 * a non-global zone's vfs_t list, where the cloned 601 * vfs_t would not ordinarily be visible; the ctimes are 602 * therefore unique. The zone's root path must be 603 * available before the zone boots, and so its root 604 * vnode's vfs_t's ctime must be lower than those of any 605 * resources subsequently mounted by the zone. The 606 * ctimes are therefore monotonically increasing. 607 */ 608 dummyvfs = *zonep->zone_rootvp->v_vfsp; 609 dummyvfs.vfs_mntpt = refstr_alloc(zonep->zone_rootpath); 610 if (strcmp(vfssw[dummyvfs.vfs_fstype].vsw_name, "zfs") 611 != 0) 612 dummyvfs.vfs_resource = dummyvfs.vfs_mntpt; 613 dummyvfsp = &dummyvfs; 614 if (firstvfsp == NULL) { 615 lastvfsp = dummyvfsp; 616 } else { 617 lastvfsp = firstvfsp->vfs_zone_prev; 618 dummyvfsp->vfs_zone_next = firstvfsp; 619 } 620 firstvfsp = dummyvfsp; 621 } else { 622 lastvfsp = firstvfsp->vfs_zone_prev; 623 } 624 } 625 626 /* 627 * Now walk through all the vfs_ts for this zone. For each one, find the 628 * corresponding database element, creating it first if necessary, and 629 * increment its reference count. 630 */ 631 rw_enter(dblockp, RW_WRITER); 632 elemp = zonep->zone_mntfs_db; 633 /* CSTYLED */ 634 for (vfsp = firstvfsp;; 635 vfsp = is_global_zone ? vfsp->vfs_next : vfsp->vfs_zone_next) { 636 DTRACE_PROBE1(new__vfs, vfs_t *, vfsp); 637 /* Consider only visible entries. */ 638 if ((vfsp->vfs_flag & VFS_NOMNTTAB) == 0 || show_hidden) { 639 /* 640 * Walk through the existing database looking for either 641 * an element that matches the current vfs_t, or for the 642 * correct place in which to insert a new element. 643 */ 644 insert_before = 0; 645 for (; elemp; prevp = elemp, elemp = elemp->mnte_next) { 646 DTRACE_PROBE1(considering__elem, mntelem_t *, 647 elemp); 648 649 /* Compare the vfs_t with the element. */ 650 order = mntfs_newest(&elemp->mnte_vfs_ctime, 651 &vfsp->vfs_hrctime); 652 653 /* 654 * If we encounter a database element newer than 655 * this vfs_t then we've stepped over a gap 656 * where the element for this vfs_t must be 657 * inserted. 658 */ 659 if (order == MNTFS_FIRST) { 660 insert_before = 1; 661 break; 662 } 663 664 /* Dead elements no longer interest us. */ 665 if (MNTFS_ELEM_IS_DEAD(elemp)) 666 continue; 667 668 /* 669 * If the time stamps are the same then the 670 * element is potential match for the vfs_t, 671 * although it may later prove to be stale. 672 */ 673 if (order == MNTFS_NEITHER) 674 break; 675 676 /* 677 * This element must be older than the vfs_t. 678 * It must, therefore, correspond to a vfs_t 679 * that has been unmounted. Since the element is 680 * still alive, we kill it if it is visible. 681 */ 682 if (!elemp->mnte_hidden || show_hidden) 683 vfs_mono_time(&elemp->mnte_death); 684 } 685 DTRACE_PROBE2(possible__match, vfs_t *, vfsp, 686 mntelem_t *, elemp); 687 688 /* Create a new database element if required. */ 689 new_entry_length = mntfs_text_len(vfsp, zonep); 690 if (new_entry_length > entry_length) { 691 kmem_free(tempelemp->mnte_text, entry_length); 692 tempelemp->mnte_text = 693 kmem_alloc(new_entry_length, KM_SLEEP); 694 entry_length = new_entry_length; 695 } 696 mntfs_populate_text(vfsp, zonep, tempelemp); 697 ASSERT(tempelemp->mnte_text_size == new_entry_length); 698 if (elemp == NULL) { 699 /* 700 * We ran off the end of the database. Insert a 701 * new element at the end. 702 */ 703 newp = mntfs_copy(tempelemp); 704 vfs_mono_time(&newp->mnte_birth); 705 if (prevp) { 706 mntfs_insert_after(newp, prevp); 707 } else { 708 newp->mnte_next = NULL; 709 newp->mnte_prev = NULL; 710 ASSERT(*headpp == NULL); 711 *headpp = newp; 712 } 713 elemp = newp; 714 } else if (insert_before) { 715 /* 716 * Insert a new element before the current one. 717 */ 718 newp = mntfs_copy(tempelemp); 719 vfs_mono_time(&newp->mnte_birth); 720 if (prevp) { 721 mntfs_insert_after(newp, prevp); 722 } else { 723 newp->mnte_next = elemp; 724 newp->mnte_prev = NULL; 725 elemp->mnte_prev = newp; 726 ASSERT(*headpp == elemp); 727 *headpp = newp; 728 } 729 elemp = newp; 730 } else if (!mntfs_is_same_element(elemp, tempelemp)) { 731 /* 732 * The element corresponds to the vfs_t, but the 733 * vfs_t has changed; it must have been 734 * remounted. Kill the old element and insert a 735 * new one after it. 736 */ 737 vfs_mono_time(&elemp->mnte_death); 738 newp = mntfs_copy(tempelemp); 739 vfs_mono_time(&newp->mnte_birth); 740 mntfs_insert_after(newp, elemp); 741 elemp = newp; 742 } 743 744 /* We've found the corresponding element. Hold it. */ 745 DTRACE_PROBE1(incrementing, mntelem_t *, elemp); 746 elemp->mnte_refcnt++; 747 748 /* 749 * Update the parameters used to initialise the 750 * snapshot. 751 */ 752 nmnts++; 753 total_text_size += elemp->mnte_text_size; 754 if (!elemp->mnte_hidden) 755 normal_text_size += elemp->mnte_text_size; 756 if (!firstp) 757 firstp = elemp; 758 759 prevp = elemp; 760 elemp = elemp->mnte_next; 761 } 762 763 if (vfsp == lastvfsp) 764 break; 765 } 766 767 /* 768 * Any remaining visible database elements that are still alive must be 769 * killed now, because their corresponding vfs_ts must have been 770 * unmounted. 771 */ 772 for (; elemp; elemp = elemp->mnte_next) { 773 if (MNTFS_ELEM_IS_ALIVE(elemp) && 774 (!elemp->mnte_hidden || show_hidden)) 775 vfs_mono_time(&elemp->mnte_death); 776 } 777 778 /* Initialise the snapshot. */ 779 vfs_mono_time(&snapp->mnts_time); 780 snapp->mnts_last_mtime = last_mtime; 781 snapp->mnts_first = snapp->mnts_next = firstp; 782 snapp->mnts_flags = show_hidden ? MNTS_SHOWHIDDEN : 0; 783 snapp->mnts_nmnts = nmnts; 784 snapp->mnts_text_size = total_text_size; 785 snapp->mnts_foffset = snapp->mnts_ieoffset = 0; 786 787 /* 788 * Record /etc/mnttab's current size and mtime for possible future use 789 * by mntgetattr(). 790 */ 791 mnd->mnt_size = normal_text_size; 792 mnd->mnt_mtime = last_mtime; 793 if (show_hidden) { 794 mnd->mnt_hidden_size = total_text_size; 795 mnd->mnt_hidden_mtime = last_mtime; 796 } 797 798 /* Clean up. */ 799 rw_exit(dblockp); 800 vfs_list_unlock(); 801 if (dummyvfsp != NULL) 802 refstr_rele(dummyvfsp->vfs_mntpt); 803 kmem_free(tempelemp->mnte_text, entry_length); 804 kmem_free(tempelemp, sizeof (mntelem_t)); 805 } 806 807 /* 808 * Public function to convert vfs_mntopts into a string. 809 * A buffer of sufficient size is allocated, which is returned via bufp, 810 * and whose length is returned via lenp. 811 */ 812 void 813 mntfs_getmntopts(struct vfs *vfsp, char **bufp, size_t *lenp) 814 { 815 size_t len; 816 char *buf; 817 818 vfs_list_read_lock(); 819 820 len = mntfs_optsize(vfsp) + 1; 821 buf = kmem_alloc(len, KM_NOSLEEP); 822 if (buf == NULL) { 823 *bufp = NULL; 824 vfs_list_unlock(); 825 return; 826 } 827 buf[len - 1] = '\0'; 828 (void) mntfs_optprint(vfsp, buf); 829 ASSERT(buf[len - 1] == '\0'); 830 831 vfs_list_unlock(); 832 *bufp = buf; 833 *lenp = len; 834 } 835 836 /* ARGSUSED */ 837 static int 838 mntopen(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct) 839 { 840 vnode_t *vp = *vpp; 841 mntnode_t *nmnp; 842 843 /* 844 * Not allowed to open for writing, return error. 845 */ 846 if (flag & FWRITE) 847 return (EPERM); 848 /* 849 * Create a new mnt/vnode for each open, this will give us a handle to 850 * hang the snapshot on. 851 */ 852 nmnp = mntgetnode(vp); 853 854 *vpp = MTOV(nmnp); 855 atomic_add_32(&MTOD(nmnp)->mnt_nopen, 1); 856 VN_RELE(vp); 857 return (0); 858 } 859 860 /* ARGSUSED */ 861 static int 862 mntclose(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr, 863 caller_context_t *ct) 864 { 865 mntnode_t *mnp = VTOM(vp); 866 867 /* Clean up any locks or shares held by the current process */ 868 cleanlocks(vp, ttoproc(curthread)->p_pid, 0); 869 cleanshares(vp, ttoproc(curthread)->p_pid); 870 871 if (count > 1) 872 return (0); 873 if (vp->v_count == 1) { 874 rw_enter(&mnp->mnt_contents, RW_WRITER); 875 mntfs_freesnap(mnp, &mnp->mnt_read); 876 mntfs_freesnap(mnp, &mnp->mnt_ioctl); 877 rw_exit(&mnp->mnt_contents); 878 atomic_add_32(&MTOD(mnp)->mnt_nopen, -1); 879 } 880 return (0); 881 } 882 883 /* ARGSUSED */ 884 static int 885 mntread(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cred, caller_context_t *ct) 886 { 887 mntnode_t *mnp = VTOM(vp); 888 zone_t *zonep = MTOD(mnp)->mnt_zone_ref.zref_zone; 889 mntsnap_t *snapp = &mnp->mnt_read; 890 off_t off = uio->uio_offset; 891 size_t len = uio->uio_resid; 892 char *bufferp; 893 size_t available, copylen; 894 size_t written = 0; 895 mntelem_t *elemp; 896 krwlock_t *dblockp = &zonep->zone_mntfs_db_lock; 897 int error = 0; 898 off_t ieoffset; 899 900 rw_enter(&mnp->mnt_contents, RW_WRITER); 901 if (snapp->mnts_nmnts == 0 || (off == (off_t)0)) 902 mntfs_snapshot(mnp, snapp); 903 904 if ((size_t)(off + len) > snapp->mnts_text_size) 905 len = snapp->mnts_text_size - off; 906 907 if (off < 0 || len > snapp->mnts_text_size) { 908 rw_exit(&mnp->mnt_contents); 909 return (EFAULT); 910 } 911 912 if (len == 0) { 913 rw_exit(&mnp->mnt_contents); 914 return (0); 915 } 916 917 /* 918 * For the file offset provided, locate the corresponding database 919 * element and calculate the corresponding offset within its text. If 920 * the file offset is the same as that reached during the last read(2) 921 * then use the saved element and intra-element offset. 922 */ 923 rw_enter(dblockp, RW_READER); 924 if (off == 0 || (off == snapp->mnts_foffset)) { 925 elemp = snapp->mnts_next; 926 ieoffset = snapp->mnts_ieoffset; 927 } else { 928 off_t total_off; 929 /* 930 * Find the element corresponding to the requested file offset 931 * by walking through the database and summing the text sizes 932 * of the individual elements. If the requested file offset is 933 * greater than that reached on the last visit then we can start 934 * at the last seen element; otherwise, we have to start at the 935 * beginning. 936 */ 937 if (off > snapp->mnts_foffset) { 938 elemp = snapp->mnts_next; 939 total_off = snapp->mnts_foffset - snapp->mnts_ieoffset; 940 } else { 941 elemp = snapp->mnts_first; 942 total_off = 0; 943 } 944 while (off > total_off + elemp->mnte_text_size) { 945 total_off += elemp->mnte_text_size; 946 elemp = mntfs_get_next_elem(snapp, elemp); 947 ASSERT(elemp != NULL); 948 } 949 /* Calculate the intra-element offset. */ 950 if (off > total_off) 951 ieoffset = off - total_off; 952 else 953 ieoffset = 0; 954 } 955 956 /* 957 * Create a buffer and populate it with the text from successive 958 * database elements until it is full. 959 */ 960 bufferp = kmem_alloc(len, KM_SLEEP); 961 while (written < len) { 962 available = elemp->mnte_text_size - ieoffset; 963 copylen = MIN(len - written, available); 964 bcopy(elemp->mnte_text + ieoffset, bufferp + written, copylen); 965 written += copylen; 966 if (copylen == available) { 967 elemp = mntfs_get_next_elem(snapp, elemp); 968 ASSERT(elemp != NULL || written == len); 969 ieoffset = 0; 970 } else { 971 ieoffset += copylen; 972 } 973 } 974 rw_exit(dblockp); 975 976 /* 977 * Write the populated buffer, update the snapshot's state if 978 * successful and then advertise our read. 979 */ 980 error = uiomove(bufferp, len, UIO_READ, uio); 981 if (error == 0) { 982 snapp->mnts_next = elemp; 983 snapp->mnts_foffset = off + len; 984 snapp->mnts_ieoffset = ieoffset; 985 } 986 vfs_mnttab_readop(); 987 rw_exit(&mnp->mnt_contents); 988 989 /* Clean up. */ 990 kmem_free(bufferp, len); 991 return (error); 992 } 993 994 static int 995 mntgetattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, 996 caller_context_t *ct) 997 { 998 int mask = vap->va_mask; 999 int error; 1000 mntnode_t *mnp = VTOM(vp); 1001 timespec_t mtime, old_mtime; 1002 size_t size, old_size; 1003 mntdata_t *mntdata = MTOD(VTOM(vp)); 1004 mntsnap_t *rsnapp, *isnapp; 1005 extern timespec_t vfs_mnttab_ctime; 1006 1007 1008 /* AT_MODE, AT_UID and AT_GID are derived from the underlying file. */ 1009 if (mask & AT_MODE|AT_UID|AT_GID) { 1010 if (error = VOP_GETATTR(mnp->mnt_mountvp, vap, flags, cr, ct)) 1011 return (error); 1012 } 1013 1014 /* 1015 * There are some minor subtleties in the determination of 1016 * /etc/mnttab's size and mtime. We wish to avoid any condition in 1017 * which, in the vicinity of a change to the in-kernel mnttab, we 1018 * return an old value for one but a new value for the other. We cannot 1019 * simply hold vfslist for the entire calculation because we might need 1020 * to call mntfs_snapshot(), which calls vfs_list_read_lock(). 1021 */ 1022 if (mask & AT_SIZE|AT_NBLOCKS) { 1023 rw_enter(&mnp->mnt_contents, RW_WRITER); 1024 1025 vfs_list_read_lock(); 1026 vfs_mnttab_modtime(&mtime); 1027 if (mnp->mnt_flags & MNT_SHOWHIDDEN) { 1028 old_mtime = mntdata->mnt_hidden_mtime; 1029 old_size = mntdata->mnt_hidden_size; 1030 } else { 1031 old_mtime = mntdata->mnt_mtime; 1032 old_size = mntdata->mnt_size; 1033 } 1034 vfs_list_unlock(); 1035 1036 rsnapp = &mnp->mnt_read; 1037 isnapp = &mnp->mnt_ioctl; 1038 if (rsnapp->mnts_nmnts || isnapp->mnts_nmnts) { 1039 /* 1040 * The mntnode already has at least one snapshot from 1041 * which to take the size; the user will understand from 1042 * mnttab(4) that the current size of the in-kernel 1043 * mnttab is irrelevant. 1044 */ 1045 size = rsnapp->mnts_nmnts ? rsnapp->mnts_text_size : 1046 isnapp->mnts_text_size; 1047 } else if (mntfs_newest(&mtime, &old_mtime) == MNTFS_NEITHER) { 1048 /* 1049 * There is no existing valid snapshot but the in-kernel 1050 * mnttab has not changed since the time that the last 1051 * one was generated. Use the old file size; note that 1052 * it is guaranteed to be consistent with mtime, which 1053 * may be returned to the user later. 1054 */ 1055 size = old_size; 1056 } else { 1057 /* 1058 * There is no snapshot and the in-kernel mnttab has 1059 * changed since the last one was created. We generate a 1060 * new snapshot which we use for not only the size but 1061 * also the mtime, thereby ensuring that the two are 1062 * consistent. 1063 */ 1064 mntfs_snapshot(mnp, rsnapp); 1065 size = rsnapp->mnts_text_size; 1066 mtime = rsnapp->mnts_last_mtime; 1067 mntfs_freesnap(mnp, rsnapp); 1068 } 1069 1070 rw_exit(&mnp->mnt_contents); 1071 } else if (mask & AT_ATIME|AT_MTIME) { 1072 vfs_list_read_lock(); 1073 vfs_mnttab_modtime(&mtime); 1074 vfs_list_unlock(); 1075 } 1076 1077 /* Always look like a regular file. */ 1078 if (mask & AT_TYPE) 1079 vap->va_type = VREG; 1080 /* Mode should basically be read only. */ 1081 if (mask & AT_MODE) 1082 vap->va_mode &= 07444; 1083 if (mask & AT_FSID) 1084 vap->va_fsid = vp->v_vfsp->vfs_dev; 1085 /* Nodeid is always ROOTINO. */ 1086 if (mask & AT_NODEID) 1087 vap->va_nodeid = (ino64_t)MNTROOTINO; 1088 /* 1089 * Set nlink to the number of open vnodes for mnttab info 1090 * plus one for existing. 1091 */ 1092 if (mask & AT_NLINK) 1093 vap->va_nlink = mntdata->mnt_nopen + 1; 1094 if (mask & AT_SIZE) 1095 vap->va_size = size; 1096 if (mask & AT_ATIME) 1097 vap->va_atime = mtime; 1098 if (mask & AT_MTIME) 1099 vap->va_mtime = mtime; 1100 if (mask & AT_CTIME) 1101 vap->va_ctime = vfs_mnttab_ctime; 1102 if (mask & AT_RDEV) 1103 vap->va_rdev = 0; 1104 if (mask & AT_BLKSIZE) 1105 vap->va_blksize = DEV_BSIZE; 1106 if (mask & AT_NBLOCKS) 1107 vap->va_nblocks = btod(size); 1108 if (mask & AT_SEQ) 1109 vap->va_seq = 0; 1110 1111 return (0); 1112 } 1113 1114 static int 1115 mntaccess(vnode_t *vp, int mode, int flags, cred_t *cr, 1116 caller_context_t *ct) 1117 { 1118 mntnode_t *mnp = VTOM(vp); 1119 1120 if (mode & (VWRITE|VEXEC)) 1121 return (EROFS); 1122 1123 /* 1124 * Do access check on the underlying directory vnode. 1125 */ 1126 return (VOP_ACCESS(mnp->mnt_mountvp, mode, flags, cr, ct)); 1127 } 1128 1129 1130 /* 1131 * New /mntfs vnode required; allocate it and fill in most of the fields. 1132 */ 1133 static mntnode_t * 1134 mntgetnode(vnode_t *dp) 1135 { 1136 mntnode_t *mnp; 1137 vnode_t *vp; 1138 1139 mnp = kmem_zalloc(sizeof (mntnode_t), KM_SLEEP); 1140 mnp->mnt_vnode = vn_alloc(KM_SLEEP); 1141 mnp->mnt_mountvp = VTOM(dp)->mnt_mountvp; 1142 rw_init(&mnp->mnt_contents, NULL, RW_DEFAULT, NULL); 1143 vp = MTOV(mnp); 1144 vp->v_flag = VNOCACHE|VNOMAP|VNOSWAP|VNOMOUNT; 1145 vn_setops(vp, mntvnodeops); 1146 vp->v_vfsp = dp->v_vfsp; 1147 vp->v_type = VREG; 1148 vp->v_data = (caddr_t)mnp; 1149 1150 return (mnp); 1151 } 1152 1153 /* 1154 * Free the storage obtained from mntgetnode(). 1155 */ 1156 static void 1157 mntfreenode(mntnode_t *mnp) 1158 { 1159 vnode_t *vp = MTOV(mnp); 1160 1161 rw_destroy(&mnp->mnt_contents); 1162 vn_invalid(vp); 1163 vn_free(vp); 1164 kmem_free(mnp, sizeof (*mnp)); 1165 } 1166 1167 1168 /* ARGSUSED */ 1169 static int 1170 mntfsync(vnode_t *vp, int syncflag, cred_t *cr, caller_context_t *ct) 1171 { 1172 return (0); 1173 } 1174 1175 /* ARGSUSED */ 1176 static void 1177 mntinactive(vnode_t *vp, cred_t *cr, caller_context_t *ct) 1178 { 1179 mntnode_t *mnp = VTOM(vp); 1180 1181 mntfreenode(mnp); 1182 } 1183 1184 /* 1185 * lseek(2) is supported only to rewind the file by resetmnttab(3C). Rewinding 1186 * has a special meaning for /etc/mnttab: it forces mntfs to refresh the 1187 * snapshot at the next ioctl(). 1188 * 1189 * mnttab(4) explains that "the snapshot...is taken any time a read(2) is 1190 * performed at offset 0". We therefore ignore the read snapshot here. 1191 */ 1192 /* ARGSUSED */ 1193 static int 1194 mntseek(vnode_t *vp, offset_t ooff, offset_t *noffp, caller_context_t *ct) 1195 { 1196 mntnode_t *mnp = VTOM(vp); 1197 1198 if (*noffp == 0) { 1199 rw_enter(&mnp->mnt_contents, RW_WRITER); 1200 mnp->mnt_ioctl.mnts_flags |= MNTS_REWIND; 1201 rw_exit(&mnp->mnt_contents); 1202 } 1203 1204 return (0); 1205 } 1206 1207 /* 1208 * Return the answer requested to poll(). 1209 * POLLRDBAND will return when the mtime of the mnttab 1210 * information is newer than the latest one read for this open. 1211 */ 1212 /* ARGSUSED */ 1213 static int 1214 mntpoll(vnode_t *vp, short ev, int any, short *revp, pollhead_t **phpp, 1215 caller_context_t *ct) 1216 { 1217 mntnode_t *mnp = VTOM(vp); 1218 mntsnap_t *snapp; 1219 1220 rw_enter(&mnp->mnt_contents, RW_READER); 1221 if (mntfs_newest(&mnp->mnt_ioctl.mnts_last_mtime, 1222 &mnp->mnt_read.mnts_last_mtime) == MNTFS_FIRST) 1223 snapp = &mnp->mnt_ioctl; 1224 else 1225 snapp = &mnp->mnt_read; 1226 1227 *revp = 0; 1228 *phpp = (pollhead_t *)NULL; 1229 if (ev & POLLIN) 1230 *revp |= POLLIN; 1231 1232 if (ev & POLLRDNORM) 1233 *revp |= POLLRDNORM; 1234 1235 if (ev & POLLRDBAND) { 1236 vfs_mnttab_poll(&snapp->mnts_last_mtime, phpp); 1237 if (*phpp == (pollhead_t *)NULL) 1238 *revp |= POLLRDBAND; 1239 } 1240 rw_exit(&mnp->mnt_contents); 1241 1242 if (*revp || *phpp != NULL || any) { 1243 return (0); 1244 } 1245 /* 1246 * If someone is polling an unsupported poll events (e.g. 1247 * POLLOUT, POLLPRI, etc.), just return POLLERR revents. 1248 * That way we will ensure that we don't return a 0 1249 * revents with a NULL pollhead pointer. 1250 */ 1251 *revp = POLLERR; 1252 return (0); 1253 } 1254 1255 /* 1256 * mntfs_same_word() returns 1 if two words are the same in the context of 1257 * MNTIOC_GETMNTANY and 0 otherwise. 1258 * 1259 * worda is a memory address that lies somewhere in the buffer bufa; it cannot 1260 * be NULL since this is used to indicate to getmntany(3C) that the user does 1261 * not wish to match a particular field. The text to which worda points is 1262 * supplied by the user; if it is not null-terminated then it cannot match. 1263 * 1264 * Buffer bufb contains a line from /etc/mnttab, in which the fields are 1265 * delimited by tab or new-line characters. offb is the offset of the second 1266 * word within this buffer. 1267 * 1268 * mntfs_same_word() returns 1 if the words are the same and 0 otherwise. 1269 */ 1270 int 1271 mntfs_same_word(char *worda, char *bufa, size_t sizea, off_t offb, char *bufb, 1272 size_t sizeb) 1273 { 1274 char *wordb = bufb + offb; 1275 int bytes_remaining; 1276 1277 ASSERT(worda != NULL); 1278 1279 bytes_remaining = MIN(((bufa + sizea) - worda), 1280 ((bufb + sizeb) - wordb)); 1281 while (bytes_remaining && *worda == *wordb) { 1282 worda++; 1283 wordb++; 1284 bytes_remaining--; 1285 } 1286 if (bytes_remaining && 1287 *worda == '\0' && (*wordb == '\t' || *wordb == '\n')) 1288 return (1); 1289 else 1290 return (0); 1291 } 1292 1293 /* 1294 * mntfs_special_info_string() returns which, if either, of VBLK or VCHR 1295 * corresponds to a supplied path. If the path is a special device then the 1296 * function optionally sets the major and minor numbers. 1297 */ 1298 vtype_t 1299 mntfs_special_info_string(char *path, uint_t *major, uint_t *minor, cred_t *cr) 1300 { 1301 vattr_t vattr; 1302 vnode_t *vp; 1303 vtype_t type; 1304 int error; 1305 1306 if (path == NULL || *path != '/' || 1307 lookupnameat(path + 1, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp, rootdir)) 1308 return (0); 1309 1310 vattr.va_mask = AT_TYPE | AT_RDEV; 1311 error = VOP_GETATTR(vp, &vattr, ATTR_REAL, cr, NULL); 1312 VN_RELE(vp); 1313 1314 if (error == 0 && ((type = vattr.va_type) == VBLK || type == VCHR)) { 1315 if (major && minor) { 1316 *major = getmajor(vattr.va_rdev); 1317 *minor = getminor(vattr.va_rdev); 1318 } 1319 return (type); 1320 } else { 1321 return (0); 1322 } 1323 } 1324 1325 /* 1326 * mntfs_special_info_element() extracts the name of the mounted resource 1327 * for a given element and copies it into a null-terminated string, which it 1328 * then passes to mntfs_special_info_string(). 1329 */ 1330 vtype_t 1331 mntfs_special_info_element(mntelem_t *elemp, cred_t *cr) 1332 { 1333 char *newpath; 1334 vtype_t type; 1335 1336 newpath = kmem_alloc(elemp->mnte_text_size, KM_SLEEP); 1337 bcopy(elemp->mnte_text, newpath, (off_t)(elemp->mnte_tab.mnt_mountp)); 1338 *(newpath + (off_t)elemp->mnte_tab.mnt_mountp - 1) = '\0'; 1339 type = mntfs_special_info_string(newpath, NULL, NULL, cr); 1340 kmem_free(newpath, elemp->mnte_text_size); 1341 1342 return (type); 1343 } 1344 1345 /* 1346 * Convert an address that points to a byte within a user buffer into an 1347 * address that points to the corresponding offset within a kernel buffer. If 1348 * the user address is NULL then make no conversion. If the address does not 1349 * lie within the buffer then reset it to NULL. 1350 */ 1351 char * 1352 mntfs_import_addr(char *uaddr, char *ubufp, char *kbufp, size_t bufsize) 1353 { 1354 if (uaddr < ubufp || uaddr >= ubufp + bufsize) 1355 return (NULL); 1356 else 1357 return (kbufp + (uaddr - ubufp)); 1358 } 1359 1360 /* 1361 * These 32-bit versions are to support STRUCT_DECL(9F) etc. in 1362 * mntfs_copyout_element() and mntioctl(). 1363 */ 1364 #ifdef _SYSCALL32_IMPL 1365 typedef struct extmnttab32 { 1366 uint32_t mnt_special; 1367 uint32_t mnt_mountp; 1368 uint32_t mnt_fstype; 1369 uint32_t mnt_mntopts; 1370 uint32_t mnt_time; 1371 uint_t mnt_major; 1372 uint_t mnt_minor; 1373 } extmnttab32_t; 1374 1375 typedef struct mnttab32 { 1376 uint32_t mnt_special; 1377 uint32_t mnt_mountp; 1378 uint32_t mnt_fstype; 1379 uint32_t mnt_mntopts; 1380 uint32_t mnt_time; 1381 } mnttab32_t; 1382 1383 struct mntentbuf32 { 1384 uint32_t mbuf_emp; 1385 uint_t mbuf_bufsize; 1386 uint32_t mbuf_buf; 1387 }; 1388 #endif 1389 1390 /* 1391 * mntfs_copyout_element() is common code for the MNTIOC_GETMNTENT, 1392 * MNTIOC_GETEXTMNTENT and MNTIOC_GETMNTANY ioctls. Having identifed the 1393 * database element desired by the user, this function copies out the text and 1394 * the pointers to the relevant userland addresses. It returns 0 on success 1395 * and non-zero otherwise. 1396 */ 1397 int 1398 mntfs_copyout_elem(mntelem_t *elemp, struct extmnttab *uemp, 1399 char *ubufp, int cmd, int datamodel) 1400 { 1401 STRUCT_DECL(extmnttab, ktab); 1402 char *dbbufp = elemp->mnte_text; 1403 size_t dbbufsize = elemp->mnte_text_size; 1404 struct extmnttab *dbtabp = &elemp->mnte_tab; 1405 size_t ssize; 1406 char *kbufp; 1407 int error = 0; 1408 1409 1410 /* 1411 * We create a struct extmnttab within the kernel of the size 1412 * determined by the user's data model. We then populate its 1413 * fields by combining the start address of the text buffer 1414 * supplied by the user, ubufp, with the offsets stored for 1415 * this database element within dbtabp, a pointer to a struct 1416 * extmnttab. 1417 * 1418 * Note that if the corresponding field is "-" this signifies 1419 * no real content, and we set the address to NULL. This does 1420 * not apply to mnt_time. 1421 */ 1422 STRUCT_INIT(ktab, datamodel); 1423 STRUCT_FSETP(ktab, mnt_special, 1424 MNTFS_REAL_FIELD(dbbufp) ? ubufp : NULL); 1425 STRUCT_FSETP(ktab, mnt_mountp, 1426 MNTFS_REAL_FIELD(dbbufp + (off_t)dbtabp->mnt_mountp) ? 1427 ubufp + (off_t)dbtabp->mnt_mountp : NULL); 1428 STRUCT_FSETP(ktab, mnt_fstype, 1429 MNTFS_REAL_FIELD(dbbufp + (off_t)dbtabp->mnt_fstype) ? 1430 ubufp + (off_t)dbtabp->mnt_fstype : NULL); 1431 STRUCT_FSETP(ktab, mnt_mntopts, 1432 MNTFS_REAL_FIELD(dbbufp + (off_t)dbtabp->mnt_mntopts) ? 1433 ubufp + (off_t)dbtabp->mnt_mntopts : NULL); 1434 STRUCT_FSETP(ktab, mnt_time, 1435 ubufp + (off_t)dbtabp->mnt_time); 1436 if (cmd == MNTIOC_GETEXTMNTENT) { 1437 STRUCT_FSETP(ktab, mnt_major, dbtabp->mnt_major); 1438 STRUCT_FSETP(ktab, mnt_minor, dbtabp->mnt_minor); 1439 ssize = SIZEOF_STRUCT(extmnttab, datamodel); 1440 } else { 1441 ssize = SIZEOF_STRUCT(mnttab, datamodel); 1442 } 1443 if (copyout(STRUCT_BUF(ktab), uemp, ssize)) 1444 return (EFAULT); 1445 1446 /* 1447 * We create a text buffer in the kernel into which we copy the 1448 * /etc/mnttab entry for this element. We change the tab and 1449 * new-line delimiters to null bytes before copying out the 1450 * buffer. 1451 */ 1452 kbufp = kmem_alloc(dbbufsize, KM_SLEEP); 1453 bcopy(elemp->mnte_text, kbufp, dbbufsize); 1454 *(kbufp + (off_t)dbtabp->mnt_mountp - 1) = 1455 *(kbufp + (off_t)dbtabp->mnt_fstype - 1) = 1456 *(kbufp + (off_t)dbtabp->mnt_mntopts - 1) = 1457 *(kbufp + (off_t)dbtabp->mnt_time - 1) = 1458 *(kbufp + dbbufsize - 1) = '\0'; 1459 if (copyout(kbufp, ubufp, dbbufsize)) 1460 error = EFAULT; 1461 1462 kmem_free(kbufp, dbbufsize); 1463 return (error); 1464 } 1465 1466 /* ARGSUSED */ 1467 static int 1468 mntioctl(struct vnode *vp, int cmd, intptr_t arg, int flag, cred_t *cr, 1469 int *rvalp, caller_context_t *ct) 1470 { 1471 uint_t *up = (uint_t *)arg; 1472 mntnode_t *mnp = VTOM(vp); 1473 mntsnap_t *snapp = &mnp->mnt_ioctl; 1474 int error = 0; 1475 zone_t *zonep = MTOD(mnp)->mnt_zone_ref.zref_zone; 1476 krwlock_t *dblockp = &zonep->zone_mntfs_db_lock; 1477 model_t datamodel = flag & DATAMODEL_MASK; 1478 1479 switch (cmd) { 1480 1481 case MNTIOC_NMNTS: /* get no. of mounted resources */ 1482 { 1483 rw_enter(&mnp->mnt_contents, RW_READER); 1484 if (snapp->mnts_nmnts == 0 || 1485 (snapp->mnts_flags & MNTS_REWIND)) { 1486 if (!rw_tryupgrade(&mnp->mnt_contents)) { 1487 rw_exit(&mnp->mnt_contents); 1488 rw_enter(&mnp->mnt_contents, RW_WRITER); 1489 } 1490 if (snapp->mnts_nmnts == 0 || 1491 (snapp->mnts_flags & MNTS_REWIND)) 1492 mntfs_snapshot(mnp, snapp); 1493 } 1494 rw_exit(&mnp->mnt_contents); 1495 1496 if (suword32(up, snapp->mnts_nmnts) != 0) 1497 error = EFAULT; 1498 break; 1499 } 1500 1501 case MNTIOC_GETDEVLIST: /* get mounted device major/minor nos */ 1502 { 1503 size_t len; 1504 uint_t *devlist; 1505 mntelem_t *elemp; 1506 int i = 0; 1507 1508 rw_enter(&mnp->mnt_contents, RW_READER); 1509 if (snapp->mnts_nmnts == 0 || 1510 (snapp->mnts_flags & MNTS_REWIND)) { 1511 if (!rw_tryupgrade(&mnp->mnt_contents)) { 1512 rw_exit(&mnp->mnt_contents); 1513 rw_enter(&mnp->mnt_contents, RW_WRITER); 1514 } 1515 if (snapp->mnts_nmnts == 0 || 1516 (snapp->mnts_flags & MNTS_REWIND)) 1517 mntfs_snapshot(mnp, snapp); 1518 rw_downgrade(&mnp->mnt_contents); 1519 } 1520 1521 /* Create a local buffer to hold the device numbers. */ 1522 len = 2 * snapp->mnts_nmnts * sizeof (uint_t); 1523 devlist = kmem_alloc(len, KM_SLEEP); 1524 1525 /* 1526 * Walk the database elements for this snapshot and add their 1527 * major and minor numbers. 1528 */ 1529 rw_enter(dblockp, RW_READER); 1530 for (elemp = snapp->mnts_first; elemp; 1531 elemp = mntfs_get_next_elem(snapp, elemp)) { 1532 devlist[2 * i] = elemp->mnte_tab.mnt_major; 1533 devlist[2 * i + 1] = elemp->mnte_tab.mnt_minor; 1534 i++; 1535 } 1536 rw_exit(dblockp); 1537 ASSERT(i == snapp->mnts_nmnts); 1538 rw_exit(&mnp->mnt_contents); 1539 1540 error = xcopyout(devlist, up, len); 1541 kmem_free(devlist, len); 1542 break; 1543 } 1544 1545 case MNTIOC_SETTAG: /* set tag on mounted file system */ 1546 case MNTIOC_CLRTAG: /* clear tag on mounted file system */ 1547 { 1548 struct mnttagdesc *dp = (struct mnttagdesc *)arg; 1549 STRUCT_DECL(mnttagdesc, tagdesc); 1550 char *cptr; 1551 uint32_t major, minor; 1552 char tagbuf[MAX_MNTOPT_TAG]; 1553 char *pbuf; 1554 size_t len; 1555 uint_t start = 0; 1556 mntdata_t *mntdata = MTOD(mnp); 1557 zone_t *zone = mntdata->mnt_zone_ref.zref_zone; 1558 1559 STRUCT_INIT(tagdesc, flag & DATAMODEL_MASK); 1560 if (copyin(dp, STRUCT_BUF(tagdesc), STRUCT_SIZE(tagdesc))) { 1561 error = EFAULT; 1562 break; 1563 } 1564 pbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP); 1565 if (zone != global_zone) { 1566 (void) strcpy(pbuf, zone->zone_rootpath); 1567 /* truncate "/" and nul */ 1568 start = zone->zone_rootpathlen - 2; 1569 ASSERT(pbuf[start] == '/'); 1570 } 1571 cptr = STRUCT_FGETP(tagdesc, mtd_mntpt); 1572 error = copyinstr(cptr, pbuf + start, MAXPATHLEN - start, &len); 1573 if (error) { 1574 kmem_free(pbuf, MAXPATHLEN); 1575 break; 1576 } 1577 if (start != 0 && pbuf[start] != '/') { 1578 kmem_free(pbuf, MAXPATHLEN); 1579 error = EINVAL; 1580 break; 1581 } 1582 cptr = STRUCT_FGETP(tagdesc, mtd_tag); 1583 if ((error = copyinstr(cptr, tagbuf, MAX_MNTOPT_TAG, &len))) { 1584 kmem_free(pbuf, MAXPATHLEN); 1585 break; 1586 } 1587 major = STRUCT_FGET(tagdesc, mtd_major); 1588 minor = STRUCT_FGET(tagdesc, mtd_minor); 1589 if (cmd == MNTIOC_SETTAG) 1590 error = vfs_settag(major, minor, pbuf, tagbuf, cr); 1591 else 1592 error = vfs_clrtag(major, minor, pbuf, tagbuf, cr); 1593 kmem_free(pbuf, MAXPATHLEN); 1594 break; 1595 } 1596 1597 case MNTIOC_SHOWHIDDEN: 1598 { 1599 rw_enter(&mnp->mnt_contents, RW_WRITER); 1600 mnp->mnt_flags |= MNT_SHOWHIDDEN; 1601 rw_exit(&mnp->mnt_contents); 1602 break; 1603 } 1604 1605 case MNTIOC_GETMNTANY: 1606 { 1607 STRUCT_DECL(mntentbuf, embuf); /* Our copy of user's embuf */ 1608 STRUCT_DECL(extmnttab, ktab); /* Out copy of user's emp */ 1609 struct extmnttab *uemp; /* uaddr of user's emp */ 1610 char *ubufp; /* uaddr of user's text buf */ 1611 size_t ubufsize; /* size of the above */ 1612 struct extmnttab preftab; /* our version of user's emp */ 1613 char *prefbuf; /* our copy of user's text */ 1614 mntelem_t *elemp; /* a database element */ 1615 struct extmnttab *dbtabp; /* element's extmnttab */ 1616 char *dbbufp; /* element's text buf */ 1617 size_t dbbufsize; /* size of the above */ 1618 vtype_t type; /* type, if any, of special */ 1619 1620 1621 /* 1622 * embuf is a struct embuf within the kernel. We copy into it 1623 * the struct embuf supplied by the user. 1624 */ 1625 STRUCT_INIT(embuf, datamodel); 1626 if (copyin((void *) arg, STRUCT_BUF(embuf), 1627 STRUCT_SIZE(embuf))) { 1628 error = EFAULT; 1629 break; 1630 } 1631 uemp = STRUCT_FGETP(embuf, mbuf_emp); 1632 ubufp = STRUCT_FGETP(embuf, mbuf_buf); 1633 ubufsize = STRUCT_FGET(embuf, mbuf_bufsize); 1634 1635 /* 1636 * Check that the text buffer offered by the user is the 1637 * agreed size. 1638 */ 1639 if (ubufsize != MNT_LINE_MAX) { 1640 error = EINVAL; 1641 break; 1642 } 1643 1644 /* Copy the user-supplied entry into a local buffer. */ 1645 prefbuf = kmem_alloc(MNT_LINE_MAX, KM_SLEEP); 1646 if (copyin(ubufp, prefbuf, MNT_LINE_MAX)) { 1647 kmem_free(prefbuf, MNT_LINE_MAX); 1648 error = EFAULT; 1649 break; 1650 } 1651 1652 /* Ensure that any string within it is null-terminated. */ 1653 *(prefbuf + MNT_LINE_MAX - 1) = 0; 1654 1655 /* Copy in the user-supplied mpref */ 1656 STRUCT_INIT(ktab, datamodel); 1657 if (copyin(uemp, STRUCT_BUF(ktab), 1658 SIZEOF_STRUCT(mnttab, datamodel))) { 1659 kmem_free(prefbuf, MNT_LINE_MAX); 1660 error = EFAULT; 1661 break; 1662 } 1663 1664 /* 1665 * Copy the members of the user's pref struct into a local 1666 * struct. The pointers need to be offset and verified to 1667 * ensure that they lie within the bounds of the buffer. 1668 */ 1669 preftab.mnt_special = mntfs_import_addr(STRUCT_FGETP(ktab, 1670 mnt_special), ubufp, prefbuf, MNT_LINE_MAX); 1671 preftab.mnt_mountp = mntfs_import_addr(STRUCT_FGETP(ktab, 1672 mnt_mountp), ubufp, prefbuf, MNT_LINE_MAX); 1673 preftab.mnt_fstype = mntfs_import_addr(STRUCT_FGETP(ktab, 1674 mnt_fstype), ubufp, prefbuf, MNT_LINE_MAX); 1675 preftab.mnt_mntopts = mntfs_import_addr(STRUCT_FGETP(ktab, 1676 mnt_mntopts), ubufp, prefbuf, MNT_LINE_MAX); 1677 preftab.mnt_time = mntfs_import_addr(STRUCT_FGETP(ktab, 1678 mnt_time), ubufp, prefbuf, MNT_LINE_MAX); 1679 1680 /* 1681 * If the user specifies a mounted resource that is a special 1682 * device then we capture its mode and major and minor numbers; 1683 * cf. the block comment below. 1684 */ 1685 type = mntfs_special_info_string(preftab.mnt_special, 1686 &preftab.mnt_major, &preftab.mnt_minor, cr); 1687 1688 rw_enter(&mnp->mnt_contents, RW_WRITER); 1689 if (snapp->mnts_nmnts == 0 || 1690 (snapp->mnts_flags & MNTS_REWIND)) 1691 mntfs_snapshot(mnp, snapp); 1692 1693 /* 1694 * This is the core functionality that implements getmntany(). 1695 * We walk through the mntfs database until we find an element 1696 * matching the user's preferences that are contained in 1697 * preftab. Typically, this means checking that the text 1698 * matches. However, the mounted resource is special: if the 1699 * user is looking for a special device then we must find a 1700 * database element with the same major and minor numbers and 1701 * the same type, i.e. VBLK or VCHR. The type is not recorded 1702 * in the element because it cannot be inferred from the vfs_t. 1703 * We therefore check the type of suitable candidates via 1704 * mntfs_special_info_element(); since this calls into the 1705 * underlying file system we make sure to drop the database lock 1706 * first. 1707 */ 1708 elemp = snapp->mnts_next; 1709 rw_enter(dblockp, RW_READER); 1710 for (;;) { 1711 for (; elemp; elemp = mntfs_get_next_elem(snapp, 1712 elemp)) { 1713 dbtabp = &elemp->mnte_tab; 1714 dbbufp = elemp->mnte_text; 1715 dbbufsize = elemp->mnte_text_size; 1716 1717 if (((type && 1718 dbtabp->mnt_major == preftab.mnt_major && 1719 dbtabp->mnt_minor == preftab.mnt_minor && 1720 MNTFS_REAL_FIELD(dbbufp)) || 1721 (!type && (!preftab.mnt_special || 1722 mntfs_same_word(preftab.mnt_special, 1723 prefbuf, MNT_LINE_MAX, (off_t)0, dbbufp, 1724 dbbufsize)))) && 1725 1726 (!preftab.mnt_mountp || mntfs_same_word( 1727 preftab.mnt_mountp, prefbuf, MNT_LINE_MAX, 1728 (off_t)dbtabp->mnt_mountp, dbbufp, 1729 dbbufsize)) && 1730 1731 (!preftab.mnt_fstype || mntfs_same_word( 1732 preftab.mnt_fstype, prefbuf, MNT_LINE_MAX, 1733 (off_t)dbtabp->mnt_fstype, dbbufp, 1734 dbbufsize)) && 1735 1736 (!preftab.mnt_mntopts || mntfs_same_word( 1737 preftab.mnt_mntopts, prefbuf, MNT_LINE_MAX, 1738 (off_t)dbtabp->mnt_mntopts, dbbufp, 1739 dbbufsize)) && 1740 1741 (!preftab.mnt_time || mntfs_same_word( 1742 preftab.mnt_time, prefbuf, MNT_LINE_MAX, 1743 (off_t)dbtabp->mnt_time, dbbufp, 1744 dbbufsize))) 1745 break; 1746 } 1747 rw_exit(dblockp); 1748 1749 if (elemp == NULL || type == 0 || 1750 type == mntfs_special_info_element(elemp, cr)) 1751 break; 1752 1753 rw_enter(dblockp, RW_READER); 1754 elemp = mntfs_get_next_elem(snapp, elemp); 1755 } 1756 1757 kmem_free(prefbuf, MNT_LINE_MAX); 1758 1759 /* If we failed to find a match then return EOF. */ 1760 if (elemp == NULL) { 1761 rw_exit(&mnp->mnt_contents); 1762 *rvalp = MNTFS_EOF; 1763 break; 1764 } 1765 1766 /* 1767 * Check that the text buffer offered by the user will be large 1768 * enough to accommodate the text for this entry. 1769 */ 1770 if (elemp->mnte_text_size > MNT_LINE_MAX) { 1771 rw_exit(&mnp->mnt_contents); 1772 *rvalp = MNTFS_TOOLONG; 1773 break; 1774 } 1775 1776 /* 1777 * Populate the user's struct mnttab and text buffer using the 1778 * element's contents. 1779 */ 1780 if (mntfs_copyout_elem(elemp, uemp, ubufp, cmd, datamodel)) { 1781 error = EFAULT; 1782 } else { 1783 rw_enter(dblockp, RW_READER); 1784 elemp = mntfs_get_next_elem(snapp, elemp); 1785 rw_exit(dblockp); 1786 snapp->mnts_next = elemp; 1787 } 1788 rw_exit(&mnp->mnt_contents); 1789 break; 1790 } 1791 1792 case MNTIOC_GETMNTENT: 1793 case MNTIOC_GETEXTMNTENT: 1794 { 1795 STRUCT_DECL(mntentbuf, embuf); /* Our copy of user's embuf */ 1796 struct extmnttab *uemp; /* uaddr of user's emp */ 1797 char *ubufp; /* uaddr of user's text buf */ 1798 size_t ubufsize; /* size of the above */ 1799 mntelem_t *elemp; /* a database element */ 1800 1801 1802 rw_enter(&mnp->mnt_contents, RW_WRITER); 1803 if (snapp->mnts_nmnts == 0 || 1804 (snapp->mnts_flags & MNTS_REWIND)) 1805 mntfs_snapshot(mnp, snapp); 1806 if ((elemp = snapp->mnts_next) == NULL) { 1807 rw_exit(&mnp->mnt_contents); 1808 *rvalp = MNTFS_EOF; 1809 break; 1810 } 1811 1812 /* 1813 * embuf is a struct embuf within the kernel. We copy into it 1814 * the struct embuf supplied by the user. 1815 */ 1816 STRUCT_INIT(embuf, datamodel); 1817 if (copyin((void *) arg, STRUCT_BUF(embuf), 1818 STRUCT_SIZE(embuf))) { 1819 rw_exit(&mnp->mnt_contents); 1820 error = EFAULT; 1821 break; 1822 } 1823 uemp = STRUCT_FGETP(embuf, mbuf_emp); 1824 ubufp = STRUCT_FGETP(embuf, mbuf_buf); 1825 ubufsize = STRUCT_FGET(embuf, mbuf_bufsize); 1826 1827 /* 1828 * Check that the text buffer offered by the user will be large 1829 * enough to accommodate the text for this entry. 1830 */ 1831 if (elemp->mnte_text_size > ubufsize) { 1832 rw_exit(&mnp->mnt_contents); 1833 *rvalp = MNTFS_TOOLONG; 1834 break; 1835 } 1836 1837 /* 1838 * Populate the user's struct mnttab and text buffer using the 1839 * element's contents. 1840 */ 1841 if (mntfs_copyout_elem(elemp, uemp, ubufp, cmd, datamodel)) { 1842 error = EFAULT; 1843 } else { 1844 rw_enter(dblockp, RW_READER); 1845 elemp = mntfs_get_next_elem(snapp, elemp); 1846 rw_exit(dblockp); 1847 snapp->mnts_next = elemp; 1848 } 1849 rw_exit(&mnp->mnt_contents); 1850 break; 1851 } 1852 1853 default: 1854 error = EINVAL; 1855 break; 1856 } 1857 1858 return (error); 1859 } 1860 1861 /* 1862 * mntfs provides a new vnode for each open(2). Two vnodes will represent the 1863 * same instance of /etc/mnttab if they share the same (zone-specific) vfs. 1864 */ 1865 /* ARGSUSED */ 1866 int 1867 mntcmp(vnode_t *vp1, vnode_t *vp2, caller_context_t *ct) 1868 { 1869 return (vp1 != NULL && vp2 != NULL && vp1->v_vfsp == vp2->v_vfsp); 1870 } 1871 1872 /* 1873 * /mntfs vnode operations vector 1874 */ 1875 const fs_operation_def_t mnt_vnodeops_template[] = { 1876 VOPNAME_OPEN, { .vop_open = mntopen }, 1877 VOPNAME_CLOSE, { .vop_close = mntclose }, 1878 VOPNAME_READ, { .vop_read = mntread }, 1879 VOPNAME_IOCTL, { .vop_ioctl = mntioctl }, 1880 VOPNAME_GETATTR, { .vop_getattr = mntgetattr }, 1881 VOPNAME_ACCESS, { .vop_access = mntaccess }, 1882 VOPNAME_FSYNC, { .vop_fsync = mntfsync }, 1883 VOPNAME_INACTIVE, { .vop_inactive = mntinactive }, 1884 VOPNAME_SEEK, { .vop_seek = mntseek }, 1885 VOPNAME_POLL, { .vop_poll = mntpoll }, 1886 VOPNAME_CMP, { .vop_cmp = mntcmp }, 1887 VOPNAME_DISPOSE, { .error = fs_error }, 1888 VOPNAME_SHRLOCK, { .error = fs_error }, 1889 NULL, NULL 1890 }; 1891