1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* Portions Copyright 2007 Shivakumar GN */ 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <sys/cmn_err.h> 31 #include <sys/debug.h> 32 #include <sys/dirent.h> 33 #include <sys/kmem.h> 34 #include <sys/mman.h> 35 #include <sys/mutex.h> 36 #include <sys/sysmacros.h> 37 #include <sys/systm.h> 38 #include <sys/sunddi.h> 39 #include <sys/uio.h> 40 #include <sys/vmsystm.h> 41 #include <sys/vfs.h> 42 #include <sys/vnode.h> 43 44 #include <vm/as.h> 45 #include <vm/seg_vn.h> 46 47 #include <sys/gfs.h> 48 49 /* 50 * Generic pseudo-filesystem routines. 51 * 52 * There are significant similarities between the implementation of certain file 53 * system entry points across different filesystems. While one could attempt to 54 * "choke up on the bat" and incorporate common functionality into a VOP 55 * preamble or postamble, such an approach is limited in the benefit it can 56 * provide. In this file we instead define a toolkit of routines which can be 57 * called from a filesystem (with in-kernel pseudo-filesystems being the focus 58 * of the exercise) in a more component-like fashion. 59 * 60 * There are three basic classes of routines: 61 * 62 * 1) Lowlevel support routines 63 * 64 * These routines are designed to play a support role for existing 65 * pseudo-filesystems (such as procfs). They simplify common tasks, 66 * without forcing the filesystem to hand over management to GFS. The 67 * routines covered are: 68 * 69 * gfs_readdir_init() 70 * gfs_readdir_emit() 71 * gfs_readdir_emitn() 72 * gfs_readdir_pred() 73 * gfs_readdir_fini() 74 * gfs_lookup_dot() 75 * 76 * 2) Complete GFS management 77 * 78 * These routines take a more active role in management of the 79 * pseudo-filesystem. They handle the relationship between vnode private 80 * data and VFS data, as well as the relationship between vnodes in the 81 * directory hierarchy. 82 * 83 * In order to use these interfaces, the first member of every private 84 * v_data must be a gfs_file_t or a gfs_dir_t. This hands over all control 85 * to GFS. 86 * 87 * gfs_file_create() 88 * gfs_dir_create() 89 * gfs_root_create() 90 * 91 * gfs_file_inactive() 92 * gfs_dir_inactive() 93 * gfs_dir_lookup() 94 * gfs_dir_readdir() 95 * 96 * gfs_vop_inactive() 97 * gfs_vop_lookup() 98 * gfs_vop_readdir() 99 * gfs_vop_map() 100 * 101 * 3) Single File pseudo-filesystems 102 * 103 * This routine creates a rooted file to be overlayed ontop of another 104 * file in the physical filespace. 105 * 106 * Note that the parent is NULL (actually the vfs), but there is nothing 107 * technically keeping such a file from utilizing the "Complete GFS 108 * management" set of routines. 109 * 110 * gfs_root_create_file() 111 */ 112 113 /* 114 * gfs_make_opsvec: take an array of vnode type definitions and create 115 * their vnodeops_t structures 116 * 117 * This routine takes an array of gfs_opsvec_t's. It could 118 * alternatively take an array of gfs_opsvec_t*'s, which would allow 119 * vnode types to be completely defined in files external to the caller 120 * of gfs_make_opsvec(). As it stands, much more sharing takes place -- 121 * both the caller and the vnode type provider need to access gfsv_ops 122 * and gfsv_template, and the caller also needs to know gfsv_name. 123 */ 124 int 125 gfs_make_opsvec(gfs_opsvec_t *vec) 126 { 127 int error, i; 128 129 for (i = 0; ; i++) { 130 if (vec[i].gfsv_name == NULL) 131 return (0); 132 error = vn_make_ops(vec[i].gfsv_name, vec[i].gfsv_template, 133 vec[i].gfsv_ops); 134 if (error) 135 break; 136 } 137 138 cmn_err(CE_WARN, "gfs_make_opsvec: bad vnode ops template for '%s'", 139 vec[i].gfsv_name); 140 for (i--; i >= 0; i--) { 141 vn_freevnodeops(*vec[i].gfsv_ops); 142 *vec[i].gfsv_ops = NULL; 143 } 144 return (error); 145 } 146 147 /* 148 * Low level directory routines 149 * 150 * These routines provide some simple abstractions for reading directories. 151 * They are designed to be used by existing pseudo filesystems (namely procfs) 152 * that already have a complicated management infrastructure. 153 */ 154 155 /* 156 * gfs_get_parent_ino: used to obtain a parent inode number and the 157 * inode number of the given vnode in preparation for calling gfs_readdir_init. 158 */ 159 int 160 gfs_get_parent_ino(vnode_t *dvp, cred_t *cr, caller_context_t *ct, 161 ino64_t *pino, ino64_t *ino) 162 { 163 vnode_t *parent; 164 gfs_dir_t *dp = dvp->v_data; 165 int error; 166 167 *ino = dp->gfsd_file.gfs_ino; 168 parent = dp->gfsd_file.gfs_parent; 169 170 if (parent == NULL) { 171 *pino = *ino; /* root of filesystem */ 172 } else if (dvp->v_flag & V_XATTRDIR) { 173 vattr_t va; 174 175 va.va_mask = AT_NODEID; 176 error = VOP_GETATTR(parent, &va, 0, cr, ct); 177 if (error) 178 return (error); 179 *pino = va.va_nodeid; 180 } else { 181 *pino = ((gfs_file_t *)(parent->v_data))->gfs_ino; 182 } 183 184 return (0); 185 } 186 187 /* 188 * gfs_readdir_init: initiate a generic readdir 189 * st - a pointer to an uninitialized gfs_readdir_state_t structure 190 * name_max - the directory's maximum file name length 191 * ureclen - the exported file-space record length (1 for non-legacy FSs) 192 * uiop - the uiop passed to readdir 193 * parent - the parent directory's inode 194 * self - this directory's inode 195 * flags - flags from VOP_READDIR 196 * 197 * Returns 0 or a non-zero errno. 198 * 199 * Typical VOP_READDIR usage of gfs_readdir_*: 200 * 201 * if ((error = gfs_readdir_init(...)) != 0) 202 * return (error); 203 * eof = 0; 204 * while ((error = gfs_readdir_pred(..., &voffset)) != 0) { 205 * if (!consumer_entry_at(voffset)) 206 * voffset = consumer_next_entry(voffset); 207 * if (consumer_eof(voffset)) { 208 * eof = 1 209 * break; 210 * } 211 * if ((error = gfs_readdir_emit(..., voffset, 212 * consumer_ino(voffset), consumer_name(voffset))) != 0) 213 * break; 214 * } 215 * return (gfs_readdir_fini(..., error, eofp, eof)); 216 * 217 * As you can see, a zero result from gfs_readdir_pred() or 218 * gfs_readdir_emit() indicates that processing should continue, 219 * whereas a non-zero result indicates that the loop should terminate. 220 * Most consumers need do nothing more than let gfs_readdir_fini() 221 * determine what the cause of failure was and return the appropriate 222 * value. 223 */ 224 int 225 gfs_readdir_init(gfs_readdir_state_t *st, int name_max, int ureclen, 226 uio_t *uiop, ino64_t parent, ino64_t self, int flags) 227 { 228 size_t dirent_size; 229 230 if (uiop->uio_loffset < 0 || uiop->uio_resid <= 0 || 231 (uiop->uio_loffset % ureclen) != 0) 232 return (EINVAL); 233 234 st->grd_ureclen = ureclen; 235 st->grd_oresid = uiop->uio_resid; 236 st->grd_namlen = name_max; 237 if (flags & V_RDDIR_ENTFLAGS) 238 dirent_size = EDIRENT_RECLEN(st->grd_namlen); 239 else 240 dirent_size = DIRENT64_RECLEN(st->grd_namlen); 241 st->grd_dirent = kmem_zalloc(dirent_size, KM_SLEEP); 242 st->grd_parent = parent; 243 st->grd_self = self; 244 st->grd_flags = flags; 245 246 return (0); 247 } 248 249 /* 250 * gfs_readdir_emit_int: internal routine to emit directory entry 251 * 252 * st - the current readdir state, which must have d_ino/ed_ino 253 * and d_name/ed_name set 254 * uiop - caller-supplied uio pointer 255 * next - the offset of the next entry 256 */ 257 static int 258 gfs_readdir_emit_int(gfs_readdir_state_t *st, uio_t *uiop, offset_t next) 259 { 260 int reclen; 261 dirent64_t *dp; 262 edirent_t *edp; 263 264 if (st->grd_flags & V_RDDIR_ENTFLAGS) { 265 edp = st->grd_dirent; 266 reclen = EDIRENT_RECLEN(strlen(edp->ed_name)); 267 } else { 268 dp = st->grd_dirent; 269 reclen = DIRENT64_RECLEN(strlen(dp->d_name)); 270 } 271 272 if (reclen > uiop->uio_resid) { 273 /* 274 * Error if no entries were returned yet 275 */ 276 if (uiop->uio_resid == st->grd_oresid) 277 return (EINVAL); 278 return (-1); 279 } 280 281 if (st->grd_flags & V_RDDIR_ENTFLAGS) { 282 edp->ed_off = next; 283 edp->ed_reclen = (ushort_t)reclen; 284 } else { 285 dp->d_off = next; 286 dp->d_reclen = (ushort_t)reclen; 287 } 288 289 if (uiomove((caddr_t)st->grd_dirent, reclen, UIO_READ, uiop)) 290 return (EFAULT); 291 292 uiop->uio_loffset = next; 293 294 return (0); 295 } 296 297 /* 298 * gfs_readdir_emit: emit a directory entry 299 * voff - the virtual offset (obtained from gfs_readdir_pred) 300 * ino - the entry's inode 301 * name - the entry's name 302 * eflags - value for ed_eflags (if processing edirent_t) 303 * 304 * Returns a 0 on success, a non-zero errno on failure, or -1 if the 305 * readdir loop should terminate. A non-zero result (either errno or 306 * -1) from this function is typically passed directly to 307 * gfs_readdir_fini(). 308 */ 309 int 310 gfs_readdir_emit(gfs_readdir_state_t *st, uio_t *uiop, offset_t voff, 311 ino64_t ino, const char *name, int eflags) 312 { 313 offset_t off = (voff + 2) * st->grd_ureclen; 314 315 if (st->grd_flags & V_RDDIR_ENTFLAGS) { 316 edirent_t *edp = st->grd_dirent; 317 318 edp->ed_ino = ino; 319 (void) strncpy(edp->ed_name, name, st->grd_namlen); 320 edp->ed_eflags = eflags; 321 } else { 322 dirent64_t *dp = st->grd_dirent; 323 324 dp->d_ino = ino; 325 (void) strncpy(dp->d_name, name, st->grd_namlen); 326 } 327 328 /* 329 * Inter-entry offsets are invalid, so we assume a record size of 330 * grd_ureclen and explicitly set the offset appropriately. 331 */ 332 return (gfs_readdir_emit_int(st, uiop, off + st->grd_ureclen)); 333 } 334 335 /* 336 * gfs_readdir_emitn: like gfs_readdir_emit(), but takes an integer 337 * instead of a string for the entry's name. 338 */ 339 int 340 gfs_readdir_emitn(gfs_readdir_state_t *st, uio_t *uiop, offset_t voff, 341 ino64_t ino, unsigned long num) 342 { 343 char buf[40]; 344 345 numtos(num, buf); 346 return (gfs_readdir_emit(st, uiop, voff, ino, buf, 0)); 347 } 348 349 /* 350 * gfs_readdir_pred: readdir loop predicate 351 * voffp - a pointer in which the next virtual offset should be stored 352 * 353 * Returns a 0 on success, a non-zero errno on failure, or -1 if the 354 * readdir loop should terminate. A non-zero result (either errno or 355 * -1) from this function is typically passed directly to 356 * gfs_readdir_fini(). 357 */ 358 int 359 gfs_readdir_pred(gfs_readdir_state_t *st, uio_t *uiop, offset_t *voffp) 360 { 361 offset_t off, voff; 362 int error; 363 364 top: 365 if (uiop->uio_resid <= 0) 366 return (-1); 367 368 off = uiop->uio_loffset / st->grd_ureclen; 369 voff = off - 2; 370 if (off == 0) { 371 if ((error = gfs_readdir_emit(st, uiop, voff, st->grd_self, 372 ".", 0)) == 0) 373 goto top; 374 } else if (off == 1) { 375 if ((error = gfs_readdir_emit(st, uiop, voff, st->grd_parent, 376 "..", 0)) == 0) 377 goto top; 378 } else { 379 *voffp = voff; 380 return (0); 381 } 382 383 return (error); 384 } 385 386 /* 387 * gfs_readdir_fini: generic readdir cleanup 388 * error - if positive, an error to return 389 * eofp - the eofp passed to readdir 390 * eof - the eof value 391 * 392 * Returns a 0 on success, a non-zero errno on failure. This result 393 * should be returned from readdir. 394 */ 395 int 396 gfs_readdir_fini(gfs_readdir_state_t *st, int error, int *eofp, int eof) 397 { 398 size_t dirent_size; 399 400 if (st->grd_flags & V_RDDIR_ENTFLAGS) 401 dirent_size = EDIRENT_RECLEN(st->grd_namlen); 402 else 403 dirent_size = DIRENT64_RECLEN(st->grd_namlen); 404 kmem_free(st->grd_dirent, dirent_size); 405 if (error > 0) 406 return (error); 407 if (eofp) 408 *eofp = eof; 409 return (0); 410 } 411 412 /* 413 * gfs_lookup_dot 414 * 415 * Performs a basic check for "." and ".." directory entries. 416 */ 417 int 418 gfs_lookup_dot(vnode_t **vpp, vnode_t *dvp, vnode_t *pvp, const char *nm) 419 { 420 if (*nm == '\0' || strcmp(nm, ".") == 0) { 421 VN_HOLD(dvp); 422 *vpp = dvp; 423 return (0); 424 } else if (strcmp(nm, "..") == 0) { 425 if (pvp == NULL) { 426 ASSERT(dvp->v_flag & VROOT); 427 VN_HOLD(dvp); 428 *vpp = dvp; 429 } else { 430 VN_HOLD(pvp); 431 *vpp = pvp; 432 } 433 return (0); 434 } 435 436 return (-1); 437 } 438 439 /* 440 * gfs_file_create(): create a new GFS file 441 * 442 * size - size of private data structure (v_data) 443 * pvp - parent vnode (GFS directory) 444 * ops - vnode operations vector 445 * 446 * In order to use this interface, the parent vnode must have been created by 447 * gfs_dir_create(), and the private data stored in v_data must have a 448 * 'gfs_file_t' as its first field. 449 * 450 * Given these constraints, this routine will automatically: 451 * 452 * - Allocate v_data for the vnode 453 * - Initialize necessary fields in the vnode 454 * - Hold the parent 455 */ 456 vnode_t * 457 gfs_file_create(size_t size, vnode_t *pvp, vnodeops_t *ops) 458 { 459 gfs_file_t *fp; 460 vnode_t *vp; 461 462 /* 463 * Allocate vnode and internal data structure 464 */ 465 fp = kmem_zalloc(size, KM_SLEEP); 466 vp = vn_alloc(KM_SLEEP); 467 468 /* 469 * Set up various pointers 470 */ 471 fp->gfs_vnode = vp; 472 fp->gfs_parent = pvp; 473 vp->v_data = fp; 474 fp->gfs_size = size; 475 fp->gfs_type = GFS_FILE; 476 477 /* 478 * Initialize vnode and hold parent. 479 */ 480 vn_setops(vp, ops); 481 if (pvp) { 482 VN_SET_VFS_TYPE_DEV(vp, pvp->v_vfsp, VREG, 0); 483 VN_HOLD(pvp); 484 } 485 486 return (vp); 487 } 488 489 /* 490 * gfs_dir_create: creates a new directory in the parent 491 * 492 * size - size of private data structure (v_data) 493 * pvp - parent vnode (GFS directory) 494 * ops - vnode operations vector 495 * entries - NULL-terminated list of static entries (if any) 496 * maxlen - maximum length of a directory entry 497 * readdir_cb - readdir callback (see gfs_dir_readdir) 498 * inode_cb - inode callback (see gfs_dir_readdir) 499 * lookup_cb - lookup callback (see gfs_dir_lookup) 500 * 501 * In order to use this function, the first member of the private vnode 502 * structure (v_data) must be a gfs_dir_t. For each directory, there are 503 * static entries, defined when the structure is initialized, and dynamic 504 * entries, retrieved through callbacks. 505 * 506 * If a directory has static entries, then it must supply a inode callback, 507 * which will compute the inode number based on the parent and the index. 508 * For a directory with dynamic entries, the caller must supply a readdir 509 * callback and a lookup callback. If a static lookup fails, we fall back to 510 * the supplied lookup callback, if any. 511 * 512 * This function also performs the same initialization as gfs_file_create(). 513 */ 514 vnode_t * 515 gfs_dir_create(size_t struct_size, vnode_t *pvp, vnodeops_t *ops, 516 gfs_dirent_t *entries, gfs_inode_cb inode_cb, int maxlen, 517 gfs_readdir_cb readdir_cb, gfs_lookup_cb lookup_cb) 518 { 519 vnode_t *vp; 520 gfs_dir_t *dp; 521 gfs_dirent_t *de; 522 523 vp = gfs_file_create(struct_size, pvp, ops); 524 vp->v_type = VDIR; 525 526 dp = vp->v_data; 527 dp->gfsd_file.gfs_type = GFS_DIR; 528 dp->gfsd_maxlen = maxlen; 529 530 if (entries != NULL) { 531 for (de = entries; de->gfse_name != NULL; de++) 532 dp->gfsd_nstatic++; 533 534 dp->gfsd_static = kmem_alloc( 535 dp->gfsd_nstatic * sizeof (gfs_dirent_t), KM_SLEEP); 536 bcopy(entries, dp->gfsd_static, 537 dp->gfsd_nstatic * sizeof (gfs_dirent_t)); 538 } 539 540 dp->gfsd_readdir = readdir_cb; 541 dp->gfsd_lookup = lookup_cb; 542 dp->gfsd_inode = inode_cb; 543 544 mutex_init(&dp->gfsd_lock, NULL, MUTEX_DEFAULT, NULL); 545 546 return (vp); 547 } 548 549 /* 550 * gfs_root_create(): create a root vnode for a GFS filesystem 551 * 552 * Similar to gfs_dir_create(), this creates a root vnode for a filesystem. The 553 * only difference is that it takes a vfs_t instead of a vnode_t as its parent. 554 */ 555 vnode_t * 556 gfs_root_create(size_t size, vfs_t *vfsp, vnodeops_t *ops, ino64_t ino, 557 gfs_dirent_t *entries, gfs_inode_cb inode_cb, int maxlen, 558 gfs_readdir_cb readdir_cb, gfs_lookup_cb lookup_cb) 559 { 560 vnode_t *vp = gfs_dir_create(size, NULL, ops, entries, inode_cb, 561 maxlen, readdir_cb, lookup_cb); 562 563 /* Manually set the inode */ 564 ((gfs_file_t *)vp->v_data)->gfs_ino = ino; 565 566 VFS_HOLD(vfsp); 567 VN_SET_VFS_TYPE_DEV(vp, vfsp, VDIR, 0); 568 vp->v_flag |= VROOT | VNOCACHE | VNOMAP | VNOSWAP | VNOMOUNT; 569 570 return (vp); 571 } 572 573 /* 574 * gfs_root_create_file(): create a root vnode for a GFS file as a filesystem 575 * 576 * Similar to gfs_root_create(), this creates a root vnode for a file to 577 * be the pseudo-filesystem. 578 */ 579 vnode_t * 580 gfs_root_create_file(size_t size, vfs_t *vfsp, vnodeops_t *ops, ino64_t ino) 581 { 582 vnode_t *vp = gfs_file_create(size, NULL, ops); 583 584 ((gfs_file_t *)vp->v_data)->gfs_ino = ino; 585 586 VFS_HOLD(vfsp); 587 VN_SET_VFS_TYPE_DEV(vp, vfsp, VREG, 0); 588 vp->v_flag |= VROOT | VNOCACHE | VNOMAP | VNOSWAP | VNOMOUNT; 589 590 return (vp); 591 } 592 593 /* 594 * gfs_file_inactive() 595 * 596 * Called from the VOP_INACTIVE() routine. If necessary, this routine will 597 * remove the given vnode from the parent directory and clean up any references 598 * in the VFS layer. 599 * 600 * If the vnode was not removed (due to a race with vget), then NULL is 601 * returned. Otherwise, a pointer to the private data is returned. 602 */ 603 void * 604 gfs_file_inactive(vnode_t *vp) 605 { 606 int i; 607 gfs_dirent_t *ge = NULL; 608 gfs_file_t *fp = vp->v_data; 609 gfs_dir_t *dp = NULL; 610 void *data; 611 612 if (fp->gfs_parent == NULL || (vp->v_flag & V_XATTRDIR)) 613 goto found; 614 615 dp = fp->gfs_parent->v_data; 616 617 /* 618 * First, see if this vnode is cached in the parent. 619 */ 620 gfs_dir_lock(dp); 621 622 /* 623 * Find it in the set of static entries. 624 */ 625 for (i = 0; i < dp->gfsd_nstatic; i++) { 626 ge = &dp->gfsd_static[i]; 627 628 if (ge->gfse_vnode == vp) 629 goto found; 630 } 631 632 /* 633 * If 'ge' is NULL, then it is a dynamic entry. 634 */ 635 ge = NULL; 636 637 found: 638 if (vp->v_flag & V_XATTRDIR) { 639 mutex_enter(&fp->gfs_parent->v_lock); 640 } 641 mutex_enter(&vp->v_lock); 642 if (vp->v_count == 1) { 643 /* 644 * Really remove this vnode 645 */ 646 data = vp->v_data; 647 if (ge != NULL) { 648 /* 649 * If this was a statically cached entry, simply set the 650 * cached vnode to NULL. 651 */ 652 ge->gfse_vnode = NULL; 653 } 654 if (vp->v_flag & V_XATTRDIR) { 655 fp->gfs_parent->v_xattrdir = NULL; 656 mutex_exit(&fp->gfs_parent->v_lock); 657 } 658 mutex_exit(&vp->v_lock); 659 660 /* 661 * Free vnode and release parent 662 */ 663 if (fp->gfs_parent) { 664 if (dp) { 665 gfs_dir_unlock(dp); 666 } 667 VN_RELE(fp->gfs_parent); 668 } else { 669 ASSERT(vp->v_vfsp != NULL); 670 VFS_RELE(vp->v_vfsp); 671 } 672 vn_free(vp); 673 } else { 674 vp->v_count--; 675 data = NULL; 676 mutex_exit(&vp->v_lock); 677 if (vp->v_flag & V_XATTRDIR) { 678 mutex_exit(&fp->gfs_parent->v_lock); 679 } 680 if (dp) 681 gfs_dir_unlock(dp); 682 } 683 684 return (data); 685 } 686 687 /* 688 * gfs_dir_inactive() 689 * 690 * Same as above, but for directories. 691 */ 692 void * 693 gfs_dir_inactive(vnode_t *vp) 694 { 695 gfs_dir_t *dp; 696 697 ASSERT(vp->v_type == VDIR); 698 699 if ((dp = gfs_file_inactive(vp)) != NULL) { 700 mutex_destroy(&dp->gfsd_lock); 701 if (dp->gfsd_nstatic) 702 kmem_free(dp->gfsd_static, 703 dp->gfsd_nstatic * sizeof (gfs_dirent_t)); 704 } 705 706 return (dp); 707 } 708 709 /* 710 * gfs_dir_lookup_dynamic() 711 * 712 * This routine looks up the provided name amongst the dynamic entries 713 * in the gfs directory and returns the corresponding vnode, if found. 714 * 715 * The gfs directory is expected to be locked by the caller prior to 716 * calling this function. The directory will be unlocked during the 717 * execution of this function, but will be locked upon return from the 718 * function. This function returns 0 on success, non-zero on error. 719 * 720 * The dynamic lookups are performed by invoking the lookup 721 * callback, which is passed to this function as the first argument. 722 * The arguments to the callback are: 723 * 724 * int gfs_lookup_cb(vnode_t *pvp, const char *nm, vnode_t **vpp, cred_t *cr, 725 * int flags, int *deflgs, pathname_t *rpnp); 726 * 727 * pvp - parent vnode 728 * nm - name of entry 729 * vpp - pointer to resulting vnode 730 * cr - pointer to cred 731 * flags - flags value from lookup request 732 * ignored here; currently only used to request 733 * insensitive lookups 734 * direntflgs - output parameter, directory entry flags 735 * ignored here; currently only used to indicate a lookup 736 * has more than one possible match when case is not considered 737 * realpnp - output parameter, real pathname 738 * ignored here; when lookup was performed case-insensitively, 739 * this field contains the "real" name of the file. 740 * 741 * Returns 0 on success, non-zero on error. 742 */ 743 static int 744 gfs_dir_lookup_dynamic(gfs_lookup_cb callback, gfs_dir_t *dp, 745 const char *nm, vnode_t *dvp, vnode_t **vpp, cred_t *cr, int flags, 746 int *direntflags, pathname_t *realpnp) 747 { 748 gfs_file_t *fp; 749 ino64_t ino; 750 int ret; 751 752 ASSERT(GFS_DIR_LOCKED(dp)); 753 754 /* 755 * Drop the directory lock, as the lookup routine 756 * will need to allocate memory, or otherwise deadlock on this 757 * directory. 758 */ 759 gfs_dir_unlock(dp); 760 ret = callback(dvp, nm, vpp, &ino, cr, flags, direntflags, realpnp); 761 gfs_dir_lock(dp); 762 763 /* 764 * The callback for extended attributes returns a vnode 765 * with v_data from an underlying fs. 766 */ 767 if (ret == 0 && !IS_XATTRDIR(dvp)) { 768 fp = (gfs_file_t *)((*vpp)->v_data); 769 fp->gfs_index = -1; 770 fp->gfs_ino = ino; 771 } 772 773 return (ret); 774 } 775 776 /* 777 * gfs_dir_lookup_static() 778 * 779 * This routine looks up the provided name amongst the static entries 780 * in the gfs directory and returns the corresponding vnode, if found. 781 * The first argument to the function is a pointer to the comparison 782 * function this function should use to decide if names are a match. 783 * 784 * If a match is found, and GFS_CACHE_VNODE is set and the vnode 785 * exists, we simply return the existing vnode. Otherwise, we call 786 * the static entry's callback routine, caching the result if 787 * necessary. If the idx pointer argument is non-NULL, we use it to 788 * return the index of the matching static entry. 789 * 790 * The gfs directory is expected to be locked by the caller prior to calling 791 * this function. The directory may be unlocked during the execution of 792 * this function, but will be locked upon return from the function. 793 * 794 * This function returns 0 if a match is found, ENOENT if not. 795 */ 796 static int 797 gfs_dir_lookup_static(int (*compare)(const char *, const char *), 798 gfs_dir_t *dp, const char *nm, vnode_t *dvp, int *idx, 799 vnode_t **vpp, pathname_t *rpnp) 800 { 801 gfs_dirent_t *ge; 802 vnode_t *vp = NULL; 803 int i; 804 805 ASSERT(GFS_DIR_LOCKED(dp)); 806 807 /* 808 * Search static entries. 809 */ 810 for (i = 0; i < dp->gfsd_nstatic; i++) { 811 ge = &dp->gfsd_static[i]; 812 813 if (compare(ge->gfse_name, nm) == 0) { 814 if (rpnp) 815 (void) strlcpy(rpnp->pn_buf, ge->gfse_name, 816 rpnp->pn_bufsize); 817 818 if (ge->gfse_vnode) { 819 ASSERT(ge->gfse_flags & GFS_CACHE_VNODE); 820 vp = ge->gfse_vnode; 821 VN_HOLD(vp); 822 break; 823 } 824 825 /* 826 * We drop the directory lock, as the constructor will 827 * need to do KM_SLEEP allocations. If we return from 828 * the constructor only to find that a parallel 829 * operation has completed, and GFS_CACHE_VNODE is set 830 * for this entry, we discard the result in favor of 831 * the cached vnode. 832 */ 833 gfs_dir_unlock(dp); 834 vp = ge->gfse_ctor(dvp); 835 gfs_dir_lock(dp); 836 837 ((gfs_file_t *)vp->v_data)->gfs_index = i; 838 839 /* Set the inode according to the callback. */ 840 ((gfs_file_t *)vp->v_data)->gfs_ino = 841 dp->gfsd_inode(dvp, i); 842 843 if (ge->gfse_flags & GFS_CACHE_VNODE) { 844 if (ge->gfse_vnode == NULL) { 845 ge->gfse_vnode = vp; 846 } else { 847 /* 848 * A parallel constructor beat us to it; 849 * return existing vnode. We have to be 850 * careful because we can't release the 851 * current vnode while holding the 852 * directory lock; its inactive routine 853 * will try to lock this directory. 854 */ 855 vnode_t *oldvp = vp; 856 vp = ge->gfse_vnode; 857 VN_HOLD(vp); 858 859 gfs_dir_unlock(dp); 860 VN_RELE(oldvp); 861 gfs_dir_lock(dp); 862 } 863 } 864 break; 865 } 866 } 867 868 if (vp == NULL) 869 return (ENOENT); 870 else if (idx) 871 *idx = i; 872 *vpp = vp; 873 return (0); 874 } 875 876 /* 877 * gfs_dir_lookup() 878 * 879 * Looks up the given name in the directory and returns the corresponding 880 * vnode, if found. 881 * 882 * First, we search statically defined entries, if any, with a call to 883 * gfs_dir_lookup_static(). If no static entry is found, and we have 884 * a callback function we try a dynamic lookup via gfs_dir_lookup_dynamic(). 885 * 886 * This function returns 0 on success, non-zero on error. 887 */ 888 int 889 gfs_dir_lookup(vnode_t *dvp, const char *nm, vnode_t **vpp, cred_t *cr, 890 int flags, int *direntflags, pathname_t *realpnp) 891 { 892 gfs_dir_t *dp = dvp->v_data; 893 boolean_t casecheck; 894 vnode_t *dynvp = NULL; 895 vnode_t *vp = NULL; 896 int (*compare)(const char *, const char *); 897 int error, idx; 898 899 ASSERT(dvp->v_type == VDIR); 900 901 if (gfs_lookup_dot(vpp, dvp, dp->gfsd_file.gfs_parent, nm) == 0) 902 return (0); 903 904 casecheck = (flags & FIGNORECASE) != 0 && direntflags != NULL; 905 if (vfs_has_feature(dvp->v_vfsp, VFSFT_NOCASESENSITIVE) || 906 (flags & FIGNORECASE)) 907 compare = strcasecmp; 908 else 909 compare = strcmp; 910 911 gfs_dir_lock(dp); 912 913 error = gfs_dir_lookup_static(compare, dp, nm, dvp, &idx, &vp, realpnp); 914 915 if (vp && casecheck) { 916 gfs_dirent_t *ge; 917 int i; 918 919 for (i = idx + 1; i < dp->gfsd_nstatic; i++) { 920 ge = &dp->gfsd_static[i]; 921 922 if (strcasecmp(ge->gfse_name, nm) == 0) { 923 *direntflags |= ED_CASE_CONFLICT; 924 goto out; 925 } 926 } 927 } 928 929 if ((error || casecheck) && dp->gfsd_lookup) 930 error = gfs_dir_lookup_dynamic(dp->gfsd_lookup, dp, nm, dvp, 931 &dynvp, cr, flags, direntflags, vp ? NULL : realpnp); 932 933 if (vp && dynvp) { 934 /* static and dynamic entries are case-insensitive conflict */ 935 ASSERT(casecheck); 936 *direntflags |= ED_CASE_CONFLICT; 937 VN_RELE(dynvp); 938 } else if (vp == NULL) { 939 vp = dynvp; 940 } else if (error == ENOENT) { 941 error = 0; 942 } else if (error) { 943 VN_RELE(vp); 944 vp = NULL; 945 } 946 947 out: 948 gfs_dir_unlock(dp); 949 950 *vpp = vp; 951 return (error); 952 } 953 954 /* 955 * gfs_dir_readdir: does a readdir() on the given directory 956 * 957 * dvp - directory vnode 958 * uiop - uio structure 959 * eofp - eof pointer 960 * data - arbitrary data passed to readdir callback 961 * 962 * This routine does all the readdir() dirty work. Even so, the caller must 963 * supply two callbacks in order to get full compatibility. 964 * 965 * If the directory contains static entries, an inode callback must be 966 * specified. This avoids having to create every vnode and call VOP_GETATTR() 967 * when reading the directory. This function has the following arguments: 968 * 969 * ino_t gfs_inode_cb(vnode_t *vp, int index); 970 * 971 * vp - vnode for the directory 972 * index - index in original gfs_dirent_t array 973 * 974 * Returns the inode number for the given entry. 975 * 976 * For directories with dynamic entries, a readdir callback must be provided. 977 * This is significantly more complex, thanks to the particulars of 978 * VOP_READDIR(). 979 * 980 * int gfs_readdir_cb(vnode_t *vp, void *dp, int *eofp, 981 * offset_t *off, offset_t *nextoff, void *data, int flags) 982 * 983 * vp - directory vnode 984 * dp - directory entry, sized according to maxlen given to 985 * gfs_dir_create(). callback must fill in d_name and 986 * d_ino (if a dirent64_t), or ed_name, ed_ino, and ed_eflags 987 * (if an edirent_t). edirent_t is used if V_RDDIR_ENTFLAGS 988 * is set in 'flags'. 989 * eofp - callback must set to 1 when EOF has been reached 990 * off - on entry, the last offset read from the directory. Callback 991 * must set to the offset of the current entry, typically left 992 * untouched. 993 * nextoff - callback must set to offset of next entry. Typically 994 * (off + 1) 995 * data - caller-supplied data 996 * flags - VOP_READDIR flags 997 * 998 * Return 0 on success, or error on failure. 999 */ 1000 int 1001 gfs_dir_readdir(vnode_t *dvp, uio_t *uiop, int *eofp, void *data, cred_t *cr, 1002 caller_context_t *ct, int flags) 1003 { 1004 gfs_readdir_state_t gstate; 1005 int error, eof = 0; 1006 ino64_t ino, pino; 1007 offset_t off, next; 1008 gfs_dir_t *dp = dvp->v_data; 1009 1010 error = gfs_get_parent_ino(dvp, cr, ct, &pino, &ino); 1011 if (error) 1012 return (error); 1013 1014 if ((error = gfs_readdir_init(&gstate, dp->gfsd_maxlen, 1, uiop, 1015 pino, ino, flags)) != 0) 1016 return (error); 1017 1018 while ((error = gfs_readdir_pred(&gstate, uiop, &off)) == 0 && 1019 !eof) { 1020 1021 if (off >= 0 && off < dp->gfsd_nstatic) { 1022 ino = dp->gfsd_inode(dvp, off); 1023 1024 if ((error = gfs_readdir_emit(&gstate, uiop, 1025 off, ino, dp->gfsd_static[off].gfse_name, 0)) 1026 != 0) 1027 break; 1028 1029 } else if (dp->gfsd_readdir) { 1030 off -= dp->gfsd_nstatic; 1031 1032 if ((error = dp->gfsd_readdir(dvp, 1033 gstate.grd_dirent, &eof, &off, &next, 1034 data, flags)) != 0 || eof) 1035 break; 1036 1037 off += dp->gfsd_nstatic + 2; 1038 next += dp->gfsd_nstatic + 2; 1039 1040 if ((error = gfs_readdir_emit_int(&gstate, uiop, 1041 next)) != 0) 1042 break; 1043 } else { 1044 /* 1045 * Offset is beyond the end of the static entries, and 1046 * we have no dynamic entries. Set EOF. 1047 */ 1048 eof = 1; 1049 } 1050 } 1051 1052 return (gfs_readdir_fini(&gstate, error, eofp, eof)); 1053 } 1054 1055 1056 /* 1057 * gfs_vop_lookup: VOP_LOOKUP() entry point 1058 * 1059 * For use directly in vnode ops table. Given a GFS directory, calls 1060 * gfs_dir_lookup() as necessary. 1061 */ 1062 /* ARGSUSED */ 1063 int 1064 gfs_vop_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, pathname_t *pnp, 1065 int flags, vnode_t *rdir, cred_t *cr, caller_context_t *ct, 1066 int *direntflags, pathname_t *realpnp) 1067 { 1068 return (gfs_dir_lookup(dvp, nm, vpp, cr, flags, direntflags, realpnp)); 1069 } 1070 1071 /* 1072 * gfs_vop_readdir: VOP_READDIR() entry point 1073 * 1074 * For use directly in vnode ops table. Given a GFS directory, calls 1075 * gfs_dir_readdir() as necessary. 1076 */ 1077 /* ARGSUSED */ 1078 int 1079 gfs_vop_readdir(vnode_t *vp, uio_t *uiop, cred_t *cr, int *eofp, 1080 caller_context_t *ct, int flags) 1081 { 1082 return (gfs_dir_readdir(vp, uiop, eofp, NULL, cr, ct, flags)); 1083 } 1084 1085 1086 /* 1087 * gfs_vop_map: VOP_MAP() entry point 1088 * 1089 * Convenient routine for handling pseudo-files that wish to allow mmap() calls. 1090 * This function only works for readonly files, and uses the read function for 1091 * the vnode to fill in the data. The mapped data is immediately faulted in and 1092 * filled with the necessary data during this call; there are no getpage() or 1093 * putpage() routines. 1094 */ 1095 /* ARGSUSED */ 1096 int 1097 gfs_vop_map(vnode_t *vp, offset_t off, struct as *as, caddr_t *addrp, 1098 size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, cred_t *cred, 1099 caller_context_t *ct) 1100 { 1101 int rv; 1102 ssize_t resid = len; 1103 1104 /* 1105 * Check for bad parameters 1106 */ 1107 #ifdef _ILP32 1108 if (len > MAXOFF_T) 1109 return (ENOMEM); 1110 #endif 1111 if (vp->v_flag & VNOMAP) 1112 return (ENOTSUP); 1113 if (off > MAXOFF_T) 1114 return (EFBIG); 1115 if ((long)off < 0 || (long)(off + len) < 0) 1116 return (EINVAL); 1117 if (vp->v_type != VREG) 1118 return (ENODEV); 1119 if ((prot & (PROT_EXEC | PROT_WRITE)) != 0) 1120 return (EACCES); 1121 1122 /* 1123 * Find appropriate address if needed, otherwise clear address range. 1124 */ 1125 as_rangelock(as); 1126 rv = choose_addr(as, addrp, len, off, ADDR_VACALIGN, flags); 1127 if (rv != 0) { 1128 as_rangeunlock(as); 1129 return (rv); 1130 } 1131 1132 /* 1133 * Create mapping 1134 */ 1135 rv = as_map(as, *addrp, len, segvn_create, zfod_argsp); 1136 as_rangeunlock(as); 1137 if (rv != 0) 1138 return (rv); 1139 1140 /* 1141 * Fill with data from read() 1142 */ 1143 rv = vn_rdwr(UIO_READ, vp, *addrp, len, off, UIO_USERSPACE, 1144 0, (rlim64_t)0, cred, &resid); 1145 1146 if (rv == 0 && resid != 0) 1147 rv = ENXIO; 1148 1149 if (rv != 0) { 1150 as_rangelock(as); 1151 (void) as_unmap(as, *addrp, len); 1152 as_rangeunlock(as); 1153 } 1154 1155 return (rv); 1156 } 1157 1158 /* 1159 * gfs_vop_inactive: VOP_INACTIVE() entry point 1160 * 1161 * Given a vnode that is a GFS file or directory, call gfs_file_inactive() or 1162 * gfs_dir_inactive() as necessary, and kmem_free()s associated private data. 1163 */ 1164 /* ARGSUSED */ 1165 void 1166 gfs_vop_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct) 1167 { 1168 gfs_file_t *fp = vp->v_data; 1169 void *data; 1170 1171 if (fp->gfs_type == GFS_DIR) 1172 data = gfs_dir_inactive(vp); 1173 else 1174 data = gfs_file_inactive(vp); 1175 1176 if (data != NULL) 1177 kmem_free(data, fp->gfs_size); 1178 } 1179