1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. 23 */ 24 /* 25 * Copyright (c) 2013, Joyent, Inc. All rights reserved. 26 */ 27 28 /* 29 * vnode ops for the /dev filesystem 30 * 31 * - VDIR, VCHR, CBLK, and VLNK are considered must supported files 32 * - VREG and VDOOR are used for some internal implementations in 33 * the global zone, e.g. devname and devfsadm communication 34 * - other file types are unusual in this namespace and 35 * not supported for now 36 */ 37 38 /* 39 * sdev has a few basic goals: 40 * o Provide /dev for the global zone as well as various non-global zones. 41 * o Provide the basic functionality that devfsadm might need (mknod, 42 * symlinks, etc.) 43 * o Allow persistent permissions on files in /dev. 44 * o Allow for dynamic directories and nodes for use by various services (pts, 45 * zvol, net, etc.) 46 * 47 * The sdev file system is primarily made up of sdev_node_t's which is sdev's 48 * counterpart to the vnode_t. There are two different classes of sdev_node_t's 49 * that we generally care about, dynamic and otherwise. 50 * 51 * Persisting Information 52 * ---------------------- 53 * 54 * When sdev is mounted, it keeps track of the underlying file system it is 55 * mounted over. In certain situations, sdev will go and create entries in that 56 * underlying file system. These underlying 'back end' nodes are used as proxies 57 * for various changes in permissions. While specific sets of nodes, such as 58 * dynamic ones, are exempt, this process stores permission changes against 59 * these back end nodes. The point of all of this is to allow for these settings 60 * to persist across host and zone reboots. As an example, consider the entry 61 * /dev/dsk/c0t0d0 which is a character device and that / is in UFS. Upon 62 * changing the permissions on c0t0d0 you'd have the following logical 63 * relationships: 64 * 65 * +------------------+ sdev_vnode +--------------+ 66 * | sdev_node_t |<---------------->| vnode_t | 67 * | /dev/dsk/c0t0d0 |<---------------->| for sdev | 68 * +------------------+ +--------------+ 69 * | 70 * | sdev_attrvp 71 * | 72 * | +---------------------+ 73 * +--->| vnode_t for UFS|ZFS | 74 * | /dev/dsk/c0t0d0 | 75 * +---------------------+ 76 * 77 * sdev is generally in memory. Therefore when a lookup happens and there is no 78 * entry already inside of a directory cache, it will next check the backing 79 * store. If the backing store exists, we will reconstitute the sdev_node based 80 * on the information that we persisted. When we create the backing store node, 81 * we use the struct vattr information that we already have in sdev_node_t. 82 * Because of this, we already know if the entry was previously a symlink, 83 * directory, or some other kind of type. Note that not all types of nodes are 84 * supported. Currently only VDIR, VCHR, VBLK, VREG, VDOOR, and VLNK are 85 * eligible to be persisted. 86 * 87 * When the sdev_node is created and the lookup is done, we grab a hold on the 88 * underlying vnode as part of the call to VOP_LOOKUP. That reference is held 89 * until the sdev_node becomes inactive. Once its reference count reaches one 90 * and the VOP_INACTIVE callback fires leading to the destruction of the node, 91 * the reference on the underlying vnode will be released. 92 * 93 * The backing store node will be deleted only when the node itself is deleted 94 * through the means of a VOP_REMOVE, VOP_RMDIR, or similar call. 95 * 96 * Not everything can be persisted, see The Rules section for more details. 97 * 98 * Dynamic Nodes 99 * ------------- 100 * 101 * Dynamic nodes allow for specific interactions with various kernel subsystems 102 * when looking up directory entries. This allows the lookup and readdir 103 * functions to check against the kernel subsystem's for validity. eg. does a 104 * zvol or nic still exist. 105 * 106 * More specifically, when we create various directories we check if the 107 * directory name matches that of one of the names in the vtab[] (sdev_subr.c). 108 * If it does, we swap out the vnode operations into a new set which combine the 109 * normal sdev vnode operations with the dynamic set here. 110 * 111 * In addition, various dynamic nodes implement a verification entry point. This 112 * verification entry is used as a part of lookup and readdir. The goal for 113 * these dynamic nodes is to allow them to check with the underlying subsystems 114 * to ensure that these devices are still present, or if they have gone away, to 115 * remove them from the results. This is indicated by using the SDEV_VTOR flag 116 * in vtab[]. 117 * 118 * Dynamic nodes have additional restrictions placed upon them. They may only 119 * appear at the top level directory of the file system. In addition, users 120 * cannot create dirents below any leve of a dynamic node aside from its special 121 * vnops. 122 * 123 * Profiles 124 * -------- 125 * 126 * Profiles exist for the purpose of non-global zones. They work with the zone 127 * brands and zoneadmd to set up a filter of allowed devices that can appear in 128 * a non-global zone's /dev. These are sent to sdev by means of libdevinfo and a 129 * modctl system call. Specifically it allows one to add patterns of device 130 * paths to include and exclude. It allows for a collection of symlinks to be 131 * added and it allows for remapping names. 132 * 133 * When operating in a non-global zone, several of the sdev vnops are redirected 134 * to the profile versions. These impose additional restrictions such as 135 * enforcing that a non-global zone's /dev is read only. 136 * 137 * sdev_node_t States 138 * ------------------ 139 * 140 * A given sdev_node_t has a field called the sdev_state which describes where 141 * in the sdev life cycle it is. There are three primary states: SDEV_INIT, 142 * SDEV_READY, and SDEV_ZOMBIE. 143 * 144 * SDEV_INIT: When a new /dev file is first looked up, a sdev_node 145 * is allocated, initialized and added to the directory's 146 * sdev_node cache. A node at this state will also 147 * have the SDEV_LOOKUP flag set. 148 * 149 * Other threads that are trying to look up a node at 150 * this state will be blocked until the SDEV_LOOKUP flag 151 * is cleared. 152 * 153 * When the SDEV_LOOKUP flag is cleared, the node may 154 * transition into the SDEV_READY state for a successful 155 * lookup or the node is removed from the directory cache 156 * and destroyed if the named node can not be found. 157 * An ENOENT error is returned for the second case. 158 * 159 * SDEV_READY: A /dev file has been successfully looked up and 160 * associated with a vnode. The /dev file is available 161 * for the supported /dev file system operations. 162 * 163 * SDEV_ZOMBIE: Deletion of a /dev file has been explicitly issued 164 * to an SDEV_READY node. The node is transitioned into 165 * the SDEV_ZOMBIE state if the vnode reference count 166 * is still held. A SDEV_ZOMBIE node does not support 167 * any of the /dev file system operations. A SDEV_ZOMBIE 168 * node is immediately removed from the directory cache 169 * and destroyed once the reference count reaches zero. 170 * 171 * Historically nodes that were marked SDEV_ZOMBIE were not removed from the 172 * underlying directory caches. This has been the source of numerous bugs and 173 * thus to better mimic what happens on a real file system, it is no longer the 174 * case. 175 * 176 * The following state machine describes the life cycle of a given node and its 177 * associated states: 178 * 179 * node is . . . . . 180 * allocated via . +-------------+ . . . . . . . vnode_t refcount 181 * sdev_nodeinit() . | Unallocated | . reaches zero and 182 * +--------*-----| Memory |<--------*---+ sdev_inactive is 183 * | +-------------+ | called. 184 * | +------------^ | called. 185 * v | | 186 * +-----------+ * . . sdev_nodeready() +-------------+ 187 * | SDEV_INIT | | or related setup | SDEV_ZOMBIE | 188 * +-----------+ | failure +-------------+ 189 * | | ^ 190 * | | +------------+ | 191 * +-*----------->| SDEV_READY |--------*-----+ 192 * . +------------+ . The node is no longer 193 * . . node successfully . . . . . valid or we've been 194 * inserted into the asked to remove it. 195 * directory cache This happens via 196 * and sdev_nodready() sdev_dirdelete(). 197 * call successful. 198 * 199 * Adding and Removing Dirents, Zombie Nodes 200 * ----------------------------------------- 201 * 202 * As part of doing a lookup, readdir, or an explicit creation operation like 203 * mkdir or create, nodes may be created. Every directory has an avl tree which 204 * contains its children, the sdev_entries tree. This is only used if the type 205 * is VDIR. Access to this is controlled by the sdev_node_t's contents_lock and 206 * it is managed through sdev_cache_update(). 207 * 208 * Every sdev_node_t has a field sdev_state, which describes the current state 209 * of the node. A node is generally speaking in the SDEV_READY state. When it is 210 * there, it can be looked up, accessed, and operations performed on it. When a 211 * node is going to be removed from the directory cache it is marked as a 212 * zombie. Once a node becomes a zombie, no other file system operations will 213 * succeed and it will continue to exist as a node until the vnode count on the 214 * node reaches zero. At that point, the node will be freed. However, once a 215 * node has been marked as a zombie, it will be removed immediately from the 216 * directory cache such that no one else may find it again. This means that 217 * someone else can insert a new entry into that directory with the same name 218 * and without a problem. 219 * 220 * To remove a node, see the section on that in The Rules. 221 * 222 * The Rules 223 * --------- 224 * These are the rules to live by when working in sdev. These are not 225 * exhaustive. 226 * 227 * - Set 1: Working with Backing Nodes 228 * o If there is a SDEV_READY sdev_node_t, it knows about its backing node. 229 * o If we find a backing node when looking up an sdev_node_t for the first 230 * time, we use its attributes to build our sdev_node_t. 231 * o If there is a found backing node, or we create a backing node, that's 232 * when we grab the hold on its vnode. 233 * o If we mark an sdev_node_t a ZOMBIE, we must remove its backing node from 234 * the underlying file system. It must not be searchable or findable. 235 * o We release our hold on the backing node vnode when we destroy the 236 * sdev_node_t. 237 * 238 * - Set 2: Locking rules for sdev (not exhaustive) 239 * o The majority of nodes contain an sdev_contents rw lock. You must hold it 240 * for read or write if manipulating its contents appropriately. 241 * o You must lock your parent before yourself. 242 * o If you need your vnode's v_lock and the sdev_contents rw lock, you must 243 * grab the v_lock before the sdev_contents rw_lock. 244 * o If you release a lock on the node as a part of upgrading it, you must 245 * verify that the node has not become a zombie as a part of this process. 246 * 247 * - Set 3: Zombie Status and What it Means 248 * o If you encounter a node that is a ZOMBIE, that means that it has been 249 * unlinked from the backing store. 250 * o If you release your contents lock and acquire it again (say as part of 251 * trying to grab a write lock) you must check that the node has not become 252 * a zombie. 253 * o You should VERIFY that a looked up node is not a zombie. This follows 254 * from the following logic. To mark something as a zombie means that it is 255 * removed from the parents directory cache. To do that, you must have a 256 * write lock on the parent's sdev_contents. To lookup through that 257 * directory you must have a read lock. This then becomes a simple ordering 258 * problem. If you've been granted the lock then the other operation cannot 259 * be in progress or must have already succeeded. 260 * 261 * - Set 4: Removing Directory Entries (aka making nodes Zombies) 262 * o Write lock must be held on the directory 263 * o Write lock must be held on the node 264 * o Remove the sdev_node_t from its parent cache 265 * o Remove the corresponding backing store node, if it exists, eg. use 266 * VOP_REMOVE or VOP_RMDIR. 267 * o You must NOT make any change in the vnode reference count! Nodes should 268 * only be cleaned up through VOP_INACTIVE callbacks. 269 * o VOP_INACTIVE is the only one responsible for doing the final vn_rele of 270 * the backing store vnode that was grabbed during lookup. 271 * 272 * - Set 5: What Nodes may be Persisted 273 * o The root, /dev is always persisted 274 * o Any node in vtab which is marked SDEV_DYNAMIC, may not be persisted 275 * unless it is also marked SDEV_PERSIST 276 * o Anything whose parent directory is marked SDEV_PERSIST will pass that 277 * along to the child as long as it does not contradict the above rules 278 */ 279 280 #include <sys/types.h> 281 #include <sys/param.h> 282 #include <sys/t_lock.h> 283 #include <sys/systm.h> 284 #include <sys/sysmacros.h> 285 #include <sys/user.h> 286 #include <sys/time.h> 287 #include <sys/vfs.h> 288 #include <sys/vnode.h> 289 #include <sys/vfs_opreg.h> 290 #include <sys/file.h> 291 #include <sys/fcntl.h> 292 #include <sys/flock.h> 293 #include <sys/kmem.h> 294 #include <sys/uio.h> 295 #include <sys/errno.h> 296 #include <sys/stat.h> 297 #include <sys/cred.h> 298 #include <sys/dirent.h> 299 #include <sys/pathname.h> 300 #include <sys/cmn_err.h> 301 #include <sys/debug.h> 302 #include <sys/policy.h> 303 #include <vm/hat.h> 304 #include <vm/seg_vn.h> 305 #include <vm/seg_map.h> 306 #include <vm/seg.h> 307 #include <vm/as.h> 308 #include <vm/page.h> 309 #include <sys/proc.h> 310 #include <sys/mode.h> 311 #include <sys/sunndi.h> 312 #include <sys/ptms.h> 313 #include <fs/fs_subr.h> 314 #include <sys/fs/dv_node.h> 315 #include <sys/fs/sdev_impl.h> 316 317 /*ARGSUSED*/ 318 static int 319 sdev_open(struct vnode **vpp, int flag, struct cred *cred, caller_context_t *ct) 320 { 321 struct sdev_node *dv = VTOSDEV(*vpp); 322 struct sdev_node *ddv = dv->sdev_dotdot; 323 int error = 0; 324 325 if ((*vpp)->v_type == VDIR) 326 return (0); 327 328 if (!SDEV_IS_GLOBAL(dv)) 329 return (ENOTSUP); 330 331 if ((*vpp)->v_type == VLNK) 332 return (ENOENT); 333 ASSERT((*vpp)->v_type == VREG); 334 if ((*vpp)->v_type != VREG) 335 return (ENOTSUP); 336 337 ASSERT(ddv); 338 rw_enter(&ddv->sdev_contents, RW_READER); 339 if (dv->sdev_attrvp == NULL) { 340 rw_exit(&ddv->sdev_contents); 341 return (ENOENT); 342 } 343 error = VOP_OPEN(&(dv->sdev_attrvp), flag, cred, ct); 344 rw_exit(&ddv->sdev_contents); 345 return (error); 346 } 347 348 /*ARGSUSED1*/ 349 static int 350 sdev_close(struct vnode *vp, int flag, int count, 351 offset_t offset, struct cred *cred, caller_context_t *ct) 352 { 353 struct sdev_node *dv = VTOSDEV(vp); 354 355 if (vp->v_type == VDIR) { 356 cleanlocks(vp, ttoproc(curthread)->p_pid, 0); 357 cleanshares(vp, ttoproc(curthread)->p_pid); 358 return (0); 359 } 360 361 if (!SDEV_IS_GLOBAL(dv)) 362 return (ENOTSUP); 363 364 ASSERT(vp->v_type == VREG); 365 if (vp->v_type != VREG) 366 return (ENOTSUP); 367 368 ASSERT(dv->sdev_attrvp); 369 return (VOP_CLOSE(dv->sdev_attrvp, flag, count, offset, cred, ct)); 370 } 371 372 /*ARGSUSED*/ 373 static int 374 sdev_read(struct vnode *vp, struct uio *uio, int ioflag, struct cred *cred, 375 struct caller_context *ct) 376 { 377 struct sdev_node *dv = (struct sdev_node *)VTOSDEV(vp); 378 int error; 379 380 if (!SDEV_IS_GLOBAL(dv)) 381 return (EINVAL); 382 383 if (vp->v_type == VDIR) 384 return (EISDIR); 385 386 /* only supporting regular files in /dev */ 387 ASSERT(vp->v_type == VREG); 388 if (vp->v_type != VREG) 389 return (EINVAL); 390 391 ASSERT(RW_READ_HELD(&VTOSDEV(vp)->sdev_contents)); 392 ASSERT(dv->sdev_attrvp); 393 (void) VOP_RWLOCK(dv->sdev_attrvp, 0, ct); 394 error = VOP_READ(dv->sdev_attrvp, uio, ioflag, cred, ct); 395 VOP_RWUNLOCK(dv->sdev_attrvp, 0, ct); 396 return (error); 397 } 398 399 /*ARGSUSED*/ 400 static int 401 sdev_write(struct vnode *vp, struct uio *uio, int ioflag, struct cred *cred, 402 struct caller_context *ct) 403 { 404 struct sdev_node *dv = VTOSDEV(vp); 405 int error = 0; 406 407 if (!SDEV_IS_GLOBAL(dv)) 408 return (EINVAL); 409 410 if (vp->v_type == VDIR) 411 return (EISDIR); 412 413 /* only supporting regular files in /dev */ 414 ASSERT(vp->v_type == VREG); 415 if (vp->v_type != VREG) 416 return (EINVAL); 417 418 ASSERT(dv->sdev_attrvp); 419 420 (void) VOP_RWLOCK(dv->sdev_attrvp, 1, ct); 421 error = VOP_WRITE(dv->sdev_attrvp, uio, ioflag, cred, ct); 422 VOP_RWUNLOCK(dv->sdev_attrvp, 1, ct); 423 if (error == 0) { 424 sdev_update_timestamps(dv->sdev_attrvp, kcred, 425 AT_MTIME); 426 } 427 return (error); 428 } 429 430 /*ARGSUSED*/ 431 static int 432 sdev_ioctl(struct vnode *vp, int cmd, intptr_t arg, int flag, 433 struct cred *cred, int *rvalp, caller_context_t *ct) 434 { 435 struct sdev_node *dv = VTOSDEV(vp); 436 437 if (!SDEV_IS_GLOBAL(dv) || (vp->v_type == VDIR)) 438 return (ENOTTY); 439 440 ASSERT(vp->v_type == VREG); 441 if (vp->v_type != VREG) 442 return (EINVAL); 443 444 ASSERT(dv->sdev_attrvp); 445 return (VOP_IOCTL(dv->sdev_attrvp, cmd, arg, flag, cred, rvalp, ct)); 446 } 447 448 static int 449 sdev_getattr(struct vnode *vp, struct vattr *vap, int flags, 450 struct cred *cr, caller_context_t *ct) 451 { 452 int error = 0; 453 struct sdev_node *dv = VTOSDEV(vp); 454 struct sdev_node *parent = dv->sdev_dotdot; 455 456 ASSERT(parent); 457 458 rw_enter(&parent->sdev_contents, RW_READER); 459 ASSERT(dv->sdev_attr || dv->sdev_attrvp); 460 461 /* 462 * search order: 463 * - for persistent nodes (SDEV_PERSIST): backstore 464 * - for non-persistent nodes: module ops if global, then memory 465 */ 466 if (dv->sdev_attrvp) { 467 rw_exit(&parent->sdev_contents); 468 error = VOP_GETATTR(dv->sdev_attrvp, vap, flags, cr, ct); 469 sdev_vattr_merge(dv, vap); 470 } else { 471 ASSERT(dv->sdev_attr); 472 *vap = *dv->sdev_attr; 473 sdev_vattr_merge(dv, vap); 474 rw_exit(&parent->sdev_contents); 475 } 476 477 return (error); 478 } 479 480 /*ARGSUSED4*/ 481 static int 482 sdev_setattr(struct vnode *vp, struct vattr *vap, int flags, 483 struct cred *cred, caller_context_t *ctp) 484 { 485 return (devname_setattr_func(vp, vap, flags, cred, NULL, 0)); 486 } 487 488 static int 489 sdev_getsecattr(struct vnode *vp, struct vsecattr *vsap, int flags, 490 struct cred *cr, caller_context_t *ct) 491 { 492 int error; 493 struct sdev_node *dv = VTOSDEV(vp); 494 struct vnode *avp = dv->sdev_attrvp; 495 496 if (avp == NULL) { 497 /* return fs_fab_acl() if flavor matches, else do nothing */ 498 if ((SDEV_ACL_FLAVOR(vp) == _ACL_ACLENT_ENABLED && 499 (vsap->vsa_mask & (VSA_ACLCNT | VSA_DFACLCNT))) || 500 (SDEV_ACL_FLAVOR(vp) == _ACL_ACE_ENABLED && 501 (vsap->vsa_mask & (VSA_ACECNT | VSA_ACE)))) 502 return (fs_fab_acl(vp, vsap, flags, cr, ct)); 503 504 return (ENOSYS); 505 } 506 507 (void) VOP_RWLOCK(avp, 1, ct); 508 error = VOP_GETSECATTR(avp, vsap, flags, cr, ct); 509 VOP_RWUNLOCK(avp, 1, ct); 510 return (error); 511 } 512 513 static int 514 sdev_setsecattr(struct vnode *vp, struct vsecattr *vsap, int flags, 515 struct cred *cr, caller_context_t *ct) 516 { 517 int error; 518 struct sdev_node *dv = VTOSDEV(vp); 519 struct vnode *avp = dv->sdev_attrvp; 520 521 if (dv->sdev_state == SDEV_ZOMBIE) 522 return (0); 523 524 if (avp == NULL) { 525 if (SDEV_IS_GLOBAL(dv) && !SDEV_IS_PERSIST(dv)) 526 return (fs_nosys()); 527 ASSERT(dv->sdev_attr); 528 /* 529 * if coming in directly, the acl system call will 530 * have held the read-write lock via VOP_RWLOCK() 531 * If coming in via specfs, specfs will have 532 * held the rw lock on the realvp i.e. us. 533 */ 534 ASSERT(RW_WRITE_HELD(&dv->sdev_contents)); 535 sdev_vattr_merge(dv, dv->sdev_attr); 536 error = sdev_shadow_node(dv, cr); 537 if (error) { 538 return (fs_nosys()); 539 } 540 541 ASSERT(dv->sdev_attrvp); 542 /* clean out the memory copy if any */ 543 if (dv->sdev_attr) { 544 kmem_free(dv->sdev_attr, sizeof (struct vattr)); 545 dv->sdev_attr = NULL; 546 } 547 avp = dv->sdev_attrvp; 548 } 549 ASSERT(avp); 550 551 (void) VOP_RWLOCK(avp, V_WRITELOCK_TRUE, ct); 552 error = VOP_SETSECATTR(avp, vsap, flags, cr, ct); 553 VOP_RWUNLOCK(avp, V_WRITELOCK_TRUE, ct); 554 return (error); 555 } 556 557 /* 558 * There are two different unlocked routines. This one is not static as it is 559 * used as part of the secpolicy_vnode_setattr calls in sdev_subr.c. Because it 560 * is used in that function it has to have a specific signature. 561 */ 562 int 563 sdev_unlocked_access(void *vdv, int mode, struct cred *cr) 564 { 565 struct sdev_node *dv = vdv; 566 int shift = 0; 567 uid_t owner = dv->sdev_attr->va_uid; 568 569 if (crgetuid(cr) != owner) { 570 shift += 3; 571 if (groupmember(dv->sdev_attr->va_gid, cr) == 0) 572 shift += 3; 573 } 574 575 return (secpolicy_vnode_access2(cr, SDEVTOV(dv), owner, 576 dv->sdev_attr->va_mode << shift, mode)); 577 } 578 579 static int 580 sdev_self_access(sdev_node_t *dv, int mode, int flags, struct cred *cr, 581 caller_context_t *ct) 582 { 583 int ret; 584 585 ASSERT(dv->sdev_attr || dv->sdev_attrvp); 586 if (dv->sdev_attrvp) { 587 ret = VOP_ACCESS(dv->sdev_attrvp, mode, flags, cr, ct); 588 } else if (dv->sdev_attr) { 589 ret = sdev_unlocked_access(dv, mode, cr); 590 if (ret) 591 ret = EACCES; 592 } 593 594 return (ret); 595 } 596 597 static int 598 sdev_access(struct vnode *vp, int mode, int flags, struct cred *cr, 599 caller_context_t *ct) 600 { 601 struct sdev_node *dv = VTOSDEV(vp); 602 int ret; 603 604 rw_enter(&dv->sdev_contents, RW_READER); 605 ret = sdev_self_access(dv, mode, flags, cr, ct); 606 rw_exit(&dv->sdev_contents); 607 608 return (ret); 609 } 610 611 /* 612 * Lookup 613 */ 614 /*ARGSUSED3*/ 615 static int 616 sdev_lookup(struct vnode *dvp, char *nm, struct vnode **vpp, 617 struct pathname *pnp, int flags, struct vnode *rdir, struct cred *cred, 618 caller_context_t *ct, int *direntflags, pathname_t *realpnp) 619 { 620 struct sdev_node *parent; 621 int error; 622 623 parent = VTOSDEV(dvp); 624 ASSERT(parent); 625 626 /* execute access is required to search the directory */ 627 if ((error = VOP_ACCESS(dvp, VEXEC, 0, cred, ct)) != 0) 628 return (error); 629 630 if (!SDEV_IS_GLOBAL(parent)) 631 return (prof_lookup(dvp, nm, vpp, cred)); 632 return (devname_lookup_func(parent, nm, vpp, cred, NULL, 0)); 633 } 634 635 /*ARGSUSED2*/ 636 static int 637 sdev_create(struct vnode *dvp, char *nm, struct vattr *vap, vcexcl_t excl, 638 int mode, struct vnode **vpp, struct cred *cred, int flag, 639 caller_context_t *ct, vsecattr_t *vsecp) 640 { 641 struct vnode *vp = NULL; 642 struct vnode *avp; 643 struct sdev_node *parent; 644 struct sdev_node *self = NULL; 645 int error = 0; 646 vtype_t type = vap->va_type; 647 648 ASSERT(type != VNON && type != VBAD); 649 650 if ((type == VFIFO) || (type == VSOCK) || 651 (type == VPROC) || (type == VPORT)) 652 return (ENOTSUP); 653 654 parent = VTOSDEV(dvp); 655 ASSERT(parent); 656 657 rw_enter(&parent->sdev_dotdot->sdev_contents, RW_READER); 658 if (parent->sdev_state == SDEV_ZOMBIE) { 659 rw_exit(&parent->sdev_dotdot->sdev_contents); 660 return (ENOENT); 661 } 662 663 /* 664 * Nodes cannot be created in NGZ context. 665 */ 666 if (!SDEV_IS_GLOBAL(parent)) { 667 rw_exit(&parent->sdev_dotdot->sdev_contents); 668 error = prof_lookup(dvp, nm, vpp, cred); 669 670 /* 671 * In this case, we can't create a vnode but we can 672 * open an existing one. However, we still want to 673 * enforce the open(2) error semantics as if this was 674 * a regular sdev_create() in GZ context. Since we 675 * know the vnode already exists (error == 0) we a) 676 * return EEXIST if exclusive access was requested, or 677 * b) return EISDIR if write access was requested on a 678 * directory. Otherwise, we return the value from 679 * prof_lookup() as is. 680 */ 681 if (error == 0) { 682 if (excl == EXCL) { 683 error = EEXIST; 684 } else if (((*vpp)->v_type == VDIR) && 685 (mode & VWRITE)) { 686 error = EISDIR; 687 } 688 689 if (error != 0) 690 VN_RELE(*vpp); 691 } 692 693 694 return (error); 695 } 696 rw_exit(&parent->sdev_dotdot->sdev_contents); 697 698 /* execute access is required to search the directory */ 699 if ((error = VOP_ACCESS(dvp, VEXEC, 0, cred, ct)) != 0) 700 return (error); 701 702 /* check existing name */ 703 /* XXXci - We may need to translate the C-I flags on VOP_LOOKUP */ 704 error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cred, ct, NULL, NULL); 705 706 /* name found */ 707 if (error == 0) { 708 ASSERT(vp); 709 if (excl == EXCL) { 710 error = EEXIST; 711 } else if ((vp->v_type == VDIR) && (mode & VWRITE)) { 712 /* allowing create/read-only an existing directory */ 713 error = EISDIR; 714 } else { 715 error = VOP_ACCESS(vp, mode, 0, cred, ct); 716 } 717 718 if (error) { 719 VN_RELE(vp); 720 return (error); 721 } 722 723 /* truncation first */ 724 if ((vp->v_type == VREG) && (vap->va_mask & AT_SIZE) && 725 (vap->va_size == 0)) { 726 ASSERT(parent->sdev_attrvp); 727 error = VOP_CREATE(parent->sdev_attrvp, 728 nm, vap, excl, mode, &avp, cred, flag, ct, vsecp); 729 730 if (error) { 731 VN_RELE(vp); 732 return (error); 733 } 734 } 735 736 sdev_update_timestamps(vp, kcred, 737 AT_CTIME|AT_MTIME|AT_ATIME); 738 *vpp = vp; 739 return (0); 740 } 741 742 /* bail out early */ 743 if (error != ENOENT) 744 return (error); 745 746 /* verify write access - compliance specifies ENXIO */ 747 if ((error = VOP_ACCESS(dvp, VEXEC|VWRITE, 0, cred, ct)) != 0) { 748 if (error == EACCES) 749 error = ENXIO; 750 return (error); 751 } 752 753 /* 754 * For memory-based (ROFS) directory: 755 * - either disallow node creation; 756 * - or implement VOP_CREATE of its own 757 */ 758 rw_enter(&parent->sdev_contents, RW_WRITER); 759 if (!SDEV_IS_PERSIST(parent)) { 760 rw_exit(&parent->sdev_contents); 761 return (ENOTSUP); 762 } 763 ASSERT(parent->sdev_attrvp); 764 error = sdev_mknode(parent, nm, &self, vap, NULL, NULL, 765 cred, SDEV_READY); 766 if (error) { 767 rw_exit(&parent->sdev_contents); 768 if (self) 769 SDEV_RELE(self); 770 return (error); 771 } 772 rw_exit(&parent->sdev_contents); 773 774 ASSERT(self); 775 /* take care the timestamps for the node and its parent */ 776 sdev_update_timestamps(SDEVTOV(self), kcred, 777 AT_CTIME|AT_MTIME|AT_ATIME); 778 sdev_update_timestamps(dvp, kcred, AT_MTIME|AT_ATIME); 779 if (SDEV_IS_GLOBAL(parent)) 780 atomic_inc_ulong(&parent->sdev_gdir_gen); 781 782 /* wake up other threads blocked on looking up this node */ 783 mutex_enter(&self->sdev_lookup_lock); 784 SDEV_UNBLOCK_OTHERS(self, SDEV_LOOKUP); 785 mutex_exit(&self->sdev_lookup_lock); 786 error = sdev_to_vp(self, vpp); 787 return (error); 788 } 789 790 static int 791 sdev_remove(struct vnode *dvp, char *nm, struct cred *cred, 792 caller_context_t *ct, int flags) 793 { 794 int error; 795 struct sdev_node *parent = (struct sdev_node *)VTOSDEV(dvp); 796 struct vnode *vp = NULL; 797 struct sdev_node *dv = NULL; 798 int len; 799 int bkstore; 800 801 /* bail out early */ 802 len = strlen(nm); 803 if (nm[0] == '.') { 804 if (len == 1) { 805 return (EINVAL); 806 } else if (len == 2 && nm[1] == '.') { 807 return (EEXIST); 808 } 809 } 810 811 ASSERT(parent); 812 rw_enter(&parent->sdev_contents, RW_READER); 813 if (!SDEV_IS_GLOBAL(parent)) { 814 rw_exit(&parent->sdev_contents); 815 return (ENOTSUP); 816 } 817 818 /* execute access is required to search the directory */ 819 if ((error = sdev_self_access(parent, VEXEC, 0, cred, ct)) != 0) { 820 rw_exit(&parent->sdev_contents); 821 return (error); 822 } 823 824 /* check existence first */ 825 dv = sdev_cache_lookup(parent, nm); 826 if (dv == NULL) { 827 rw_exit(&parent->sdev_contents); 828 return (ENOENT); 829 } 830 831 vp = SDEVTOV(dv); 832 if ((dv->sdev_state == SDEV_INIT) || 833 (dv->sdev_state == SDEV_ZOMBIE)) { 834 rw_exit(&parent->sdev_contents); 835 VN_RELE(vp); 836 return (ENOENT); 837 } 838 839 /* write access is required to remove an entry */ 840 if ((error = sdev_self_access(parent, VWRITE, 0, cred, ct)) != 0) { 841 rw_exit(&parent->sdev_contents); 842 VN_RELE(vp); 843 return (error); 844 } 845 846 bkstore = SDEV_IS_PERSIST(dv) ? 1 : 0; 847 if (!rw_tryupgrade(&parent->sdev_contents)) { 848 rw_exit(&parent->sdev_contents); 849 rw_enter(&parent->sdev_contents, RW_WRITER); 850 /* Make sure we didn't become a zombie */ 851 if (parent->sdev_state == SDEV_ZOMBIE) { 852 rw_exit(&parent->sdev_contents); 853 VN_RELE(vp); 854 return (ENOENT); 855 } 856 } 857 858 /* we do not support unlinking a non-empty directory */ 859 if (vp->v_type == VDIR && dv->sdev_nlink > 2) { 860 rw_exit(&parent->sdev_contents); 861 VN_RELE(vp); 862 return (EBUSY); 863 } 864 865 /* 866 * sdev_dirdelete does the real job of: 867 * - make sure no open ref count 868 * - destroying the sdev_node 869 * - releasing the hold on attrvp 870 */ 871 sdev_cache_update(parent, &dv, nm, SDEV_CACHE_DELETE); 872 VN_RELE(vp); 873 rw_exit(&parent->sdev_contents); 874 875 /* 876 * best efforts clean up the backing store 877 */ 878 if (bkstore) { 879 ASSERT(parent->sdev_attrvp); 880 error = VOP_REMOVE(parent->sdev_attrvp, nm, cred, 881 ct, flags); 882 /* 883 * do not report BUSY error 884 * because the backing store ref count is released 885 * when the last ref count on the sdev_node is 886 * released. 887 */ 888 if (error == EBUSY) { 889 sdcmn_err2(("sdev_remove: device %s is still on" 890 "disk %s\n", nm, parent->sdev_path)); 891 error = 0; 892 } 893 } 894 895 return (error); 896 } 897 898 /* 899 * Some restrictions for this file system: 900 * - both oldnm and newnm are in the scope of /dev file system, 901 * to simply the namespace management model. 902 */ 903 /*ARGSUSED6*/ 904 static int 905 sdev_rename(struct vnode *odvp, char *onm, struct vnode *ndvp, char *nnm, 906 struct cred *cred, caller_context_t *ct, int flags) 907 { 908 struct sdev_node *fromparent = NULL; 909 struct vattr vattr; 910 struct sdev_node *toparent; 911 struct sdev_node *fromdv = NULL; /* source node */ 912 struct vnode *ovp = NULL; /* source vnode */ 913 struct sdev_node *todv = NULL; /* destination node */ 914 struct vnode *nvp = NULL; /* destination vnode */ 915 int samedir = 0; /* set if odvp == ndvp */ 916 struct vnode *realvp; 917 int error = 0; 918 dev_t fsid; 919 int bkstore = 0; 920 vtype_t type; 921 922 /* prevent modifying "." and ".." */ 923 if ((onm[0] == '.' && 924 (onm[1] == '\0' || (onm[1] == '.' && onm[2] == '\0'))) || 925 (nnm[0] == '.' && 926 (nnm[1] == '\0' || (nnm[1] == '.' && nnm[2] == '\0')))) { 927 return (EINVAL); 928 } 929 930 fromparent = VTOSDEV(odvp); 931 toparent = VTOSDEV(ndvp); 932 933 /* ZOMBIE parent doesn't allow new node creation */ 934 rw_enter(&fromparent->sdev_dotdot->sdev_contents, RW_READER); 935 if (fromparent->sdev_state == SDEV_ZOMBIE) { 936 rw_exit(&fromparent->sdev_dotdot->sdev_contents); 937 return (ENOENT); 938 } 939 940 /* renaming only supported for global device nodes */ 941 if (!SDEV_IS_GLOBAL(fromparent)) { 942 rw_exit(&fromparent->sdev_dotdot->sdev_contents); 943 return (ENOTSUP); 944 } 945 rw_exit(&fromparent->sdev_dotdot->sdev_contents); 946 947 rw_enter(&toparent->sdev_dotdot->sdev_contents, RW_READER); 948 if (toparent->sdev_state == SDEV_ZOMBIE) { 949 rw_exit(&toparent->sdev_dotdot->sdev_contents); 950 return (ENOENT); 951 } 952 rw_exit(&toparent->sdev_dotdot->sdev_contents); 953 954 /* 955 * acquire the global lock to prevent 956 * mount/unmount/other rename activities. 957 */ 958 mutex_enter(&sdev_lock); 959 960 /* check existence of the source node */ 961 /* XXXci - We may need to translate the C-I flags on VOP_LOOKUP */ 962 error = VOP_LOOKUP(odvp, onm, &ovp, NULL, 0, NULL, cred, ct, 963 NULL, NULL); 964 if (error) { 965 sdcmn_err2(("sdev_rename: the source node %s exists\n", 966 onm)); 967 mutex_exit(&sdev_lock); 968 return (error); 969 } 970 971 if (VOP_REALVP(ovp, &realvp, ct) == 0) { 972 VN_HOLD(realvp); 973 VN_RELE(ovp); 974 ovp = realvp; 975 } 976 977 /* check existence of destination */ 978 /* XXXci - We may need to translate the C-I flags on VOP_LOOKUP */ 979 error = VOP_LOOKUP(ndvp, nnm, &nvp, NULL, 0, NULL, cred, ct, 980 NULL, NULL); 981 if (error && (error != ENOENT)) { 982 mutex_exit(&sdev_lock); 983 VN_RELE(ovp); 984 return (error); 985 } 986 987 if (nvp && (VOP_REALVP(nvp, &realvp, ct) == 0)) { 988 VN_HOLD(realvp); 989 VN_RELE(nvp); 990 nvp = realvp; 991 } 992 993 /* 994 * make sure the source and the destination are 995 * in the same dev filesystem 996 */ 997 if (odvp != ndvp) { 998 vattr.va_mask = AT_FSID; 999 if (error = VOP_GETATTR(odvp, &vattr, 0, cred, ct)) { 1000 mutex_exit(&sdev_lock); 1001 VN_RELE(ovp); 1002 if (nvp != NULL) 1003 VN_RELE(nvp); 1004 return (error); 1005 } 1006 fsid = vattr.va_fsid; 1007 vattr.va_mask = AT_FSID; 1008 if (error = VOP_GETATTR(ndvp, &vattr, 0, cred, ct)) { 1009 mutex_exit(&sdev_lock); 1010 VN_RELE(ovp); 1011 if (nvp != NULL) 1012 VN_RELE(nvp); 1013 return (error); 1014 } 1015 if (fsid != vattr.va_fsid) { 1016 mutex_exit(&sdev_lock); 1017 VN_RELE(ovp); 1018 if (nvp != NULL) 1019 VN_RELE(nvp); 1020 return (EXDEV); 1021 } 1022 } 1023 1024 /* make sure the old entry can be deleted */ 1025 error = VOP_ACCESS(odvp, VWRITE, 0, cred, ct); 1026 if (error) { 1027 mutex_exit(&sdev_lock); 1028 VN_RELE(ovp); 1029 if (nvp != NULL) 1030 VN_RELE(nvp); 1031 return (error); 1032 } 1033 1034 /* make sure the destination allows creation */ 1035 samedir = (fromparent == toparent); 1036 if (!samedir) { 1037 error = VOP_ACCESS(ndvp, VEXEC|VWRITE, 0, cred, ct); 1038 if (error) { 1039 mutex_exit(&sdev_lock); 1040 VN_RELE(ovp); 1041 if (nvp != NULL) 1042 VN_RELE(nvp); 1043 return (error); 1044 } 1045 } 1046 1047 fromdv = VTOSDEV(ovp); 1048 ASSERT(fromdv); 1049 1050 /* destination file exists */ 1051 if (nvp != NULL) { 1052 todv = VTOSDEV(nvp); 1053 ASSERT(todv); 1054 } 1055 1056 if ((fromdv->sdev_flags & SDEV_DYNAMIC) != 0 || 1057 (todv != NULL && (todv->sdev_flags & SDEV_DYNAMIC) != 0)) { 1058 mutex_exit(&sdev_lock); 1059 if (nvp != NULL) 1060 VN_RELE(nvp); 1061 VN_RELE(ovp); 1062 return (EACCES); 1063 } 1064 1065 /* 1066 * link source to new target in the memory. Regardless of failure, we 1067 * must rele our hold on nvp. 1068 */ 1069 error = sdev_rnmnode(fromparent, fromdv, toparent, &todv, nnm, cred); 1070 if (nvp != NULL) 1071 VN_RELE(nvp); 1072 if (error) { 1073 sdcmn_err2(("sdev_rename: renaming %s to %s failed " 1074 " with error %d\n", onm, nnm, error)); 1075 mutex_exit(&sdev_lock); 1076 VN_RELE(ovp); 1077 return (error); 1078 } 1079 1080 /* 1081 * unlink from source 1082 */ 1083 rw_enter(&fromparent->sdev_contents, RW_READER); 1084 fromdv = sdev_cache_lookup(fromparent, onm); 1085 if (fromdv == NULL) { 1086 rw_exit(&fromparent->sdev_contents); 1087 mutex_exit(&sdev_lock); 1088 VN_RELE(ovp); 1089 sdcmn_err2(("sdev_rename: the source is deleted already\n")); 1090 return (0); 1091 } 1092 1093 if (fromdv->sdev_state == SDEV_ZOMBIE) { 1094 rw_exit(&fromparent->sdev_contents); 1095 mutex_exit(&sdev_lock); 1096 VN_RELE(SDEVTOV(fromdv)); 1097 VN_RELE(ovp); 1098 sdcmn_err2(("sdev_rename: the source is being deleted\n")); 1099 return (0); 1100 } 1101 rw_exit(&fromparent->sdev_contents); 1102 ASSERT(SDEVTOV(fromdv) == ovp); 1103 VN_RELE(ovp); 1104 1105 /* clean out the directory contents before it can be removed */ 1106 type = SDEVTOV(fromdv)->v_type; 1107 if (type == VDIR) { 1108 error = sdev_cleandir(fromdv, NULL, 0); 1109 sdcmn_err2(("sdev_rename: cleandir finished with %d\n", 1110 error)); 1111 if (error == EBUSY) 1112 error = 0; 1113 } 1114 1115 rw_enter(&fromparent->sdev_contents, RW_WRITER); 1116 bkstore = SDEV_IS_PERSIST(fromdv) ? 1 : 0; 1117 sdev_cache_update(fromparent, &fromdv, onm, 1118 SDEV_CACHE_DELETE); 1119 VN_RELE(SDEVTOV(fromdv)); 1120 1121 /* best effforts clean up the backing store */ 1122 if (bkstore) { 1123 ASSERT(fromparent->sdev_attrvp); 1124 if (type != VDIR) { 1125 /* XXXci - We may need to translate the C-I flags on VOP_REMOVE */ 1126 error = VOP_REMOVE(fromparent->sdev_attrvp, 1127 onm, kcred, ct, 0); 1128 } else { 1129 /* XXXci - We may need to translate the C-I flags on VOP_RMDIR */ 1130 error = VOP_RMDIR(fromparent->sdev_attrvp, 1131 onm, fromparent->sdev_attrvp, kcred, ct, 0); 1132 } 1133 1134 if (error) { 1135 sdcmn_err2(("sdev_rename: device %s is " 1136 "still on disk %s\n", onm, 1137 fromparent->sdev_path)); 1138 error = 0; 1139 } 1140 } 1141 rw_exit(&fromparent->sdev_contents); 1142 mutex_exit(&sdev_lock); 1143 1144 /* once reached to this point, the rename is regarded successful */ 1145 return (0); 1146 } 1147 1148 /* 1149 * dev-fs version of "ln -s path dev-name" 1150 * tnm - path, e.g. /devices/... or /dev/... 1151 * lnm - dev_name 1152 */ 1153 /*ARGSUSED6*/ 1154 static int 1155 sdev_symlink(struct vnode *dvp, char *lnm, struct vattr *tva, 1156 char *tnm, struct cred *cred, caller_context_t *ct, int flags) 1157 { 1158 int error; 1159 struct vnode *vp = NULL; 1160 struct sdev_node *parent = (struct sdev_node *)VTOSDEV(dvp); 1161 struct sdev_node *self = (struct sdev_node *)NULL; 1162 1163 ASSERT(parent); 1164 rw_enter(&parent->sdev_dotdot->sdev_contents, RW_READER); 1165 if (parent->sdev_state == SDEV_ZOMBIE) { 1166 rw_exit(&parent->sdev_dotdot->sdev_contents); 1167 sdcmn_err2(("sdev_symlink: parent %s is ZOMBIED \n", 1168 parent->sdev_name)); 1169 return (ENOENT); 1170 } 1171 1172 if (!SDEV_IS_GLOBAL(parent)) { 1173 rw_exit(&parent->sdev_dotdot->sdev_contents); 1174 return (ENOTSUP); 1175 } 1176 rw_exit(&parent->sdev_dotdot->sdev_contents); 1177 1178 /* execute access is required to search a directory */ 1179 if ((error = VOP_ACCESS(dvp, VEXEC, 0, cred, ct)) != 0) 1180 return (error); 1181 1182 /* find existing name */ 1183 /* XXXci - We may need to translate the C-I flags here */ 1184 error = VOP_LOOKUP(dvp, lnm, &vp, NULL, 0, NULL, cred, ct, NULL, NULL); 1185 if (error == 0) { 1186 ASSERT(vp); 1187 VN_RELE(vp); 1188 sdcmn_err2(("sdev_symlink: node %s already exists\n", lnm)); 1189 return (EEXIST); 1190 } 1191 if (error != ENOENT) 1192 return (error); 1193 1194 /* write access is required to create a symlink */ 1195 if ((error = VOP_ACCESS(dvp, VWRITE, 0, cred, ct)) != 0) 1196 return (error); 1197 1198 /* put it into memory cache */ 1199 rw_enter(&parent->sdev_contents, RW_WRITER); 1200 error = sdev_mknode(parent, lnm, &self, tva, NULL, (void *)tnm, 1201 cred, SDEV_READY); 1202 if (error) { 1203 rw_exit(&parent->sdev_contents); 1204 sdcmn_err2(("sdev_symlink: node %s creation failed\n", lnm)); 1205 if (self) 1206 SDEV_RELE(self); 1207 1208 return (error); 1209 } 1210 ASSERT(self && (self->sdev_state == SDEV_READY)); 1211 rw_exit(&parent->sdev_contents); 1212 1213 /* take care the timestamps for the node and its parent */ 1214 sdev_update_timestamps(SDEVTOV(self), kcred, 1215 AT_CTIME|AT_MTIME|AT_ATIME); 1216 sdev_update_timestamps(dvp, kcred, AT_MTIME|AT_ATIME); 1217 if (SDEV_IS_GLOBAL(parent)) 1218 atomic_inc_ulong(&parent->sdev_gdir_gen); 1219 1220 /* wake up other threads blocked on looking up this node */ 1221 mutex_enter(&self->sdev_lookup_lock); 1222 SDEV_UNBLOCK_OTHERS(self, SDEV_LOOKUP); 1223 mutex_exit(&self->sdev_lookup_lock); 1224 SDEV_RELE(self); /* don't return with vnode held */ 1225 return (0); 1226 } 1227 1228 /*ARGSUSED6*/ 1229 static int 1230 sdev_mkdir(struct vnode *dvp, char *nm, struct vattr *va, struct vnode **vpp, 1231 struct cred *cred, caller_context_t *ct, int flags, vsecattr_t *vsecp) 1232 { 1233 int error; 1234 struct sdev_node *parent = (struct sdev_node *)VTOSDEV(dvp); 1235 struct sdev_node *self = NULL; 1236 struct vnode *vp = NULL; 1237 1238 ASSERT(parent && parent->sdev_dotdot); 1239 rw_enter(&parent->sdev_dotdot->sdev_contents, RW_READER); 1240 if (parent->sdev_state == SDEV_ZOMBIE) { 1241 rw_exit(&parent->sdev_dotdot->sdev_contents); 1242 return (ENOENT); 1243 } 1244 1245 /* non-global do not allow pure directory creation */ 1246 if (!SDEV_IS_GLOBAL(parent)) { 1247 rw_exit(&parent->sdev_dotdot->sdev_contents); 1248 return (prof_lookup(dvp, nm, vpp, cred)); 1249 } 1250 rw_exit(&parent->sdev_dotdot->sdev_contents); 1251 1252 /* execute access is required to search the directory */ 1253 if ((error = VOP_ACCESS(dvp, VEXEC, 0, cred, ct)) != 0) { 1254 return (error); 1255 } 1256 1257 /* find existing name */ 1258 /* XXXci - We may need to translate the C-I flags on VOP_LOOKUP */ 1259 error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cred, ct, NULL, NULL); 1260 if (error == 0) { 1261 VN_RELE(vp); 1262 return (EEXIST); 1263 } 1264 if (error != ENOENT) 1265 return (error); 1266 1267 /* require write access to create a directory */ 1268 if ((error = VOP_ACCESS(dvp, VWRITE, 0, cred, ct)) != 0) { 1269 return (error); 1270 } 1271 1272 /* put it into memory */ 1273 rw_enter(&parent->sdev_contents, RW_WRITER); 1274 error = sdev_mknode(parent, nm, &self, 1275 va, NULL, NULL, cred, SDEV_READY); 1276 if (error) { 1277 rw_exit(&parent->sdev_contents); 1278 if (self) 1279 SDEV_RELE(self); 1280 return (error); 1281 } 1282 ASSERT(self && (self->sdev_state == SDEV_READY)); 1283 rw_exit(&parent->sdev_contents); 1284 1285 /* take care the timestamps for the node and its parent */ 1286 sdev_update_timestamps(SDEVTOV(self), kcred, 1287 AT_CTIME|AT_MTIME|AT_ATIME); 1288 sdev_update_timestamps(dvp, kcred, AT_MTIME|AT_ATIME); 1289 if (SDEV_IS_GLOBAL(parent)) 1290 atomic_inc_ulong(&parent->sdev_gdir_gen); 1291 1292 /* wake up other threads blocked on looking up this node */ 1293 mutex_enter(&self->sdev_lookup_lock); 1294 SDEV_UNBLOCK_OTHERS(self, SDEV_LOOKUP); 1295 mutex_exit(&self->sdev_lookup_lock); 1296 *vpp = SDEVTOV(self); 1297 return (0); 1298 } 1299 1300 /* 1301 * allowing removing an empty directory under /dev 1302 */ 1303 /*ARGSUSED*/ 1304 static int 1305 sdev_rmdir(struct vnode *dvp, char *nm, struct vnode *cdir, struct cred *cred, 1306 caller_context_t *ct, int flags) 1307 { 1308 int error = 0; 1309 struct sdev_node *parent = (struct sdev_node *)VTOSDEV(dvp); 1310 struct sdev_node *self = NULL; 1311 struct vnode *vp = NULL; 1312 1313 /* bail out early */ 1314 if (strcmp(nm, ".") == 0) 1315 return (EINVAL); 1316 if (strcmp(nm, "..") == 0) 1317 return (EEXIST); /* should be ENOTEMPTY */ 1318 1319 /* no destruction of non-global node */ 1320 ASSERT(parent && parent->sdev_dotdot); 1321 rw_enter(&parent->sdev_dotdot->sdev_contents, RW_READER); 1322 if (!SDEV_IS_GLOBAL(parent)) { 1323 rw_exit(&parent->sdev_dotdot->sdev_contents); 1324 return (ENOTSUP); 1325 } 1326 rw_exit(&parent->sdev_dotdot->sdev_contents); 1327 1328 /* execute access is required to search the directory */ 1329 if ((error = VOP_ACCESS(dvp, VEXEC|VWRITE, 0, cred, ct)) != 0) 1330 return (error); 1331 1332 /* check existing name */ 1333 rw_enter(&parent->sdev_contents, RW_WRITER); 1334 self = sdev_cache_lookup(parent, nm); 1335 if (self == NULL) { 1336 rw_exit(&parent->sdev_contents); 1337 return (ENOENT); 1338 } 1339 1340 vp = SDEVTOV(self); 1341 if ((self->sdev_state == SDEV_INIT) || 1342 (self->sdev_state == SDEV_ZOMBIE)) { 1343 rw_exit(&parent->sdev_contents); 1344 VN_RELE(vp); 1345 return (ENOENT); 1346 } 1347 1348 /* some sanity checks */ 1349 if (vp == dvp || vp == cdir) { 1350 rw_exit(&parent->sdev_contents); 1351 VN_RELE(vp); 1352 return (EINVAL); 1353 } 1354 1355 if (vp->v_type != VDIR) { 1356 rw_exit(&parent->sdev_contents); 1357 VN_RELE(vp); 1358 return (ENOTDIR); 1359 } 1360 1361 if (vn_vfswlock(vp)) { 1362 rw_exit(&parent->sdev_contents); 1363 VN_RELE(vp); 1364 return (EBUSY); 1365 } 1366 1367 if (vn_mountedvfs(vp) != NULL) { 1368 rw_exit(&parent->sdev_contents); 1369 vn_vfsunlock(vp); 1370 VN_RELE(vp); 1371 return (EBUSY); 1372 } 1373 1374 self = VTOSDEV(vp); 1375 /* bail out on a non-empty directory */ 1376 rw_enter(&self->sdev_contents, RW_READER); 1377 if (self->sdev_nlink > 2) { 1378 rw_exit(&self->sdev_contents); 1379 rw_exit(&parent->sdev_contents); 1380 vn_vfsunlock(vp); 1381 VN_RELE(vp); 1382 return (ENOTEMPTY); 1383 } 1384 rw_exit(&self->sdev_contents); 1385 1386 /* unlink it from the directory cache */ 1387 sdev_cache_update(parent, &self, nm, SDEV_CACHE_DELETE); 1388 rw_exit(&parent->sdev_contents); 1389 vn_vfsunlock(vp); 1390 VN_RELE(vp); 1391 1392 /* best effort to clean up the backing store */ 1393 if (SDEV_IS_PERSIST(parent)) { 1394 ASSERT(parent->sdev_attrvp); 1395 error = VOP_RMDIR(parent->sdev_attrvp, nm, 1396 parent->sdev_attrvp, kcred, ct, flags); 1397 1398 if (error) 1399 sdcmn_err2(("sdev_rmdir: cleaning device %s is on" 1400 " disk error %d\n", parent->sdev_path, error)); 1401 if (error == EBUSY) 1402 error = 0; 1403 1404 } 1405 1406 return (error); 1407 } 1408 1409 /* 1410 * read the contents of a symbolic link 1411 */ 1412 static int 1413 sdev_readlink(struct vnode *vp, struct uio *uiop, struct cred *cred, 1414 caller_context_t *ct) 1415 { 1416 struct sdev_node *dv; 1417 int error = 0; 1418 1419 ASSERT(vp->v_type == VLNK); 1420 1421 dv = VTOSDEV(vp); 1422 1423 if (dv->sdev_attrvp) { 1424 /* non-NULL attrvp implys a persisted node at READY state */ 1425 return (VOP_READLINK(dv->sdev_attrvp, uiop, cred, ct)); 1426 } else if (dv->sdev_symlink != NULL) { 1427 /* memory nodes, e.g. local nodes */ 1428 rw_enter(&dv->sdev_contents, RW_READER); 1429 sdcmn_err2(("sdev_readlink link is %s\n", dv->sdev_symlink)); 1430 error = uiomove(dv->sdev_symlink, strlen(dv->sdev_symlink), 1431 UIO_READ, uiop); 1432 rw_exit(&dv->sdev_contents); 1433 return (error); 1434 } 1435 1436 return (ENOENT); 1437 } 1438 1439 /*ARGSUSED4*/ 1440 static int 1441 sdev_readdir(struct vnode *dvp, struct uio *uiop, struct cred *cred, int *eofp, 1442 caller_context_t *ct, int flags) 1443 { 1444 struct sdev_node *parent = VTOSDEV(dvp); 1445 int error; 1446 1447 /* 1448 * We must check that we have execute access to search the directory -- 1449 * but because our sdev_contents lock is already held as a reader (the 1450 * caller must have done a VOP_RWLOCK()), we call directly into the 1451 * underlying access routine if sdev_attr is non-NULL. 1452 */ 1453 if (parent->sdev_attr != NULL) { 1454 VERIFY(RW_READ_HELD(&parent->sdev_contents)); 1455 1456 if (sdev_unlocked_access(parent, VEXEC, cred) != 0) 1457 return (EACCES); 1458 } else { 1459 if ((error = VOP_ACCESS(dvp, VEXEC, 0, cred, ct)) != 0) 1460 return (error); 1461 } 1462 1463 ASSERT(parent); 1464 if (!SDEV_IS_GLOBAL(parent)) 1465 prof_filldir(parent); 1466 return (devname_readdir_func(dvp, uiop, cred, eofp, SDEV_BROWSE)); 1467 } 1468 1469 /*ARGSUSED1*/ 1470 static void 1471 sdev_inactive(struct vnode *vp, struct cred *cred, caller_context_t *ct) 1472 { 1473 devname_inactive_func(vp, cred, NULL); 1474 } 1475 1476 /*ARGSUSED2*/ 1477 static int 1478 sdev_fid(struct vnode *vp, struct fid *fidp, caller_context_t *ct) 1479 { 1480 struct sdev_node *dv = VTOSDEV(vp); 1481 struct sdev_fid *sdev_fid; 1482 1483 if (fidp->fid_len < (sizeof (struct sdev_fid) - sizeof (ushort_t))) { 1484 fidp->fid_len = sizeof (struct sdev_fid) - sizeof (ushort_t); 1485 return (ENOSPC); 1486 } 1487 1488 sdev_fid = (struct sdev_fid *)fidp; 1489 bzero(sdev_fid, sizeof (struct sdev_fid)); 1490 sdev_fid->sdevfid_len = 1491 (int)sizeof (struct sdev_fid) - sizeof (ushort_t); 1492 sdev_fid->sdevfid_ino = dv->sdev_ino; 1493 1494 return (0); 1495 } 1496 1497 /* 1498 * This pair of routines bracket all VOP_READ, VOP_WRITE 1499 * and VOP_READDIR requests. The contents lock stops things 1500 * moving around while we're looking at them. 1501 */ 1502 /*ARGSUSED2*/ 1503 static int 1504 sdev_rwlock(struct vnode *vp, int write_flag, caller_context_t *ctp) 1505 { 1506 rw_enter(&VTOSDEV(vp)->sdev_contents, 1507 write_flag ? RW_WRITER : RW_READER); 1508 return (write_flag ? V_WRITELOCK_TRUE : V_WRITELOCK_FALSE); 1509 } 1510 1511 /*ARGSUSED1*/ 1512 static void 1513 sdev_rwunlock(struct vnode *vp, int write_flag, caller_context_t *ctp) 1514 { 1515 rw_exit(&VTOSDEV(vp)->sdev_contents); 1516 } 1517 1518 /*ARGSUSED1*/ 1519 static int 1520 sdev_seek(struct vnode *vp, offset_t ooff, offset_t *noffp, 1521 caller_context_t *ct) 1522 { 1523 struct vnode *attrvp = VTOSDEV(vp)->sdev_attrvp; 1524 1525 ASSERT(vp->v_type != VCHR && 1526 vp->v_type != VBLK && vp->v_type != VLNK); 1527 1528 if (vp->v_type == VDIR) 1529 return (fs_seek(vp, ooff, noffp, ct)); 1530 1531 ASSERT(attrvp); 1532 return (VOP_SEEK(attrvp, ooff, noffp, ct)); 1533 } 1534 1535 /*ARGSUSED1*/ 1536 static int 1537 sdev_frlock(struct vnode *vp, int cmd, struct flock64 *bfp, int flag, 1538 offset_t offset, struct flk_callback *flk_cbp, struct cred *cr, 1539 caller_context_t *ct) 1540 { 1541 int error; 1542 struct sdev_node *dv = VTOSDEV(vp); 1543 1544 ASSERT(dv); 1545 ASSERT(dv->sdev_attrvp); 1546 error = VOP_FRLOCK(dv->sdev_attrvp, cmd, bfp, flag, offset, 1547 flk_cbp, cr, ct); 1548 1549 return (error); 1550 } 1551 1552 static int 1553 sdev_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr, 1554 caller_context_t *ct) 1555 { 1556 switch (cmd) { 1557 case _PC_ACL_ENABLED: 1558 *valp = SDEV_ACL_FLAVOR(vp); 1559 return (0); 1560 } 1561 1562 return (fs_pathconf(vp, cmd, valp, cr, ct)); 1563 } 1564 1565 vnodeops_t *sdev_vnodeops; 1566 1567 const fs_operation_def_t sdev_vnodeops_tbl[] = { 1568 VOPNAME_OPEN, { .vop_open = sdev_open }, 1569 VOPNAME_CLOSE, { .vop_close = sdev_close }, 1570 VOPNAME_READ, { .vop_read = sdev_read }, 1571 VOPNAME_WRITE, { .vop_write = sdev_write }, 1572 VOPNAME_IOCTL, { .vop_ioctl = sdev_ioctl }, 1573 VOPNAME_GETATTR, { .vop_getattr = sdev_getattr }, 1574 VOPNAME_SETATTR, { .vop_setattr = sdev_setattr }, 1575 VOPNAME_ACCESS, { .vop_access = sdev_access }, 1576 VOPNAME_LOOKUP, { .vop_lookup = sdev_lookup }, 1577 VOPNAME_CREATE, { .vop_create = sdev_create }, 1578 VOPNAME_RENAME, { .vop_rename = sdev_rename }, 1579 VOPNAME_REMOVE, { .vop_remove = sdev_remove }, 1580 VOPNAME_MKDIR, { .vop_mkdir = sdev_mkdir }, 1581 VOPNAME_RMDIR, { .vop_rmdir = sdev_rmdir }, 1582 VOPNAME_READDIR, { .vop_readdir = sdev_readdir }, 1583 VOPNAME_SYMLINK, { .vop_symlink = sdev_symlink }, 1584 VOPNAME_READLINK, { .vop_readlink = sdev_readlink }, 1585 VOPNAME_INACTIVE, { .vop_inactive = sdev_inactive }, 1586 VOPNAME_FID, { .vop_fid = sdev_fid }, 1587 VOPNAME_RWLOCK, { .vop_rwlock = sdev_rwlock }, 1588 VOPNAME_RWUNLOCK, { .vop_rwunlock = sdev_rwunlock }, 1589 VOPNAME_SEEK, { .vop_seek = sdev_seek }, 1590 VOPNAME_FRLOCK, { .vop_frlock = sdev_frlock }, 1591 VOPNAME_PATHCONF, { .vop_pathconf = sdev_pathconf }, 1592 VOPNAME_SETSECATTR, { .vop_setsecattr = sdev_setsecattr }, 1593 VOPNAME_GETSECATTR, { .vop_getsecattr = sdev_getsecattr }, 1594 NULL, NULL 1595 }; 1596 1597 int sdev_vnodeops_tbl_size = sizeof (sdev_vnodeops_tbl); 1598