1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. 23 */ 24 /* 25 * Copyright (c) 2013, Joyent, Inc. All rights reserved. 26 */ 27 28 /* 29 * vnode ops for the /dev filesystem 30 * 31 * - VDIR, VCHR, CBLK, and VLNK are considered must supported files 32 * - VREG and VDOOR are used for some internal implementations in 33 * the global zone, e.g. devname and devfsadm communication 34 * - other file types are unusual in this namespace and 35 * not supported for now 36 */ 37 38 /* 39 * sdev has a few basic goals: 40 * o Provide /dev for the global zone as well as various non-global zones. 41 * o Provide the basic functionality that devfsadm might need (mknod, 42 * symlinks, etc.) 43 * o Allow persistent permissions on files in /dev. 44 * o Allow for dynamic directories and nodes for use by various services (pts, 45 * zvol, net, etc.) 46 * 47 * The sdev file system is primarily made up of sdev_node_t's which is sdev's 48 * counterpart to the vnode_t. There are two different classes of sdev_node_t's 49 * that we generally care about, dynamic and otherwise. 50 * 51 * Persisting Information 52 * ---------------------- 53 * 54 * When sdev is mounted, it keeps track of the underlying file system it is 55 * mounted over. In certain situations, sdev will go and create entries in that 56 * underlying file system. These underlying 'back end' nodes are used as proxies 57 * for various changes in permissions. While specific sets of nodes, such as 58 * dynamic ones, are exempt, this process stores permission changes against 59 * these back end nodes. The point of all of this is to allow for these settings 60 * to persist across host and zone reboots. As an example, consider the entry 61 * /dev/dsk/c0t0d0 which is a character device and that / is in UFS. Upon 62 * changing the permissions on c0t0d0 you'd have the following logical 63 * relationships: 64 * 65 * +------------------+ sdev_vnode +--------------+ 66 * | sdev_node_t |<---------------->| vnode_t | 67 * | /dev/dsk/c0t0d0 |<---------------->| for sdev | 68 * +------------------+ +--------------+ 69 * | 70 * | sdev_attrvp 71 * | 72 * | +---------------------+ 73 * +--->| vnode_t for UFS|ZFS | 74 * | /dev/dsk/c0t0d0 | 75 * +---------------------+ 76 * 77 * sdev is generally in memory. Therefore when a lookup happens and there is no 78 * entry already inside of a directory cache, it will next check the backing 79 * store. If the backing store exists, we will reconstitute the sdev_node based 80 * on the information that we persisted. When we create the backing store node, 81 * we use the struct vattr information that we already have in sdev_node_t. 82 * Because of this, we already know if the entry was previously a symlink, 83 * directory, or some other kind of type. Note that not all types of nodes are 84 * supported. Currently only VDIR, VCHR, VBLK, VREG, VDOOR, and VLNK are 85 * eligible to be persisted. 86 * 87 * When the sdev_node is created and the lookup is done, we grab a hold on the 88 * underlying vnode as part of the call to VOP_LOOKUP. That reference is held 89 * until the sdev_node becomes inactive. Once its reference count reaches one 90 * and the VOP_INACTIVE callback fires leading to the destruction of the node, 91 * the reference on the underlying vnode will be released. 92 * 93 * The backing store node will be deleted only when the node itself is deleted 94 * through the means of a VOP_REMOVE, VOP_RMDIR, or similar call. 95 * 96 * Not everything can be persisted, see The Rules section for more details. 97 * 98 * Dynamic Nodes 99 * ------------- 100 * 101 * Dynamic nodes allow for specific interactions with various kernel subsystems 102 * when looking up directory entries. This allows the lookup and readdir 103 * functions to check against the kernel subsystem's for validity. eg. does a 104 * zvol or nic still exist. 105 * 106 * More specifically, when we create various directories we check if the 107 * directory name matches that of one of the names in the vtab[] (sdev_subr.c). 108 * If it does, we swap out the vnode operations into a new set which combine the 109 * normal sdev vnode operations with the dynamic set here. 110 * 111 * In addition, various dynamic nodes implement a verification entry point. This 112 * verification entry is used as a part of lookup and readdir. The goal for 113 * these dynamic nodes is to allow them to check with the underlying subsystems 114 * to ensure that these devices are still present, or if they have gone away, to 115 * remove them from the results. This is indicated by using the SDEV_VTOR flag 116 * in vtab[]. 117 * 118 * Dynamic nodes have additional restrictions placed upon them. They may only 119 * appear at the top level directory of the file system. In addition, users 120 * cannot create dirents below any leve of a dynamic node aside from its special 121 * vnops. 122 * 123 * Profiles 124 * -------- 125 * 126 * Profiles exist for the purpose of non-global zones. They work with the zone 127 * brands and zoneadmd to set up a filter of allowed devices that can appear in 128 * a non-global zone's /dev. These are sent to sdev by means of libdevinfo and a 129 * modctl system call. Specifically it allows one to add patterns of device 130 * paths to include and exclude. It allows for a collection of symlinks to be 131 * added and it allows for remapping names. 132 * 133 * When operating in a non-global zone, several of the sdev vnops are redirected 134 * to the profile versions. These impose additional restrictions such as 135 * enforcing that a non-global zone's /dev is read only. 136 * 137 * sdev_node_t States 138 * ------------------ 139 * 140 * A given sdev_node_t has a field called the sdev_state which describes where 141 * in the sdev life cycle it is. There are three primary states: SDEV_INIT, 142 * SDEV_READY, and SDEV_ZOMBIE. 143 * 144 * SDEV_INIT: When a new /dev file is first looked up, a sdev_node 145 * is allocated, initialized and added to the directory's 146 * sdev_node cache. A node at this state will also 147 * have the SDEV_LOOKUP flag set. 148 * 149 * Other threads that are trying to look up a node at 150 * this state will be blocked until the SDEV_LOOKUP flag 151 * is cleared. 152 * 153 * When the SDEV_LOOKUP flag is cleared, the node may 154 * transition into the SDEV_READY state for a successful 155 * lookup or the node is removed from the directory cache 156 * and destroyed if the named node can not be found. 157 * An ENOENT error is returned for the second case. 158 * 159 * SDEV_READY: A /dev file has been successfully looked up and 160 * associated with a vnode. The /dev file is available 161 * for the supported /dev file system operations. 162 * 163 * SDEV_ZOMBIE: Deletion of a /dev file has been explicitly issued 164 * to an SDEV_READY node. The node is transitioned into 165 * the SDEV_ZOMBIE state if the vnode reference count 166 * is still held. A SDEV_ZOMBIE node does not support 167 * any of the /dev file system operations. A SDEV_ZOMBIE 168 * node is immediately removed from the directory cache 169 * and destroyed once the reference count reaches zero. 170 * 171 * Historically nodes that were marked SDEV_ZOMBIE were not removed from the 172 * underlying directory caches. This has been the source of numerous bugs and 173 * thus to better mimic what happens on a real file system, it is no longer the 174 * case. 175 * 176 * The following state machine describes the life cycle of a given node and its 177 * associated states: 178 * 179 * node is . . . . . 180 * allocated via . +-------------+ . . . . . . . vnode_t refcount 181 * sdev_nodeinit() . | Unallocated | . reaches zero and 182 * +--------*-----| Memory |<--------*---+ sdev_inactive is 183 * | +-------------+ | called. 184 * | +------------^ | called. 185 * v | | 186 * +-----------+ * . . sdev_nodeready() +-------------+ 187 * | SDEV_INIT | | or related setup | SDEV_ZOMBIE | 188 * +-----------+ | failure +-------------+ 189 * | | ^ 190 * | | +------------+ | 191 * +-*----------->| SDEV_READY |--------*-----+ 192 * . +------------+ . The node is no longer 193 * . . node successfully . . . . . valid or we've been 194 * inserted into the asked to remove it. 195 * directory cache This happens via 196 * and sdev_nodready() sdev_dirdelete(). 197 * call successful. 198 * 199 * Adding and Removing Dirents, Zombie Nodes 200 * ----------------------------------------- 201 * 202 * As part of doing a lookup, readdir, or an explicit creation operation like 203 * mkdir or create, nodes may be created. Every directory has an avl tree which 204 * contains its children, the sdev_entries tree. This is only used if the type 205 * is VDIR. Access to this is controlled by the sdev_node_t's contents_lock and 206 * it is managed through sdev_cache_update(). 207 * 208 * Every sdev_node_t has a field sdev_state, which describes the current state 209 * of the node. A node is generally speaking in the SDEV_READY state. When it is 210 * there, it can be looked up, accessed, and operations performed on it. When a 211 * node is going to be removed from the directory cache it is marked as a 212 * zombie. Once a node becomes a zombie, no other file system operations will 213 * succeed and it will continue to exist as a node until the vnode count on the 214 * node reaches zero. At that point, the node will be freed. However, once a 215 * node has been marked as a zombie, it will be removed immediately from the 216 * directory cache such that no one else may find it again. This means that 217 * someone else can insert a new entry into that directory with the same name 218 * and without a problem. 219 * 220 * To remove a node, see the section on that in The Rules. 221 * 222 * The Rules 223 * --------- 224 * These are the rules to live by when working in sdev. These are not 225 * exhaustive. 226 * 227 * - Set 1: Working with Backing Nodes 228 * o If there is a SDEV_READY sdev_node_t, it knows about its backing node. 229 * o If we find a backing node when looking up an sdev_node_t for the first 230 * time, we use its attributes to build our sdev_node_t. 231 * o If there is a found backing node, or we create a backing node, that's 232 * when we grab the hold on its vnode. 233 * o If we mark an sdev_node_t a ZOMBIE, we must remove its backing node from 234 * the underlying file system. It must not be searchable or findable. 235 * o We release our hold on the backing node vnode when we destroy the 236 * sdev_node_t. 237 * 238 * - Set 2: Locking rules for sdev (not exhaustive) 239 * o The majority of nodes contain an sdev_contents rw lock. You must hold it 240 * for read or write if manipulating its contents appropriately. 241 * o You must lock your parent before yourself. 242 * o If you need your vnode's v_lock and the sdev_contents rw lock, you must 243 * grab the v_lock before the sdev_contents rw_lock. 244 * o If you release a lock on the node as a part of upgrading it, you must 245 * verify that the node has not become a zombie as a part of this process. 246 * 247 * - Set 3: Zombie Status and What it Means 248 * o If you encounter a node that is a ZOMBIE, that means that it has been 249 * unlinked from the backing store. 250 * o If you release your contents lock and acquire it again (say as part of 251 * trying to grab a write lock) you must check that the node has not become 252 * a zombie. 253 * o You should VERIFY that a looked up node is not a zombie. This follows 254 * from the following logic. To mark something as a zombie means that it is 255 * removed from the parents directory cache. To do that, you must have a 256 * write lock on the parent's sdev_contents. To lookup through that 257 * directory you must have a read lock. This then becomes a simple ordering 258 * problem. If you've been granted the lock then the other operation cannot 259 * be in progress or must have already succeeded. 260 * 261 * - Set 4: Removing Directory Entries (aka making nodes Zombies) 262 * o Write lock must be held on the directory 263 * o Write lock must be held on the node 264 * o Remove the sdev_node_t from its parent cache 265 * o Remove the corresponding backing store node, if it exists, eg. use 266 * VOP_REMOVE or VOP_RMDIR. 267 * o You must NOT make any change in the vnode reference count! Nodes should 268 * only be cleaned up through VOP_INACTIVE callbacks. 269 * o VOP_INACTIVE is the only one responsible for doing the final vn_rele of 270 * the backing store vnode that was grabbed during lookup. 271 * 272 * - Set 5: What Nodes may be Persisted 273 * o The root, /dev is always persisted 274 * o Any node in vtab which is marked SDEV_DYNAMIC, may not be persisted 275 * unless it is also marked SDEV_PERSIST 276 * o Anything whose parent directory is marked SDEV_PERSIST will pass that 277 * along to the child as long as it does not contradict the above rules 278 */ 279 280 #include <sys/types.h> 281 #include <sys/param.h> 282 #include <sys/t_lock.h> 283 #include <sys/systm.h> 284 #include <sys/sysmacros.h> 285 #include <sys/user.h> 286 #include <sys/time.h> 287 #include <sys/vfs.h> 288 #include <sys/vnode.h> 289 #include <sys/vfs_opreg.h> 290 #include <sys/file.h> 291 #include <sys/fcntl.h> 292 #include <sys/flock.h> 293 #include <sys/kmem.h> 294 #include <sys/uio.h> 295 #include <sys/errno.h> 296 #include <sys/stat.h> 297 #include <sys/cred.h> 298 #include <sys/dirent.h> 299 #include <sys/pathname.h> 300 #include <sys/cmn_err.h> 301 #include <sys/debug.h> 302 #include <sys/policy.h> 303 #include <vm/hat.h> 304 #include <vm/seg_vn.h> 305 #include <vm/seg_map.h> 306 #include <vm/seg.h> 307 #include <vm/as.h> 308 #include <vm/page.h> 309 #include <sys/proc.h> 310 #include <sys/mode.h> 311 #include <sys/sunndi.h> 312 #include <sys/ptms.h> 313 #include <fs/fs_subr.h> 314 #include <sys/fs/dv_node.h> 315 #include <sys/fs/sdev_impl.h> 316 317 /*ARGSUSED*/ 318 static int 319 sdev_open(struct vnode **vpp, int flag, struct cred *cred, caller_context_t *ct) 320 { 321 struct sdev_node *dv = VTOSDEV(*vpp); 322 struct sdev_node *ddv = dv->sdev_dotdot; 323 int error = 0; 324 325 if ((*vpp)->v_type == VDIR) 326 return (0); 327 328 if (!SDEV_IS_GLOBAL(dv)) 329 return (ENOTSUP); 330 331 if ((*vpp)->v_type == VLNK) 332 return (ENOENT); 333 ASSERT((*vpp)->v_type == VREG); 334 if ((*vpp)->v_type != VREG) 335 return (ENOTSUP); 336 337 ASSERT(ddv); 338 rw_enter(&ddv->sdev_contents, RW_READER); 339 if (dv->sdev_attrvp == NULL) { 340 rw_exit(&ddv->sdev_contents); 341 return (ENOENT); 342 } 343 error = VOP_OPEN(&(dv->sdev_attrvp), flag, cred, ct); 344 rw_exit(&ddv->sdev_contents); 345 return (error); 346 } 347 348 /*ARGSUSED1*/ 349 static int 350 sdev_close(struct vnode *vp, int flag, int count, 351 offset_t offset, struct cred *cred, caller_context_t *ct) 352 { 353 struct sdev_node *dv = VTOSDEV(vp); 354 355 if (vp->v_type == VDIR) { 356 cleanlocks(vp, ttoproc(curthread)->p_pid, 0); 357 cleanshares(vp, ttoproc(curthread)->p_pid); 358 return (0); 359 } 360 361 if (!SDEV_IS_GLOBAL(dv)) 362 return (ENOTSUP); 363 364 ASSERT(vp->v_type == VREG); 365 if (vp->v_type != VREG) 366 return (ENOTSUP); 367 368 ASSERT(dv->sdev_attrvp); 369 return (VOP_CLOSE(dv->sdev_attrvp, flag, count, offset, cred, ct)); 370 } 371 372 /*ARGSUSED*/ 373 static int 374 sdev_read(struct vnode *vp, struct uio *uio, int ioflag, struct cred *cred, 375 struct caller_context *ct) 376 { 377 struct sdev_node *dv = (struct sdev_node *)VTOSDEV(vp); 378 int error; 379 380 if (!SDEV_IS_GLOBAL(dv)) 381 return (EINVAL); 382 383 if (vp->v_type == VDIR) 384 return (EISDIR); 385 386 /* only supporting regular files in /dev */ 387 ASSERT(vp->v_type == VREG); 388 if (vp->v_type != VREG) 389 return (EINVAL); 390 391 ASSERT(RW_READ_HELD(&VTOSDEV(vp)->sdev_contents)); 392 ASSERT(dv->sdev_attrvp); 393 (void) VOP_RWLOCK(dv->sdev_attrvp, 0, ct); 394 error = VOP_READ(dv->sdev_attrvp, uio, ioflag, cred, ct); 395 VOP_RWUNLOCK(dv->sdev_attrvp, 0, ct); 396 return (error); 397 } 398 399 /*ARGSUSED*/ 400 static int 401 sdev_write(struct vnode *vp, struct uio *uio, int ioflag, struct cred *cred, 402 struct caller_context *ct) 403 { 404 struct sdev_node *dv = VTOSDEV(vp); 405 int error = 0; 406 407 if (!SDEV_IS_GLOBAL(dv)) 408 return (EINVAL); 409 410 if (vp->v_type == VDIR) 411 return (EISDIR); 412 413 /* only supporting regular files in /dev */ 414 ASSERT(vp->v_type == VREG); 415 if (vp->v_type != VREG) 416 return (EINVAL); 417 418 ASSERT(dv->sdev_attrvp); 419 420 (void) VOP_RWLOCK(dv->sdev_attrvp, 1, ct); 421 error = VOP_WRITE(dv->sdev_attrvp, uio, ioflag, cred, ct); 422 VOP_RWUNLOCK(dv->sdev_attrvp, 1, ct); 423 if (error == 0) { 424 sdev_update_timestamps(dv->sdev_attrvp, kcred, 425 AT_MTIME); 426 } 427 return (error); 428 } 429 430 /*ARGSUSED*/ 431 static int 432 sdev_ioctl(struct vnode *vp, int cmd, intptr_t arg, int flag, 433 struct cred *cred, int *rvalp, caller_context_t *ct) 434 { 435 struct sdev_node *dv = VTOSDEV(vp); 436 437 if (!SDEV_IS_GLOBAL(dv) || (vp->v_type == VDIR)) 438 return (ENOTTY); 439 440 ASSERT(vp->v_type == VREG); 441 if (vp->v_type != VREG) 442 return (EINVAL); 443 444 ASSERT(dv->sdev_attrvp); 445 return (VOP_IOCTL(dv->sdev_attrvp, cmd, arg, flag, cred, rvalp, ct)); 446 } 447 448 static int 449 sdev_getattr(struct vnode *vp, struct vattr *vap, int flags, 450 struct cred *cr, caller_context_t *ct) 451 { 452 int error = 0; 453 struct sdev_node *dv = VTOSDEV(vp); 454 struct sdev_node *parent = dv->sdev_dotdot; 455 456 ASSERT(parent); 457 458 rw_enter(&parent->sdev_contents, RW_READER); 459 ASSERT(dv->sdev_attr || dv->sdev_attrvp); 460 461 /* 462 * search order: 463 * - for persistent nodes (SDEV_PERSIST): backstore 464 * - for non-persistent nodes: module ops if global, then memory 465 */ 466 if (dv->sdev_attrvp) { 467 rw_exit(&parent->sdev_contents); 468 error = VOP_GETATTR(dv->sdev_attrvp, vap, flags, cr, ct); 469 sdev_vattr_merge(dv, vap); 470 } else { 471 ASSERT(dv->sdev_attr); 472 *vap = *dv->sdev_attr; 473 sdev_vattr_merge(dv, vap); 474 rw_exit(&parent->sdev_contents); 475 } 476 477 return (error); 478 } 479 480 /*ARGSUSED4*/ 481 static int 482 sdev_setattr(struct vnode *vp, struct vattr *vap, int flags, 483 struct cred *cred, caller_context_t *ctp) 484 { 485 return (devname_setattr_func(vp, vap, flags, cred, NULL, 0)); 486 } 487 488 static int 489 sdev_getsecattr(struct vnode *vp, struct vsecattr *vsap, int flags, 490 struct cred *cr, caller_context_t *ct) 491 { 492 int error; 493 struct sdev_node *dv = VTOSDEV(vp); 494 struct vnode *avp = dv->sdev_attrvp; 495 496 if (avp == NULL) { 497 /* return fs_fab_acl() if flavor matches, else do nothing */ 498 if ((SDEV_ACL_FLAVOR(vp) == _ACL_ACLENT_ENABLED && 499 (vsap->vsa_mask & (VSA_ACLCNT | VSA_DFACLCNT))) || 500 (SDEV_ACL_FLAVOR(vp) == _ACL_ACE_ENABLED && 501 (vsap->vsa_mask & (VSA_ACECNT | VSA_ACE)))) 502 return (fs_fab_acl(vp, vsap, flags, cr, ct)); 503 504 return (ENOSYS); 505 } 506 507 (void) VOP_RWLOCK(avp, 1, ct); 508 error = VOP_GETSECATTR(avp, vsap, flags, cr, ct); 509 VOP_RWUNLOCK(avp, 1, ct); 510 return (error); 511 } 512 513 static int 514 sdev_setsecattr(struct vnode *vp, struct vsecattr *vsap, int flags, 515 struct cred *cr, caller_context_t *ct) 516 { 517 int error; 518 struct sdev_node *dv = VTOSDEV(vp); 519 struct vnode *avp = dv->sdev_attrvp; 520 521 if (dv->sdev_state == SDEV_ZOMBIE) 522 return (0); 523 524 if (avp == NULL) { 525 if (SDEV_IS_GLOBAL(dv) && !SDEV_IS_PERSIST(dv)) 526 return (fs_nosys()); 527 ASSERT(dv->sdev_attr); 528 /* 529 * if coming in directly, the acl system call will 530 * have held the read-write lock via VOP_RWLOCK() 531 * If coming in via specfs, specfs will have 532 * held the rw lock on the realvp i.e. us. 533 */ 534 ASSERT(RW_WRITE_HELD(&dv->sdev_contents)); 535 sdev_vattr_merge(dv, dv->sdev_attr); 536 error = sdev_shadow_node(dv, cr); 537 if (error) { 538 return (fs_nosys()); 539 } 540 541 ASSERT(dv->sdev_attrvp); 542 /* clean out the memory copy if any */ 543 if (dv->sdev_attr) { 544 kmem_free(dv->sdev_attr, sizeof (struct vattr)); 545 dv->sdev_attr = NULL; 546 } 547 avp = dv->sdev_attrvp; 548 } 549 ASSERT(avp); 550 551 (void) VOP_RWLOCK(avp, V_WRITELOCK_TRUE, ct); 552 error = VOP_SETSECATTR(avp, vsap, flags, cr, ct); 553 VOP_RWUNLOCK(avp, V_WRITELOCK_TRUE, ct); 554 return (error); 555 } 556 557 int 558 sdev_unlocked_access(void *vdv, int mode, struct cred *cr) 559 { 560 struct sdev_node *dv = vdv; 561 int shift = 0; 562 uid_t owner = dv->sdev_attr->va_uid; 563 564 if (crgetuid(cr) != owner) { 565 shift += 3; 566 if (groupmember(dv->sdev_attr->va_gid, cr) == 0) 567 shift += 3; 568 } 569 570 return (secpolicy_vnode_access2(cr, SDEVTOV(dv), owner, 571 dv->sdev_attr->va_mode << shift, mode)); 572 } 573 574 static int 575 sdev_access(struct vnode *vp, int mode, int flags, struct cred *cr, 576 caller_context_t *ct) 577 { 578 struct sdev_node *dv = VTOSDEV(vp); 579 int ret = 0; 580 581 rw_enter(&dv->sdev_contents, RW_READER); 582 ASSERT(dv->sdev_attr || dv->sdev_attrvp); 583 if (dv->sdev_attrvp) { 584 ret = VOP_ACCESS(dv->sdev_attrvp, mode, flags, cr, ct); 585 } else if (dv->sdev_attr) { 586 ret = sdev_unlocked_access(dv, mode, cr); 587 if (ret) 588 ret = EACCES; 589 } 590 rw_exit(&dv->sdev_contents); 591 592 return (ret); 593 } 594 595 /* 596 * Lookup 597 */ 598 /*ARGSUSED3*/ 599 static int 600 sdev_lookup(struct vnode *dvp, char *nm, struct vnode **vpp, 601 struct pathname *pnp, int flags, struct vnode *rdir, struct cred *cred, 602 caller_context_t *ct, int *direntflags, pathname_t *realpnp) 603 { 604 struct sdev_node *parent; 605 int error; 606 607 parent = VTOSDEV(dvp); 608 ASSERT(parent); 609 610 /* execute access is required to search the directory */ 611 if ((error = VOP_ACCESS(dvp, VEXEC, 0, cred, ct)) != 0) 612 return (error); 613 614 if (!SDEV_IS_GLOBAL(parent)) 615 return (prof_lookup(dvp, nm, vpp, cred)); 616 return (devname_lookup_func(parent, nm, vpp, cred, NULL, 0)); 617 } 618 619 /*ARGSUSED2*/ 620 static int 621 sdev_create(struct vnode *dvp, char *nm, struct vattr *vap, vcexcl_t excl, 622 int mode, struct vnode **vpp, struct cred *cred, int flag, 623 caller_context_t *ct, vsecattr_t *vsecp) 624 { 625 struct vnode *vp = NULL; 626 struct vnode *avp; 627 struct sdev_node *parent; 628 struct sdev_node *self = NULL; 629 int error = 0; 630 vtype_t type = vap->va_type; 631 632 ASSERT(type != VNON && type != VBAD); 633 634 if ((type == VFIFO) || (type == VSOCK) || 635 (type == VPROC) || (type == VPORT)) 636 return (ENOTSUP); 637 638 parent = VTOSDEV(dvp); 639 ASSERT(parent); 640 641 rw_enter(&parent->sdev_dotdot->sdev_contents, RW_READER); 642 if (parent->sdev_state == SDEV_ZOMBIE) { 643 rw_exit(&parent->sdev_dotdot->sdev_contents); 644 return (ENOENT); 645 } 646 647 /* non-global do not allow pure node creation */ 648 if (!SDEV_IS_GLOBAL(parent)) { 649 rw_exit(&parent->sdev_dotdot->sdev_contents); 650 return (prof_lookup(dvp, nm, vpp, cred)); 651 } 652 rw_exit(&parent->sdev_dotdot->sdev_contents); 653 654 /* execute access is required to search the directory */ 655 if ((error = VOP_ACCESS(dvp, VEXEC, 0, cred, ct)) != 0) 656 return (error); 657 658 /* check existing name */ 659 /* XXXci - We may need to translate the C-I flags on VOP_LOOKUP */ 660 error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cred, ct, NULL, NULL); 661 662 /* name found */ 663 if (error == 0) { 664 ASSERT(vp); 665 if (excl == EXCL) { 666 error = EEXIST; 667 } else if ((vp->v_type == VDIR) && (mode & VWRITE)) { 668 /* allowing create/read-only an existing directory */ 669 error = EISDIR; 670 } else { 671 error = VOP_ACCESS(vp, mode, 0, cred, ct); 672 } 673 674 if (error) { 675 VN_RELE(vp); 676 return (error); 677 } 678 679 /* truncation first */ 680 if ((vp->v_type == VREG) && (vap->va_mask & AT_SIZE) && 681 (vap->va_size == 0)) { 682 ASSERT(parent->sdev_attrvp); 683 error = VOP_CREATE(parent->sdev_attrvp, 684 nm, vap, excl, mode, &avp, cred, flag, ct, vsecp); 685 686 if (error) { 687 VN_RELE(vp); 688 return (error); 689 } 690 } 691 692 sdev_update_timestamps(vp, kcred, 693 AT_CTIME|AT_MTIME|AT_ATIME); 694 *vpp = vp; 695 return (0); 696 } 697 698 /* bail out early */ 699 if (error != ENOENT) 700 return (error); 701 702 /* verify write access - compliance specifies ENXIO */ 703 if ((error = VOP_ACCESS(dvp, VEXEC|VWRITE, 0, cred, ct)) != 0) { 704 if (error == EACCES) 705 error = ENXIO; 706 return (error); 707 } 708 709 /* 710 * For memory-based (ROFS) directory: 711 * - either disallow node creation; 712 * - or implement VOP_CREATE of its own 713 */ 714 rw_enter(&parent->sdev_contents, RW_WRITER); 715 if (!SDEV_IS_PERSIST(parent)) { 716 rw_exit(&parent->sdev_contents); 717 return (ENOTSUP); 718 } 719 ASSERT(parent->sdev_attrvp); 720 error = sdev_mknode(parent, nm, &self, vap, NULL, NULL, 721 cred, SDEV_READY); 722 if (error) { 723 rw_exit(&parent->sdev_contents); 724 if (self) 725 SDEV_RELE(self); 726 return (error); 727 } 728 rw_exit(&parent->sdev_contents); 729 730 ASSERT(self); 731 /* take care the timestamps for the node and its parent */ 732 sdev_update_timestamps(SDEVTOV(self), kcred, 733 AT_CTIME|AT_MTIME|AT_ATIME); 734 sdev_update_timestamps(dvp, kcred, AT_MTIME|AT_ATIME); 735 if (SDEV_IS_GLOBAL(parent)) 736 atomic_inc_ulong(&parent->sdev_gdir_gen); 737 738 /* wake up other threads blocked on looking up this node */ 739 mutex_enter(&self->sdev_lookup_lock); 740 SDEV_UNBLOCK_OTHERS(self, SDEV_LOOKUP); 741 mutex_exit(&self->sdev_lookup_lock); 742 error = sdev_to_vp(self, vpp); 743 return (error); 744 } 745 746 static int 747 sdev_remove(struct vnode *dvp, char *nm, struct cred *cred, 748 caller_context_t *ct, int flags) 749 { 750 int error; 751 struct sdev_node *parent = (struct sdev_node *)VTOSDEV(dvp); 752 struct vnode *vp = NULL; 753 struct sdev_node *dv = NULL; 754 int len; 755 int bkstore; 756 757 /* bail out early */ 758 len = strlen(nm); 759 if (nm[0] == '.') { 760 if (len == 1) { 761 return (EINVAL); 762 } else if (len == 2 && nm[1] == '.') { 763 return (EEXIST); 764 } 765 } 766 767 ASSERT(parent); 768 rw_enter(&parent->sdev_contents, RW_READER); 769 if (!SDEV_IS_GLOBAL(parent)) { 770 rw_exit(&parent->sdev_contents); 771 return (ENOTSUP); 772 } 773 774 /* execute access is required to search the directory */ 775 if ((error = VOP_ACCESS(dvp, VEXEC, 0, cred, ct)) != 0) { 776 rw_exit(&parent->sdev_contents); 777 return (error); 778 } 779 780 /* check existence first */ 781 dv = sdev_cache_lookup(parent, nm); 782 if (dv == NULL) { 783 rw_exit(&parent->sdev_contents); 784 return (ENOENT); 785 } 786 787 vp = SDEVTOV(dv); 788 if ((dv->sdev_state == SDEV_INIT) || 789 (dv->sdev_state == SDEV_ZOMBIE)) { 790 rw_exit(&parent->sdev_contents); 791 VN_RELE(vp); 792 return (ENOENT); 793 } 794 795 /* write access is required to remove an entry */ 796 if ((error = VOP_ACCESS(dvp, VWRITE, 0, cred, ct)) != 0) { 797 rw_exit(&parent->sdev_contents); 798 VN_RELE(vp); 799 return (error); 800 } 801 802 bkstore = SDEV_IS_PERSIST(dv) ? 1 : 0; 803 if (!rw_tryupgrade(&parent->sdev_contents)) { 804 rw_exit(&parent->sdev_contents); 805 rw_enter(&parent->sdev_contents, RW_WRITER); 806 /* Make sure we didn't become a zombie */ 807 if (parent->sdev_state == SDEV_ZOMBIE) { 808 rw_exit(&parent->sdev_contents); 809 VN_RELE(vp); 810 return (ENOENT); 811 } 812 } 813 814 /* we do not support unlinking a non-empty directory */ 815 if (vp->v_type == VDIR && dv->sdev_nlink > 2) { 816 rw_exit(&parent->sdev_contents); 817 VN_RELE(vp); 818 return (EBUSY); 819 } 820 821 /* 822 * sdev_dirdelete does the real job of: 823 * - make sure no open ref count 824 * - destroying the sdev_node 825 * - releasing the hold on attrvp 826 */ 827 sdev_cache_update(parent, &dv, nm, SDEV_CACHE_DELETE); 828 VN_RELE(vp); 829 rw_exit(&parent->sdev_contents); 830 831 /* 832 * best efforts clean up the backing store 833 */ 834 if (bkstore) { 835 ASSERT(parent->sdev_attrvp); 836 error = VOP_REMOVE(parent->sdev_attrvp, nm, cred, 837 ct, flags); 838 /* 839 * do not report BUSY error 840 * because the backing store ref count is released 841 * when the last ref count on the sdev_node is 842 * released. 843 */ 844 if (error == EBUSY) { 845 sdcmn_err2(("sdev_remove: device %s is still on" 846 "disk %s\n", nm, parent->sdev_path)); 847 error = 0; 848 } 849 } 850 851 return (error); 852 } 853 854 /* 855 * Some restrictions for this file system: 856 * - both oldnm and newnm are in the scope of /dev file system, 857 * to simply the namespace management model. 858 */ 859 /*ARGSUSED6*/ 860 static int 861 sdev_rename(struct vnode *odvp, char *onm, struct vnode *ndvp, char *nnm, 862 struct cred *cred, caller_context_t *ct, int flags) 863 { 864 struct sdev_node *fromparent = NULL; 865 struct vattr vattr; 866 struct sdev_node *toparent; 867 struct sdev_node *fromdv = NULL; /* source node */ 868 struct vnode *ovp = NULL; /* source vnode */ 869 struct sdev_node *todv = NULL; /* destination node */ 870 struct vnode *nvp = NULL; /* destination vnode */ 871 int samedir = 0; /* set if odvp == ndvp */ 872 struct vnode *realvp; 873 int error = 0; 874 dev_t fsid; 875 int bkstore = 0; 876 vtype_t type; 877 878 /* prevent modifying "." and ".." */ 879 if ((onm[0] == '.' && 880 (onm[1] == '\0' || (onm[1] == '.' && onm[2] == '\0'))) || 881 (nnm[0] == '.' && 882 (nnm[1] == '\0' || (nnm[1] == '.' && nnm[2] == '\0')))) { 883 return (EINVAL); 884 } 885 886 fromparent = VTOSDEV(odvp); 887 toparent = VTOSDEV(ndvp); 888 889 /* ZOMBIE parent doesn't allow new node creation */ 890 rw_enter(&fromparent->sdev_dotdot->sdev_contents, RW_READER); 891 if (fromparent->sdev_state == SDEV_ZOMBIE) { 892 rw_exit(&fromparent->sdev_dotdot->sdev_contents); 893 return (ENOENT); 894 } 895 896 /* renaming only supported for global device nodes */ 897 if (!SDEV_IS_GLOBAL(fromparent)) { 898 rw_exit(&fromparent->sdev_dotdot->sdev_contents); 899 return (ENOTSUP); 900 } 901 rw_exit(&fromparent->sdev_dotdot->sdev_contents); 902 903 rw_enter(&toparent->sdev_dotdot->sdev_contents, RW_READER); 904 if (toparent->sdev_state == SDEV_ZOMBIE) { 905 rw_exit(&toparent->sdev_dotdot->sdev_contents); 906 return (ENOENT); 907 } 908 rw_exit(&toparent->sdev_dotdot->sdev_contents); 909 910 /* 911 * acquire the global lock to prevent 912 * mount/unmount/other rename activities. 913 */ 914 mutex_enter(&sdev_lock); 915 916 /* check existence of the source node */ 917 /* XXXci - We may need to translate the C-I flags on VOP_LOOKUP */ 918 error = VOP_LOOKUP(odvp, onm, &ovp, NULL, 0, NULL, cred, ct, 919 NULL, NULL); 920 if (error) { 921 sdcmn_err2(("sdev_rename: the source node %s exists\n", 922 onm)); 923 mutex_exit(&sdev_lock); 924 return (error); 925 } 926 927 if (VOP_REALVP(ovp, &realvp, ct) == 0) { 928 VN_HOLD(realvp); 929 VN_RELE(ovp); 930 ovp = realvp; 931 } 932 933 /* check existence of destination */ 934 /* XXXci - We may need to translate the C-I flags on VOP_LOOKUP */ 935 error = VOP_LOOKUP(ndvp, nnm, &nvp, NULL, 0, NULL, cred, ct, 936 NULL, NULL); 937 if (error && (error != ENOENT)) { 938 mutex_exit(&sdev_lock); 939 VN_RELE(ovp); 940 return (error); 941 } 942 943 if (nvp && (VOP_REALVP(nvp, &realvp, ct) == 0)) { 944 VN_HOLD(realvp); 945 VN_RELE(nvp); 946 nvp = realvp; 947 } 948 949 /* 950 * make sure the source and the destination are 951 * in the same dev filesystem 952 */ 953 if (odvp != ndvp) { 954 vattr.va_mask = AT_FSID; 955 if (error = VOP_GETATTR(odvp, &vattr, 0, cred, ct)) { 956 mutex_exit(&sdev_lock); 957 VN_RELE(ovp); 958 if (nvp != NULL) 959 VN_RELE(nvp); 960 return (error); 961 } 962 fsid = vattr.va_fsid; 963 vattr.va_mask = AT_FSID; 964 if (error = VOP_GETATTR(ndvp, &vattr, 0, cred, ct)) { 965 mutex_exit(&sdev_lock); 966 VN_RELE(ovp); 967 if (nvp != NULL) 968 VN_RELE(nvp); 969 return (error); 970 } 971 if (fsid != vattr.va_fsid) { 972 mutex_exit(&sdev_lock); 973 VN_RELE(ovp); 974 if (nvp != NULL) 975 VN_RELE(nvp); 976 return (EXDEV); 977 } 978 } 979 980 /* make sure the old entry can be deleted */ 981 error = VOP_ACCESS(odvp, VWRITE, 0, cred, ct); 982 if (error) { 983 mutex_exit(&sdev_lock); 984 VN_RELE(ovp); 985 if (nvp != NULL) 986 VN_RELE(nvp); 987 return (error); 988 } 989 990 /* make sure the destination allows creation */ 991 samedir = (fromparent == toparent); 992 if (!samedir) { 993 error = VOP_ACCESS(ndvp, VEXEC|VWRITE, 0, cred, ct); 994 if (error) { 995 mutex_exit(&sdev_lock); 996 VN_RELE(ovp); 997 if (nvp != NULL) 998 VN_RELE(nvp); 999 return (error); 1000 } 1001 } 1002 1003 fromdv = VTOSDEV(ovp); 1004 ASSERT(fromdv); 1005 1006 /* destination file exists */ 1007 if (nvp != NULL) { 1008 todv = VTOSDEV(nvp); 1009 ASSERT(todv); 1010 } 1011 1012 if ((fromdv->sdev_flags & SDEV_DYNAMIC) != 0 || 1013 (todv != NULL && (todv->sdev_flags & SDEV_DYNAMIC) != 0)) { 1014 mutex_exit(&sdev_lock); 1015 if (nvp != NULL) 1016 VN_RELE(nvp); 1017 VN_RELE(ovp); 1018 return (EACCES); 1019 } 1020 1021 /* 1022 * link source to new target in the memory. Regardless of failure, we 1023 * must rele our hold on nvp. 1024 */ 1025 error = sdev_rnmnode(fromparent, fromdv, toparent, &todv, nnm, cred); 1026 if (nvp != NULL) 1027 VN_RELE(nvp); 1028 if (error) { 1029 sdcmn_err2(("sdev_rename: renaming %s to %s failed " 1030 " with error %d\n", onm, nnm, error)); 1031 mutex_exit(&sdev_lock); 1032 VN_RELE(ovp); 1033 return (error); 1034 } 1035 1036 /* 1037 * unlink from source 1038 */ 1039 rw_enter(&fromparent->sdev_contents, RW_READER); 1040 fromdv = sdev_cache_lookup(fromparent, onm); 1041 if (fromdv == NULL) { 1042 rw_exit(&fromparent->sdev_contents); 1043 mutex_exit(&sdev_lock); 1044 VN_RELE(ovp); 1045 sdcmn_err2(("sdev_rename: the source is deleted already\n")); 1046 return (0); 1047 } 1048 1049 if (fromdv->sdev_state == SDEV_ZOMBIE) { 1050 rw_exit(&fromparent->sdev_contents); 1051 mutex_exit(&sdev_lock); 1052 VN_RELE(SDEVTOV(fromdv)); 1053 VN_RELE(ovp); 1054 sdcmn_err2(("sdev_rename: the source is being deleted\n")); 1055 return (0); 1056 } 1057 rw_exit(&fromparent->sdev_contents); 1058 ASSERT(SDEVTOV(fromdv) == ovp); 1059 VN_RELE(ovp); 1060 1061 /* clean out the directory contents before it can be removed */ 1062 type = SDEVTOV(fromdv)->v_type; 1063 if (type == VDIR) { 1064 error = sdev_cleandir(fromdv, NULL, 0); 1065 sdcmn_err2(("sdev_rename: cleandir finished with %d\n", 1066 error)); 1067 if (error == EBUSY) 1068 error = 0; 1069 } 1070 1071 rw_enter(&fromparent->sdev_contents, RW_WRITER); 1072 bkstore = SDEV_IS_PERSIST(fromdv) ? 1 : 0; 1073 sdev_cache_update(fromparent, &fromdv, onm, 1074 SDEV_CACHE_DELETE); 1075 VN_RELE(SDEVTOV(fromdv)); 1076 1077 /* best effforts clean up the backing store */ 1078 if (bkstore) { 1079 ASSERT(fromparent->sdev_attrvp); 1080 if (type != VDIR) { 1081 /* XXXci - We may need to translate the C-I flags on VOP_REMOVE */ 1082 error = VOP_REMOVE(fromparent->sdev_attrvp, 1083 onm, kcred, ct, 0); 1084 } else { 1085 /* XXXci - We may need to translate the C-I flags on VOP_RMDIR */ 1086 error = VOP_RMDIR(fromparent->sdev_attrvp, 1087 onm, fromparent->sdev_attrvp, kcred, ct, 0); 1088 } 1089 1090 if (error) { 1091 sdcmn_err2(("sdev_rename: device %s is " 1092 "still on disk %s\n", onm, 1093 fromparent->sdev_path)); 1094 error = 0; 1095 } 1096 } 1097 rw_exit(&fromparent->sdev_contents); 1098 mutex_exit(&sdev_lock); 1099 1100 /* once reached to this point, the rename is regarded successful */ 1101 return (0); 1102 } 1103 1104 /* 1105 * dev-fs version of "ln -s path dev-name" 1106 * tnm - path, e.g. /devices/... or /dev/... 1107 * lnm - dev_name 1108 */ 1109 /*ARGSUSED6*/ 1110 static int 1111 sdev_symlink(struct vnode *dvp, char *lnm, struct vattr *tva, 1112 char *tnm, struct cred *cred, caller_context_t *ct, int flags) 1113 { 1114 int error; 1115 struct vnode *vp = NULL; 1116 struct sdev_node *parent = (struct sdev_node *)VTOSDEV(dvp); 1117 struct sdev_node *self = (struct sdev_node *)NULL; 1118 1119 ASSERT(parent); 1120 rw_enter(&parent->sdev_dotdot->sdev_contents, RW_READER); 1121 if (parent->sdev_state == SDEV_ZOMBIE) { 1122 rw_exit(&parent->sdev_dotdot->sdev_contents); 1123 sdcmn_err2(("sdev_symlink: parent %s is ZOMBIED \n", 1124 parent->sdev_name)); 1125 return (ENOENT); 1126 } 1127 1128 if (!SDEV_IS_GLOBAL(parent)) { 1129 rw_exit(&parent->sdev_dotdot->sdev_contents); 1130 return (ENOTSUP); 1131 } 1132 rw_exit(&parent->sdev_dotdot->sdev_contents); 1133 1134 /* execute access is required to search a directory */ 1135 if ((error = VOP_ACCESS(dvp, VEXEC, 0, cred, ct)) != 0) 1136 return (error); 1137 1138 /* find existing name */ 1139 /* XXXci - We may need to translate the C-I flags here */ 1140 error = VOP_LOOKUP(dvp, lnm, &vp, NULL, 0, NULL, cred, ct, NULL, NULL); 1141 if (error == 0) { 1142 ASSERT(vp); 1143 VN_RELE(vp); 1144 sdcmn_err2(("sdev_symlink: node %s already exists\n", lnm)); 1145 return (EEXIST); 1146 } 1147 if (error != ENOENT) 1148 return (error); 1149 1150 /* write access is required to create a symlink */ 1151 if ((error = VOP_ACCESS(dvp, VWRITE, 0, cred, ct)) != 0) 1152 return (error); 1153 1154 /* put it into memory cache */ 1155 rw_enter(&parent->sdev_contents, RW_WRITER); 1156 error = sdev_mknode(parent, lnm, &self, tva, NULL, (void *)tnm, 1157 cred, SDEV_READY); 1158 if (error) { 1159 rw_exit(&parent->sdev_contents); 1160 sdcmn_err2(("sdev_symlink: node %s creation failed\n", lnm)); 1161 if (self) 1162 SDEV_RELE(self); 1163 1164 return (error); 1165 } 1166 ASSERT(self && (self->sdev_state == SDEV_READY)); 1167 rw_exit(&parent->sdev_contents); 1168 1169 /* take care the timestamps for the node and its parent */ 1170 sdev_update_timestamps(SDEVTOV(self), kcred, 1171 AT_CTIME|AT_MTIME|AT_ATIME); 1172 sdev_update_timestamps(dvp, kcred, AT_MTIME|AT_ATIME); 1173 if (SDEV_IS_GLOBAL(parent)) 1174 atomic_inc_ulong(&parent->sdev_gdir_gen); 1175 1176 /* wake up other threads blocked on looking up this node */ 1177 mutex_enter(&self->sdev_lookup_lock); 1178 SDEV_UNBLOCK_OTHERS(self, SDEV_LOOKUP); 1179 mutex_exit(&self->sdev_lookup_lock); 1180 SDEV_RELE(self); /* don't return with vnode held */ 1181 return (0); 1182 } 1183 1184 /*ARGSUSED6*/ 1185 static int 1186 sdev_mkdir(struct vnode *dvp, char *nm, struct vattr *va, struct vnode **vpp, 1187 struct cred *cred, caller_context_t *ct, int flags, vsecattr_t *vsecp) 1188 { 1189 int error; 1190 struct sdev_node *parent = (struct sdev_node *)VTOSDEV(dvp); 1191 struct sdev_node *self = NULL; 1192 struct vnode *vp = NULL; 1193 1194 ASSERT(parent && parent->sdev_dotdot); 1195 rw_enter(&parent->sdev_dotdot->sdev_contents, RW_READER); 1196 if (parent->sdev_state == SDEV_ZOMBIE) { 1197 rw_exit(&parent->sdev_dotdot->sdev_contents); 1198 return (ENOENT); 1199 } 1200 1201 /* non-global do not allow pure directory creation */ 1202 if (!SDEV_IS_GLOBAL(parent)) { 1203 rw_exit(&parent->sdev_dotdot->sdev_contents); 1204 return (prof_lookup(dvp, nm, vpp, cred)); 1205 } 1206 rw_exit(&parent->sdev_dotdot->sdev_contents); 1207 1208 /* execute access is required to search the directory */ 1209 if ((error = VOP_ACCESS(dvp, VEXEC, 0, cred, ct)) != 0) { 1210 return (error); 1211 } 1212 1213 /* find existing name */ 1214 /* XXXci - We may need to translate the C-I flags on VOP_LOOKUP */ 1215 error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cred, ct, NULL, NULL); 1216 if (error == 0) { 1217 VN_RELE(vp); 1218 return (EEXIST); 1219 } 1220 if (error != ENOENT) 1221 return (error); 1222 1223 /* require write access to create a directory */ 1224 if ((error = VOP_ACCESS(dvp, VWRITE, 0, cred, ct)) != 0) { 1225 return (error); 1226 } 1227 1228 /* put it into memory */ 1229 rw_enter(&parent->sdev_contents, RW_WRITER); 1230 error = sdev_mknode(parent, nm, &self, 1231 va, NULL, NULL, cred, SDEV_READY); 1232 if (error) { 1233 rw_exit(&parent->sdev_contents); 1234 if (self) 1235 SDEV_RELE(self); 1236 return (error); 1237 } 1238 ASSERT(self && (self->sdev_state == SDEV_READY)); 1239 rw_exit(&parent->sdev_contents); 1240 1241 /* take care the timestamps for the node and its parent */ 1242 sdev_update_timestamps(SDEVTOV(self), kcred, 1243 AT_CTIME|AT_MTIME|AT_ATIME); 1244 sdev_update_timestamps(dvp, kcred, AT_MTIME|AT_ATIME); 1245 if (SDEV_IS_GLOBAL(parent)) 1246 atomic_inc_ulong(&parent->sdev_gdir_gen); 1247 1248 /* wake up other threads blocked on looking up this node */ 1249 mutex_enter(&self->sdev_lookup_lock); 1250 SDEV_UNBLOCK_OTHERS(self, SDEV_LOOKUP); 1251 mutex_exit(&self->sdev_lookup_lock); 1252 *vpp = SDEVTOV(self); 1253 return (0); 1254 } 1255 1256 /* 1257 * allowing removing an empty directory under /dev 1258 */ 1259 /*ARGSUSED*/ 1260 static int 1261 sdev_rmdir(struct vnode *dvp, char *nm, struct vnode *cdir, struct cred *cred, 1262 caller_context_t *ct, int flags) 1263 { 1264 int error = 0; 1265 struct sdev_node *parent = (struct sdev_node *)VTOSDEV(dvp); 1266 struct sdev_node *self = NULL; 1267 struct vnode *vp = NULL; 1268 1269 /* bail out early */ 1270 if (strcmp(nm, ".") == 0) 1271 return (EINVAL); 1272 if (strcmp(nm, "..") == 0) 1273 return (EEXIST); /* should be ENOTEMPTY */ 1274 1275 /* no destruction of non-global node */ 1276 ASSERT(parent && parent->sdev_dotdot); 1277 rw_enter(&parent->sdev_dotdot->sdev_contents, RW_READER); 1278 if (!SDEV_IS_GLOBAL(parent)) { 1279 rw_exit(&parent->sdev_dotdot->sdev_contents); 1280 return (ENOTSUP); 1281 } 1282 rw_exit(&parent->sdev_dotdot->sdev_contents); 1283 1284 /* execute access is required to search the directory */ 1285 if ((error = VOP_ACCESS(dvp, VEXEC|VWRITE, 0, cred, ct)) != 0) 1286 return (error); 1287 1288 /* check existing name */ 1289 rw_enter(&parent->sdev_contents, RW_WRITER); 1290 self = sdev_cache_lookup(parent, nm); 1291 if (self == NULL) { 1292 rw_exit(&parent->sdev_contents); 1293 return (ENOENT); 1294 } 1295 1296 vp = SDEVTOV(self); 1297 if ((self->sdev_state == SDEV_INIT) || 1298 (self->sdev_state == SDEV_ZOMBIE)) { 1299 rw_exit(&parent->sdev_contents); 1300 VN_RELE(vp); 1301 return (ENOENT); 1302 } 1303 1304 /* some sanity checks */ 1305 if (vp == dvp || vp == cdir) { 1306 rw_exit(&parent->sdev_contents); 1307 VN_RELE(vp); 1308 return (EINVAL); 1309 } 1310 1311 if (vp->v_type != VDIR) { 1312 rw_exit(&parent->sdev_contents); 1313 VN_RELE(vp); 1314 return (ENOTDIR); 1315 } 1316 1317 if (vn_vfswlock(vp)) { 1318 rw_exit(&parent->sdev_contents); 1319 VN_RELE(vp); 1320 return (EBUSY); 1321 } 1322 1323 if (vn_mountedvfs(vp) != NULL) { 1324 rw_exit(&parent->sdev_contents); 1325 vn_vfsunlock(vp); 1326 VN_RELE(vp); 1327 return (EBUSY); 1328 } 1329 1330 self = VTOSDEV(vp); 1331 /* bail out on a non-empty directory */ 1332 rw_enter(&self->sdev_contents, RW_READER); 1333 if (self->sdev_nlink > 2) { 1334 rw_exit(&self->sdev_contents); 1335 rw_exit(&parent->sdev_contents); 1336 vn_vfsunlock(vp); 1337 VN_RELE(vp); 1338 return (ENOTEMPTY); 1339 } 1340 rw_exit(&self->sdev_contents); 1341 1342 /* unlink it from the directory cache */ 1343 sdev_cache_update(parent, &self, nm, SDEV_CACHE_DELETE); 1344 rw_exit(&parent->sdev_contents); 1345 vn_vfsunlock(vp); 1346 VN_RELE(vp); 1347 1348 /* best effort to clean up the backing store */ 1349 if (SDEV_IS_PERSIST(parent)) { 1350 ASSERT(parent->sdev_attrvp); 1351 error = VOP_RMDIR(parent->sdev_attrvp, nm, 1352 parent->sdev_attrvp, kcred, ct, flags); 1353 1354 if (error) 1355 sdcmn_err2(("sdev_rmdir: cleaning device %s is on" 1356 " disk error %d\n", parent->sdev_path, error)); 1357 if (error == EBUSY) 1358 error = 0; 1359 1360 } 1361 1362 return (error); 1363 } 1364 1365 /* 1366 * read the contents of a symbolic link 1367 */ 1368 static int 1369 sdev_readlink(struct vnode *vp, struct uio *uiop, struct cred *cred, 1370 caller_context_t *ct) 1371 { 1372 struct sdev_node *dv; 1373 int error = 0; 1374 1375 ASSERT(vp->v_type == VLNK); 1376 1377 dv = VTOSDEV(vp); 1378 1379 if (dv->sdev_attrvp) { 1380 /* non-NULL attrvp implys a persisted node at READY state */ 1381 return (VOP_READLINK(dv->sdev_attrvp, uiop, cred, ct)); 1382 } else if (dv->sdev_symlink != NULL) { 1383 /* memory nodes, e.g. local nodes */ 1384 rw_enter(&dv->sdev_contents, RW_READER); 1385 sdcmn_err2(("sdev_readlink link is %s\n", dv->sdev_symlink)); 1386 error = uiomove(dv->sdev_symlink, strlen(dv->sdev_symlink), 1387 UIO_READ, uiop); 1388 rw_exit(&dv->sdev_contents); 1389 return (error); 1390 } 1391 1392 return (ENOENT); 1393 } 1394 1395 /*ARGSUSED4*/ 1396 static int 1397 sdev_readdir(struct vnode *dvp, struct uio *uiop, struct cred *cred, int *eofp, 1398 caller_context_t *ct, int flags) 1399 { 1400 struct sdev_node *parent = VTOSDEV(dvp); 1401 int error; 1402 1403 /* 1404 * We must check that we have execute access to search the directory -- 1405 * but because our sdev_contents lock is already held as a reader (the 1406 * caller must have done a VOP_RWLOCK()), we call directly into the 1407 * underlying access routine if sdev_attr is non-NULL. 1408 */ 1409 if (parent->sdev_attr != NULL) { 1410 VERIFY(RW_READ_HELD(&parent->sdev_contents)); 1411 1412 if (sdev_unlocked_access(parent, VEXEC, cred) != 0) 1413 return (EACCES); 1414 } else { 1415 if ((error = VOP_ACCESS(dvp, VEXEC, 0, cred, ct)) != 0) 1416 return (error); 1417 } 1418 1419 ASSERT(parent); 1420 if (!SDEV_IS_GLOBAL(parent)) 1421 prof_filldir(parent); 1422 return (devname_readdir_func(dvp, uiop, cred, eofp, SDEV_BROWSE)); 1423 } 1424 1425 /*ARGSUSED1*/ 1426 static void 1427 sdev_inactive(struct vnode *vp, struct cred *cred, caller_context_t *ct) 1428 { 1429 devname_inactive_func(vp, cred, NULL); 1430 } 1431 1432 /*ARGSUSED2*/ 1433 static int 1434 sdev_fid(struct vnode *vp, struct fid *fidp, caller_context_t *ct) 1435 { 1436 struct sdev_node *dv = VTOSDEV(vp); 1437 struct sdev_fid *sdev_fid; 1438 1439 if (fidp->fid_len < (sizeof (struct sdev_fid) - sizeof (ushort_t))) { 1440 fidp->fid_len = sizeof (struct sdev_fid) - sizeof (ushort_t); 1441 return (ENOSPC); 1442 } 1443 1444 sdev_fid = (struct sdev_fid *)fidp; 1445 bzero(sdev_fid, sizeof (struct sdev_fid)); 1446 sdev_fid->sdevfid_len = 1447 (int)sizeof (struct sdev_fid) - sizeof (ushort_t); 1448 sdev_fid->sdevfid_ino = dv->sdev_ino; 1449 1450 return (0); 1451 } 1452 1453 /* 1454 * This pair of routines bracket all VOP_READ, VOP_WRITE 1455 * and VOP_READDIR requests. The contents lock stops things 1456 * moving around while we're looking at them. 1457 */ 1458 /*ARGSUSED2*/ 1459 static int 1460 sdev_rwlock(struct vnode *vp, int write_flag, caller_context_t *ctp) 1461 { 1462 rw_enter(&VTOSDEV(vp)->sdev_contents, 1463 write_flag ? RW_WRITER : RW_READER); 1464 return (write_flag ? V_WRITELOCK_TRUE : V_WRITELOCK_FALSE); 1465 } 1466 1467 /*ARGSUSED1*/ 1468 static void 1469 sdev_rwunlock(struct vnode *vp, int write_flag, caller_context_t *ctp) 1470 { 1471 rw_exit(&VTOSDEV(vp)->sdev_contents); 1472 } 1473 1474 /*ARGSUSED1*/ 1475 static int 1476 sdev_seek(struct vnode *vp, offset_t ooff, offset_t *noffp, 1477 caller_context_t *ct) 1478 { 1479 struct vnode *attrvp = VTOSDEV(vp)->sdev_attrvp; 1480 1481 ASSERT(vp->v_type != VCHR && 1482 vp->v_type != VBLK && vp->v_type != VLNK); 1483 1484 if (vp->v_type == VDIR) 1485 return (fs_seek(vp, ooff, noffp, ct)); 1486 1487 ASSERT(attrvp); 1488 return (VOP_SEEK(attrvp, ooff, noffp, ct)); 1489 } 1490 1491 /*ARGSUSED1*/ 1492 static int 1493 sdev_frlock(struct vnode *vp, int cmd, struct flock64 *bfp, int flag, 1494 offset_t offset, struct flk_callback *flk_cbp, struct cred *cr, 1495 caller_context_t *ct) 1496 { 1497 int error; 1498 struct sdev_node *dv = VTOSDEV(vp); 1499 1500 ASSERT(dv); 1501 ASSERT(dv->sdev_attrvp); 1502 error = VOP_FRLOCK(dv->sdev_attrvp, cmd, bfp, flag, offset, 1503 flk_cbp, cr, ct); 1504 1505 return (error); 1506 } 1507 1508 static int 1509 sdev_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr, 1510 caller_context_t *ct) 1511 { 1512 switch (cmd) { 1513 case _PC_ACL_ENABLED: 1514 *valp = SDEV_ACL_FLAVOR(vp); 1515 return (0); 1516 } 1517 1518 return (fs_pathconf(vp, cmd, valp, cr, ct)); 1519 } 1520 1521 vnodeops_t *sdev_vnodeops; 1522 1523 const fs_operation_def_t sdev_vnodeops_tbl[] = { 1524 VOPNAME_OPEN, { .vop_open = sdev_open }, 1525 VOPNAME_CLOSE, { .vop_close = sdev_close }, 1526 VOPNAME_READ, { .vop_read = sdev_read }, 1527 VOPNAME_WRITE, { .vop_write = sdev_write }, 1528 VOPNAME_IOCTL, { .vop_ioctl = sdev_ioctl }, 1529 VOPNAME_GETATTR, { .vop_getattr = sdev_getattr }, 1530 VOPNAME_SETATTR, { .vop_setattr = sdev_setattr }, 1531 VOPNAME_ACCESS, { .vop_access = sdev_access }, 1532 VOPNAME_LOOKUP, { .vop_lookup = sdev_lookup }, 1533 VOPNAME_CREATE, { .vop_create = sdev_create }, 1534 VOPNAME_RENAME, { .vop_rename = sdev_rename }, 1535 VOPNAME_REMOVE, { .vop_remove = sdev_remove }, 1536 VOPNAME_MKDIR, { .vop_mkdir = sdev_mkdir }, 1537 VOPNAME_RMDIR, { .vop_rmdir = sdev_rmdir }, 1538 VOPNAME_READDIR, { .vop_readdir = sdev_readdir }, 1539 VOPNAME_SYMLINK, { .vop_symlink = sdev_symlink }, 1540 VOPNAME_READLINK, { .vop_readlink = sdev_readlink }, 1541 VOPNAME_INACTIVE, { .vop_inactive = sdev_inactive }, 1542 VOPNAME_FID, { .vop_fid = sdev_fid }, 1543 VOPNAME_RWLOCK, { .vop_rwlock = sdev_rwlock }, 1544 VOPNAME_RWUNLOCK, { .vop_rwunlock = sdev_rwunlock }, 1545 VOPNAME_SEEK, { .vop_seek = sdev_seek }, 1546 VOPNAME_FRLOCK, { .vop_frlock = sdev_frlock }, 1547 VOPNAME_PATHCONF, { .vop_pathconf = sdev_pathconf }, 1548 VOPNAME_SETSECATTR, { .vop_setsecattr = sdev_setsecattr }, 1549 VOPNAME_GETSECATTR, { .vop_getsecattr = sdev_getsecattr }, 1550 NULL, NULL 1551 }; 1552 1553 int sdev_vnodeops_tbl_size = sizeof (sdev_vnodeops_tbl); 1554