1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. 23 */ 24 /* 25 * Copyright (c) 2013, Joyent, Inc. All rights reserved. 26 */ 27 28 /* 29 * vnode ops for the /dev filesystem 30 * 31 * - VDIR, VCHR, CBLK, and VLNK are considered must supported files 32 * - VREG and VDOOR are used for some internal implementations in 33 * the global zone, e.g. devname and devfsadm communication 34 * - other file types are unusual in this namespace and 35 * not supported for now 36 */ 37 38 /* 39 * sdev has a few basic goals: 40 * o Provide /dev for the global zone as well as various non-global zones. 41 * o Provide the basic functionality that devfsadm might need (mknod, 42 * symlinks, etc.) 43 * o Allow persistent permissions on files in /dev. 44 * o Allow for dynamic directories and nodes for use by various services (pts, 45 * zvol, net, etc.) 46 * 47 * The sdev file system is primarily made up of sdev_node_t's which is sdev's 48 * counterpart to the vnode_t. There are two different classes of sdev_node_t's 49 * that we generally care about, dynamic and otherwise. 50 * 51 * Persisting Information 52 * ---------------------- 53 * 54 * When sdev is mounted, it keeps track of the underlying file system it is 55 * mounted over. In certain situations, sdev will go and create entries in that 56 * underlying file system. These underlying 'back end' nodes are used as proxies 57 * for various changes in permissions. While specific sets of nodes, such as 58 * dynamic ones, are exempt, this process stores permission changes against 59 * these back end nodes. The point of all of this is to allow for these settings 60 * to persist across host and zone reboots. As an example, consider the entry 61 * /dev/dsk/c0t0d0 which is a character device and that / is in UFS. Upon 62 * changing the permissions on c0t0d0 you'd have the following logical 63 * relationships: 64 * 65 * +------------------+ sdev_vnode +--------------+ 66 * | sdev_node_t |<---------------->| vnode_t | 67 * | /dev/dsk/c0t0d0 |<---------------->| for sdev | 68 * +------------------+ +--------------+ 69 * | 70 * | sdev_attrvp 71 * | 72 * | +---------------------+ 73 * +--->| vnode_t for UFS|ZFS | 74 * | /dev/dsk/c0t0d0 | 75 * +---------------------+ 76 * 77 * sdev is generally in memory. Therefore when a lookup happens and there is no 78 * entry already inside of a directory cache, it will next check the backing 79 * store. If the backing store exists, we will reconstitute the sdev_node based 80 * on the information that we persisted. When we create the backing store node, 81 * we use the struct vattr information that we already have in sdev_node_t. 82 * Because of this, we already know if the entry was previously a symlink, 83 * directory, or some other kind of type. Note that not all types of nodes are 84 * supported. Currently only VDIR, VCHR, VBLK, VREG, VDOOR, and VLNK are 85 * eligible to be persisted. 86 * 87 * When the sdev_node is created and the lookup is done, we grab a hold on the 88 * underlying vnode as part of the call to VOP_LOOKUP. That reference is held 89 * until the sdev_node becomes inactive. Once its reference count reaches one 90 * and the VOP_INACTIVE callback fires leading to the destruction of the node, 91 * the reference on the underlying vnode will be released. 92 * 93 * The backing store node will be deleted only when the node itself is deleted 94 * through the means of a VOP_REMOVE, VOP_RMDIR, or similar call. 95 * 96 * Not everything can be persisted, see The Rules section for more details. 97 * 98 * Dynamic Nodes 99 * ------------- 100 * 101 * Dynamic nodes allow for specific interactions with various kernel subsystems 102 * when looking up directory entries. This allows the lookup and readdir 103 * functions to check against the kernel subsystem's for validity. eg. does a 104 * zvol or nic still exist. 105 * 106 * More specifically, when we create various directories we check if the 107 * directory name matches that of one of the names in the vtab[] (sdev_subr.c). 108 * If it does, we swap out the vnode operations into a new set which combine the 109 * normal sdev vnode operations with the dynamic set here. 110 * 111 * In addition, various dynamic nodes implement a verification entry point. This 112 * verification entry is used as a part of lookup and readdir. The goal for 113 * these dynamic nodes is to allow them to check with the underlying subsystems 114 * to ensure that these devices are still present, or if they have gone away, to 115 * remove them from the results. This is indicated by using the SDEV_VTOR flag 116 * in vtab[]. 117 * 118 * Dynamic nodes have additional restrictions placed upon them. They may only 119 * appear at the top level directory of the file system. In addition, users 120 * cannot create dirents below any leve of a dynamic node aside from its special 121 * vnops. 122 * 123 * Profiles 124 * -------- 125 * 126 * Profiles exist for the purpose of non-global zones. They work with the zone 127 * brands and zoneadmd to set up a filter of allowed devices that can appear in 128 * a non-global zone's /dev. These are sent to sdev by means of libdevinfo and a 129 * modctl system call. Specifically it allows one to add patterns of device 130 * paths to include and exclude. It allows for a collection of symlinks to be 131 * added and it allows for remapping names. 132 * 133 * When operating in a non-global zone, several of the sdev vnops are redirected 134 * to the profile versions. These impose additional restrictions such as 135 * enforcing that a non-global zone's /dev is read only. 136 * 137 * sdev_node_t States 138 * ------------------ 139 * 140 * A given sdev_node_t has a field called the sdev_state which describes where 141 * in the sdev life cycle it is. There are three primary states: SDEV_INIT, 142 * SDEV_READY, and SDEV_ZOMBIE. 143 * 144 * SDEV_INIT: When a new /dev file is first looked up, a sdev_node 145 * is allocated, initialized and added to the directory's 146 * sdev_node cache. A node at this state will also 147 * have the SDEV_LOOKUP flag set. 148 * 149 * Other threads that are trying to look up a node at 150 * this state will be blocked until the SDEV_LOOKUP flag 151 * is cleared. 152 * 153 * When the SDEV_LOOKUP flag is cleared, the node may 154 * transition into the SDEV_READY state for a successful 155 * lookup or the node is removed from the directory cache 156 * and destroyed if the named node can not be found. 157 * An ENOENT error is returned for the second case. 158 * 159 * SDEV_READY: A /dev file has been successfully looked up and 160 * associated with a vnode. The /dev file is available 161 * for the supported /dev file system operations. 162 * 163 * SDEV_ZOMBIE: Deletion of a /dev file has been explicitly issued 164 * to an SDEV_READY node. The node is transitioned into 165 * the SDEV_ZOMBIE state if the vnode reference count 166 * is still held. A SDEV_ZOMBIE node does not support 167 * any of the /dev file system operations. A SDEV_ZOMBIE 168 * node is immediately removed from the directory cache 169 * and destroyed once the reference count reaches zero. 170 * 171 * Historically nodes that were marked SDEV_ZOMBIE were not removed from the 172 * underlying directory caches. This has been the source of numerous bugs and 173 * thus to better mimic what happens on a real file system, it is no longer the 174 * case. 175 * 176 * The following state machine describes the life cycle of a given node and its 177 * associated states: 178 * 179 * node is . . . . . 180 * allocated via . +-------------+ . . . . . . . vnode_t refcount 181 * sdev_nodeinit() . | Unallocated | . reaches zero and 182 * +--------*-----| Memory |<--------*---+ sdev_inactive is 183 * | +-------------+ | called. 184 * | +------------^ | called. 185 * v | | 186 * +-----------+ * . . sdev_nodeready() +-------------+ 187 * | SDEV_INIT | | or related setup | SDEV_ZOMBIE | 188 * +-----------+ | failure +-------------+ 189 * | | ^ 190 * | | +------------+ | 191 * +-*----------->| SDEV_READY |--------*-----+ 192 * . +------------+ . The node is no longer 193 * . . node successfully . . . . . valid or we've been 194 * inserted into the asked to remove it. 195 * directory cache This happens via 196 * and sdev_nodready() sdev_dirdelete(). 197 * call successful. 198 * 199 * Adding and Removing Dirents, Zombie Nodes 200 * ----------------------------------------- 201 * 202 * As part of doing a lookup, readdir, or an explicit creation operation like 203 * mkdir or create, nodes may be created. Every directory has an avl tree which 204 * contains its children, the sdev_entries tree. This is only used if the type 205 * is VDIR. Access to this is controlled by the sdev_node_t's contents_lock and 206 * it is managed through sdev_cache_update(). 207 * 208 * Every sdev_node_t has a field sdev_state, which describes the current state 209 * of the node. A node is generally speaking in the SDEV_READY state. When it is 210 * there, it can be looked up, accessed, and operations performed on it. When a 211 * node is going to be removed from the directory cache it is marked as a 212 * zombie. Once a node becomes a zombie, no other file system operations will 213 * succeed and it will continue to exist as a node until the vnode count on the 214 * node reaches zero. At that point, the node will be freed. However, once a 215 * node has been marked as a zombie, it will be removed immediately from the 216 * directory cache such that no one else may find it again. This means that 217 * someone else can insert a new entry into that directory with the same name 218 * and without a problem. 219 * 220 * To remove a node, see the section on that in The Rules. 221 * 222 * The Rules 223 * --------- 224 * These are the rules to live by when working in sdev. These are not 225 * exhaustive. 226 * 227 * - Set 1: Working with Backing Nodes 228 * o If there is a SDEV_READY sdev_node_t, it knows about its backing node. 229 * o If we find a backing node when looking up an sdev_node_t for the first 230 * time, we use its attributes to build our sdev_node_t. 231 * o If there is a found backing node, or we create a backing node, that's 232 * when we grab the hold on its vnode. 233 * o If we mark an sdev_node_t a ZOMBIE, we must remove its backing node from 234 * the underlying file system. It must not be searchable or findable. 235 * o We release our hold on the backing node vnode when we destroy the 236 * sdev_node_t. 237 * 238 * - Set 2: Locking rules for sdev (not exhaustive) 239 * o The majority of nodes contain an sdev_contents rw lock. You must hold it 240 * for read or write if manipulating its contents appropriately. 241 * o You must lock your parent before yourself. 242 * o If you need your vnode's v_lock and the sdev_contents rw lock, you must 243 * grab the v_lock before the sdev_contents rw_lock. 244 * o If you release a lock on the node as a part of upgrading it, you must 245 * verify that the node has not become a zombie as a part of this process. 246 * 247 * - Set 3: Zombie Status and What it Means 248 * o If you encounter a node that is a ZOMBIE, that means that it has been 249 * unlinked from the backing store. 250 * o If you release your contents lock and acquire it again (say as part of 251 * trying to grab a write lock) you must check that the node has not become 252 * a zombie. 253 * o You should VERIFY that a looked up node is not a zombie. This follows 254 * from the following logic. To mark something as a zombie means that it is 255 * removed from the parents directory cache. To do that, you must have a 256 * write lock on the parent's sdev_contents. To lookup through that 257 * directory you must have a read lock. This then becomes a simple ordering 258 * problem. If you've been granted the lock then the other operation cannot 259 * be in progress or must have already succeeded. 260 * 261 * - Set 4: Removing Directory Entries (aka making nodes Zombies) 262 * o Write lock must be held on the directory 263 * o Write lock must be held on the node 264 * o Remove the sdev_node_t from its parent cache 265 * o Remove the corresponding backing store node, if it exists, eg. use 266 * VOP_REMOVE or VOP_RMDIR. 267 * o You must NOT make any change in the vnode reference count! Nodes should 268 * only be cleaned up through VOP_INACTIVE callbacks. 269 * o VOP_INACTIVE is the only one responsible for doing the final vn_rele of 270 * the backing store vnode that was grabbed during lookup. 271 * 272 * - Set 5: What Nodes may be Persisted 273 * o The root, /dev is always persisted 274 * o Any node in vtab which is marked SDEV_DYNAMIC, may not be persisted 275 * unless it is also marked SDEV_PERSIST 276 * o Anything whose parent directory is marked SDEV_PERSIST will pass that 277 * along to the child as long as it does not contradict the above rules 278 */ 279 280 #include <sys/types.h> 281 #include <sys/param.h> 282 #include <sys/t_lock.h> 283 #include <sys/systm.h> 284 #include <sys/sysmacros.h> 285 #include <sys/user.h> 286 #include <sys/time.h> 287 #include <sys/vfs.h> 288 #include <sys/vnode.h> 289 #include <sys/vfs_opreg.h> 290 #include <sys/file.h> 291 #include <sys/fcntl.h> 292 #include <sys/flock.h> 293 #include <sys/kmem.h> 294 #include <sys/uio.h> 295 #include <sys/errno.h> 296 #include <sys/stat.h> 297 #include <sys/cred.h> 298 #include <sys/dirent.h> 299 #include <sys/pathname.h> 300 #include <sys/cmn_err.h> 301 #include <sys/debug.h> 302 #include <sys/policy.h> 303 #include <vm/hat.h> 304 #include <vm/seg_vn.h> 305 #include <vm/seg_map.h> 306 #include <vm/seg.h> 307 #include <vm/as.h> 308 #include <vm/page.h> 309 #include <sys/proc.h> 310 #include <sys/mode.h> 311 #include <sys/sunndi.h> 312 #include <sys/ptms.h> 313 #include <fs/fs_subr.h> 314 #include <sys/fs/dv_node.h> 315 #include <sys/fs/sdev_impl.h> 316 317 /*ARGSUSED*/ 318 static int 319 sdev_open(struct vnode **vpp, int flag, struct cred *cred, caller_context_t *ct) 320 { 321 struct sdev_node *dv = VTOSDEV(*vpp); 322 struct sdev_node *ddv = dv->sdev_dotdot; 323 int error = 0; 324 325 if ((*vpp)->v_type == VDIR) 326 return (0); 327 328 if (!SDEV_IS_GLOBAL(dv)) 329 return (ENOTSUP); 330 331 if ((*vpp)->v_type == VLNK) 332 return (ENOENT); 333 ASSERT((*vpp)->v_type == VREG); 334 if ((*vpp)->v_type != VREG) 335 return (ENOTSUP); 336 337 ASSERT(ddv); 338 rw_enter(&ddv->sdev_contents, RW_READER); 339 if (dv->sdev_attrvp == NULL) { 340 rw_exit(&ddv->sdev_contents); 341 return (ENOENT); 342 } 343 error = VOP_OPEN(&(dv->sdev_attrvp), flag, cred, ct); 344 rw_exit(&ddv->sdev_contents); 345 return (error); 346 } 347 348 /*ARGSUSED1*/ 349 static int 350 sdev_close(struct vnode *vp, int flag, int count, 351 offset_t offset, struct cred *cred, caller_context_t *ct) 352 { 353 struct sdev_node *dv = VTOSDEV(vp); 354 355 if (vp->v_type == VDIR) { 356 cleanlocks(vp, ttoproc(curthread)->p_pid, 0); 357 cleanshares(vp, ttoproc(curthread)->p_pid); 358 return (0); 359 } 360 361 if (!SDEV_IS_GLOBAL(dv)) 362 return (ENOTSUP); 363 364 ASSERT(vp->v_type == VREG); 365 if (vp->v_type != VREG) 366 return (ENOTSUP); 367 368 ASSERT(dv->sdev_attrvp); 369 return (VOP_CLOSE(dv->sdev_attrvp, flag, count, offset, cred, ct)); 370 } 371 372 /*ARGSUSED*/ 373 static int 374 sdev_read(struct vnode *vp, struct uio *uio, int ioflag, struct cred *cred, 375 struct caller_context *ct) 376 { 377 struct sdev_node *dv = (struct sdev_node *)VTOSDEV(vp); 378 int error; 379 380 if (!SDEV_IS_GLOBAL(dv)) 381 return (EINVAL); 382 383 if (vp->v_type == VDIR) 384 return (EISDIR); 385 386 /* only supporting regular files in /dev */ 387 ASSERT(vp->v_type == VREG); 388 if (vp->v_type != VREG) 389 return (EINVAL); 390 391 ASSERT(RW_READ_HELD(&VTOSDEV(vp)->sdev_contents)); 392 ASSERT(dv->sdev_attrvp); 393 (void) VOP_RWLOCK(dv->sdev_attrvp, 0, ct); 394 error = VOP_READ(dv->sdev_attrvp, uio, ioflag, cred, ct); 395 VOP_RWUNLOCK(dv->sdev_attrvp, 0, ct); 396 return (error); 397 } 398 399 /*ARGSUSED*/ 400 static int 401 sdev_write(struct vnode *vp, struct uio *uio, int ioflag, struct cred *cred, 402 struct caller_context *ct) 403 { 404 struct sdev_node *dv = VTOSDEV(vp); 405 int error = 0; 406 407 if (!SDEV_IS_GLOBAL(dv)) 408 return (EINVAL); 409 410 if (vp->v_type == VDIR) 411 return (EISDIR); 412 413 /* only supporting regular files in /dev */ 414 ASSERT(vp->v_type == VREG); 415 if (vp->v_type != VREG) 416 return (EINVAL); 417 418 ASSERT(dv->sdev_attrvp); 419 420 (void) VOP_RWLOCK(dv->sdev_attrvp, 1, ct); 421 error = VOP_WRITE(dv->sdev_attrvp, uio, ioflag, cred, ct); 422 VOP_RWUNLOCK(dv->sdev_attrvp, 1, ct); 423 if (error == 0) { 424 sdev_update_timestamps(dv->sdev_attrvp, kcred, 425 AT_MTIME); 426 } 427 return (error); 428 } 429 430 /*ARGSUSED*/ 431 static int 432 sdev_ioctl(struct vnode *vp, int cmd, intptr_t arg, int flag, 433 struct cred *cred, int *rvalp, caller_context_t *ct) 434 { 435 struct sdev_node *dv = VTOSDEV(vp); 436 437 if (!SDEV_IS_GLOBAL(dv) || (vp->v_type == VDIR)) 438 return (ENOTTY); 439 440 ASSERT(vp->v_type == VREG); 441 if (vp->v_type != VREG) 442 return (EINVAL); 443 444 ASSERT(dv->sdev_attrvp); 445 return (VOP_IOCTL(dv->sdev_attrvp, cmd, arg, flag, cred, rvalp, ct)); 446 } 447 448 static int 449 sdev_getattr(struct vnode *vp, struct vattr *vap, int flags, 450 struct cred *cr, caller_context_t *ct) 451 { 452 int error = 0; 453 struct sdev_node *dv = VTOSDEV(vp); 454 struct sdev_node *parent = dv->sdev_dotdot; 455 456 ASSERT(parent); 457 458 rw_enter(&parent->sdev_contents, RW_READER); 459 ASSERT(dv->sdev_attr || dv->sdev_attrvp); 460 461 /* 462 * search order: 463 * - for persistent nodes (SDEV_PERSIST): backstore 464 * - for non-persistent nodes: module ops if global, then memory 465 */ 466 if (dv->sdev_attrvp) { 467 rw_exit(&parent->sdev_contents); 468 error = VOP_GETATTR(dv->sdev_attrvp, vap, flags, cr, ct); 469 sdev_vattr_merge(dv, vap); 470 } else { 471 ASSERT(dv->sdev_attr); 472 *vap = *dv->sdev_attr; 473 sdev_vattr_merge(dv, vap); 474 rw_exit(&parent->sdev_contents); 475 } 476 477 return (error); 478 } 479 480 /*ARGSUSED4*/ 481 static int 482 sdev_setattr(struct vnode *vp, struct vattr *vap, int flags, 483 struct cred *cred, caller_context_t *ctp) 484 { 485 return (devname_setattr_func(vp, vap, flags, cred, NULL, 0)); 486 } 487 488 static int 489 sdev_getsecattr(struct vnode *vp, struct vsecattr *vsap, int flags, 490 struct cred *cr, caller_context_t *ct) 491 { 492 int error; 493 struct sdev_node *dv = VTOSDEV(vp); 494 struct vnode *avp = dv->sdev_attrvp; 495 496 if (avp == NULL) { 497 /* return fs_fab_acl() if flavor matches, else do nothing */ 498 if ((SDEV_ACL_FLAVOR(vp) == _ACL_ACLENT_ENABLED && 499 (vsap->vsa_mask & (VSA_ACLCNT | VSA_DFACLCNT))) || 500 (SDEV_ACL_FLAVOR(vp) == _ACL_ACE_ENABLED && 501 (vsap->vsa_mask & (VSA_ACECNT | VSA_ACE)))) 502 return (fs_fab_acl(vp, vsap, flags, cr, ct)); 503 504 return (ENOSYS); 505 } 506 507 (void) VOP_RWLOCK(avp, 1, ct); 508 error = VOP_GETSECATTR(avp, vsap, flags, cr, ct); 509 VOP_RWUNLOCK(avp, 1, ct); 510 return (error); 511 } 512 513 static int 514 sdev_setsecattr(struct vnode *vp, struct vsecattr *vsap, int flags, 515 struct cred *cr, caller_context_t *ct) 516 { 517 int error; 518 struct sdev_node *dv = VTOSDEV(vp); 519 struct vnode *avp = dv->sdev_attrvp; 520 521 if (dv->sdev_state == SDEV_ZOMBIE) 522 return (0); 523 524 if (avp == NULL) { 525 if (SDEV_IS_GLOBAL(dv) && !SDEV_IS_PERSIST(dv)) 526 return (fs_nosys()); 527 ASSERT(dv->sdev_attr); 528 /* 529 * if coming in directly, the acl system call will 530 * have held the read-write lock via VOP_RWLOCK() 531 * If coming in via specfs, specfs will have 532 * held the rw lock on the realvp i.e. us. 533 */ 534 ASSERT(RW_WRITE_HELD(&dv->sdev_contents)); 535 sdev_vattr_merge(dv, dv->sdev_attr); 536 error = sdev_shadow_node(dv, cr); 537 if (error) { 538 return (fs_nosys()); 539 } 540 541 ASSERT(dv->sdev_attrvp); 542 /* clean out the memory copy if any */ 543 if (dv->sdev_attr) { 544 kmem_free(dv->sdev_attr, sizeof (struct vattr)); 545 dv->sdev_attr = NULL; 546 } 547 avp = dv->sdev_attrvp; 548 } 549 ASSERT(avp); 550 551 (void) VOP_RWLOCK(avp, V_WRITELOCK_TRUE, ct); 552 error = VOP_SETSECATTR(avp, vsap, flags, cr, ct); 553 VOP_RWUNLOCK(avp, V_WRITELOCK_TRUE, ct); 554 return (error); 555 } 556 557 int 558 sdev_unlocked_access(void *vdv, int mode, struct cred *cr) 559 { 560 struct sdev_node *dv = vdv; 561 int shift = 0; 562 uid_t owner = dv->sdev_attr->va_uid; 563 564 if (crgetuid(cr) != owner) { 565 shift += 3; 566 if (groupmember(dv->sdev_attr->va_gid, cr) == 0) 567 shift += 3; 568 } 569 570 return (secpolicy_vnode_access2(cr, SDEVTOV(dv), owner, 571 dv->sdev_attr->va_mode << shift, mode)); 572 } 573 574 static int 575 sdev_access(struct vnode *vp, int mode, int flags, struct cred *cr, 576 caller_context_t *ct) 577 { 578 struct sdev_node *dv = VTOSDEV(vp); 579 int ret = 0; 580 581 ASSERT(dv->sdev_attr || dv->sdev_attrvp); 582 583 if (dv->sdev_attrvp) { 584 ret = VOP_ACCESS(dv->sdev_attrvp, mode, flags, cr, ct); 585 } else if (dv->sdev_attr) { 586 rw_enter(&dv->sdev_contents, RW_READER); 587 ret = sdev_unlocked_access(dv, mode, cr); 588 if (ret) 589 ret = EACCES; 590 rw_exit(&dv->sdev_contents); 591 } 592 593 return (ret); 594 } 595 596 /* 597 * Lookup 598 */ 599 /*ARGSUSED3*/ 600 static int 601 sdev_lookup(struct vnode *dvp, char *nm, struct vnode **vpp, 602 struct pathname *pnp, int flags, struct vnode *rdir, struct cred *cred, 603 caller_context_t *ct, int *direntflags, pathname_t *realpnp) 604 { 605 struct sdev_node *parent; 606 int error; 607 608 parent = VTOSDEV(dvp); 609 ASSERT(parent); 610 611 /* execute access is required to search the directory */ 612 if ((error = VOP_ACCESS(dvp, VEXEC, 0, cred, ct)) != 0) 613 return (error); 614 615 if (!SDEV_IS_GLOBAL(parent)) 616 return (prof_lookup(dvp, nm, vpp, cred)); 617 return (devname_lookup_func(parent, nm, vpp, cred, NULL, 0)); 618 } 619 620 /*ARGSUSED2*/ 621 static int 622 sdev_create(struct vnode *dvp, char *nm, struct vattr *vap, vcexcl_t excl, 623 int mode, struct vnode **vpp, struct cred *cred, int flag, 624 caller_context_t *ct, vsecattr_t *vsecp) 625 { 626 struct vnode *vp = NULL; 627 struct vnode *avp; 628 struct sdev_node *parent; 629 struct sdev_node *self = NULL; 630 int error = 0; 631 vtype_t type = vap->va_type; 632 633 ASSERT(type != VNON && type != VBAD); 634 635 if ((type == VFIFO) || (type == VSOCK) || 636 (type == VPROC) || (type == VPORT)) 637 return (ENOTSUP); 638 639 parent = VTOSDEV(dvp); 640 ASSERT(parent); 641 642 rw_enter(&parent->sdev_dotdot->sdev_contents, RW_READER); 643 if (parent->sdev_state == SDEV_ZOMBIE) { 644 rw_exit(&parent->sdev_dotdot->sdev_contents); 645 return (ENOENT); 646 } 647 648 /* non-global do not allow pure node creation */ 649 if (!SDEV_IS_GLOBAL(parent)) { 650 rw_exit(&parent->sdev_dotdot->sdev_contents); 651 return (prof_lookup(dvp, nm, vpp, cred)); 652 } 653 rw_exit(&parent->sdev_dotdot->sdev_contents); 654 655 /* execute access is required to search the directory */ 656 if ((error = VOP_ACCESS(dvp, VEXEC, 0, cred, ct)) != 0) 657 return (error); 658 659 /* check existing name */ 660 /* XXXci - We may need to translate the C-I flags on VOP_LOOKUP */ 661 error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cred, ct, NULL, NULL); 662 663 /* name found */ 664 if (error == 0) { 665 ASSERT(vp); 666 if (excl == EXCL) { 667 error = EEXIST; 668 } else if ((vp->v_type == VDIR) && (mode & VWRITE)) { 669 /* allowing create/read-only an existing directory */ 670 error = EISDIR; 671 } else { 672 error = VOP_ACCESS(vp, mode, 0, cred, ct); 673 } 674 675 if (error) { 676 VN_RELE(vp); 677 return (error); 678 } 679 680 /* truncation first */ 681 if ((vp->v_type == VREG) && (vap->va_mask & AT_SIZE) && 682 (vap->va_size == 0)) { 683 ASSERT(parent->sdev_attrvp); 684 error = VOP_CREATE(parent->sdev_attrvp, 685 nm, vap, excl, mode, &avp, cred, flag, ct, vsecp); 686 687 if (error) { 688 VN_RELE(vp); 689 return (error); 690 } 691 } 692 693 sdev_update_timestamps(vp, kcred, 694 AT_CTIME|AT_MTIME|AT_ATIME); 695 *vpp = vp; 696 return (0); 697 } 698 699 /* bail out early */ 700 if (error != ENOENT) 701 return (error); 702 703 /* verify write access - compliance specifies ENXIO */ 704 if ((error = VOP_ACCESS(dvp, VEXEC|VWRITE, 0, cred, ct)) != 0) { 705 if (error == EACCES) 706 error = ENXIO; 707 return (error); 708 } 709 710 /* 711 * For memory-based (ROFS) directory: 712 * - either disallow node creation; 713 * - or implement VOP_CREATE of its own 714 */ 715 rw_enter(&parent->sdev_contents, RW_WRITER); 716 if (!SDEV_IS_PERSIST(parent)) { 717 rw_exit(&parent->sdev_contents); 718 return (ENOTSUP); 719 } 720 ASSERT(parent->sdev_attrvp); 721 error = sdev_mknode(parent, nm, &self, vap, NULL, NULL, 722 cred, SDEV_READY); 723 if (error) { 724 rw_exit(&parent->sdev_contents); 725 if (self) 726 SDEV_RELE(self); 727 return (error); 728 } 729 rw_exit(&parent->sdev_contents); 730 731 ASSERT(self); 732 /* take care the timestamps for the node and its parent */ 733 sdev_update_timestamps(SDEVTOV(self), kcred, 734 AT_CTIME|AT_MTIME|AT_ATIME); 735 sdev_update_timestamps(dvp, kcred, AT_MTIME|AT_ATIME); 736 if (SDEV_IS_GLOBAL(parent)) 737 atomic_inc_ulong(&parent->sdev_gdir_gen); 738 739 /* wake up other threads blocked on looking up this node */ 740 mutex_enter(&self->sdev_lookup_lock); 741 SDEV_UNBLOCK_OTHERS(self, SDEV_LOOKUP); 742 mutex_exit(&self->sdev_lookup_lock); 743 error = sdev_to_vp(self, vpp); 744 return (error); 745 } 746 747 static int 748 sdev_remove(struct vnode *dvp, char *nm, struct cred *cred, 749 caller_context_t *ct, int flags) 750 { 751 int error; 752 struct sdev_node *parent = (struct sdev_node *)VTOSDEV(dvp); 753 struct vnode *vp = NULL; 754 struct sdev_node *dv = NULL; 755 int len; 756 int bkstore; 757 758 /* bail out early */ 759 len = strlen(nm); 760 if (nm[0] == '.') { 761 if (len == 1) { 762 return (EINVAL); 763 } else if (len == 2 && nm[1] == '.') { 764 return (EEXIST); 765 } 766 } 767 768 ASSERT(parent); 769 rw_enter(&parent->sdev_contents, RW_READER); 770 if (!SDEV_IS_GLOBAL(parent)) { 771 rw_exit(&parent->sdev_contents); 772 return (ENOTSUP); 773 } 774 775 /* execute access is required to search the directory */ 776 if ((error = VOP_ACCESS(dvp, VEXEC, 0, cred, ct)) != 0) { 777 rw_exit(&parent->sdev_contents); 778 return (error); 779 } 780 781 /* check existence first */ 782 dv = sdev_cache_lookup(parent, nm); 783 if (dv == NULL) { 784 rw_exit(&parent->sdev_contents); 785 return (ENOENT); 786 } 787 788 vp = SDEVTOV(dv); 789 if ((dv->sdev_state == SDEV_INIT) || 790 (dv->sdev_state == SDEV_ZOMBIE)) { 791 rw_exit(&parent->sdev_contents); 792 VN_RELE(vp); 793 return (ENOENT); 794 } 795 796 /* write access is required to remove an entry */ 797 if ((error = VOP_ACCESS(dvp, VWRITE, 0, cred, ct)) != 0) { 798 rw_exit(&parent->sdev_contents); 799 VN_RELE(vp); 800 return (error); 801 } 802 803 bkstore = SDEV_IS_PERSIST(dv) ? 1 : 0; 804 if (!rw_tryupgrade(&parent->sdev_contents)) { 805 rw_exit(&parent->sdev_contents); 806 rw_enter(&parent->sdev_contents, RW_WRITER); 807 /* Make sure we didn't become a zombie */ 808 if (parent->sdev_state == SDEV_ZOMBIE) { 809 rw_exit(&parent->sdev_contents); 810 VN_RELE(vp); 811 return (ENOENT); 812 } 813 } 814 815 /* we do not support unlinking a non-empty directory */ 816 if (vp->v_type == VDIR && dv->sdev_nlink > 2) { 817 rw_exit(&parent->sdev_contents); 818 VN_RELE(vp); 819 return (EBUSY); 820 } 821 822 /* 823 * sdev_dirdelete does the real job of: 824 * - make sure no open ref count 825 * - destroying the sdev_node 826 * - releasing the hold on attrvp 827 */ 828 sdev_cache_update(parent, &dv, nm, SDEV_CACHE_DELETE); 829 VN_RELE(vp); 830 rw_exit(&parent->sdev_contents); 831 832 /* 833 * best efforts clean up the backing store 834 */ 835 if (bkstore) { 836 ASSERT(parent->sdev_attrvp); 837 error = VOP_REMOVE(parent->sdev_attrvp, nm, cred, 838 ct, flags); 839 /* 840 * do not report BUSY error 841 * because the backing store ref count is released 842 * when the last ref count on the sdev_node is 843 * released. 844 */ 845 if (error == EBUSY) { 846 sdcmn_err2(("sdev_remove: device %s is still on" 847 "disk %s\n", nm, parent->sdev_path)); 848 error = 0; 849 } 850 } 851 852 return (error); 853 } 854 855 /* 856 * Some restrictions for this file system: 857 * - both oldnm and newnm are in the scope of /dev file system, 858 * to simply the namespace management model. 859 */ 860 /*ARGSUSED6*/ 861 static int 862 sdev_rename(struct vnode *odvp, char *onm, struct vnode *ndvp, char *nnm, 863 struct cred *cred, caller_context_t *ct, int flags) 864 { 865 struct sdev_node *fromparent = NULL; 866 struct vattr vattr; 867 struct sdev_node *toparent; 868 struct sdev_node *fromdv = NULL; /* source node */ 869 struct vnode *ovp = NULL; /* source vnode */ 870 struct sdev_node *todv = NULL; /* destination node */ 871 struct vnode *nvp = NULL; /* destination vnode */ 872 int samedir = 0; /* set if odvp == ndvp */ 873 struct vnode *realvp; 874 int error = 0; 875 dev_t fsid; 876 int bkstore = 0; 877 vtype_t type; 878 879 /* prevent modifying "." and ".." */ 880 if ((onm[0] == '.' && 881 (onm[1] == '\0' || (onm[1] == '.' && onm[2] == '\0'))) || 882 (nnm[0] == '.' && 883 (nnm[1] == '\0' || (nnm[1] == '.' && nnm[2] == '\0')))) { 884 return (EINVAL); 885 } 886 887 fromparent = VTOSDEV(odvp); 888 toparent = VTOSDEV(ndvp); 889 890 /* ZOMBIE parent doesn't allow new node creation */ 891 rw_enter(&fromparent->sdev_dotdot->sdev_contents, RW_READER); 892 if (fromparent->sdev_state == SDEV_ZOMBIE) { 893 rw_exit(&fromparent->sdev_dotdot->sdev_contents); 894 return (ENOENT); 895 } 896 897 /* renaming only supported for global device nodes */ 898 if (!SDEV_IS_GLOBAL(fromparent)) { 899 rw_exit(&fromparent->sdev_dotdot->sdev_contents); 900 return (ENOTSUP); 901 } 902 rw_exit(&fromparent->sdev_dotdot->sdev_contents); 903 904 rw_enter(&toparent->sdev_dotdot->sdev_contents, RW_READER); 905 if (toparent->sdev_state == SDEV_ZOMBIE) { 906 rw_exit(&toparent->sdev_dotdot->sdev_contents); 907 return (ENOENT); 908 } 909 rw_exit(&toparent->sdev_dotdot->sdev_contents); 910 911 /* 912 * acquire the global lock to prevent 913 * mount/unmount/other rename activities. 914 */ 915 mutex_enter(&sdev_lock); 916 917 /* check existence of the source node */ 918 /* XXXci - We may need to translate the C-I flags on VOP_LOOKUP */ 919 error = VOP_LOOKUP(odvp, onm, &ovp, NULL, 0, NULL, cred, ct, 920 NULL, NULL); 921 if (error) { 922 sdcmn_err2(("sdev_rename: the source node %s exists\n", 923 onm)); 924 mutex_exit(&sdev_lock); 925 return (error); 926 } 927 928 if (VOP_REALVP(ovp, &realvp, ct) == 0) { 929 VN_HOLD(realvp); 930 VN_RELE(ovp); 931 ovp = realvp; 932 } 933 934 /* check existence of destination */ 935 /* XXXci - We may need to translate the C-I flags on VOP_LOOKUP */ 936 error = VOP_LOOKUP(ndvp, nnm, &nvp, NULL, 0, NULL, cred, ct, 937 NULL, NULL); 938 if (error && (error != ENOENT)) { 939 mutex_exit(&sdev_lock); 940 VN_RELE(ovp); 941 return (error); 942 } 943 944 if (nvp && (VOP_REALVP(nvp, &realvp, ct) == 0)) { 945 VN_HOLD(realvp); 946 VN_RELE(nvp); 947 nvp = realvp; 948 } 949 950 /* 951 * make sure the source and the destination are 952 * in the same dev filesystem 953 */ 954 if (odvp != ndvp) { 955 vattr.va_mask = AT_FSID; 956 if (error = VOP_GETATTR(odvp, &vattr, 0, cred, ct)) { 957 mutex_exit(&sdev_lock); 958 VN_RELE(ovp); 959 if (nvp != NULL) 960 VN_RELE(nvp); 961 return (error); 962 } 963 fsid = vattr.va_fsid; 964 vattr.va_mask = AT_FSID; 965 if (error = VOP_GETATTR(ndvp, &vattr, 0, cred, ct)) { 966 mutex_exit(&sdev_lock); 967 VN_RELE(ovp); 968 if (nvp != NULL) 969 VN_RELE(nvp); 970 return (error); 971 } 972 if (fsid != vattr.va_fsid) { 973 mutex_exit(&sdev_lock); 974 VN_RELE(ovp); 975 if (nvp != NULL) 976 VN_RELE(nvp); 977 return (EXDEV); 978 } 979 } 980 981 /* make sure the old entry can be deleted */ 982 error = VOP_ACCESS(odvp, VWRITE, 0, cred, ct); 983 if (error) { 984 mutex_exit(&sdev_lock); 985 VN_RELE(ovp); 986 if (nvp != NULL) 987 VN_RELE(nvp); 988 return (error); 989 } 990 991 /* make sure the destination allows creation */ 992 samedir = (fromparent == toparent); 993 if (!samedir) { 994 error = VOP_ACCESS(ndvp, VEXEC|VWRITE, 0, cred, ct); 995 if (error) { 996 mutex_exit(&sdev_lock); 997 VN_RELE(ovp); 998 if (nvp != NULL) 999 VN_RELE(nvp); 1000 return (error); 1001 } 1002 } 1003 1004 fromdv = VTOSDEV(ovp); 1005 ASSERT(fromdv); 1006 1007 /* destination file exists */ 1008 if (nvp != NULL) { 1009 todv = VTOSDEV(nvp); 1010 ASSERT(todv); 1011 } 1012 1013 if ((fromdv->sdev_flags & SDEV_DYNAMIC) != 0 || 1014 (todv != NULL && (todv->sdev_flags & SDEV_DYNAMIC) != 0)) { 1015 mutex_exit(&sdev_lock); 1016 if (nvp != NULL) 1017 VN_RELE(nvp); 1018 VN_RELE(ovp); 1019 return (EACCES); 1020 } 1021 1022 /* 1023 * link source to new target in the memory. Regardless of failure, we 1024 * must rele our hold on nvp. 1025 */ 1026 error = sdev_rnmnode(fromparent, fromdv, toparent, &todv, nnm, cred); 1027 if (nvp != NULL) 1028 VN_RELE(nvp); 1029 if (error) { 1030 sdcmn_err2(("sdev_rename: renaming %s to %s failed " 1031 " with error %d\n", onm, nnm, error)); 1032 mutex_exit(&sdev_lock); 1033 VN_RELE(ovp); 1034 return (error); 1035 } 1036 1037 /* 1038 * unlink from source 1039 */ 1040 rw_enter(&fromparent->sdev_contents, RW_READER); 1041 fromdv = sdev_cache_lookup(fromparent, onm); 1042 if (fromdv == NULL) { 1043 rw_exit(&fromparent->sdev_contents); 1044 mutex_exit(&sdev_lock); 1045 VN_RELE(ovp); 1046 sdcmn_err2(("sdev_rename: the source is deleted already\n")); 1047 return (0); 1048 } 1049 1050 if (fromdv->sdev_state == SDEV_ZOMBIE) { 1051 rw_exit(&fromparent->sdev_contents); 1052 mutex_exit(&sdev_lock); 1053 VN_RELE(SDEVTOV(fromdv)); 1054 VN_RELE(ovp); 1055 sdcmn_err2(("sdev_rename: the source is being deleted\n")); 1056 return (0); 1057 } 1058 rw_exit(&fromparent->sdev_contents); 1059 ASSERT(SDEVTOV(fromdv) == ovp); 1060 VN_RELE(ovp); 1061 1062 /* clean out the directory contents before it can be removed */ 1063 type = SDEVTOV(fromdv)->v_type; 1064 if (type == VDIR) { 1065 error = sdev_cleandir(fromdv, NULL, 0); 1066 sdcmn_err2(("sdev_rename: cleandir finished with %d\n", 1067 error)); 1068 if (error == EBUSY) 1069 error = 0; 1070 } 1071 1072 rw_enter(&fromparent->sdev_contents, RW_WRITER); 1073 bkstore = SDEV_IS_PERSIST(fromdv) ? 1 : 0; 1074 sdev_cache_update(fromparent, &fromdv, onm, 1075 SDEV_CACHE_DELETE); 1076 VN_RELE(SDEVTOV(fromdv)); 1077 1078 /* best effforts clean up the backing store */ 1079 if (bkstore) { 1080 ASSERT(fromparent->sdev_attrvp); 1081 if (type != VDIR) { 1082 /* XXXci - We may need to translate the C-I flags on VOP_REMOVE */ 1083 error = VOP_REMOVE(fromparent->sdev_attrvp, 1084 onm, kcred, ct, 0); 1085 } else { 1086 /* XXXci - We may need to translate the C-I flags on VOP_RMDIR */ 1087 error = VOP_RMDIR(fromparent->sdev_attrvp, 1088 onm, fromparent->sdev_attrvp, kcred, ct, 0); 1089 } 1090 1091 if (error) { 1092 sdcmn_err2(("sdev_rename: device %s is " 1093 "still on disk %s\n", onm, 1094 fromparent->sdev_path)); 1095 error = 0; 1096 } 1097 } 1098 rw_exit(&fromparent->sdev_contents); 1099 mutex_exit(&sdev_lock); 1100 1101 /* once reached to this point, the rename is regarded successful */ 1102 return (0); 1103 } 1104 1105 /* 1106 * dev-fs version of "ln -s path dev-name" 1107 * tnm - path, e.g. /devices/... or /dev/... 1108 * lnm - dev_name 1109 */ 1110 /*ARGSUSED6*/ 1111 static int 1112 sdev_symlink(struct vnode *dvp, char *lnm, struct vattr *tva, 1113 char *tnm, struct cred *cred, caller_context_t *ct, int flags) 1114 { 1115 int error; 1116 struct vnode *vp = NULL; 1117 struct sdev_node *parent = (struct sdev_node *)VTOSDEV(dvp); 1118 struct sdev_node *self = (struct sdev_node *)NULL; 1119 1120 ASSERT(parent); 1121 rw_enter(&parent->sdev_dotdot->sdev_contents, RW_READER); 1122 if (parent->sdev_state == SDEV_ZOMBIE) { 1123 rw_exit(&parent->sdev_dotdot->sdev_contents); 1124 sdcmn_err2(("sdev_symlink: parent %s is ZOMBIED \n", 1125 parent->sdev_name)); 1126 return (ENOENT); 1127 } 1128 1129 if (!SDEV_IS_GLOBAL(parent)) { 1130 rw_exit(&parent->sdev_dotdot->sdev_contents); 1131 return (ENOTSUP); 1132 } 1133 rw_exit(&parent->sdev_dotdot->sdev_contents); 1134 1135 /* execute access is required to search a directory */ 1136 if ((error = VOP_ACCESS(dvp, VEXEC, 0, cred, ct)) != 0) 1137 return (error); 1138 1139 /* find existing name */ 1140 /* XXXci - We may need to translate the C-I flags here */ 1141 error = VOP_LOOKUP(dvp, lnm, &vp, NULL, 0, NULL, cred, ct, NULL, NULL); 1142 if (error == 0) { 1143 ASSERT(vp); 1144 VN_RELE(vp); 1145 sdcmn_err2(("sdev_symlink: node %s already exists\n", lnm)); 1146 return (EEXIST); 1147 } 1148 if (error != ENOENT) 1149 return (error); 1150 1151 /* write access is required to create a symlink */ 1152 if ((error = VOP_ACCESS(dvp, VWRITE, 0, cred, ct)) != 0) 1153 return (error); 1154 1155 /* put it into memory cache */ 1156 rw_enter(&parent->sdev_contents, RW_WRITER); 1157 error = sdev_mknode(parent, lnm, &self, tva, NULL, (void *)tnm, 1158 cred, SDEV_READY); 1159 if (error) { 1160 rw_exit(&parent->sdev_contents); 1161 sdcmn_err2(("sdev_symlink: node %s creation failed\n", lnm)); 1162 if (self) 1163 SDEV_RELE(self); 1164 1165 return (error); 1166 } 1167 ASSERT(self && (self->sdev_state == SDEV_READY)); 1168 rw_exit(&parent->sdev_contents); 1169 1170 /* take care the timestamps for the node and its parent */ 1171 sdev_update_timestamps(SDEVTOV(self), kcred, 1172 AT_CTIME|AT_MTIME|AT_ATIME); 1173 sdev_update_timestamps(dvp, kcred, AT_MTIME|AT_ATIME); 1174 if (SDEV_IS_GLOBAL(parent)) 1175 atomic_inc_ulong(&parent->sdev_gdir_gen); 1176 1177 /* wake up other threads blocked on looking up this node */ 1178 mutex_enter(&self->sdev_lookup_lock); 1179 SDEV_UNBLOCK_OTHERS(self, SDEV_LOOKUP); 1180 mutex_exit(&self->sdev_lookup_lock); 1181 SDEV_RELE(self); /* don't return with vnode held */ 1182 return (0); 1183 } 1184 1185 /*ARGSUSED6*/ 1186 static int 1187 sdev_mkdir(struct vnode *dvp, char *nm, struct vattr *va, struct vnode **vpp, 1188 struct cred *cred, caller_context_t *ct, int flags, vsecattr_t *vsecp) 1189 { 1190 int error; 1191 struct sdev_node *parent = (struct sdev_node *)VTOSDEV(dvp); 1192 struct sdev_node *self = NULL; 1193 struct vnode *vp = NULL; 1194 1195 ASSERT(parent && parent->sdev_dotdot); 1196 rw_enter(&parent->sdev_dotdot->sdev_contents, RW_READER); 1197 if (parent->sdev_state == SDEV_ZOMBIE) { 1198 rw_exit(&parent->sdev_dotdot->sdev_contents); 1199 return (ENOENT); 1200 } 1201 1202 /* non-global do not allow pure directory creation */ 1203 if (!SDEV_IS_GLOBAL(parent)) { 1204 rw_exit(&parent->sdev_dotdot->sdev_contents); 1205 return (prof_lookup(dvp, nm, vpp, cred)); 1206 } 1207 rw_exit(&parent->sdev_dotdot->sdev_contents); 1208 1209 /* execute access is required to search the directory */ 1210 if ((error = VOP_ACCESS(dvp, VEXEC, 0, cred, ct)) != 0) { 1211 return (error); 1212 } 1213 1214 /* find existing name */ 1215 /* XXXci - We may need to translate the C-I flags on VOP_LOOKUP */ 1216 error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cred, ct, NULL, NULL); 1217 if (error == 0) { 1218 VN_RELE(vp); 1219 return (EEXIST); 1220 } 1221 if (error != ENOENT) 1222 return (error); 1223 1224 /* require write access to create a directory */ 1225 if ((error = VOP_ACCESS(dvp, VWRITE, 0, cred, ct)) != 0) { 1226 return (error); 1227 } 1228 1229 /* put it into memory */ 1230 rw_enter(&parent->sdev_contents, RW_WRITER); 1231 error = sdev_mknode(parent, nm, &self, 1232 va, NULL, NULL, cred, SDEV_READY); 1233 if (error) { 1234 rw_exit(&parent->sdev_contents); 1235 if (self) 1236 SDEV_RELE(self); 1237 return (error); 1238 } 1239 ASSERT(self && (self->sdev_state == SDEV_READY)); 1240 rw_exit(&parent->sdev_contents); 1241 1242 /* take care the timestamps for the node and its parent */ 1243 sdev_update_timestamps(SDEVTOV(self), kcred, 1244 AT_CTIME|AT_MTIME|AT_ATIME); 1245 sdev_update_timestamps(dvp, kcred, AT_MTIME|AT_ATIME); 1246 if (SDEV_IS_GLOBAL(parent)) 1247 atomic_inc_ulong(&parent->sdev_gdir_gen); 1248 1249 /* wake up other threads blocked on looking up this node */ 1250 mutex_enter(&self->sdev_lookup_lock); 1251 SDEV_UNBLOCK_OTHERS(self, SDEV_LOOKUP); 1252 mutex_exit(&self->sdev_lookup_lock); 1253 *vpp = SDEVTOV(self); 1254 return (0); 1255 } 1256 1257 /* 1258 * allowing removing an empty directory under /dev 1259 */ 1260 /*ARGSUSED*/ 1261 static int 1262 sdev_rmdir(struct vnode *dvp, char *nm, struct vnode *cdir, struct cred *cred, 1263 caller_context_t *ct, int flags) 1264 { 1265 int error = 0; 1266 struct sdev_node *parent = (struct sdev_node *)VTOSDEV(dvp); 1267 struct sdev_node *self = NULL; 1268 struct vnode *vp = NULL; 1269 1270 /* bail out early */ 1271 if (strcmp(nm, ".") == 0) 1272 return (EINVAL); 1273 if (strcmp(nm, "..") == 0) 1274 return (EEXIST); /* should be ENOTEMPTY */ 1275 1276 /* no destruction of non-global node */ 1277 ASSERT(parent && parent->sdev_dotdot); 1278 rw_enter(&parent->sdev_dotdot->sdev_contents, RW_READER); 1279 if (!SDEV_IS_GLOBAL(parent)) { 1280 rw_exit(&parent->sdev_dotdot->sdev_contents); 1281 return (ENOTSUP); 1282 } 1283 rw_exit(&parent->sdev_dotdot->sdev_contents); 1284 1285 /* execute access is required to search the directory */ 1286 if ((error = VOP_ACCESS(dvp, VEXEC|VWRITE, 0, cred, ct)) != 0) 1287 return (error); 1288 1289 /* check existing name */ 1290 rw_enter(&parent->sdev_contents, RW_WRITER); 1291 self = sdev_cache_lookup(parent, nm); 1292 if (self == NULL) { 1293 rw_exit(&parent->sdev_contents); 1294 return (ENOENT); 1295 } 1296 1297 vp = SDEVTOV(self); 1298 if ((self->sdev_state == SDEV_INIT) || 1299 (self->sdev_state == SDEV_ZOMBIE)) { 1300 rw_exit(&parent->sdev_contents); 1301 VN_RELE(vp); 1302 return (ENOENT); 1303 } 1304 1305 /* some sanity checks */ 1306 if (vp == dvp || vp == cdir) { 1307 rw_exit(&parent->sdev_contents); 1308 VN_RELE(vp); 1309 return (EINVAL); 1310 } 1311 1312 if (vp->v_type != VDIR) { 1313 rw_exit(&parent->sdev_contents); 1314 VN_RELE(vp); 1315 return (ENOTDIR); 1316 } 1317 1318 if (vn_vfswlock(vp)) { 1319 rw_exit(&parent->sdev_contents); 1320 VN_RELE(vp); 1321 return (EBUSY); 1322 } 1323 1324 if (vn_mountedvfs(vp) != NULL) { 1325 rw_exit(&parent->sdev_contents); 1326 vn_vfsunlock(vp); 1327 VN_RELE(vp); 1328 return (EBUSY); 1329 } 1330 1331 self = VTOSDEV(vp); 1332 /* bail out on a non-empty directory */ 1333 rw_enter(&self->sdev_contents, RW_READER); 1334 if (self->sdev_nlink > 2) { 1335 rw_exit(&self->sdev_contents); 1336 rw_exit(&parent->sdev_contents); 1337 vn_vfsunlock(vp); 1338 VN_RELE(vp); 1339 return (ENOTEMPTY); 1340 } 1341 rw_exit(&self->sdev_contents); 1342 1343 /* unlink it from the directory cache */ 1344 sdev_cache_update(parent, &self, nm, SDEV_CACHE_DELETE); 1345 rw_exit(&parent->sdev_contents); 1346 vn_vfsunlock(vp); 1347 VN_RELE(vp); 1348 1349 /* best effort to clean up the backing store */ 1350 if (SDEV_IS_PERSIST(parent)) { 1351 ASSERT(parent->sdev_attrvp); 1352 error = VOP_RMDIR(parent->sdev_attrvp, nm, 1353 parent->sdev_attrvp, kcred, ct, flags); 1354 1355 if (error) 1356 sdcmn_err2(("sdev_rmdir: cleaning device %s is on" 1357 " disk error %d\n", parent->sdev_path, error)); 1358 if (error == EBUSY) 1359 error = 0; 1360 1361 } 1362 1363 return (error); 1364 } 1365 1366 /* 1367 * read the contents of a symbolic link 1368 */ 1369 static int 1370 sdev_readlink(struct vnode *vp, struct uio *uiop, struct cred *cred, 1371 caller_context_t *ct) 1372 { 1373 struct sdev_node *dv; 1374 int error = 0; 1375 1376 ASSERT(vp->v_type == VLNK); 1377 1378 dv = VTOSDEV(vp); 1379 1380 if (dv->sdev_attrvp) { 1381 /* non-NULL attrvp implys a persisted node at READY state */ 1382 return (VOP_READLINK(dv->sdev_attrvp, uiop, cred, ct)); 1383 } else if (dv->sdev_symlink != NULL) { 1384 /* memory nodes, e.g. local nodes */ 1385 rw_enter(&dv->sdev_contents, RW_READER); 1386 sdcmn_err2(("sdev_readlink link is %s\n", dv->sdev_symlink)); 1387 error = uiomove(dv->sdev_symlink, strlen(dv->sdev_symlink), 1388 UIO_READ, uiop); 1389 rw_exit(&dv->sdev_contents); 1390 return (error); 1391 } 1392 1393 return (ENOENT); 1394 } 1395 1396 /*ARGSUSED4*/ 1397 static int 1398 sdev_readdir(struct vnode *dvp, struct uio *uiop, struct cred *cred, int *eofp, 1399 caller_context_t *ct, int flags) 1400 { 1401 struct sdev_node *parent = VTOSDEV(dvp); 1402 int error; 1403 1404 /* 1405 * We must check that we have execute access to search the directory -- 1406 * but because our sdev_contents lock is already held as a reader (the 1407 * caller must have done a VOP_RWLOCK()), we call directly into the 1408 * underlying access routine if sdev_attr is non-NULL. 1409 */ 1410 if (parent->sdev_attr != NULL) { 1411 VERIFY(RW_READ_HELD(&parent->sdev_contents)); 1412 1413 if (sdev_unlocked_access(parent, VEXEC, cred) != 0) 1414 return (EACCES); 1415 } else { 1416 if ((error = VOP_ACCESS(dvp, VEXEC, 0, cred, ct)) != 0) 1417 return (error); 1418 } 1419 1420 ASSERT(parent); 1421 if (!SDEV_IS_GLOBAL(parent)) 1422 prof_filldir(parent); 1423 return (devname_readdir_func(dvp, uiop, cred, eofp, SDEV_BROWSE)); 1424 } 1425 1426 /*ARGSUSED1*/ 1427 static void 1428 sdev_inactive(struct vnode *vp, struct cred *cred, caller_context_t *ct) 1429 { 1430 devname_inactive_func(vp, cred, NULL); 1431 } 1432 1433 /*ARGSUSED2*/ 1434 static int 1435 sdev_fid(struct vnode *vp, struct fid *fidp, caller_context_t *ct) 1436 { 1437 struct sdev_node *dv = VTOSDEV(vp); 1438 struct sdev_fid *sdev_fid; 1439 1440 if (fidp->fid_len < (sizeof (struct sdev_fid) - sizeof (ushort_t))) { 1441 fidp->fid_len = sizeof (struct sdev_fid) - sizeof (ushort_t); 1442 return (ENOSPC); 1443 } 1444 1445 sdev_fid = (struct sdev_fid *)fidp; 1446 bzero(sdev_fid, sizeof (struct sdev_fid)); 1447 sdev_fid->sdevfid_len = 1448 (int)sizeof (struct sdev_fid) - sizeof (ushort_t); 1449 sdev_fid->sdevfid_ino = dv->sdev_ino; 1450 1451 return (0); 1452 } 1453 1454 /* 1455 * This pair of routines bracket all VOP_READ, VOP_WRITE 1456 * and VOP_READDIR requests. The contents lock stops things 1457 * moving around while we're looking at them. 1458 */ 1459 /*ARGSUSED2*/ 1460 static int 1461 sdev_rwlock(struct vnode *vp, int write_flag, caller_context_t *ctp) 1462 { 1463 rw_enter(&VTOSDEV(vp)->sdev_contents, 1464 write_flag ? RW_WRITER : RW_READER); 1465 return (write_flag ? V_WRITELOCK_TRUE : V_WRITELOCK_FALSE); 1466 } 1467 1468 /*ARGSUSED1*/ 1469 static void 1470 sdev_rwunlock(struct vnode *vp, int write_flag, caller_context_t *ctp) 1471 { 1472 rw_exit(&VTOSDEV(vp)->sdev_contents); 1473 } 1474 1475 /*ARGSUSED1*/ 1476 static int 1477 sdev_seek(struct vnode *vp, offset_t ooff, offset_t *noffp, 1478 caller_context_t *ct) 1479 { 1480 struct vnode *attrvp = VTOSDEV(vp)->sdev_attrvp; 1481 1482 ASSERT(vp->v_type != VCHR && 1483 vp->v_type != VBLK && vp->v_type != VLNK); 1484 1485 if (vp->v_type == VDIR) 1486 return (fs_seek(vp, ooff, noffp, ct)); 1487 1488 ASSERT(attrvp); 1489 return (VOP_SEEK(attrvp, ooff, noffp, ct)); 1490 } 1491 1492 /*ARGSUSED1*/ 1493 static int 1494 sdev_frlock(struct vnode *vp, int cmd, struct flock64 *bfp, int flag, 1495 offset_t offset, struct flk_callback *flk_cbp, struct cred *cr, 1496 caller_context_t *ct) 1497 { 1498 int error; 1499 struct sdev_node *dv = VTOSDEV(vp); 1500 1501 ASSERT(dv); 1502 ASSERT(dv->sdev_attrvp); 1503 error = VOP_FRLOCK(dv->sdev_attrvp, cmd, bfp, flag, offset, 1504 flk_cbp, cr, ct); 1505 1506 return (error); 1507 } 1508 1509 static int 1510 sdev_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr, 1511 caller_context_t *ct) 1512 { 1513 switch (cmd) { 1514 case _PC_ACL_ENABLED: 1515 *valp = SDEV_ACL_FLAVOR(vp); 1516 return (0); 1517 } 1518 1519 return (fs_pathconf(vp, cmd, valp, cr, ct)); 1520 } 1521 1522 vnodeops_t *sdev_vnodeops; 1523 1524 const fs_operation_def_t sdev_vnodeops_tbl[] = { 1525 VOPNAME_OPEN, { .vop_open = sdev_open }, 1526 VOPNAME_CLOSE, { .vop_close = sdev_close }, 1527 VOPNAME_READ, { .vop_read = sdev_read }, 1528 VOPNAME_WRITE, { .vop_write = sdev_write }, 1529 VOPNAME_IOCTL, { .vop_ioctl = sdev_ioctl }, 1530 VOPNAME_GETATTR, { .vop_getattr = sdev_getattr }, 1531 VOPNAME_SETATTR, { .vop_setattr = sdev_setattr }, 1532 VOPNAME_ACCESS, { .vop_access = sdev_access }, 1533 VOPNAME_LOOKUP, { .vop_lookup = sdev_lookup }, 1534 VOPNAME_CREATE, { .vop_create = sdev_create }, 1535 VOPNAME_RENAME, { .vop_rename = sdev_rename }, 1536 VOPNAME_REMOVE, { .vop_remove = sdev_remove }, 1537 VOPNAME_MKDIR, { .vop_mkdir = sdev_mkdir }, 1538 VOPNAME_RMDIR, { .vop_rmdir = sdev_rmdir }, 1539 VOPNAME_READDIR, { .vop_readdir = sdev_readdir }, 1540 VOPNAME_SYMLINK, { .vop_symlink = sdev_symlink }, 1541 VOPNAME_READLINK, { .vop_readlink = sdev_readlink }, 1542 VOPNAME_INACTIVE, { .vop_inactive = sdev_inactive }, 1543 VOPNAME_FID, { .vop_fid = sdev_fid }, 1544 VOPNAME_RWLOCK, { .vop_rwlock = sdev_rwlock }, 1545 VOPNAME_RWUNLOCK, { .vop_rwunlock = sdev_rwunlock }, 1546 VOPNAME_SEEK, { .vop_seek = sdev_seek }, 1547 VOPNAME_FRLOCK, { .vop_frlock = sdev_frlock }, 1548 VOPNAME_PATHCONF, { .vop_pathconf = sdev_pathconf }, 1549 VOPNAME_SETSECATTR, { .vop_setsecattr = sdev_setsecattr }, 1550 VOPNAME_GETSECATTR, { .vop_getsecattr = sdev_getsecattr }, 1551 NULL, NULL 1552 }; 1553 1554 int sdev_vnodeops_tbl_size = sizeof (sdev_vnodeops_tbl); 1555