1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. 23 */ 24 /* 25 * Copyright 2018, Joyent, Inc. 26 */ 27 28 /* 29 * vnode ops for the /dev filesystem 30 * 31 * - VDIR, VCHR, CBLK, and VLNK are considered must supported files 32 * - VREG and VDOOR are used for some internal implementations in 33 * the global zone, e.g. devname and devfsadm communication 34 * - other file types are unusual in this namespace and 35 * not supported for now 36 */ 37 38 /* 39 * sdev has a few basic goals: 40 * o Provide /dev for the global zone as well as various non-global zones. 41 * o Provide the basic functionality that devfsadm might need (mknod, 42 * symlinks, etc.) 43 * o Allow persistent permissions on files in /dev. 44 * o Allow for dynamic directories and nodes for use by various services (pts, 45 * zvol, net, etc.) 46 * 47 * The sdev file system is primarily made up of sdev_node_t's which is sdev's 48 * counterpart to the vnode_t. There are two different classes of sdev_node_t's 49 * that we generally care about, dynamic and otherwise. 50 * 51 * Persisting Information 52 * ---------------------- 53 * 54 * When sdev is mounted, it keeps track of the underlying file system it is 55 * mounted over. In certain situations, sdev will go and create entries in that 56 * underlying file system. These underlying 'back end' nodes are used as proxies 57 * for various changes in permissions. While specific sets of nodes, such as 58 * dynamic ones, are exempt, this process stores permission changes against 59 * these back end nodes. The point of all of this is to allow for these settings 60 * to persist across host and zone reboots. As an example, consider the entry 61 * /dev/dsk/c0t0d0 which is a character device and that / is in UFS. Upon 62 * changing the permissions on c0t0d0 you'd have the following logical 63 * relationships: 64 * 65 * +------------------+ sdev_vnode +--------------+ 66 * | sdev_node_t |<---------------->| vnode_t | 67 * | /dev/dsk/c0t0d0 |<---------------->| for sdev | 68 * +------------------+ +--------------+ 69 * | 70 * | sdev_attrvp 71 * | 72 * | +---------------------+ 73 * +--->| vnode_t for UFS|ZFS | 74 * | /dev/dsk/c0t0d0 | 75 * +---------------------+ 76 * 77 * sdev is generally in memory. Therefore when a lookup happens and there is no 78 * entry already inside of a directory cache, it will next check the backing 79 * store. If the backing store exists, we will reconstitute the sdev_node based 80 * on the information that we persisted. When we create the backing store node, 81 * we use the struct vattr information that we already have in sdev_node_t. 82 * Because of this, we already know if the entry was previously a symlink, 83 * directory, or some other kind of type. Note that not all types of nodes are 84 * supported. Currently only VDIR, VCHR, VBLK, VREG, VDOOR, and VLNK are 85 * eligible to be persisted. 86 * 87 * When the sdev_node is created and the lookup is done, we grab a hold on the 88 * underlying vnode as part of the call to VOP_LOOKUP. That reference is held 89 * until the sdev_node becomes inactive. Once its reference count reaches one 90 * and the VOP_INACTIVE callback fires leading to the destruction of the node, 91 * the reference on the underlying vnode will be released. 92 * 93 * The backing store node will be deleted only when the node itself is deleted 94 * through the means of a VOP_REMOVE, VOP_RMDIR, or similar call. 95 * 96 * Not everything can be persisted, see The Rules section for more details. 97 * 98 * Dynamic Nodes 99 * ------------- 100 * 101 * Dynamic nodes allow for specific interactions with various kernel subsystems 102 * when looking up directory entries. This allows the lookup and readdir 103 * functions to check against the kernel subsystem's for validity. eg. does a 104 * zvol or nic still exist. 105 * 106 * More specifically, when we create various directories we check if the 107 * directory name matches that of one of the names in the vtab[] (sdev_subr.c). 108 * If it does, we swap out the vnode operations into a new set which combine the 109 * normal sdev vnode operations with the dynamic set here. 110 * 111 * In addition, various dynamic nodes implement a verification entry point. This 112 * verification entry is used as a part of lookup and readdir. The goal for 113 * these dynamic nodes is to allow them to check with the underlying subsystems 114 * to ensure that these devices are still present, or if they have gone away, to 115 * remove them from the results. This is indicated by using the SDEV_VTOR flag 116 * in vtab[]. 117 * 118 * Dynamic nodes have additional restrictions placed upon them. They may only 119 * appear at the top level directory of the file system. In addition, users 120 * cannot create dirents below any leve of a dynamic node aside from its special 121 * vnops. 122 * 123 * Profiles 124 * -------- 125 * 126 * Profiles exist for the purpose of non-global zones. They work with the zone 127 * brands and zoneadmd to set up a filter of allowed devices that can appear in 128 * a non-global zone's /dev. These are sent to sdev by means of libdevinfo and a 129 * modctl system call. Specifically it allows one to add patterns of device 130 * paths to include and exclude. It allows for a collection of symlinks to be 131 * added and it allows for remapping names. 132 * 133 * When operating in a non-global zone, several of the sdev vnops are redirected 134 * to the profile versions. These impose additional restrictions such as 135 * enforcing that a non-global zone's /dev is read only. 136 * 137 * sdev_node_t States 138 * ------------------ 139 * 140 * A given sdev_node_t has a field called the sdev_state which describes where 141 * in the sdev life cycle it is. There are three primary states: SDEV_INIT, 142 * SDEV_READY, and SDEV_ZOMBIE. 143 * 144 * SDEV_INIT: When a new /dev file is first looked up, a sdev_node 145 * is allocated, initialized and added to the directory's 146 * sdev_node cache. A node at this state will also 147 * have the SDEV_LOOKUP flag set. 148 * 149 * Other threads that are trying to look up a node at 150 * this state will be blocked until the SDEV_LOOKUP flag 151 * is cleared. 152 * 153 * When the SDEV_LOOKUP flag is cleared, the node may 154 * transition into the SDEV_READY state for a successful 155 * lookup or the node is removed from the directory cache 156 * and destroyed if the named node can not be found. 157 * An ENOENT error is returned for the second case. 158 * 159 * SDEV_READY: A /dev file has been successfully looked up and 160 * associated with a vnode. The /dev file is available 161 * for the supported /dev file system operations. 162 * 163 * SDEV_ZOMBIE: Deletion of a /dev file has been explicitly issued 164 * to an SDEV_READY node. The node is transitioned into 165 * the SDEV_ZOMBIE state if the vnode reference count 166 * is still held. A SDEV_ZOMBIE node does not support 167 * any of the /dev file system operations. A SDEV_ZOMBIE 168 * node is immediately removed from the directory cache 169 * and destroyed once the reference count reaches zero. 170 * 171 * Historically nodes that were marked SDEV_ZOMBIE were not removed from the 172 * underlying directory caches. This has been the source of numerous bugs and 173 * thus to better mimic what happens on a real file system, it is no longer the 174 * case. 175 * 176 * The following state machine describes the life cycle of a given node and its 177 * associated states: 178 * 179 * node is . . . . . 180 * allocated via . +-------------+ . . . . . . . vnode_t refcount 181 * sdev_nodeinit() . | Unallocated | . reaches zero and 182 * +--------*-----| Memory |<--------*---+ sdev_inactive is 183 * | +-------------+ | called. 184 * | +------------^ | called. 185 * v | | 186 * +-----------+ * . . sdev_nodeready() +-------------+ 187 * | SDEV_INIT | | or related setup | SDEV_ZOMBIE | 188 * +-----------+ | failure +-------------+ 189 * | | ^ 190 * | | +------------+ | 191 * +-*----------->| SDEV_READY |--------*-----+ 192 * . +------------+ . The node is no longer 193 * . . node successfully . . . . . valid or we've been 194 * inserted into the asked to remove it. 195 * directory cache This happens via 196 * and sdev_nodready() sdev_dirdelete(). 197 * call successful. 198 * 199 * Adding and Removing Dirents, Zombie Nodes 200 * ----------------------------------------- 201 * 202 * As part of doing a lookup, readdir, or an explicit creation operation like 203 * mkdir or create, nodes may be created. Every directory has an avl tree which 204 * contains its children, the sdev_entries tree. This is only used if the type 205 * is VDIR. Access to this is controlled by the sdev_node_t's contents_lock and 206 * it is managed through sdev_cache_update(). 207 * 208 * Every sdev_node_t has a field sdev_state, which describes the current state 209 * of the node. A node is generally speaking in the SDEV_READY state. When it is 210 * there, it can be looked up, accessed, and operations performed on it. When a 211 * node is going to be removed from the directory cache it is marked as a 212 * zombie. Once a node becomes a zombie, no other file system operations will 213 * succeed and it will continue to exist as a node until the vnode count on the 214 * node reaches zero. At that point, the node will be freed. However, once a 215 * node has been marked as a zombie, it will be removed immediately from the 216 * directory cache such that no one else may find it again. This means that 217 * someone else can insert a new entry into that directory with the same name 218 * and without a problem. 219 * 220 * To remove a node, see the section on that in The Rules. 221 * 222 * The Rules 223 * --------- 224 * These are the rules to live by when working in sdev. These are not 225 * exhaustive. 226 * 227 * - Set 1: Working with Backing Nodes 228 * o If there is a SDEV_READY sdev_node_t, it knows about its backing node. 229 * o If we find a backing node when looking up an sdev_node_t for the first 230 * time, we use its attributes to build our sdev_node_t. 231 * o If there is a found backing node, or we create a backing node, that's 232 * when we grab the hold on its vnode. 233 * o If we mark an sdev_node_t a ZOMBIE, we must remove its backing node from 234 * the underlying file system. It must not be searchable or findable. 235 * o We release our hold on the backing node vnode when we destroy the 236 * sdev_node_t. 237 * 238 * - Set 2: Locking rules for sdev (not exhaustive) 239 * o The majority of nodes contain an sdev_contents rw lock. You must hold it 240 * for read or write if manipulating its contents appropriately. 241 * o You must lock your parent before yourself. 242 * o If you need your vnode's v_lock and the sdev_contents rw lock, you must 243 * grab the v_lock before the sdev_contents rw_lock. 244 * o If you release a lock on the node as a part of upgrading it, you must 245 * verify that the node has not become a zombie as a part of this process. 246 * 247 * - Set 3: Zombie Status and What it Means 248 * o If you encounter a node that is a ZOMBIE, that means that it has been 249 * unlinked from the backing store. 250 * o If you release your contents lock and acquire it again (say as part of 251 * trying to grab a write lock) you must check that the node has not become 252 * a zombie. 253 * o You should VERIFY that a looked up node is not a zombie. This follows 254 * from the following logic. To mark something as a zombie means that it is 255 * removed from the parents directory cache. To do that, you must have a 256 * write lock on the parent's sdev_contents. To lookup through that 257 * directory you must have a read lock. This then becomes a simple ordering 258 * problem. If you've been granted the lock then the other operation cannot 259 * be in progress or must have already succeeded. 260 * 261 * - Set 4: Removing Directory Entries (aka making nodes Zombies) 262 * o Write lock must be held on the directory 263 * o Write lock must be held on the node 264 * o Remove the sdev_node_t from its parent cache 265 * o Remove the corresponding backing store node, if it exists, eg. use 266 * VOP_REMOVE or VOP_RMDIR. 267 * o You must NOT make any change in the vnode reference count! Nodes should 268 * only be cleaned up through VOP_INACTIVE callbacks. 269 * o VOP_INACTIVE is the only one responsible for doing the final vn_rele of 270 * the backing store vnode that was grabbed during lookup. 271 * 272 * - Set 5: What Nodes may be Persisted 273 * o The root, /dev is always persisted 274 * o Any node in vtab which is marked SDEV_DYNAMIC, may not be persisted 275 * unless it is also marked SDEV_PERSIST 276 * o Anything whose parent directory is marked SDEV_PERSIST will pass that 277 * along to the child as long as it does not contradict the above rules 278 */ 279 280 #include <sys/types.h> 281 #include <sys/param.h> 282 #include <sys/t_lock.h> 283 #include <sys/systm.h> 284 #include <sys/sysmacros.h> 285 #include <sys/user.h> 286 #include <sys/time.h> 287 #include <sys/vfs.h> 288 #include <sys/vnode.h> 289 #include <sys/vfs_opreg.h> 290 #include <sys/file.h> 291 #include <sys/fcntl.h> 292 #include <sys/flock.h> 293 #include <sys/kmem.h> 294 #include <sys/uio.h> 295 #include <sys/errno.h> 296 #include <sys/stat.h> 297 #include <sys/cred.h> 298 #include <sys/dirent.h> 299 #include <sys/pathname.h> 300 #include <sys/cmn_err.h> 301 #include <sys/debug.h> 302 #include <sys/policy.h> 303 #include <vm/hat.h> 304 #include <vm/seg_vn.h> 305 #include <vm/seg_map.h> 306 #include <vm/seg.h> 307 #include <vm/as.h> 308 #include <vm/page.h> 309 #include <sys/proc.h> 310 #include <sys/mode.h> 311 #include <sys/sunndi.h> 312 #include <sys/ptms.h> 313 #include <fs/fs_subr.h> 314 #include <sys/fs/dv_node.h> 315 #include <sys/fs/sdev_impl.h> 316 317 /*ARGSUSED*/ 318 static int 319 sdev_open(struct vnode **vpp, int flag, struct cred *cred, caller_context_t *ct) 320 { 321 struct sdev_node *dv = VTOSDEV(*vpp); 322 struct sdev_node *ddv = dv->sdev_dotdot; 323 int error = 0; 324 325 if ((*vpp)->v_type == VDIR) 326 return (0); 327 328 if (!SDEV_IS_GLOBAL(dv)) 329 return (ENOTSUP); 330 331 if ((*vpp)->v_type == VLNK) 332 return (ENOENT); 333 ASSERT((*vpp)->v_type == VREG); 334 if ((*vpp)->v_type != VREG) 335 return (ENOTSUP); 336 337 ASSERT(ddv); 338 rw_enter(&ddv->sdev_contents, RW_READER); 339 if (dv->sdev_attrvp == NULL) { 340 rw_exit(&ddv->sdev_contents); 341 return (ENOENT); 342 } 343 error = VOP_OPEN(&(dv->sdev_attrvp), flag, cred, ct); 344 rw_exit(&ddv->sdev_contents); 345 return (error); 346 } 347 348 /*ARGSUSED1*/ 349 static int 350 sdev_close(struct vnode *vp, int flag, int count, 351 offset_t offset, struct cred *cred, caller_context_t *ct) 352 { 353 struct sdev_node *dv = VTOSDEV(vp); 354 355 if (vp->v_type == VDIR) { 356 cleanlocks(vp, ttoproc(curthread)->p_pid, 0); 357 cleanshares(vp, ttoproc(curthread)->p_pid); 358 return (0); 359 } 360 361 if (!SDEV_IS_GLOBAL(dv)) 362 return (ENOTSUP); 363 364 ASSERT(vp->v_type == VREG); 365 if (vp->v_type != VREG) 366 return (ENOTSUP); 367 368 ASSERT(dv->sdev_attrvp); 369 return (VOP_CLOSE(dv->sdev_attrvp, flag, count, offset, cred, ct)); 370 } 371 372 /*ARGSUSED*/ 373 static int 374 sdev_read(struct vnode *vp, struct uio *uio, int ioflag, struct cred *cred, 375 struct caller_context *ct) 376 { 377 struct sdev_node *dv = (struct sdev_node *)VTOSDEV(vp); 378 int error; 379 380 if (!SDEV_IS_GLOBAL(dv)) 381 return (EINVAL); 382 383 if (vp->v_type == VDIR) 384 return (EISDIR); 385 386 /* only supporting regular files in /dev */ 387 ASSERT(vp->v_type == VREG); 388 if (vp->v_type != VREG) 389 return (EINVAL); 390 391 ASSERT(RW_READ_HELD(&VTOSDEV(vp)->sdev_contents)); 392 ASSERT(dv->sdev_attrvp); 393 (void) VOP_RWLOCK(dv->sdev_attrvp, 0, ct); 394 error = VOP_READ(dv->sdev_attrvp, uio, ioflag, cred, ct); 395 VOP_RWUNLOCK(dv->sdev_attrvp, 0, ct); 396 return (error); 397 } 398 399 /*ARGSUSED*/ 400 static int 401 sdev_write(struct vnode *vp, struct uio *uio, int ioflag, struct cred *cred, 402 struct caller_context *ct) 403 { 404 struct sdev_node *dv = VTOSDEV(vp); 405 int error = 0; 406 407 if (!SDEV_IS_GLOBAL(dv)) 408 return (EINVAL); 409 410 if (vp->v_type == VDIR) 411 return (EISDIR); 412 413 /* only supporting regular files in /dev */ 414 ASSERT(vp->v_type == VREG); 415 if (vp->v_type != VREG) 416 return (EINVAL); 417 418 ASSERT(dv->sdev_attrvp); 419 420 (void) VOP_RWLOCK(dv->sdev_attrvp, 1, ct); 421 error = VOP_WRITE(dv->sdev_attrvp, uio, ioflag, cred, ct); 422 VOP_RWUNLOCK(dv->sdev_attrvp, 1, ct); 423 if (error == 0) { 424 sdev_update_timestamps(dv->sdev_attrvp, kcred, 425 AT_MTIME); 426 } 427 return (error); 428 } 429 430 /*ARGSUSED*/ 431 static int 432 sdev_ioctl(struct vnode *vp, int cmd, intptr_t arg, int flag, 433 struct cred *cred, int *rvalp, caller_context_t *ct) 434 { 435 struct sdev_node *dv = VTOSDEV(vp); 436 437 if (!SDEV_IS_GLOBAL(dv) || (vp->v_type == VDIR)) 438 return (ENOTTY); 439 440 ASSERT(vp->v_type == VREG); 441 if (vp->v_type != VREG) 442 return (EINVAL); 443 444 ASSERT(dv->sdev_attrvp); 445 return (VOP_IOCTL(dv->sdev_attrvp, cmd, arg, flag, cred, rvalp, ct)); 446 } 447 448 static int 449 sdev_getattr(struct vnode *vp, struct vattr *vap, int flags, 450 struct cred *cr, caller_context_t *ct) 451 { 452 int error = 0; 453 struct sdev_node *dv = VTOSDEV(vp); 454 struct sdev_node *parent = dv->sdev_dotdot; 455 456 ASSERT(parent); 457 458 rw_enter(&parent->sdev_contents, RW_READER); 459 ASSERT(dv->sdev_attr || dv->sdev_attrvp); 460 461 /* 462 * search order: 463 * - for persistent nodes (SDEV_PERSIST): backstore 464 * - for non-persistent nodes: module ops if global, then memory 465 */ 466 if (dv->sdev_attrvp) { 467 rw_exit(&parent->sdev_contents); 468 error = VOP_GETATTR(dv->sdev_attrvp, vap, flags, cr, ct); 469 sdev_vattr_merge(dv, vap); 470 } else { 471 ASSERT(dv->sdev_attr); 472 *vap = *dv->sdev_attr; 473 sdev_vattr_merge(dv, vap); 474 rw_exit(&parent->sdev_contents); 475 } 476 477 return (error); 478 } 479 480 /*ARGSUSED4*/ 481 static int 482 sdev_setattr(struct vnode *vp, struct vattr *vap, int flags, 483 struct cred *cred, caller_context_t *ctp) 484 { 485 return (devname_setattr_func(vp, vap, flags, cred, NULL, 0)); 486 } 487 488 static int 489 sdev_getsecattr(struct vnode *vp, struct vsecattr *vsap, int flags, 490 struct cred *cr, caller_context_t *ct) 491 { 492 int error; 493 struct sdev_node *dv = VTOSDEV(vp); 494 struct vnode *avp = dv->sdev_attrvp; 495 496 if (avp == NULL) { 497 /* return fs_fab_acl() if flavor matches, else do nothing */ 498 if ((SDEV_ACL_FLAVOR(vp) == _ACL_ACLENT_ENABLED && 499 (vsap->vsa_mask & (VSA_ACLCNT | VSA_DFACLCNT))) || 500 (SDEV_ACL_FLAVOR(vp) == _ACL_ACE_ENABLED && 501 (vsap->vsa_mask & (VSA_ACECNT | VSA_ACE)))) 502 return (fs_fab_acl(vp, vsap, flags, cr, ct)); 503 504 return (ENOSYS); 505 } 506 507 (void) VOP_RWLOCK(avp, 1, ct); 508 error = VOP_GETSECATTR(avp, vsap, flags, cr, ct); 509 VOP_RWUNLOCK(avp, 1, ct); 510 return (error); 511 } 512 513 static int 514 sdev_setsecattr(struct vnode *vp, struct vsecattr *vsap, int flags, 515 struct cred *cr, caller_context_t *ct) 516 { 517 int error; 518 struct sdev_node *dv = VTOSDEV(vp); 519 struct vnode *avp = dv->sdev_attrvp; 520 521 if (dv->sdev_state == SDEV_ZOMBIE) 522 return (0); 523 524 if (avp == NULL) { 525 if (SDEV_IS_GLOBAL(dv) && !SDEV_IS_PERSIST(dv)) 526 return (fs_nosys()); 527 ASSERT(dv->sdev_attr); 528 /* 529 * if coming in directly, the acl system call will 530 * have held the read-write lock via VOP_RWLOCK() 531 * If coming in via specfs, specfs will have 532 * held the rw lock on the realvp i.e. us. 533 */ 534 ASSERT(RW_WRITE_HELD(&dv->sdev_contents)); 535 sdev_vattr_merge(dv, dv->sdev_attr); 536 error = sdev_shadow_node(dv, cr); 537 if (error) { 538 return (fs_nosys()); 539 } 540 541 ASSERT(dv->sdev_attrvp); 542 /* clean out the memory copy if any */ 543 if (dv->sdev_attr) { 544 kmem_free(dv->sdev_attr, sizeof (struct vattr)); 545 dv->sdev_attr = NULL; 546 } 547 avp = dv->sdev_attrvp; 548 } 549 ASSERT(avp); 550 551 (void) VOP_RWLOCK(avp, V_WRITELOCK_TRUE, ct); 552 error = VOP_SETSECATTR(avp, vsap, flags, cr, ct); 553 VOP_RWUNLOCK(avp, V_WRITELOCK_TRUE, ct); 554 return (error); 555 } 556 557 /* 558 * There are two different unlocked routines. This one is not static as it is 559 * used as part of the secpolicy_vnode_setattr calls in sdev_subr.c. Because it 560 * is used in that function it has to have a specific signature. 561 */ 562 int 563 sdev_unlocked_access(void *vdv, int mode, struct cred *cr) 564 { 565 struct sdev_node *dv = vdv; 566 int shift = 0; 567 uid_t owner = dv->sdev_attr->va_uid; 568 569 if (crgetuid(cr) != owner) { 570 shift += 3; 571 if (groupmember(dv->sdev_attr->va_gid, cr) == 0) 572 shift += 3; 573 } 574 575 return (secpolicy_vnode_access2(cr, SDEVTOV(dv), owner, 576 dv->sdev_attr->va_mode << shift, mode)); 577 } 578 579 static int 580 sdev_self_access(sdev_node_t *dv, int mode, int flags, struct cred *cr, 581 caller_context_t *ct) 582 { 583 int ret; 584 585 ASSERT(RW_READ_HELD(&dv->sdev_contents)); 586 ASSERT(dv->sdev_attr || dv->sdev_attrvp); 587 588 if (dv->sdev_attrvp) { 589 ret = VOP_ACCESS(dv->sdev_attrvp, mode, flags, cr, ct); 590 } else if (dv->sdev_attr) { 591 ret = sdev_unlocked_access(dv, mode, cr); 592 if (ret) 593 ret = EACCES; 594 } 595 596 return (ret); 597 } 598 599 static int 600 sdev_access(struct vnode *vp, int mode, int flags, struct cred *cr, 601 caller_context_t *ct) 602 { 603 struct sdev_node *dv = VTOSDEV(vp); 604 int ret; 605 606 rw_enter(&dv->sdev_contents, RW_READER); 607 ret = sdev_self_access(dv, mode, flags, cr, ct); 608 rw_exit(&dv->sdev_contents); 609 610 return (ret); 611 } 612 613 /* 614 * Lookup 615 */ 616 /*ARGSUSED3*/ 617 static int 618 sdev_lookup(struct vnode *dvp, char *nm, struct vnode **vpp, 619 struct pathname *pnp, int flags, struct vnode *rdir, struct cred *cred, 620 caller_context_t *ct, int *direntflags, pathname_t *realpnp) 621 { 622 struct sdev_node *parent; 623 int error; 624 625 parent = VTOSDEV(dvp); 626 ASSERT(parent); 627 628 /* execute access is required to search the directory */ 629 if ((error = VOP_ACCESS(dvp, VEXEC, 0, cred, ct)) != 0) 630 return (error); 631 632 if (!SDEV_IS_GLOBAL(parent)) 633 return (prof_lookup(dvp, nm, vpp, cred)); 634 return (devname_lookup_func(parent, nm, vpp, cred, NULL, 0)); 635 } 636 637 /*ARGSUSED2*/ 638 static int 639 sdev_create(struct vnode *dvp, char *nm, struct vattr *vap, vcexcl_t excl, 640 int mode, struct vnode **vpp, struct cred *cred, int flag, 641 caller_context_t *ct, vsecattr_t *vsecp) 642 { 643 struct vnode *vp = NULL; 644 struct vnode *avp; 645 struct sdev_node *parent; 646 struct sdev_node *self = NULL; 647 int error = 0; 648 vtype_t type = vap->va_type; 649 650 ASSERT(type != VNON && type != VBAD); 651 652 if ((type == VFIFO) || (type == VSOCK) || 653 (type == VPROC) || (type == VPORT)) 654 return (ENOTSUP); 655 656 parent = VTOSDEV(dvp); 657 ASSERT(parent); 658 659 rw_enter(&parent->sdev_dotdot->sdev_contents, RW_READER); 660 if (parent->sdev_state == SDEV_ZOMBIE) { 661 rw_exit(&parent->sdev_dotdot->sdev_contents); 662 return (ENOENT); 663 } 664 665 /* 666 * Nodes cannot be created in NGZ context. 667 */ 668 if (!SDEV_IS_GLOBAL(parent)) { 669 rw_exit(&parent->sdev_dotdot->sdev_contents); 670 error = prof_lookup(dvp, nm, vpp, cred); 671 672 /* 673 * In this case, we can't create a vnode but we can 674 * open an existing one. However, we still want to 675 * enforce the open(2) error semantics as if this was 676 * a regular sdev_create() in GZ context. Since we 677 * know the vnode already exists (error == 0) we a) 678 * return EEXIST if exclusive access was requested, or 679 * b) return EISDIR if write access was requested on a 680 * directory. Otherwise, we return the value from 681 * prof_lookup() as is. 682 */ 683 if (error == 0) { 684 if (excl == EXCL) { 685 error = EEXIST; 686 } else if (((*vpp)->v_type == VDIR) && 687 (mode & VWRITE)) { 688 error = EISDIR; 689 } 690 691 if (error != 0) 692 VN_RELE(*vpp); 693 } 694 695 696 return (error); 697 } 698 rw_exit(&parent->sdev_dotdot->sdev_contents); 699 700 /* execute access is required to search the directory */ 701 if ((error = VOP_ACCESS(dvp, VEXEC, 0, cred, ct)) != 0) 702 return (error); 703 704 /* check existing name */ 705 /* XXXci - We may need to translate the C-I flags on VOP_LOOKUP */ 706 error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cred, ct, NULL, NULL); 707 708 /* name found */ 709 if (error == 0) { 710 ASSERT(vp); 711 if (excl == EXCL) { 712 error = EEXIST; 713 } else if ((vp->v_type == VDIR) && (mode & VWRITE)) { 714 /* allowing create/read-only an existing directory */ 715 error = EISDIR; 716 } else { 717 error = VOP_ACCESS(vp, mode, 0, cred, ct); 718 } 719 720 if (error) { 721 VN_RELE(vp); 722 return (error); 723 } 724 725 /* truncation first */ 726 if ((vp->v_type == VREG) && (vap->va_mask & AT_SIZE) && 727 (vap->va_size == 0)) { 728 ASSERT(parent->sdev_attrvp); 729 error = VOP_CREATE(parent->sdev_attrvp, 730 nm, vap, excl, mode, &avp, cred, flag, ct, vsecp); 731 732 if (error) { 733 VN_RELE(vp); 734 return (error); 735 } 736 } 737 738 sdev_update_timestamps(vp, kcred, 739 AT_CTIME|AT_MTIME|AT_ATIME); 740 *vpp = vp; 741 return (0); 742 } 743 744 /* bail out early */ 745 if (error != ENOENT) 746 return (error); 747 748 /* verify write access - compliance specifies ENXIO */ 749 if ((error = VOP_ACCESS(dvp, VEXEC|VWRITE, 0, cred, ct)) != 0) { 750 if (error == EACCES) 751 error = ENXIO; 752 return (error); 753 } 754 755 /* 756 * For memory-based (ROFS) directory: 757 * - either disallow node creation; 758 * - or implement VOP_CREATE of its own 759 */ 760 rw_enter(&parent->sdev_contents, RW_WRITER); 761 if (!SDEV_IS_PERSIST(parent)) { 762 rw_exit(&parent->sdev_contents); 763 return (ENOTSUP); 764 } 765 ASSERT(parent->sdev_attrvp); 766 error = sdev_mknode(parent, nm, &self, vap, NULL, NULL, 767 cred, SDEV_READY); 768 if (error) { 769 rw_exit(&parent->sdev_contents); 770 if (self) 771 SDEV_RELE(self); 772 return (error); 773 } 774 rw_exit(&parent->sdev_contents); 775 776 ASSERT(self); 777 /* take care the timestamps for the node and its parent */ 778 sdev_update_timestamps(SDEVTOV(self), kcred, 779 AT_CTIME|AT_MTIME|AT_ATIME); 780 sdev_update_timestamps(dvp, kcred, AT_MTIME|AT_ATIME); 781 if (SDEV_IS_GLOBAL(parent)) 782 atomic_inc_ulong(&parent->sdev_gdir_gen); 783 784 /* wake up other threads blocked on looking up this node */ 785 mutex_enter(&self->sdev_lookup_lock); 786 SDEV_UNBLOCK_OTHERS(self, SDEV_LOOKUP); 787 mutex_exit(&self->sdev_lookup_lock); 788 error = sdev_to_vp(self, vpp); 789 return (error); 790 } 791 792 static int 793 sdev_remove(struct vnode *dvp, char *nm, struct cred *cred, 794 caller_context_t *ct, int flags) 795 { 796 int error; 797 struct sdev_node *parent = (struct sdev_node *)VTOSDEV(dvp); 798 struct vnode *vp = NULL; 799 struct sdev_node *dv = NULL; 800 int len; 801 int bkstore; 802 803 /* bail out early */ 804 len = strlen(nm); 805 if (nm[0] == '.') { 806 if (len == 1) { 807 return (EINVAL); 808 } else if (len == 2 && nm[1] == '.') { 809 return (EEXIST); 810 } 811 } 812 813 ASSERT(parent); 814 rw_enter(&parent->sdev_contents, RW_READER); 815 if (!SDEV_IS_GLOBAL(parent)) { 816 rw_exit(&parent->sdev_contents); 817 return (ENOTSUP); 818 } 819 820 /* execute access is required to search the directory */ 821 if ((error = sdev_self_access(parent, VEXEC, 0, cred, ct)) != 0) { 822 rw_exit(&parent->sdev_contents); 823 return (error); 824 } 825 826 /* check existence first */ 827 dv = sdev_cache_lookup(parent, nm); 828 if (dv == NULL) { 829 rw_exit(&parent->sdev_contents); 830 return (ENOENT); 831 } 832 833 vp = SDEVTOV(dv); 834 if ((dv->sdev_state == SDEV_INIT) || 835 (dv->sdev_state == SDEV_ZOMBIE)) { 836 rw_exit(&parent->sdev_contents); 837 VN_RELE(vp); 838 return (ENOENT); 839 } 840 841 /* write access is required to remove an entry */ 842 if ((error = sdev_self_access(parent, VWRITE, 0, cred, ct)) != 0) { 843 rw_exit(&parent->sdev_contents); 844 VN_RELE(vp); 845 return (error); 846 } 847 848 bkstore = SDEV_IS_PERSIST(dv) ? 1 : 0; 849 if (!rw_tryupgrade(&parent->sdev_contents)) { 850 rw_exit(&parent->sdev_contents); 851 rw_enter(&parent->sdev_contents, RW_WRITER); 852 /* Make sure we didn't become a zombie */ 853 if (parent->sdev_state == SDEV_ZOMBIE) { 854 rw_exit(&parent->sdev_contents); 855 VN_RELE(vp); 856 return (ENOENT); 857 } 858 } 859 860 /* we do not support unlinking a non-empty directory */ 861 if (vp->v_type == VDIR && dv->sdev_nlink > 2) { 862 rw_exit(&parent->sdev_contents); 863 VN_RELE(vp); 864 return (EBUSY); 865 } 866 867 /* 868 * sdev_dirdelete does the real job of: 869 * - make sure no open ref count 870 * - destroying the sdev_node 871 * - releasing the hold on attrvp 872 */ 873 sdev_cache_update(parent, &dv, nm, SDEV_CACHE_DELETE); 874 VN_RELE(vp); 875 rw_exit(&parent->sdev_contents); 876 877 /* 878 * best efforts clean up the backing store 879 */ 880 if (bkstore) { 881 ASSERT(parent->sdev_attrvp); 882 error = VOP_REMOVE(parent->sdev_attrvp, nm, cred, 883 ct, flags); 884 /* 885 * do not report BUSY error 886 * because the backing store ref count is released 887 * when the last ref count on the sdev_node is 888 * released. 889 */ 890 if (error == EBUSY) { 891 sdcmn_err2(("sdev_remove: device %s is still on" 892 "disk %s\n", nm, parent->sdev_path)); 893 error = 0; 894 } 895 } 896 897 return (error); 898 } 899 900 /* 901 * Some restrictions for this file system: 902 * - both oldnm and newnm are in the scope of /dev file system, 903 * to simply the namespace management model. 904 */ 905 /*ARGSUSED6*/ 906 static int 907 sdev_rename(struct vnode *odvp, char *onm, struct vnode *ndvp, char *nnm, 908 struct cred *cred, caller_context_t *ct, int flags) 909 { 910 struct sdev_node *fromparent = NULL; 911 struct vattr vattr; 912 struct sdev_node *toparent; 913 struct sdev_node *fromdv = NULL; /* source node */ 914 struct vnode *ovp = NULL; /* source vnode */ 915 struct sdev_node *todv = NULL; /* destination node */ 916 struct vnode *nvp = NULL; /* destination vnode */ 917 int samedir = 0; /* set if odvp == ndvp */ 918 struct vnode *realvp; 919 int error = 0; 920 dev_t fsid; 921 int bkstore = 0; 922 vtype_t type; 923 924 /* prevent modifying "." and ".." */ 925 if ((onm[0] == '.' && 926 (onm[1] == '\0' || (onm[1] == '.' && onm[2] == '\0'))) || 927 (nnm[0] == '.' && 928 (nnm[1] == '\0' || (nnm[1] == '.' && nnm[2] == '\0')))) { 929 return (EINVAL); 930 } 931 932 fromparent = VTOSDEV(odvp); 933 toparent = VTOSDEV(ndvp); 934 935 /* ZOMBIE parent doesn't allow new node creation */ 936 rw_enter(&fromparent->sdev_dotdot->sdev_contents, RW_READER); 937 if (fromparent->sdev_state == SDEV_ZOMBIE) { 938 rw_exit(&fromparent->sdev_dotdot->sdev_contents); 939 return (ENOENT); 940 } 941 942 /* renaming only supported for global device nodes */ 943 if (!SDEV_IS_GLOBAL(fromparent)) { 944 rw_exit(&fromparent->sdev_dotdot->sdev_contents); 945 return (ENOTSUP); 946 } 947 rw_exit(&fromparent->sdev_dotdot->sdev_contents); 948 949 rw_enter(&toparent->sdev_dotdot->sdev_contents, RW_READER); 950 if (toparent->sdev_state == SDEV_ZOMBIE) { 951 rw_exit(&toparent->sdev_dotdot->sdev_contents); 952 return (ENOENT); 953 } 954 rw_exit(&toparent->sdev_dotdot->sdev_contents); 955 956 /* 957 * acquire the global lock to prevent 958 * mount/unmount/other rename activities. 959 */ 960 mutex_enter(&sdev_lock); 961 962 /* check existence of the source node */ 963 /* XXXci - We may need to translate the C-I flags on VOP_LOOKUP */ 964 error = VOP_LOOKUP(odvp, onm, &ovp, NULL, 0, NULL, cred, ct, 965 NULL, NULL); 966 if (error) { 967 sdcmn_err2(("sdev_rename: the source node %s exists\n", 968 onm)); 969 mutex_exit(&sdev_lock); 970 return (error); 971 } 972 973 if (VOP_REALVP(ovp, &realvp, ct) == 0) { 974 VN_HOLD(realvp); 975 VN_RELE(ovp); 976 ovp = realvp; 977 } 978 979 /* check existence of destination */ 980 /* XXXci - We may need to translate the C-I flags on VOP_LOOKUP */ 981 error = VOP_LOOKUP(ndvp, nnm, &nvp, NULL, 0, NULL, cred, ct, 982 NULL, NULL); 983 if (error && (error != ENOENT)) { 984 mutex_exit(&sdev_lock); 985 VN_RELE(ovp); 986 return (error); 987 } 988 989 if (nvp && (VOP_REALVP(nvp, &realvp, ct) == 0)) { 990 VN_HOLD(realvp); 991 VN_RELE(nvp); 992 nvp = realvp; 993 } 994 995 /* 996 * make sure the source and the destination are 997 * in the same dev filesystem 998 */ 999 if (odvp != ndvp) { 1000 vattr.va_mask = AT_FSID; 1001 if (error = VOP_GETATTR(odvp, &vattr, 0, cred, ct)) { 1002 mutex_exit(&sdev_lock); 1003 VN_RELE(ovp); 1004 if (nvp != NULL) 1005 VN_RELE(nvp); 1006 return (error); 1007 } 1008 fsid = vattr.va_fsid; 1009 vattr.va_mask = AT_FSID; 1010 if (error = VOP_GETATTR(ndvp, &vattr, 0, cred, ct)) { 1011 mutex_exit(&sdev_lock); 1012 VN_RELE(ovp); 1013 if (nvp != NULL) 1014 VN_RELE(nvp); 1015 return (error); 1016 } 1017 if (fsid != vattr.va_fsid) { 1018 mutex_exit(&sdev_lock); 1019 VN_RELE(ovp); 1020 if (nvp != NULL) 1021 VN_RELE(nvp); 1022 return (EXDEV); 1023 } 1024 } 1025 1026 /* make sure the old entry can be deleted */ 1027 error = VOP_ACCESS(odvp, VWRITE, 0, cred, ct); 1028 if (error) { 1029 mutex_exit(&sdev_lock); 1030 VN_RELE(ovp); 1031 if (nvp != NULL) 1032 VN_RELE(nvp); 1033 return (error); 1034 } 1035 1036 /* make sure the destination allows creation */ 1037 samedir = (fromparent == toparent); 1038 if (!samedir) { 1039 error = VOP_ACCESS(ndvp, VEXEC|VWRITE, 0, cred, ct); 1040 if (error) { 1041 mutex_exit(&sdev_lock); 1042 VN_RELE(ovp); 1043 if (nvp != NULL) 1044 VN_RELE(nvp); 1045 return (error); 1046 } 1047 } 1048 1049 fromdv = VTOSDEV(ovp); 1050 ASSERT(fromdv); 1051 1052 /* destination file exists */ 1053 if (nvp != NULL) { 1054 todv = VTOSDEV(nvp); 1055 ASSERT(todv); 1056 } 1057 1058 if ((fromdv->sdev_flags & SDEV_DYNAMIC) != 0 || 1059 (todv != NULL && (todv->sdev_flags & SDEV_DYNAMIC) != 0)) { 1060 mutex_exit(&sdev_lock); 1061 if (nvp != NULL) 1062 VN_RELE(nvp); 1063 VN_RELE(ovp); 1064 return (EACCES); 1065 } 1066 1067 /* 1068 * link source to new target in the memory. Regardless of failure, we 1069 * must rele our hold on nvp. 1070 */ 1071 error = sdev_rnmnode(fromparent, fromdv, toparent, &todv, nnm, cred); 1072 if (nvp != NULL) 1073 VN_RELE(nvp); 1074 if (error) { 1075 sdcmn_err2(("sdev_rename: renaming %s to %s failed " 1076 " with error %d\n", onm, nnm, error)); 1077 mutex_exit(&sdev_lock); 1078 VN_RELE(ovp); 1079 return (error); 1080 } 1081 1082 /* 1083 * unlink from source 1084 */ 1085 rw_enter(&fromparent->sdev_contents, RW_READER); 1086 fromdv = sdev_cache_lookup(fromparent, onm); 1087 if (fromdv == NULL) { 1088 rw_exit(&fromparent->sdev_contents); 1089 mutex_exit(&sdev_lock); 1090 VN_RELE(ovp); 1091 sdcmn_err2(("sdev_rename: the source is deleted already\n")); 1092 return (0); 1093 } 1094 1095 if (fromdv->sdev_state == SDEV_ZOMBIE) { 1096 rw_exit(&fromparent->sdev_contents); 1097 mutex_exit(&sdev_lock); 1098 VN_RELE(SDEVTOV(fromdv)); 1099 VN_RELE(ovp); 1100 sdcmn_err2(("sdev_rename: the source is being deleted\n")); 1101 return (0); 1102 } 1103 rw_exit(&fromparent->sdev_contents); 1104 ASSERT(SDEVTOV(fromdv) == ovp); 1105 VN_RELE(ovp); 1106 1107 /* clean out the directory contents before it can be removed */ 1108 type = SDEVTOV(fromdv)->v_type; 1109 if (type == VDIR) { 1110 error = sdev_cleandir(fromdv, NULL, 0); 1111 sdcmn_err2(("sdev_rename: cleandir finished with %d\n", 1112 error)); 1113 if (error == EBUSY) 1114 error = 0; 1115 } 1116 1117 rw_enter(&fromparent->sdev_contents, RW_WRITER); 1118 bkstore = SDEV_IS_PERSIST(fromdv) ? 1 : 0; 1119 sdev_cache_update(fromparent, &fromdv, onm, 1120 SDEV_CACHE_DELETE); 1121 VN_RELE(SDEVTOV(fromdv)); 1122 1123 /* best effforts clean up the backing store */ 1124 if (bkstore) { 1125 ASSERT(fromparent->sdev_attrvp); 1126 if (type != VDIR) { 1127 /* XXXci - We may need to translate the C-I flags on VOP_REMOVE */ 1128 error = VOP_REMOVE(fromparent->sdev_attrvp, 1129 onm, kcred, ct, 0); 1130 } else { 1131 /* XXXci - We may need to translate the C-I flags on VOP_RMDIR */ 1132 error = VOP_RMDIR(fromparent->sdev_attrvp, 1133 onm, fromparent->sdev_attrvp, kcred, ct, 0); 1134 } 1135 1136 if (error) { 1137 sdcmn_err2(("sdev_rename: device %s is " 1138 "still on disk %s\n", onm, 1139 fromparent->sdev_path)); 1140 error = 0; 1141 } 1142 } 1143 rw_exit(&fromparent->sdev_contents); 1144 mutex_exit(&sdev_lock); 1145 1146 /* once reached to this point, the rename is regarded successful */ 1147 return (0); 1148 } 1149 1150 /* 1151 * dev-fs version of "ln -s path dev-name" 1152 * tnm - path, e.g. /devices/... or /dev/... 1153 * lnm - dev_name 1154 */ 1155 /*ARGSUSED6*/ 1156 static int 1157 sdev_symlink(struct vnode *dvp, char *lnm, struct vattr *tva, 1158 char *tnm, struct cred *cred, caller_context_t *ct, int flags) 1159 { 1160 int error; 1161 struct vnode *vp = NULL; 1162 struct sdev_node *parent = (struct sdev_node *)VTOSDEV(dvp); 1163 struct sdev_node *self = (struct sdev_node *)NULL; 1164 1165 ASSERT(parent); 1166 rw_enter(&parent->sdev_dotdot->sdev_contents, RW_READER); 1167 if (parent->sdev_state == SDEV_ZOMBIE) { 1168 rw_exit(&parent->sdev_dotdot->sdev_contents); 1169 sdcmn_err2(("sdev_symlink: parent %s is ZOMBIED \n", 1170 parent->sdev_name)); 1171 return (ENOENT); 1172 } 1173 1174 if (!SDEV_IS_GLOBAL(parent)) { 1175 rw_exit(&parent->sdev_dotdot->sdev_contents); 1176 return (ENOTSUP); 1177 } 1178 rw_exit(&parent->sdev_dotdot->sdev_contents); 1179 1180 /* execute access is required to search a directory */ 1181 if ((error = VOP_ACCESS(dvp, VEXEC, 0, cred, ct)) != 0) 1182 return (error); 1183 1184 /* find existing name */ 1185 /* XXXci - We may need to translate the C-I flags here */ 1186 error = VOP_LOOKUP(dvp, lnm, &vp, NULL, 0, NULL, cred, ct, NULL, NULL); 1187 if (error == 0) { 1188 ASSERT(vp); 1189 VN_RELE(vp); 1190 sdcmn_err2(("sdev_symlink: node %s already exists\n", lnm)); 1191 return (EEXIST); 1192 } 1193 if (error != ENOENT) 1194 return (error); 1195 1196 /* write access is required to create a symlink */ 1197 if ((error = VOP_ACCESS(dvp, VWRITE, 0, cred, ct)) != 0) 1198 return (error); 1199 1200 /* put it into memory cache */ 1201 rw_enter(&parent->sdev_contents, RW_WRITER); 1202 error = sdev_mknode(parent, lnm, &self, tva, NULL, (void *)tnm, 1203 cred, SDEV_READY); 1204 if (error) { 1205 rw_exit(&parent->sdev_contents); 1206 sdcmn_err2(("sdev_symlink: node %s creation failed\n", lnm)); 1207 if (self) 1208 SDEV_RELE(self); 1209 1210 return (error); 1211 } 1212 ASSERT(self && (self->sdev_state == SDEV_READY)); 1213 rw_exit(&parent->sdev_contents); 1214 1215 /* take care the timestamps for the node and its parent */ 1216 sdev_update_timestamps(SDEVTOV(self), kcred, 1217 AT_CTIME|AT_MTIME|AT_ATIME); 1218 sdev_update_timestamps(dvp, kcred, AT_MTIME|AT_ATIME); 1219 if (SDEV_IS_GLOBAL(parent)) 1220 atomic_inc_ulong(&parent->sdev_gdir_gen); 1221 1222 /* wake up other threads blocked on looking up this node */ 1223 mutex_enter(&self->sdev_lookup_lock); 1224 SDEV_UNBLOCK_OTHERS(self, SDEV_LOOKUP); 1225 mutex_exit(&self->sdev_lookup_lock); 1226 SDEV_RELE(self); /* don't return with vnode held */ 1227 return (0); 1228 } 1229 1230 /*ARGSUSED6*/ 1231 static int 1232 sdev_mkdir(struct vnode *dvp, char *nm, struct vattr *va, struct vnode **vpp, 1233 struct cred *cred, caller_context_t *ct, int flags, vsecattr_t *vsecp) 1234 { 1235 int error; 1236 struct sdev_node *parent = (struct sdev_node *)VTOSDEV(dvp); 1237 struct sdev_node *self = NULL; 1238 struct vnode *vp = NULL; 1239 1240 ASSERT(parent && parent->sdev_dotdot); 1241 rw_enter(&parent->sdev_dotdot->sdev_contents, RW_READER); 1242 if (parent->sdev_state == SDEV_ZOMBIE) { 1243 rw_exit(&parent->sdev_dotdot->sdev_contents); 1244 return (ENOENT); 1245 } 1246 1247 /* non-global do not allow pure directory creation */ 1248 if (!SDEV_IS_GLOBAL(parent)) { 1249 rw_exit(&parent->sdev_dotdot->sdev_contents); 1250 return (prof_lookup(dvp, nm, vpp, cred)); 1251 } 1252 rw_exit(&parent->sdev_dotdot->sdev_contents); 1253 1254 /* execute access is required to search the directory */ 1255 if ((error = VOP_ACCESS(dvp, VEXEC, 0, cred, ct)) != 0) { 1256 return (error); 1257 } 1258 1259 /* find existing name */ 1260 /* XXXci - We may need to translate the C-I flags on VOP_LOOKUP */ 1261 error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cred, ct, NULL, NULL); 1262 if (error == 0) { 1263 VN_RELE(vp); 1264 return (EEXIST); 1265 } 1266 if (error != ENOENT) 1267 return (error); 1268 1269 /* require write access to create a directory */ 1270 if ((error = VOP_ACCESS(dvp, VWRITE, 0, cred, ct)) != 0) { 1271 return (error); 1272 } 1273 1274 /* put it into memory */ 1275 rw_enter(&parent->sdev_contents, RW_WRITER); 1276 error = sdev_mknode(parent, nm, &self, 1277 va, NULL, NULL, cred, SDEV_READY); 1278 if (error) { 1279 rw_exit(&parent->sdev_contents); 1280 if (self) 1281 SDEV_RELE(self); 1282 return (error); 1283 } 1284 ASSERT(self && (self->sdev_state == SDEV_READY)); 1285 rw_exit(&parent->sdev_contents); 1286 1287 /* take care the timestamps for the node and its parent */ 1288 sdev_update_timestamps(SDEVTOV(self), kcred, 1289 AT_CTIME|AT_MTIME|AT_ATIME); 1290 sdev_update_timestamps(dvp, kcred, AT_MTIME|AT_ATIME); 1291 if (SDEV_IS_GLOBAL(parent)) 1292 atomic_inc_ulong(&parent->sdev_gdir_gen); 1293 1294 /* wake up other threads blocked on looking up this node */ 1295 mutex_enter(&self->sdev_lookup_lock); 1296 SDEV_UNBLOCK_OTHERS(self, SDEV_LOOKUP); 1297 mutex_exit(&self->sdev_lookup_lock); 1298 *vpp = SDEVTOV(self); 1299 return (0); 1300 } 1301 1302 /* 1303 * allowing removing an empty directory under /dev 1304 */ 1305 /*ARGSUSED*/ 1306 static int 1307 sdev_rmdir(struct vnode *dvp, char *nm, struct vnode *cdir, struct cred *cred, 1308 caller_context_t *ct, int flags) 1309 { 1310 int error = 0; 1311 struct sdev_node *parent = (struct sdev_node *)VTOSDEV(dvp); 1312 struct sdev_node *self = NULL; 1313 struct vnode *vp = NULL; 1314 1315 /* bail out early */ 1316 if (strcmp(nm, ".") == 0) 1317 return (EINVAL); 1318 if (strcmp(nm, "..") == 0) 1319 return (EEXIST); /* should be ENOTEMPTY */ 1320 1321 /* no destruction of non-global node */ 1322 ASSERT(parent && parent->sdev_dotdot); 1323 rw_enter(&parent->sdev_dotdot->sdev_contents, RW_READER); 1324 if (!SDEV_IS_GLOBAL(parent)) { 1325 rw_exit(&parent->sdev_dotdot->sdev_contents); 1326 return (ENOTSUP); 1327 } 1328 rw_exit(&parent->sdev_dotdot->sdev_contents); 1329 1330 /* execute access is required to search the directory */ 1331 if ((error = VOP_ACCESS(dvp, VEXEC|VWRITE, 0, cred, ct)) != 0) 1332 return (error); 1333 1334 /* check existing name */ 1335 rw_enter(&parent->sdev_contents, RW_WRITER); 1336 self = sdev_cache_lookup(parent, nm); 1337 if (self == NULL) { 1338 rw_exit(&parent->sdev_contents); 1339 return (ENOENT); 1340 } 1341 1342 vp = SDEVTOV(self); 1343 if ((self->sdev_state == SDEV_INIT) || 1344 (self->sdev_state == SDEV_ZOMBIE)) { 1345 rw_exit(&parent->sdev_contents); 1346 VN_RELE(vp); 1347 return (ENOENT); 1348 } 1349 1350 /* some sanity checks */ 1351 if (vp == dvp || vp == cdir) { 1352 rw_exit(&parent->sdev_contents); 1353 VN_RELE(vp); 1354 return (EINVAL); 1355 } 1356 1357 if (vp->v_type != VDIR) { 1358 rw_exit(&parent->sdev_contents); 1359 VN_RELE(vp); 1360 return (ENOTDIR); 1361 } 1362 1363 if (vn_vfswlock(vp)) { 1364 rw_exit(&parent->sdev_contents); 1365 VN_RELE(vp); 1366 return (EBUSY); 1367 } 1368 1369 if (vn_mountedvfs(vp) != NULL) { 1370 rw_exit(&parent->sdev_contents); 1371 vn_vfsunlock(vp); 1372 VN_RELE(vp); 1373 return (EBUSY); 1374 } 1375 1376 self = VTOSDEV(vp); 1377 /* bail out on a non-empty directory */ 1378 rw_enter(&self->sdev_contents, RW_READER); 1379 if (self->sdev_nlink > 2) { 1380 rw_exit(&self->sdev_contents); 1381 rw_exit(&parent->sdev_contents); 1382 vn_vfsunlock(vp); 1383 VN_RELE(vp); 1384 return (ENOTEMPTY); 1385 } 1386 rw_exit(&self->sdev_contents); 1387 1388 /* unlink it from the directory cache */ 1389 sdev_cache_update(parent, &self, nm, SDEV_CACHE_DELETE); 1390 rw_exit(&parent->sdev_contents); 1391 vn_vfsunlock(vp); 1392 VN_RELE(vp); 1393 1394 /* best effort to clean up the backing store */ 1395 if (SDEV_IS_PERSIST(parent)) { 1396 ASSERT(parent->sdev_attrvp); 1397 error = VOP_RMDIR(parent->sdev_attrvp, nm, 1398 parent->sdev_attrvp, kcred, ct, flags); 1399 1400 if (error) 1401 sdcmn_err2(("sdev_rmdir: cleaning device %s is on" 1402 " disk error %d\n", parent->sdev_path, error)); 1403 if (error == EBUSY) 1404 error = 0; 1405 1406 } 1407 1408 return (error); 1409 } 1410 1411 /* 1412 * read the contents of a symbolic link 1413 */ 1414 static int 1415 sdev_readlink(struct vnode *vp, struct uio *uiop, struct cred *cred, 1416 caller_context_t *ct) 1417 { 1418 struct sdev_node *dv; 1419 int error = 0; 1420 1421 ASSERT(vp->v_type == VLNK); 1422 1423 dv = VTOSDEV(vp); 1424 1425 if (dv->sdev_attrvp) { 1426 /* non-NULL attrvp implys a persisted node at READY state */ 1427 return (VOP_READLINK(dv->sdev_attrvp, uiop, cred, ct)); 1428 } else if (dv->sdev_symlink != NULL) { 1429 /* memory nodes, e.g. local nodes */ 1430 rw_enter(&dv->sdev_contents, RW_READER); 1431 sdcmn_err2(("sdev_readlink link is %s\n", dv->sdev_symlink)); 1432 error = uiomove(dv->sdev_symlink, strlen(dv->sdev_symlink), 1433 UIO_READ, uiop); 1434 rw_exit(&dv->sdev_contents); 1435 return (error); 1436 } 1437 1438 return (ENOENT); 1439 } 1440 1441 /*ARGSUSED4*/ 1442 static int 1443 sdev_readdir(struct vnode *vp, struct uio *uiop, struct cred *cred, int *eofp, 1444 caller_context_t *ct, int flags) 1445 { 1446 struct sdev_node *dv = VTOSDEV(vp); 1447 int error; 1448 1449 VERIFY(RW_READ_HELD(&dv->sdev_contents)); 1450 1451 /* 1452 * We can't recursively take ->sdev_contents via an indirect 1453 * VOP_ACCESS(), but we don't need to use that anyway. 1454 */ 1455 if ((error = sdev_self_access(dv, VEXEC, 0, cred, ct)) != 0) 1456 return (error); 1457 1458 if (!SDEV_IS_GLOBAL(dv)) 1459 prof_filldir(dv); 1460 return (devname_readdir_func(vp, uiop, cred, eofp, SDEV_BROWSE)); 1461 } 1462 1463 /*ARGSUSED1*/ 1464 static void 1465 sdev_inactive(struct vnode *vp, struct cred *cred, caller_context_t *ct) 1466 { 1467 devname_inactive_func(vp, cred, NULL); 1468 } 1469 1470 /*ARGSUSED2*/ 1471 static int 1472 sdev_fid(struct vnode *vp, struct fid *fidp, caller_context_t *ct) 1473 { 1474 struct sdev_node *dv = VTOSDEV(vp); 1475 struct sdev_fid *sdev_fid; 1476 1477 if (fidp->fid_len < (sizeof (struct sdev_fid) - sizeof (ushort_t))) { 1478 fidp->fid_len = sizeof (struct sdev_fid) - sizeof (ushort_t); 1479 return (ENOSPC); 1480 } 1481 1482 sdev_fid = (struct sdev_fid *)fidp; 1483 bzero(sdev_fid, sizeof (struct sdev_fid)); 1484 sdev_fid->sdevfid_len = 1485 (int)sizeof (struct sdev_fid) - sizeof (ushort_t); 1486 sdev_fid->sdevfid_ino = dv->sdev_ino; 1487 1488 return (0); 1489 } 1490 1491 /* 1492 * This pair of routines bracket all VOP_READ, VOP_WRITE 1493 * and VOP_READDIR requests. The contents lock stops things 1494 * moving around while we're looking at them. 1495 */ 1496 /*ARGSUSED2*/ 1497 static int 1498 sdev_rwlock(struct vnode *vp, int write_flag, caller_context_t *ctp) 1499 { 1500 rw_enter(&VTOSDEV(vp)->sdev_contents, 1501 write_flag ? RW_WRITER : RW_READER); 1502 return (write_flag ? V_WRITELOCK_TRUE : V_WRITELOCK_FALSE); 1503 } 1504 1505 /*ARGSUSED1*/ 1506 static void 1507 sdev_rwunlock(struct vnode *vp, int write_flag, caller_context_t *ctp) 1508 { 1509 rw_exit(&VTOSDEV(vp)->sdev_contents); 1510 } 1511 1512 /*ARGSUSED1*/ 1513 static int 1514 sdev_seek(struct vnode *vp, offset_t ooff, offset_t *noffp, 1515 caller_context_t *ct) 1516 { 1517 struct vnode *attrvp = VTOSDEV(vp)->sdev_attrvp; 1518 1519 ASSERT(vp->v_type != VCHR && 1520 vp->v_type != VBLK && vp->v_type != VLNK); 1521 1522 if (vp->v_type == VDIR) 1523 return (fs_seek(vp, ooff, noffp, ct)); 1524 1525 ASSERT(attrvp); 1526 return (VOP_SEEK(attrvp, ooff, noffp, ct)); 1527 } 1528 1529 /*ARGSUSED1*/ 1530 static int 1531 sdev_frlock(struct vnode *vp, int cmd, struct flock64 *bfp, int flag, 1532 offset_t offset, struct flk_callback *flk_cbp, struct cred *cr, 1533 caller_context_t *ct) 1534 { 1535 int error; 1536 struct sdev_node *dv = VTOSDEV(vp); 1537 1538 ASSERT(dv); 1539 ASSERT(dv->sdev_attrvp); 1540 error = VOP_FRLOCK(dv->sdev_attrvp, cmd, bfp, flag, offset, 1541 flk_cbp, cr, ct); 1542 1543 return (error); 1544 } 1545 1546 static int 1547 sdev_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr, 1548 caller_context_t *ct) 1549 { 1550 switch (cmd) { 1551 case _PC_ACL_ENABLED: 1552 *valp = SDEV_ACL_FLAVOR(vp); 1553 return (0); 1554 } 1555 1556 return (fs_pathconf(vp, cmd, valp, cr, ct)); 1557 } 1558 1559 vnodeops_t *sdev_vnodeops; 1560 1561 const fs_operation_def_t sdev_vnodeops_tbl[] = { 1562 VOPNAME_OPEN, { .vop_open = sdev_open }, 1563 VOPNAME_CLOSE, { .vop_close = sdev_close }, 1564 VOPNAME_READ, { .vop_read = sdev_read }, 1565 VOPNAME_WRITE, { .vop_write = sdev_write }, 1566 VOPNAME_IOCTL, { .vop_ioctl = sdev_ioctl }, 1567 VOPNAME_GETATTR, { .vop_getattr = sdev_getattr }, 1568 VOPNAME_SETATTR, { .vop_setattr = sdev_setattr }, 1569 VOPNAME_ACCESS, { .vop_access = sdev_access }, 1570 VOPNAME_LOOKUP, { .vop_lookup = sdev_lookup }, 1571 VOPNAME_CREATE, { .vop_create = sdev_create }, 1572 VOPNAME_RENAME, { .vop_rename = sdev_rename }, 1573 VOPNAME_REMOVE, { .vop_remove = sdev_remove }, 1574 VOPNAME_MKDIR, { .vop_mkdir = sdev_mkdir }, 1575 VOPNAME_RMDIR, { .vop_rmdir = sdev_rmdir }, 1576 VOPNAME_READDIR, { .vop_readdir = sdev_readdir }, 1577 VOPNAME_SYMLINK, { .vop_symlink = sdev_symlink }, 1578 VOPNAME_READLINK, { .vop_readlink = sdev_readlink }, 1579 VOPNAME_INACTIVE, { .vop_inactive = sdev_inactive }, 1580 VOPNAME_FID, { .vop_fid = sdev_fid }, 1581 VOPNAME_RWLOCK, { .vop_rwlock = sdev_rwlock }, 1582 VOPNAME_RWUNLOCK, { .vop_rwunlock = sdev_rwunlock }, 1583 VOPNAME_SEEK, { .vop_seek = sdev_seek }, 1584 VOPNAME_FRLOCK, { .vop_frlock = sdev_frlock }, 1585 VOPNAME_PATHCONF, { .vop_pathconf = sdev_pathconf }, 1586 VOPNAME_SETSECATTR, { .vop_setsecattr = sdev_setsecattr }, 1587 VOPNAME_GETSECATTR, { .vop_getsecattr = sdev_getsecattr }, 1588 NULL, NULL 1589 }; 1590 1591 int sdev_vnodeops_tbl_size = sizeof (sdev_vnodeops_tbl); 1592