1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
23 */
24 /*
25 * Copyright 2018, Joyent, Inc.
26 */
27
28 /*
29 * vnode ops for the /dev filesystem
30 *
31 * - VDIR, VCHR, CBLK, and VLNK are considered must supported files
32 * - VREG and VDOOR are used for some internal implementations in
33 * the global zone, e.g. devname and devfsadm communication
34 * - other file types are unusual in this namespace and
35 * not supported for now
36 */
37
38 /*
39 * sdev has a few basic goals:
40 * o Provide /dev for the global zone as well as various non-global zones.
41 * o Provide the basic functionality that devfsadm might need (mknod,
42 * symlinks, etc.)
43 * o Allow persistent permissions on files in /dev.
44 * o Allow for dynamic directories and nodes for use by various services (pts,
45 * zvol, net, etc.)
46 *
47 * The sdev file system is primarily made up of sdev_node_t's which is sdev's
48 * counterpart to the vnode_t. There are two different classes of sdev_node_t's
49 * that we generally care about, dynamic and otherwise.
50 *
51 * Persisting Information
52 * ----------------------
53 *
54 * When sdev is mounted, it keeps track of the underlying file system it is
55 * mounted over. In certain situations, sdev will go and create entries in that
56 * underlying file system. These underlying 'back end' nodes are used as proxies
57 * for various changes in permissions. While specific sets of nodes, such as
58 * dynamic ones, are exempt, this process stores permission changes against
59 * these back end nodes. The point of all of this is to allow for these settings
60 * to persist across host and zone reboots. As an example, consider the entry
61 * /dev/dsk/c0t0d0 which is a character device and that / is in UFS. Upon
62 * changing the permissions on c0t0d0 you'd have the following logical
63 * relationships:
64 *
65 * +------------------+ sdev_vnode +--------------+
66 * | sdev_node_t |<---------------->| vnode_t |
67 * | /dev/dsk/c0t0d0 |<---------------->| for sdev |
68 * +------------------+ +--------------+
69 * |
70 * | sdev_attrvp
71 * |
72 * | +---------------------+
73 * +--->| vnode_t for UFS|ZFS |
74 * | /dev/dsk/c0t0d0 |
75 * +---------------------+
76 *
77 * sdev is generally in memory. Therefore when a lookup happens and there is no
78 * entry already inside of a directory cache, it will next check the backing
79 * store. If the backing store exists, we will reconstitute the sdev_node based
80 * on the information that we persisted. When we create the backing store node,
81 * we use the struct vattr information that we already have in sdev_node_t.
82 * Because of this, we already know if the entry was previously a symlink,
83 * directory, or some other kind of type. Note that not all types of nodes are
84 * supported. Currently only VDIR, VCHR, VBLK, VREG, VDOOR, and VLNK are
85 * eligible to be persisted.
86 *
87 * When the sdev_node is created and the lookup is done, we grab a hold on the
88 * underlying vnode as part of the call to VOP_LOOKUP. That reference is held
89 * until the sdev_node becomes inactive. Once its reference count reaches one
90 * and the VOP_INACTIVE callback fires leading to the destruction of the node,
91 * the reference on the underlying vnode will be released.
92 *
93 * The backing store node will be deleted only when the node itself is deleted
94 * through the means of a VOP_REMOVE, VOP_RMDIR, or similar call.
95 *
96 * Not everything can be persisted, see The Rules section for more details.
97 *
98 * Dynamic Nodes
99 * -------------
100 *
101 * Dynamic nodes allow for specific interactions with various kernel subsystems
102 * when looking up directory entries. This allows the lookup and readdir
103 * functions to check against the kernel subsystem's for validity. eg. does a
104 * zvol or nic still exist.
105 *
106 * More specifically, when we create various directories we check if the
107 * directory name matches that of one of the names in the vtab[] (sdev_subr.c).
108 * If it does, we swap out the vnode operations into a new set which combine the
109 * normal sdev vnode operations with the dynamic set here.
110 *
111 * In addition, various dynamic nodes implement a verification entry point. This
112 * verification entry is used as a part of lookup and readdir. The goal for
113 * these dynamic nodes is to allow them to check with the underlying subsystems
114 * to ensure that these devices are still present, or if they have gone away, to
115 * remove them from the results. This is indicated by using the SDEV_VTOR flag
116 * in vtab[].
117 *
118 * Dynamic nodes have additional restrictions placed upon them. They may only
119 * appear at the top level directory of the file system. In addition, users
120 * cannot create dirents below any leve of a dynamic node aside from its special
121 * vnops.
122 *
123 * Profiles
124 * --------
125 *
126 * Profiles exist for the purpose of non-global zones. They work with the zone
127 * brands and zoneadmd to set up a filter of allowed devices that can appear in
128 * a non-global zone's /dev. These are sent to sdev by means of libdevinfo and a
129 * modctl system call. Specifically it allows one to add patterns of device
130 * paths to include and exclude. It allows for a collection of symlinks to be
131 * added and it allows for remapping names.
132 *
133 * When operating in a non-global zone, several of the sdev vnops are redirected
134 * to the profile versions. These impose additional restrictions such as
135 * enforcing that a non-global zone's /dev is read only.
136 *
137 * sdev_node_t States
138 * ------------------
139 *
140 * A given sdev_node_t has a field called the sdev_state which describes where
141 * in the sdev life cycle it is. There are three primary states: SDEV_INIT,
142 * SDEV_READY, and SDEV_ZOMBIE.
143 *
144 * SDEV_INIT: When a new /dev file is first looked up, a sdev_node
145 * is allocated, initialized and added to the directory's
146 * sdev_node cache. A node at this state will also
147 * have the SDEV_LOOKUP flag set.
148 *
149 * Other threads that are trying to look up a node at
150 * this state will be blocked until the SDEV_LOOKUP flag
151 * is cleared.
152 *
153 * When the SDEV_LOOKUP flag is cleared, the node may
154 * transition into the SDEV_READY state for a successful
155 * lookup or the node is removed from the directory cache
156 * and destroyed if the named node can not be found.
157 * An ENOENT error is returned for the second case.
158 *
159 * SDEV_READY: A /dev file has been successfully looked up and
160 * associated with a vnode. The /dev file is available
161 * for the supported /dev file system operations.
162 *
163 * SDEV_ZOMBIE: Deletion of a /dev file has been explicitly issued
164 * to an SDEV_READY node. The node is transitioned into
165 * the SDEV_ZOMBIE state if the vnode reference count
166 * is still held. A SDEV_ZOMBIE node does not support
167 * any of the /dev file system operations. A SDEV_ZOMBIE
168 * node is immediately removed from the directory cache
169 * and destroyed once the reference count reaches zero.
170 *
171 * Historically nodes that were marked SDEV_ZOMBIE were not removed from the
172 * underlying directory caches. This has been the source of numerous bugs and
173 * thus to better mimic what happens on a real file system, it is no longer the
174 * case.
175 *
176 * The following state machine describes the life cycle of a given node and its
177 * associated states:
178 *
179 * node is . . . . .
180 * allocated via . +-------------+ . . . . . . . vnode_t refcount
181 * sdev_nodeinit() . | Unallocated | . reaches zero and
182 * +--------*-----| Memory |<--------*---+ sdev_inactive is
183 * | +-------------+ | called.
184 * | +------------^ | called.
185 * v | |
186 * +-----------+ * . . sdev_nodeready() +-------------+
187 * | SDEV_INIT | | or related setup | SDEV_ZOMBIE |
188 * +-----------+ | failure +-------------+
189 * | | ^
190 * | | +------------+ |
191 * +-*----------->| SDEV_READY |--------*-----+
192 * . +------------+ . The node is no longer
193 * . . node successfully . . . . . valid or we've been
194 * inserted into the asked to remove it.
195 * directory cache This happens via
196 * and sdev_nodready() sdev_dirdelete().
197 * call successful.
198 *
199 * Adding and Removing Dirents, Zombie Nodes
200 * -----------------------------------------
201 *
202 * As part of doing a lookup, readdir, or an explicit creation operation like
203 * mkdir or create, nodes may be created. Every directory has an avl tree which
204 * contains its children, the sdev_entries tree. This is only used if the type
205 * is VDIR. Access to this is controlled by the sdev_node_t's contents_lock and
206 * it is managed through sdev_cache_update().
207 *
208 * Every sdev_node_t has a field sdev_state, which describes the current state
209 * of the node. A node is generally speaking in the SDEV_READY state. When it is
210 * there, it can be looked up, accessed, and operations performed on it. When a
211 * node is going to be removed from the directory cache it is marked as a
212 * zombie. Once a node becomes a zombie, no other file system operations will
213 * succeed and it will continue to exist as a node until the vnode count on the
214 * node reaches zero. At that point, the node will be freed. However, once a
215 * node has been marked as a zombie, it will be removed immediately from the
216 * directory cache such that no one else may find it again. This means that
217 * someone else can insert a new entry into that directory with the same name
218 * and without a problem.
219 *
220 * To remove a node, see the section on that in The Rules.
221 *
222 * The Rules
223 * ---------
224 * These are the rules to live by when working in sdev. These are not
225 * exhaustive.
226 *
227 * - Set 1: Working with Backing Nodes
228 * o If there is a SDEV_READY sdev_node_t, it knows about its backing node.
229 * o If we find a backing node when looking up an sdev_node_t for the first
230 * time, we use its attributes to build our sdev_node_t.
231 * o If there is a found backing node, or we create a backing node, that's
232 * when we grab the hold on its vnode.
233 * o If we mark an sdev_node_t a ZOMBIE, we must remove its backing node from
234 * the underlying file system. It must not be searchable or findable.
235 * o We release our hold on the backing node vnode when we destroy the
236 * sdev_node_t.
237 *
238 * - Set 2: Locking rules for sdev (not exhaustive)
239 * o The majority of nodes contain an sdev_contents rw lock. You must hold it
240 * for read or write if manipulating its contents appropriately.
241 * o You must lock your parent before yourself.
242 * o If you need your vnode's v_lock and the sdev_contents rw lock, you must
243 * grab the v_lock before the sdev_contents rw_lock.
244 * o If you release a lock on the node as a part of upgrading it, you must
245 * verify that the node has not become a zombie as a part of this process.
246 *
247 * - Set 3: Zombie Status and What it Means
248 * o If you encounter a node that is a ZOMBIE, that means that it has been
249 * unlinked from the backing store.
250 * o If you release your contents lock and acquire it again (say as part of
251 * trying to grab a write lock) you must check that the node has not become
252 * a zombie.
253 * o You should VERIFY that a looked up node is not a zombie. This follows
254 * from the following logic. To mark something as a zombie means that it is
255 * removed from the parents directory cache. To do that, you must have a
256 * write lock on the parent's sdev_contents. To lookup through that
257 * directory you must have a read lock. This then becomes a simple ordering
258 * problem. If you've been granted the lock then the other operation cannot
259 * be in progress or must have already succeeded.
260 *
261 * - Set 4: Removing Directory Entries (aka making nodes Zombies)
262 * o Write lock must be held on the directory
263 * o Write lock must be held on the node
264 * o Remove the sdev_node_t from its parent cache
265 * o Remove the corresponding backing store node, if it exists, eg. use
266 * VOP_REMOVE or VOP_RMDIR.
267 * o You must NOT make any change in the vnode reference count! Nodes should
268 * only be cleaned up through VOP_INACTIVE callbacks.
269 * o VOP_INACTIVE is the only one responsible for doing the final vn_rele of
270 * the backing store vnode that was grabbed during lookup.
271 *
272 * - Set 5: What Nodes may be Persisted
273 * o The root, /dev is always persisted
274 * o Any node in vtab which is marked SDEV_DYNAMIC, may not be persisted
275 * unless it is also marked SDEV_PERSIST
276 * o Anything whose parent directory is marked SDEV_PERSIST will pass that
277 * along to the child as long as it does not contradict the above rules
278 */
279
280 #include <sys/types.h>
281 #include <sys/param.h>
282 #include <sys/t_lock.h>
283 #include <sys/systm.h>
284 #include <sys/sysmacros.h>
285 #include <sys/user.h>
286 #include <sys/time.h>
287 #include <sys/vfs.h>
288 #include <sys/vnode.h>
289 #include <sys/vfs_opreg.h>
290 #include <sys/file.h>
291 #include <sys/fcntl.h>
292 #include <sys/flock.h>
293 #include <sys/kmem.h>
294 #include <sys/uio.h>
295 #include <sys/errno.h>
296 #include <sys/stat.h>
297 #include <sys/cred.h>
298 #include <sys/dirent.h>
299 #include <sys/pathname.h>
300 #include <sys/cmn_err.h>
301 #include <sys/debug.h>
302 #include <sys/policy.h>
303 #include <vm/hat.h>
304 #include <vm/seg_vn.h>
305 #include <vm/seg_map.h>
306 #include <vm/seg.h>
307 #include <vm/as.h>
308 #include <vm/page.h>
309 #include <sys/proc.h>
310 #include <sys/mode.h>
311 #include <sys/sunndi.h>
312 #include <sys/ptms.h>
313 #include <fs/fs_subr.h>
314 #include <sys/fs/dv_node.h>
315 #include <sys/fs/sdev_impl.h>
316
317 /*ARGSUSED*/
318 static int
sdev_open(struct vnode ** vpp,int flag,struct cred * cred,caller_context_t * ct)319 sdev_open(struct vnode **vpp, int flag, struct cred *cred, caller_context_t *ct)
320 {
321 struct sdev_node *dv = VTOSDEV(*vpp);
322 struct sdev_node *ddv = dv->sdev_dotdot;
323 int error = 0;
324
325 if ((*vpp)->v_type == VDIR)
326 return (0);
327
328 if (!SDEV_IS_GLOBAL(dv))
329 return (ENOTSUP);
330
331 if ((*vpp)->v_type == VLNK)
332 return (ENOENT);
333 ASSERT((*vpp)->v_type == VREG);
334 if ((*vpp)->v_type != VREG)
335 return (ENOTSUP);
336
337 ASSERT(ddv);
338 rw_enter(&ddv->sdev_contents, RW_READER);
339 if (dv->sdev_attrvp == NULL) {
340 rw_exit(&ddv->sdev_contents);
341 return (ENOENT);
342 }
343 error = VOP_OPEN(&(dv->sdev_attrvp), flag, cred, ct);
344 rw_exit(&ddv->sdev_contents);
345 return (error);
346 }
347
348 /*ARGSUSED1*/
349 static int
sdev_close(struct vnode * vp,int flag,int count,offset_t offset,struct cred * cred,caller_context_t * ct)350 sdev_close(struct vnode *vp, int flag, int count,
351 offset_t offset, struct cred *cred, caller_context_t *ct)
352 {
353 struct sdev_node *dv = VTOSDEV(vp);
354
355 if (vp->v_type == VDIR) {
356 cleanlocks(vp, ttoproc(curthread)->p_pid, 0);
357 cleanshares(vp, ttoproc(curthread)->p_pid);
358 return (0);
359 }
360
361 if (!SDEV_IS_GLOBAL(dv))
362 return (ENOTSUP);
363
364 ASSERT(vp->v_type == VREG);
365 if (vp->v_type != VREG)
366 return (ENOTSUP);
367
368 ASSERT(dv->sdev_attrvp);
369 return (VOP_CLOSE(dv->sdev_attrvp, flag, count, offset, cred, ct));
370 }
371
372 /*ARGSUSED*/
373 static int
sdev_read(struct vnode * vp,struct uio * uio,int ioflag,struct cred * cred,struct caller_context * ct)374 sdev_read(struct vnode *vp, struct uio *uio, int ioflag, struct cred *cred,
375 struct caller_context *ct)
376 {
377 struct sdev_node *dv = (struct sdev_node *)VTOSDEV(vp);
378 int error;
379
380 if (!SDEV_IS_GLOBAL(dv))
381 return (EINVAL);
382
383 if (vp->v_type == VDIR)
384 return (EISDIR);
385
386 /* only supporting regular files in /dev */
387 ASSERT(vp->v_type == VREG);
388 if (vp->v_type != VREG)
389 return (EINVAL);
390
391 ASSERT(RW_READ_HELD(&VTOSDEV(vp)->sdev_contents));
392 ASSERT(dv->sdev_attrvp);
393 (void) VOP_RWLOCK(dv->sdev_attrvp, 0, ct);
394 error = VOP_READ(dv->sdev_attrvp, uio, ioflag, cred, ct);
395 VOP_RWUNLOCK(dv->sdev_attrvp, 0, ct);
396 return (error);
397 }
398
399 /*ARGSUSED*/
400 static int
sdev_write(struct vnode * vp,struct uio * uio,int ioflag,struct cred * cred,struct caller_context * ct)401 sdev_write(struct vnode *vp, struct uio *uio, int ioflag, struct cred *cred,
402 struct caller_context *ct)
403 {
404 struct sdev_node *dv = VTOSDEV(vp);
405 int error = 0;
406
407 if (!SDEV_IS_GLOBAL(dv))
408 return (EINVAL);
409
410 if (vp->v_type == VDIR)
411 return (EISDIR);
412
413 /* only supporting regular files in /dev */
414 ASSERT(vp->v_type == VREG);
415 if (vp->v_type != VREG)
416 return (EINVAL);
417
418 ASSERT(dv->sdev_attrvp);
419
420 (void) VOP_RWLOCK(dv->sdev_attrvp, 1, ct);
421 error = VOP_WRITE(dv->sdev_attrvp, uio, ioflag, cred, ct);
422 VOP_RWUNLOCK(dv->sdev_attrvp, 1, ct);
423 if (error == 0) {
424 sdev_update_timestamps(dv->sdev_attrvp, kcred,
425 AT_MTIME);
426 }
427 return (error);
428 }
429
430 /*ARGSUSED*/
431 static int
sdev_ioctl(struct vnode * vp,int cmd,intptr_t arg,int flag,struct cred * cred,int * rvalp,caller_context_t * ct)432 sdev_ioctl(struct vnode *vp, int cmd, intptr_t arg, int flag,
433 struct cred *cred, int *rvalp, caller_context_t *ct)
434 {
435 struct sdev_node *dv = VTOSDEV(vp);
436
437 if (!SDEV_IS_GLOBAL(dv) || (vp->v_type == VDIR))
438 return (ENOTTY);
439
440 ASSERT(vp->v_type == VREG);
441 if (vp->v_type != VREG)
442 return (EINVAL);
443
444 ASSERT(dv->sdev_attrvp);
445 return (VOP_IOCTL(dv->sdev_attrvp, cmd, arg, flag, cred, rvalp, ct));
446 }
447
448 static int
sdev_getattr(struct vnode * vp,struct vattr * vap,int flags,struct cred * cr,caller_context_t * ct)449 sdev_getattr(struct vnode *vp, struct vattr *vap, int flags,
450 struct cred *cr, caller_context_t *ct)
451 {
452 int error = 0;
453 struct sdev_node *dv = VTOSDEV(vp);
454 struct sdev_node *parent = dv->sdev_dotdot;
455
456 ASSERT(parent);
457
458 rw_enter(&parent->sdev_contents, RW_READER);
459 ASSERT(dv->sdev_attr || dv->sdev_attrvp);
460
461 /*
462 * search order:
463 * - for persistent nodes (SDEV_PERSIST): backstore
464 * - for non-persistent nodes: module ops if global, then memory
465 */
466 if (dv->sdev_attrvp) {
467 rw_exit(&parent->sdev_contents);
468 error = VOP_GETATTR(dv->sdev_attrvp, vap, flags, cr, ct);
469 sdev_vattr_merge(dv, vap);
470 } else {
471 ASSERT(dv->sdev_attr);
472 *vap = *dv->sdev_attr;
473 sdev_vattr_merge(dv, vap);
474 rw_exit(&parent->sdev_contents);
475 }
476
477 return (error);
478 }
479
480 /*ARGSUSED4*/
481 static int
sdev_setattr(struct vnode * vp,struct vattr * vap,int flags,struct cred * cred,caller_context_t * ctp)482 sdev_setattr(struct vnode *vp, struct vattr *vap, int flags,
483 struct cred *cred, caller_context_t *ctp)
484 {
485 return (devname_setattr_func(vp, vap, flags, cred, NULL, 0));
486 }
487
488 static int
sdev_getsecattr(struct vnode * vp,struct vsecattr * vsap,int flags,struct cred * cr,caller_context_t * ct)489 sdev_getsecattr(struct vnode *vp, struct vsecattr *vsap, int flags,
490 struct cred *cr, caller_context_t *ct)
491 {
492 int error;
493 struct sdev_node *dv = VTOSDEV(vp);
494 struct vnode *avp = dv->sdev_attrvp;
495
496 if (avp == NULL) {
497 /* return fs_fab_acl() if flavor matches, else do nothing */
498 if ((SDEV_ACL_FLAVOR(vp) == _ACL_ACLENT_ENABLED &&
499 (vsap->vsa_mask & (VSA_ACLCNT | VSA_DFACLCNT))) ||
500 (SDEV_ACL_FLAVOR(vp) == _ACL_ACE_ENABLED &&
501 (vsap->vsa_mask & (VSA_ACECNT | VSA_ACE))))
502 return (fs_fab_acl(vp, vsap, flags, cr, ct));
503
504 return (ENOSYS);
505 }
506
507 (void) VOP_RWLOCK(avp, 1, ct);
508 error = VOP_GETSECATTR(avp, vsap, flags, cr, ct);
509 VOP_RWUNLOCK(avp, 1, ct);
510 return (error);
511 }
512
513 static int
sdev_setsecattr(struct vnode * vp,struct vsecattr * vsap,int flags,struct cred * cr,caller_context_t * ct)514 sdev_setsecattr(struct vnode *vp, struct vsecattr *vsap, int flags,
515 struct cred *cr, caller_context_t *ct)
516 {
517 int error;
518 struct sdev_node *dv = VTOSDEV(vp);
519 struct vnode *avp = dv->sdev_attrvp;
520
521 if (dv->sdev_state == SDEV_ZOMBIE)
522 return (0);
523
524 if (avp == NULL) {
525 if (SDEV_IS_GLOBAL(dv) && !SDEV_IS_PERSIST(dv))
526 return (fs_nosys());
527 ASSERT(dv->sdev_attr);
528 /*
529 * if coming in directly, the acl system call will
530 * have held the read-write lock via VOP_RWLOCK()
531 * If coming in via specfs, specfs will have
532 * held the rw lock on the realvp i.e. us.
533 */
534 ASSERT(RW_WRITE_HELD(&dv->sdev_contents));
535 sdev_vattr_merge(dv, dv->sdev_attr);
536 error = sdev_shadow_node(dv, cr);
537 if (error) {
538 return (fs_nosys());
539 }
540
541 ASSERT(dv->sdev_attrvp);
542 /* clean out the memory copy if any */
543 if (dv->sdev_attr) {
544 kmem_free(dv->sdev_attr, sizeof (struct vattr));
545 dv->sdev_attr = NULL;
546 }
547 avp = dv->sdev_attrvp;
548 }
549 ASSERT(avp);
550
551 (void) VOP_RWLOCK(avp, V_WRITELOCK_TRUE, ct);
552 error = VOP_SETSECATTR(avp, vsap, flags, cr, ct);
553 VOP_RWUNLOCK(avp, V_WRITELOCK_TRUE, ct);
554 return (error);
555 }
556
557 /*
558 * There are two different unlocked routines. This one is not static as it is
559 * used as part of the secpolicy_vnode_setattr calls in sdev_subr.c. Because it
560 * is used in that function it has to have a specific signature.
561 */
562 int
sdev_unlocked_access(void * vdv,int mode,struct cred * cr)563 sdev_unlocked_access(void *vdv, int mode, struct cred *cr)
564 {
565 struct sdev_node *dv = vdv;
566 int shift = 0;
567 uid_t owner = dv->sdev_attr->va_uid;
568
569 if (crgetuid(cr) != owner) {
570 shift += 3;
571 if (groupmember(dv->sdev_attr->va_gid, cr) == 0)
572 shift += 3;
573 }
574
575 return (secpolicy_vnode_access2(cr, SDEVTOV(dv), owner,
576 dv->sdev_attr->va_mode << shift, mode));
577 }
578
579 static int
sdev_self_access(sdev_node_t * dv,int mode,int flags,struct cred * cr,caller_context_t * ct)580 sdev_self_access(sdev_node_t *dv, int mode, int flags, struct cred *cr,
581 caller_context_t *ct)
582 {
583 int ret;
584
585 ASSERT(RW_READ_HELD(&dv->sdev_contents));
586 ASSERT(dv->sdev_attr || dv->sdev_attrvp);
587
588 if (dv->sdev_attrvp) {
589 ret = VOP_ACCESS(dv->sdev_attrvp, mode, flags, cr, ct);
590 } else if (dv->sdev_attr) {
591 ret = sdev_unlocked_access(dv, mode, cr);
592 if (ret)
593 ret = EACCES;
594 }
595
596 return (ret);
597 }
598
599 static int
sdev_access(struct vnode * vp,int mode,int flags,struct cred * cr,caller_context_t * ct)600 sdev_access(struct vnode *vp, int mode, int flags, struct cred *cr,
601 caller_context_t *ct)
602 {
603 struct sdev_node *dv = VTOSDEV(vp);
604 int ret;
605
606 rw_enter(&dv->sdev_contents, RW_READER);
607 ret = sdev_self_access(dv, mode, flags, cr, ct);
608 rw_exit(&dv->sdev_contents);
609
610 return (ret);
611 }
612
613 /*
614 * Lookup
615 */
616 /*ARGSUSED3*/
617 static int
sdev_lookup(struct vnode * dvp,char * nm,struct vnode ** vpp,struct pathname * pnp,int flags,struct vnode * rdir,struct cred * cred,caller_context_t * ct,int * direntflags,pathname_t * realpnp)618 sdev_lookup(struct vnode *dvp, char *nm, struct vnode **vpp,
619 struct pathname *pnp, int flags, struct vnode *rdir, struct cred *cred,
620 caller_context_t *ct, int *direntflags, pathname_t *realpnp)
621 {
622 struct sdev_node *parent;
623 int error;
624
625 parent = VTOSDEV(dvp);
626 ASSERT(parent);
627
628 /* execute access is required to search the directory */
629 if ((error = VOP_ACCESS(dvp, VEXEC, 0, cred, ct)) != 0)
630 return (error);
631
632 if (!SDEV_IS_GLOBAL(parent))
633 return (prof_lookup(dvp, nm, vpp, cred));
634 return (devname_lookup_func(parent, nm, vpp, cred, NULL, 0));
635 }
636
637 /*ARGSUSED2*/
638 static int
sdev_create(struct vnode * dvp,char * nm,struct vattr * vap,vcexcl_t excl,int mode,struct vnode ** vpp,struct cred * cred,int flag,caller_context_t * ct,vsecattr_t * vsecp)639 sdev_create(struct vnode *dvp, char *nm, struct vattr *vap, vcexcl_t excl,
640 int mode, struct vnode **vpp, struct cred *cred, int flag,
641 caller_context_t *ct, vsecattr_t *vsecp)
642 {
643 struct vnode *vp = NULL;
644 struct vnode *avp;
645 struct sdev_node *parent;
646 struct sdev_node *self = NULL;
647 int error = 0;
648 vtype_t type = vap->va_type;
649
650 ASSERT(type != VNON && type != VBAD);
651
652 if ((type == VFIFO) || (type == VSOCK) ||
653 (type == VPROC) || (type == VPORT))
654 return (ENOTSUP);
655
656 parent = VTOSDEV(dvp);
657 ASSERT(parent);
658
659 rw_enter(&parent->sdev_dotdot->sdev_contents, RW_READER);
660 if (parent->sdev_state == SDEV_ZOMBIE) {
661 rw_exit(&parent->sdev_dotdot->sdev_contents);
662 return (ENOENT);
663 }
664
665 /*
666 * Nodes cannot be created in NGZ context.
667 */
668 if (!SDEV_IS_GLOBAL(parent)) {
669 rw_exit(&parent->sdev_dotdot->sdev_contents);
670 error = prof_lookup(dvp, nm, vpp, cred);
671
672 /*
673 * In this case, we can't create a vnode but we can
674 * open an existing one. However, we still want to
675 * enforce the open(2) error semantics as if this was
676 * a regular sdev_create() in GZ context. Since we
677 * know the vnode already exists (error == 0) we a)
678 * return EEXIST if exclusive access was requested, or
679 * b) return EISDIR if write access was requested on a
680 * directory. Otherwise, we return the value from
681 * prof_lookup() as is.
682 */
683 if (error == 0) {
684 if (excl == EXCL) {
685 error = EEXIST;
686 } else if (((*vpp)->v_type == VDIR) &&
687 (mode & VWRITE)) {
688 error = EISDIR;
689 }
690
691 if (error != 0)
692 VN_RELE(*vpp);
693 }
694
695
696 return (error);
697 }
698 rw_exit(&parent->sdev_dotdot->sdev_contents);
699
700 /* execute access is required to search the directory */
701 if ((error = VOP_ACCESS(dvp, VEXEC, 0, cred, ct)) != 0)
702 return (error);
703
704 /* check existing name */
705 /* XXXci - We may need to translate the C-I flags on VOP_LOOKUP */
706 error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cred, ct, NULL, NULL);
707
708 /* name found */
709 if (error == 0) {
710 ASSERT(vp);
711 if (excl == EXCL) {
712 error = EEXIST;
713 } else if ((vp->v_type == VDIR) && (mode & VWRITE)) {
714 /* allowing create/read-only an existing directory */
715 error = EISDIR;
716 } else {
717 error = VOP_ACCESS(vp, mode, 0, cred, ct);
718 }
719
720 if (error) {
721 VN_RELE(vp);
722 return (error);
723 }
724
725 /* truncation first */
726 if ((vp->v_type == VREG) && (vap->va_mask & AT_SIZE) &&
727 (vap->va_size == 0)) {
728 ASSERT(parent->sdev_attrvp);
729 error = VOP_CREATE(parent->sdev_attrvp,
730 nm, vap, excl, mode, &avp, cred, flag, ct, vsecp);
731
732 if (error) {
733 VN_RELE(vp);
734 return (error);
735 }
736 }
737
738 sdev_update_timestamps(vp, kcred,
739 AT_CTIME|AT_MTIME|AT_ATIME);
740 *vpp = vp;
741 return (0);
742 }
743
744 /* bail out early */
745 if (error != ENOENT)
746 return (error);
747
748 /* verify write access - compliance specifies ENXIO */
749 if ((error = VOP_ACCESS(dvp, VEXEC|VWRITE, 0, cred, ct)) != 0) {
750 if (error == EACCES)
751 error = ENXIO;
752 return (error);
753 }
754
755 /*
756 * For memory-based (ROFS) directory:
757 * - either disallow node creation;
758 * - or implement VOP_CREATE of its own
759 */
760 rw_enter(&parent->sdev_contents, RW_WRITER);
761 if (!SDEV_IS_PERSIST(parent)) {
762 rw_exit(&parent->sdev_contents);
763 return (ENOTSUP);
764 }
765 ASSERT(parent->sdev_attrvp);
766 error = sdev_mknode(parent, nm, &self, vap, NULL, NULL,
767 cred, SDEV_READY);
768 if (error) {
769 rw_exit(&parent->sdev_contents);
770 if (self)
771 SDEV_RELE(self);
772 return (error);
773 }
774 rw_exit(&parent->sdev_contents);
775
776 ASSERT(self);
777 /* take care the timestamps for the node and its parent */
778 sdev_update_timestamps(SDEVTOV(self), kcred,
779 AT_CTIME|AT_MTIME|AT_ATIME);
780 sdev_update_timestamps(dvp, kcred, AT_MTIME|AT_ATIME);
781 if (SDEV_IS_GLOBAL(parent))
782 atomic_inc_ulong(&parent->sdev_gdir_gen);
783
784 /* wake up other threads blocked on looking up this node */
785 mutex_enter(&self->sdev_lookup_lock);
786 SDEV_UNBLOCK_OTHERS(self, SDEV_LOOKUP);
787 mutex_exit(&self->sdev_lookup_lock);
788 error = sdev_to_vp(self, vpp);
789 return (error);
790 }
791
792 static int
sdev_remove(struct vnode * dvp,char * nm,struct cred * cred,caller_context_t * ct,int flags)793 sdev_remove(struct vnode *dvp, char *nm, struct cred *cred,
794 caller_context_t *ct, int flags)
795 {
796 int error;
797 struct sdev_node *parent = (struct sdev_node *)VTOSDEV(dvp);
798 struct vnode *vp = NULL;
799 struct sdev_node *dv = NULL;
800 int len;
801 int bkstore;
802
803 /* bail out early */
804 len = strlen(nm);
805 if (nm[0] == '.') {
806 if (len == 1) {
807 return (EINVAL);
808 } else if (len == 2 && nm[1] == '.') {
809 return (EEXIST);
810 }
811 }
812
813 ASSERT(parent);
814 rw_enter(&parent->sdev_contents, RW_READER);
815 if (!SDEV_IS_GLOBAL(parent)) {
816 rw_exit(&parent->sdev_contents);
817 return (ENOTSUP);
818 }
819
820 /* execute access is required to search the directory */
821 if ((error = sdev_self_access(parent, VEXEC, 0, cred, ct)) != 0) {
822 rw_exit(&parent->sdev_contents);
823 return (error);
824 }
825
826 /* check existence first */
827 dv = sdev_cache_lookup(parent, nm);
828 if (dv == NULL) {
829 rw_exit(&parent->sdev_contents);
830 return (ENOENT);
831 }
832
833 vp = SDEVTOV(dv);
834 if ((dv->sdev_state == SDEV_INIT) ||
835 (dv->sdev_state == SDEV_ZOMBIE)) {
836 rw_exit(&parent->sdev_contents);
837 VN_RELE(vp);
838 return (ENOENT);
839 }
840
841 /* write access is required to remove an entry */
842 if ((error = sdev_self_access(parent, VWRITE, 0, cred, ct)) != 0) {
843 rw_exit(&parent->sdev_contents);
844 VN_RELE(vp);
845 return (error);
846 }
847
848 bkstore = SDEV_IS_PERSIST(dv) ? 1 : 0;
849 if (!rw_tryupgrade(&parent->sdev_contents)) {
850 rw_exit(&parent->sdev_contents);
851 rw_enter(&parent->sdev_contents, RW_WRITER);
852 /* Make sure we didn't become a zombie */
853 if (parent->sdev_state == SDEV_ZOMBIE) {
854 rw_exit(&parent->sdev_contents);
855 VN_RELE(vp);
856 return (ENOENT);
857 }
858 }
859
860 /* we do not support unlinking a non-empty directory */
861 if (vp->v_type == VDIR && dv->sdev_nlink > 2) {
862 rw_exit(&parent->sdev_contents);
863 VN_RELE(vp);
864 return (EBUSY);
865 }
866
867 /*
868 * sdev_dirdelete does the real job of:
869 * - make sure no open ref count
870 * - destroying the sdev_node
871 * - releasing the hold on attrvp
872 */
873 sdev_cache_update(parent, &dv, nm, SDEV_CACHE_DELETE);
874 VN_RELE(vp);
875 rw_exit(&parent->sdev_contents);
876
877 /*
878 * best efforts clean up the backing store
879 */
880 if (bkstore) {
881 ASSERT(parent->sdev_attrvp);
882 error = VOP_REMOVE(parent->sdev_attrvp, nm, cred,
883 ct, flags);
884 /*
885 * do not report BUSY error
886 * because the backing store ref count is released
887 * when the last ref count on the sdev_node is
888 * released.
889 */
890 if (error == EBUSY) {
891 sdcmn_err2(("sdev_remove: device %s is still on"
892 "disk %s\n", nm, parent->sdev_path));
893 error = 0;
894 }
895 }
896
897 return (error);
898 }
899
900 /*
901 * Some restrictions for this file system:
902 * - both oldnm and newnm are in the scope of /dev file system,
903 * to simply the namespace management model.
904 */
905 /*ARGSUSED6*/
906 static int
sdev_rename(struct vnode * odvp,char * onm,struct vnode * ndvp,char * nnm,struct cred * cred,caller_context_t * ct,int flags)907 sdev_rename(struct vnode *odvp, char *onm, struct vnode *ndvp, char *nnm,
908 struct cred *cred, caller_context_t *ct, int flags)
909 {
910 struct sdev_node *fromparent = NULL;
911 struct vattr vattr;
912 struct sdev_node *toparent;
913 struct sdev_node *fromdv = NULL; /* source node */
914 struct vnode *ovp = NULL; /* source vnode */
915 struct sdev_node *todv = NULL; /* destination node */
916 struct vnode *nvp = NULL; /* destination vnode */
917 int samedir = 0; /* set if odvp == ndvp */
918 struct vnode *realvp;
919 int error = 0;
920 dev_t fsid;
921 int bkstore = 0;
922 vtype_t type;
923
924 /* prevent modifying "." and ".." */
925 if ((onm[0] == '.' &&
926 (onm[1] == '\0' || (onm[1] == '.' && onm[2] == '\0'))) ||
927 (nnm[0] == '.' &&
928 (nnm[1] == '\0' || (nnm[1] == '.' && nnm[2] == '\0')))) {
929 return (EINVAL);
930 }
931
932 fromparent = VTOSDEV(odvp);
933 toparent = VTOSDEV(ndvp);
934
935 /* ZOMBIE parent doesn't allow new node creation */
936 rw_enter(&fromparent->sdev_dotdot->sdev_contents, RW_READER);
937 if (fromparent->sdev_state == SDEV_ZOMBIE) {
938 rw_exit(&fromparent->sdev_dotdot->sdev_contents);
939 return (ENOENT);
940 }
941
942 /* renaming only supported for global device nodes */
943 if (!SDEV_IS_GLOBAL(fromparent)) {
944 rw_exit(&fromparent->sdev_dotdot->sdev_contents);
945 return (ENOTSUP);
946 }
947 rw_exit(&fromparent->sdev_dotdot->sdev_contents);
948
949 rw_enter(&toparent->sdev_dotdot->sdev_contents, RW_READER);
950 if (toparent->sdev_state == SDEV_ZOMBIE) {
951 rw_exit(&toparent->sdev_dotdot->sdev_contents);
952 return (ENOENT);
953 }
954 rw_exit(&toparent->sdev_dotdot->sdev_contents);
955
956 /*
957 * acquire the global lock to prevent
958 * mount/unmount/other rename activities.
959 */
960 mutex_enter(&sdev_lock);
961
962 /* check existence of the source node */
963 /* XXXci - We may need to translate the C-I flags on VOP_LOOKUP */
964 error = VOP_LOOKUP(odvp, onm, &ovp, NULL, 0, NULL, cred, ct,
965 NULL, NULL);
966 if (error) {
967 sdcmn_err2(("sdev_rename: the source node %s exists\n",
968 onm));
969 mutex_exit(&sdev_lock);
970 return (error);
971 }
972
973 if (VOP_REALVP(ovp, &realvp, ct) == 0) {
974 VN_HOLD(realvp);
975 VN_RELE(ovp);
976 ovp = realvp;
977 }
978
979 /* check existence of destination */
980 /* XXXci - We may need to translate the C-I flags on VOP_LOOKUP */
981 error = VOP_LOOKUP(ndvp, nnm, &nvp, NULL, 0, NULL, cred, ct,
982 NULL, NULL);
983 if (error && (error != ENOENT)) {
984 mutex_exit(&sdev_lock);
985 VN_RELE(ovp);
986 return (error);
987 }
988
989 if (nvp && (VOP_REALVP(nvp, &realvp, ct) == 0)) {
990 VN_HOLD(realvp);
991 VN_RELE(nvp);
992 nvp = realvp;
993 }
994
995 /*
996 * make sure the source and the destination are
997 * in the same dev filesystem
998 */
999 if (odvp != ndvp) {
1000 vattr.va_mask = AT_FSID;
1001 if (error = VOP_GETATTR(odvp, &vattr, 0, cred, ct)) {
1002 mutex_exit(&sdev_lock);
1003 VN_RELE(ovp);
1004 if (nvp != NULL)
1005 VN_RELE(nvp);
1006 return (error);
1007 }
1008 fsid = vattr.va_fsid;
1009 vattr.va_mask = AT_FSID;
1010 if (error = VOP_GETATTR(ndvp, &vattr, 0, cred, ct)) {
1011 mutex_exit(&sdev_lock);
1012 VN_RELE(ovp);
1013 if (nvp != NULL)
1014 VN_RELE(nvp);
1015 return (error);
1016 }
1017 if (fsid != vattr.va_fsid) {
1018 mutex_exit(&sdev_lock);
1019 VN_RELE(ovp);
1020 if (nvp != NULL)
1021 VN_RELE(nvp);
1022 return (EXDEV);
1023 }
1024 }
1025
1026 /* make sure the old entry can be deleted */
1027 error = VOP_ACCESS(odvp, VWRITE, 0, cred, ct);
1028 if (error) {
1029 mutex_exit(&sdev_lock);
1030 VN_RELE(ovp);
1031 if (nvp != NULL)
1032 VN_RELE(nvp);
1033 return (error);
1034 }
1035
1036 /* make sure the destination allows creation */
1037 samedir = (fromparent == toparent);
1038 if (!samedir) {
1039 error = VOP_ACCESS(ndvp, VEXEC|VWRITE, 0, cred, ct);
1040 if (error) {
1041 mutex_exit(&sdev_lock);
1042 VN_RELE(ovp);
1043 if (nvp != NULL)
1044 VN_RELE(nvp);
1045 return (error);
1046 }
1047 }
1048
1049 fromdv = VTOSDEV(ovp);
1050 ASSERT(fromdv);
1051
1052 /* destination file exists */
1053 if (nvp != NULL) {
1054 todv = VTOSDEV(nvp);
1055 ASSERT(todv);
1056 }
1057
1058 if ((fromdv->sdev_flags & SDEV_DYNAMIC) != 0 ||
1059 (todv != NULL && (todv->sdev_flags & SDEV_DYNAMIC) != 0)) {
1060 mutex_exit(&sdev_lock);
1061 if (nvp != NULL)
1062 VN_RELE(nvp);
1063 VN_RELE(ovp);
1064 return (EACCES);
1065 }
1066
1067 /*
1068 * link source to new target in the memory. Regardless of failure, we
1069 * must rele our hold on nvp.
1070 */
1071 error = sdev_rnmnode(fromparent, fromdv, toparent, &todv, nnm, cred);
1072 if (nvp != NULL)
1073 VN_RELE(nvp);
1074 if (error) {
1075 sdcmn_err2(("sdev_rename: renaming %s to %s failed "
1076 " with error %d\n", onm, nnm, error));
1077 mutex_exit(&sdev_lock);
1078 VN_RELE(ovp);
1079 return (error);
1080 }
1081
1082 /*
1083 * unlink from source
1084 */
1085 rw_enter(&fromparent->sdev_contents, RW_READER);
1086 fromdv = sdev_cache_lookup(fromparent, onm);
1087 if (fromdv == NULL) {
1088 rw_exit(&fromparent->sdev_contents);
1089 mutex_exit(&sdev_lock);
1090 VN_RELE(ovp);
1091 sdcmn_err2(("sdev_rename: the source is deleted already\n"));
1092 return (0);
1093 }
1094
1095 if (fromdv->sdev_state == SDEV_ZOMBIE) {
1096 rw_exit(&fromparent->sdev_contents);
1097 mutex_exit(&sdev_lock);
1098 VN_RELE(SDEVTOV(fromdv));
1099 VN_RELE(ovp);
1100 sdcmn_err2(("sdev_rename: the source is being deleted\n"));
1101 return (0);
1102 }
1103 rw_exit(&fromparent->sdev_contents);
1104 ASSERT(SDEVTOV(fromdv) == ovp);
1105 VN_RELE(ovp);
1106
1107 /* clean out the directory contents before it can be removed */
1108 type = SDEVTOV(fromdv)->v_type;
1109 if (type == VDIR) {
1110 error = sdev_cleandir(fromdv, NULL, 0);
1111 sdcmn_err2(("sdev_rename: cleandir finished with %d\n",
1112 error));
1113 if (error == EBUSY)
1114 error = 0;
1115 }
1116
1117 rw_enter(&fromparent->sdev_contents, RW_WRITER);
1118 bkstore = SDEV_IS_PERSIST(fromdv) ? 1 : 0;
1119 sdev_cache_update(fromparent, &fromdv, onm,
1120 SDEV_CACHE_DELETE);
1121 VN_RELE(SDEVTOV(fromdv));
1122
1123 /* best effforts clean up the backing store */
1124 if (bkstore) {
1125 ASSERT(fromparent->sdev_attrvp);
1126 if (type != VDIR) {
1127 /* XXXci - We may need to translate the C-I flags on VOP_REMOVE */
1128 error = VOP_REMOVE(fromparent->sdev_attrvp,
1129 onm, kcred, ct, 0);
1130 } else {
1131 /* XXXci - We may need to translate the C-I flags on VOP_RMDIR */
1132 error = VOP_RMDIR(fromparent->sdev_attrvp,
1133 onm, fromparent->sdev_attrvp, kcred, ct, 0);
1134 }
1135
1136 if (error) {
1137 sdcmn_err2(("sdev_rename: device %s is "
1138 "still on disk %s\n", onm,
1139 fromparent->sdev_path));
1140 error = 0;
1141 }
1142 }
1143 rw_exit(&fromparent->sdev_contents);
1144 mutex_exit(&sdev_lock);
1145
1146 /* once reached to this point, the rename is regarded successful */
1147 return (0);
1148 }
1149
1150 /*
1151 * dev-fs version of "ln -s path dev-name"
1152 * tnm - path, e.g. /devices/... or /dev/...
1153 * lnm - dev_name
1154 */
1155 /*ARGSUSED6*/
1156 static int
sdev_symlink(struct vnode * dvp,char * lnm,struct vattr * tva,char * tnm,struct cred * cred,caller_context_t * ct,int flags)1157 sdev_symlink(struct vnode *dvp, char *lnm, struct vattr *tva,
1158 char *tnm, struct cred *cred, caller_context_t *ct, int flags)
1159 {
1160 int error;
1161 struct vnode *vp = NULL;
1162 struct sdev_node *parent = (struct sdev_node *)VTOSDEV(dvp);
1163 struct sdev_node *self = (struct sdev_node *)NULL;
1164
1165 ASSERT(parent);
1166 rw_enter(&parent->sdev_dotdot->sdev_contents, RW_READER);
1167 if (parent->sdev_state == SDEV_ZOMBIE) {
1168 rw_exit(&parent->sdev_dotdot->sdev_contents);
1169 sdcmn_err2(("sdev_symlink: parent %s is ZOMBIED \n",
1170 parent->sdev_name));
1171 return (ENOENT);
1172 }
1173
1174 if (!SDEV_IS_GLOBAL(parent)) {
1175 rw_exit(&parent->sdev_dotdot->sdev_contents);
1176 return (ENOTSUP);
1177 }
1178 rw_exit(&parent->sdev_dotdot->sdev_contents);
1179
1180 /* execute access is required to search a directory */
1181 if ((error = VOP_ACCESS(dvp, VEXEC, 0, cred, ct)) != 0)
1182 return (error);
1183
1184 /* find existing name */
1185 /* XXXci - We may need to translate the C-I flags here */
1186 error = VOP_LOOKUP(dvp, lnm, &vp, NULL, 0, NULL, cred, ct, NULL, NULL);
1187 if (error == 0) {
1188 ASSERT(vp);
1189 VN_RELE(vp);
1190 sdcmn_err2(("sdev_symlink: node %s already exists\n", lnm));
1191 return (EEXIST);
1192 }
1193 if (error != ENOENT)
1194 return (error);
1195
1196 /* write access is required to create a symlink */
1197 if ((error = VOP_ACCESS(dvp, VWRITE, 0, cred, ct)) != 0)
1198 return (error);
1199
1200 /* put it into memory cache */
1201 rw_enter(&parent->sdev_contents, RW_WRITER);
1202 error = sdev_mknode(parent, lnm, &self, tva, NULL, (void *)tnm,
1203 cred, SDEV_READY);
1204 if (error) {
1205 rw_exit(&parent->sdev_contents);
1206 sdcmn_err2(("sdev_symlink: node %s creation failed\n", lnm));
1207 if (self)
1208 SDEV_RELE(self);
1209
1210 return (error);
1211 }
1212 ASSERT(self && (self->sdev_state == SDEV_READY));
1213 rw_exit(&parent->sdev_contents);
1214
1215 /* take care the timestamps for the node and its parent */
1216 sdev_update_timestamps(SDEVTOV(self), kcred,
1217 AT_CTIME|AT_MTIME|AT_ATIME);
1218 sdev_update_timestamps(dvp, kcred, AT_MTIME|AT_ATIME);
1219 if (SDEV_IS_GLOBAL(parent))
1220 atomic_inc_ulong(&parent->sdev_gdir_gen);
1221
1222 /* wake up other threads blocked on looking up this node */
1223 mutex_enter(&self->sdev_lookup_lock);
1224 SDEV_UNBLOCK_OTHERS(self, SDEV_LOOKUP);
1225 mutex_exit(&self->sdev_lookup_lock);
1226 SDEV_RELE(self); /* don't return with vnode held */
1227 return (0);
1228 }
1229
1230 /*ARGSUSED6*/
1231 static int
sdev_mkdir(struct vnode * dvp,char * nm,struct vattr * va,struct vnode ** vpp,struct cred * cred,caller_context_t * ct,int flags,vsecattr_t * vsecp)1232 sdev_mkdir(struct vnode *dvp, char *nm, struct vattr *va, struct vnode **vpp,
1233 struct cred *cred, caller_context_t *ct, int flags, vsecattr_t *vsecp)
1234 {
1235 int error;
1236 struct sdev_node *parent = (struct sdev_node *)VTOSDEV(dvp);
1237 struct sdev_node *self = NULL;
1238 struct vnode *vp = NULL;
1239
1240 ASSERT(parent && parent->sdev_dotdot);
1241 rw_enter(&parent->sdev_dotdot->sdev_contents, RW_READER);
1242 if (parent->sdev_state == SDEV_ZOMBIE) {
1243 rw_exit(&parent->sdev_dotdot->sdev_contents);
1244 return (ENOENT);
1245 }
1246
1247 /* non-global do not allow pure directory creation */
1248 if (!SDEV_IS_GLOBAL(parent)) {
1249 rw_exit(&parent->sdev_dotdot->sdev_contents);
1250 return (prof_lookup(dvp, nm, vpp, cred));
1251 }
1252 rw_exit(&parent->sdev_dotdot->sdev_contents);
1253
1254 /* execute access is required to search the directory */
1255 if ((error = VOP_ACCESS(dvp, VEXEC, 0, cred, ct)) != 0) {
1256 return (error);
1257 }
1258
1259 /* find existing name */
1260 /* XXXci - We may need to translate the C-I flags on VOP_LOOKUP */
1261 error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cred, ct, NULL, NULL);
1262 if (error == 0) {
1263 VN_RELE(vp);
1264 return (EEXIST);
1265 }
1266 if (error != ENOENT)
1267 return (error);
1268
1269 /* require write access to create a directory */
1270 if ((error = VOP_ACCESS(dvp, VWRITE, 0, cred, ct)) != 0) {
1271 return (error);
1272 }
1273
1274 /* put it into memory */
1275 rw_enter(&parent->sdev_contents, RW_WRITER);
1276 error = sdev_mknode(parent, nm, &self,
1277 va, NULL, NULL, cred, SDEV_READY);
1278 if (error) {
1279 rw_exit(&parent->sdev_contents);
1280 if (self)
1281 SDEV_RELE(self);
1282 return (error);
1283 }
1284 ASSERT(self && (self->sdev_state == SDEV_READY));
1285 rw_exit(&parent->sdev_contents);
1286
1287 /* take care the timestamps for the node and its parent */
1288 sdev_update_timestamps(SDEVTOV(self), kcred,
1289 AT_CTIME|AT_MTIME|AT_ATIME);
1290 sdev_update_timestamps(dvp, kcred, AT_MTIME|AT_ATIME);
1291 if (SDEV_IS_GLOBAL(parent))
1292 atomic_inc_ulong(&parent->sdev_gdir_gen);
1293
1294 /* wake up other threads blocked on looking up this node */
1295 mutex_enter(&self->sdev_lookup_lock);
1296 SDEV_UNBLOCK_OTHERS(self, SDEV_LOOKUP);
1297 mutex_exit(&self->sdev_lookup_lock);
1298 *vpp = SDEVTOV(self);
1299 return (0);
1300 }
1301
1302 /*
1303 * allowing removing an empty directory under /dev
1304 */
1305 /*ARGSUSED*/
1306 static int
sdev_rmdir(struct vnode * dvp,char * nm,struct vnode * cdir,struct cred * cred,caller_context_t * ct,int flags)1307 sdev_rmdir(struct vnode *dvp, char *nm, struct vnode *cdir, struct cred *cred,
1308 caller_context_t *ct, int flags)
1309 {
1310 int error = 0;
1311 struct sdev_node *parent = (struct sdev_node *)VTOSDEV(dvp);
1312 struct sdev_node *self = NULL;
1313 struct vnode *vp = NULL;
1314
1315 /* bail out early */
1316 if (strcmp(nm, ".") == 0)
1317 return (EINVAL);
1318 if (strcmp(nm, "..") == 0)
1319 return (EEXIST); /* should be ENOTEMPTY */
1320
1321 /* no destruction of non-global node */
1322 ASSERT(parent && parent->sdev_dotdot);
1323 rw_enter(&parent->sdev_dotdot->sdev_contents, RW_READER);
1324 if (!SDEV_IS_GLOBAL(parent)) {
1325 rw_exit(&parent->sdev_dotdot->sdev_contents);
1326 return (ENOTSUP);
1327 }
1328 rw_exit(&parent->sdev_dotdot->sdev_contents);
1329
1330 /* execute access is required to search the directory */
1331 if ((error = VOP_ACCESS(dvp, VEXEC|VWRITE, 0, cred, ct)) != 0)
1332 return (error);
1333
1334 /* check existing name */
1335 rw_enter(&parent->sdev_contents, RW_WRITER);
1336 self = sdev_cache_lookup(parent, nm);
1337 if (self == NULL) {
1338 rw_exit(&parent->sdev_contents);
1339 return (ENOENT);
1340 }
1341
1342 vp = SDEVTOV(self);
1343 if ((self->sdev_state == SDEV_INIT) ||
1344 (self->sdev_state == SDEV_ZOMBIE)) {
1345 rw_exit(&parent->sdev_contents);
1346 VN_RELE(vp);
1347 return (ENOENT);
1348 }
1349
1350 /* some sanity checks */
1351 if (vp == dvp || vp == cdir) {
1352 rw_exit(&parent->sdev_contents);
1353 VN_RELE(vp);
1354 return (EINVAL);
1355 }
1356
1357 if (vp->v_type != VDIR) {
1358 rw_exit(&parent->sdev_contents);
1359 VN_RELE(vp);
1360 return (ENOTDIR);
1361 }
1362
1363 if (vn_vfswlock(vp)) {
1364 rw_exit(&parent->sdev_contents);
1365 VN_RELE(vp);
1366 return (EBUSY);
1367 }
1368
1369 if (vn_mountedvfs(vp) != NULL) {
1370 rw_exit(&parent->sdev_contents);
1371 vn_vfsunlock(vp);
1372 VN_RELE(vp);
1373 return (EBUSY);
1374 }
1375
1376 self = VTOSDEV(vp);
1377 /* bail out on a non-empty directory */
1378 rw_enter(&self->sdev_contents, RW_READER);
1379 if (self->sdev_nlink > 2) {
1380 rw_exit(&self->sdev_contents);
1381 rw_exit(&parent->sdev_contents);
1382 vn_vfsunlock(vp);
1383 VN_RELE(vp);
1384 return (ENOTEMPTY);
1385 }
1386 rw_exit(&self->sdev_contents);
1387
1388 /* unlink it from the directory cache */
1389 sdev_cache_update(parent, &self, nm, SDEV_CACHE_DELETE);
1390 rw_exit(&parent->sdev_contents);
1391 vn_vfsunlock(vp);
1392 VN_RELE(vp);
1393
1394 /* best effort to clean up the backing store */
1395 if (SDEV_IS_PERSIST(parent)) {
1396 ASSERT(parent->sdev_attrvp);
1397 error = VOP_RMDIR(parent->sdev_attrvp, nm,
1398 parent->sdev_attrvp, kcred, ct, flags);
1399
1400 if (error)
1401 sdcmn_err2(("sdev_rmdir: cleaning device %s is on"
1402 " disk error %d\n", parent->sdev_path, error));
1403 if (error == EBUSY)
1404 error = 0;
1405
1406 }
1407
1408 return (error);
1409 }
1410
1411 /*
1412 * read the contents of a symbolic link
1413 */
1414 static int
sdev_readlink(struct vnode * vp,struct uio * uiop,struct cred * cred,caller_context_t * ct)1415 sdev_readlink(struct vnode *vp, struct uio *uiop, struct cred *cred,
1416 caller_context_t *ct)
1417 {
1418 struct sdev_node *dv;
1419 int error = 0;
1420
1421 ASSERT(vp->v_type == VLNK);
1422
1423 dv = VTOSDEV(vp);
1424
1425 if (dv->sdev_attrvp) {
1426 /* non-NULL attrvp implys a persisted node at READY state */
1427 return (VOP_READLINK(dv->sdev_attrvp, uiop, cred, ct));
1428 } else if (dv->sdev_symlink != NULL) {
1429 /* memory nodes, e.g. local nodes */
1430 rw_enter(&dv->sdev_contents, RW_READER);
1431 sdcmn_err2(("sdev_readlink link is %s\n", dv->sdev_symlink));
1432 error = uiomove(dv->sdev_symlink, strlen(dv->sdev_symlink),
1433 UIO_READ, uiop);
1434 rw_exit(&dv->sdev_contents);
1435 return (error);
1436 }
1437
1438 return (ENOENT);
1439 }
1440
1441 /*ARGSUSED4*/
1442 static int
sdev_readdir(struct vnode * vp,struct uio * uiop,struct cred * cred,int * eofp,caller_context_t * ct,int flags)1443 sdev_readdir(struct vnode *vp, struct uio *uiop, struct cred *cred, int *eofp,
1444 caller_context_t *ct, int flags)
1445 {
1446 struct sdev_node *dv = VTOSDEV(vp);
1447 int error;
1448
1449 VERIFY(RW_READ_HELD(&dv->sdev_contents));
1450
1451 /*
1452 * We can't recursively take ->sdev_contents via an indirect
1453 * VOP_ACCESS(), but we don't need to use that anyway.
1454 */
1455 if ((error = sdev_self_access(dv, VEXEC, 0, cred, ct)) != 0)
1456 return (error);
1457
1458 if (!SDEV_IS_GLOBAL(dv))
1459 prof_filldir(dv);
1460 return (devname_readdir_func(vp, uiop, cred, eofp, SDEV_BROWSE));
1461 }
1462
1463 /*ARGSUSED1*/
1464 static void
sdev_inactive(struct vnode * vp,struct cred * cred,caller_context_t * ct)1465 sdev_inactive(struct vnode *vp, struct cred *cred, caller_context_t *ct)
1466 {
1467 devname_inactive_func(vp, cred, NULL);
1468 }
1469
1470 /*ARGSUSED2*/
1471 static int
sdev_fid(struct vnode * vp,struct fid * fidp,caller_context_t * ct)1472 sdev_fid(struct vnode *vp, struct fid *fidp, caller_context_t *ct)
1473 {
1474 struct sdev_node *dv = VTOSDEV(vp);
1475 struct sdev_fid *sdev_fid;
1476
1477 if (fidp->fid_len < (sizeof (struct sdev_fid) - sizeof (ushort_t))) {
1478 fidp->fid_len = sizeof (struct sdev_fid) - sizeof (ushort_t);
1479 return (ENOSPC);
1480 }
1481
1482 sdev_fid = (struct sdev_fid *)fidp;
1483 bzero(sdev_fid, sizeof (struct sdev_fid));
1484 sdev_fid->sdevfid_len =
1485 (int)sizeof (struct sdev_fid) - sizeof (ushort_t);
1486 sdev_fid->sdevfid_ino = dv->sdev_ino;
1487
1488 return (0);
1489 }
1490
1491 /*
1492 * This pair of routines bracket all VOP_READ, VOP_WRITE
1493 * and VOP_READDIR requests. The contents lock stops things
1494 * moving around while we're looking at them.
1495 */
1496 /*ARGSUSED2*/
1497 static int
sdev_rwlock(struct vnode * vp,int write_flag,caller_context_t * ctp)1498 sdev_rwlock(struct vnode *vp, int write_flag, caller_context_t *ctp)
1499 {
1500 rw_enter(&VTOSDEV(vp)->sdev_contents,
1501 write_flag ? RW_WRITER : RW_READER);
1502 return (write_flag ? V_WRITELOCK_TRUE : V_WRITELOCK_FALSE);
1503 }
1504
1505 /*ARGSUSED1*/
1506 static void
sdev_rwunlock(struct vnode * vp,int write_flag,caller_context_t * ctp)1507 sdev_rwunlock(struct vnode *vp, int write_flag, caller_context_t *ctp)
1508 {
1509 rw_exit(&VTOSDEV(vp)->sdev_contents);
1510 }
1511
1512 /*ARGSUSED1*/
1513 static int
sdev_seek(struct vnode * vp,offset_t ooff,offset_t * noffp,caller_context_t * ct)1514 sdev_seek(struct vnode *vp, offset_t ooff, offset_t *noffp,
1515 caller_context_t *ct)
1516 {
1517 struct vnode *attrvp = VTOSDEV(vp)->sdev_attrvp;
1518
1519 ASSERT(vp->v_type != VCHR &&
1520 vp->v_type != VBLK && vp->v_type != VLNK);
1521
1522 if (vp->v_type == VDIR)
1523 return (fs_seek(vp, ooff, noffp, ct));
1524
1525 ASSERT(attrvp);
1526 return (VOP_SEEK(attrvp, ooff, noffp, ct));
1527 }
1528
1529 /*ARGSUSED1*/
1530 static int
sdev_frlock(struct vnode * vp,int cmd,struct flock64 * bfp,int flag,offset_t offset,struct flk_callback * flk_cbp,struct cred * cr,caller_context_t * ct)1531 sdev_frlock(struct vnode *vp, int cmd, struct flock64 *bfp, int flag,
1532 offset_t offset, struct flk_callback *flk_cbp, struct cred *cr,
1533 caller_context_t *ct)
1534 {
1535 int error;
1536 struct sdev_node *dv = VTOSDEV(vp);
1537
1538 ASSERT(dv);
1539 ASSERT(dv->sdev_attrvp);
1540 error = VOP_FRLOCK(dv->sdev_attrvp, cmd, bfp, flag, offset,
1541 flk_cbp, cr, ct);
1542
1543 return (error);
1544 }
1545
1546 static int
sdev_pathconf(vnode_t * vp,int cmd,ulong_t * valp,cred_t * cr,caller_context_t * ct)1547 sdev_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr,
1548 caller_context_t *ct)
1549 {
1550 switch (cmd) {
1551 case _PC_ACL_ENABLED:
1552 *valp = SDEV_ACL_FLAVOR(vp);
1553 return (0);
1554 }
1555
1556 return (fs_pathconf(vp, cmd, valp, cr, ct));
1557 }
1558
1559 vnodeops_t *sdev_vnodeops;
1560
1561 const fs_operation_def_t sdev_vnodeops_tbl[] = {
1562 VOPNAME_OPEN, { .vop_open = sdev_open },
1563 VOPNAME_CLOSE, { .vop_close = sdev_close },
1564 VOPNAME_READ, { .vop_read = sdev_read },
1565 VOPNAME_WRITE, { .vop_write = sdev_write },
1566 VOPNAME_IOCTL, { .vop_ioctl = sdev_ioctl },
1567 VOPNAME_GETATTR, { .vop_getattr = sdev_getattr },
1568 VOPNAME_SETATTR, { .vop_setattr = sdev_setattr },
1569 VOPNAME_ACCESS, { .vop_access = sdev_access },
1570 VOPNAME_LOOKUP, { .vop_lookup = sdev_lookup },
1571 VOPNAME_CREATE, { .vop_create = sdev_create },
1572 VOPNAME_RENAME, { .vop_rename = sdev_rename },
1573 VOPNAME_REMOVE, { .vop_remove = sdev_remove },
1574 VOPNAME_MKDIR, { .vop_mkdir = sdev_mkdir },
1575 VOPNAME_RMDIR, { .vop_rmdir = sdev_rmdir },
1576 VOPNAME_READDIR, { .vop_readdir = sdev_readdir },
1577 VOPNAME_SYMLINK, { .vop_symlink = sdev_symlink },
1578 VOPNAME_READLINK, { .vop_readlink = sdev_readlink },
1579 VOPNAME_INACTIVE, { .vop_inactive = sdev_inactive },
1580 VOPNAME_FID, { .vop_fid = sdev_fid },
1581 VOPNAME_RWLOCK, { .vop_rwlock = sdev_rwlock },
1582 VOPNAME_RWUNLOCK, { .vop_rwunlock = sdev_rwunlock },
1583 VOPNAME_SEEK, { .vop_seek = sdev_seek },
1584 VOPNAME_FRLOCK, { .vop_frlock = sdev_frlock },
1585 VOPNAME_PATHCONF, { .vop_pathconf = sdev_pathconf },
1586 VOPNAME_SETSECATTR, { .vop_setsecattr = sdev_setsecattr },
1587 VOPNAME_GETSECATTR, { .vop_getsecattr = sdev_getsecattr },
1588 NULL, NULL
1589 };
1590
1591 int sdev_vnodeops_tbl_size = sizeof (sdev_vnodeops_tbl);
1592