xref: /illumos-gate/usr/src/uts/common/fs/namefs/namevno.c (revision 985cc36c07a787e0cb720fcf2fab565aa2a77590)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
22 /*	  All Rights Reserved  	*/
23 
24 
25 /*
26  * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
27  * Copyright (c) 2016 by Delphix. All rights reserved.
28  */
29 
30 /*
31  * This file defines the vnode operations for mounted file descriptors.
32  * The routines in this file act as a layer between the NAMEFS file
33  * system and SPECFS/FIFOFS.  With the exception of nm_open(), nm_setattr(),
34  * nm_getattr() and nm_access(), the routines simply apply the VOP operation
35  * to the vnode representing the file descriptor.  This switches control
36  * to the underlying file system to which the file descriptor belongs.
37  */
38 #include <sys/types.h>
39 #include <sys/param.h>
40 #include <sys/systm.h>
41 #include <sys/cred.h>
42 #include <sys/errno.h>
43 #include <sys/time.h>
44 #include <sys/file.h>
45 #include <sys/fcntl.h>
46 #include <sys/flock.h>
47 #include <sys/kmem.h>
48 #include <sys/uio.h>
49 #include <sys/vfs.h>
50 #include <sys/vfs_opreg.h>
51 #include <sys/vnode.h>
52 #include <sys/pcb.h>
53 #include <sys/signal.h>
54 #include <sys/user.h>
55 #include <sys/proc.h>
56 #include <sys/conf.h>
57 #include <sys/debug.h>
58 #include <vm/seg.h>
59 #include <sys/fs/namenode.h>
60 #include <sys/stream.h>
61 #include <fs/fs_subr.h>
62 #include <sys/policy.h>
63 
64 /*
65  * Create a reference to the vnode representing the file descriptor.
66  * Then, apply the VOP_OPEN operation to that vnode.
67  *
68  * The vnode for the file descriptor may be switched under you.
69  * If it is, search the hash list for an nodep - nodep->nm_filevp
70  * pair. If it exists, return that nodep to the user.
71  * If it does not exist, create a new namenode to attach
72  * to the nodep->nm_filevp then place the pair on the hash list.
73  *
74  * Newly created objects are like children/nodes in the mounted
75  * file system, with the parent being the initial mount.
76  */
77 int
78 nm_open(vnode_t **vpp, int flag, cred_t *crp, caller_context_t *ct)
79 {
80 	struct namenode *nodep = VTONM(*vpp);
81 	int error = 0;
82 	struct namenode *newnamep;
83 	struct vnode *newvp;
84 	struct vnode *infilevp;
85 	struct vnode *outfilevp;
86 
87 	/*
88 	 * If the vnode is switched under us, the corresponding
89 	 * VN_RELE for this VN_HOLD will be done by the file system
90 	 * performing the switch. Otherwise, the corresponding
91 	 * VN_RELE will be done by nm_close().
92 	 */
93 	infilevp = outfilevp = nodep->nm_filevp;
94 	VN_HOLD(outfilevp);
95 
96 	if ((error = VOP_OPEN(&outfilevp, flag, crp, ct)) != 0) {
97 		VN_RELE(outfilevp);
98 		return (error);
99 	}
100 	if (infilevp != outfilevp) {
101 		/*
102 		 * See if the new filevp (outfilevp) is already associated
103 		 * with the mount point. If it is, then it already has a
104 		 * namenode associated with it.
105 		 */
106 		mutex_enter(&ntable_lock);
107 		if ((newnamep =
108 		    namefind(outfilevp, nodep->nm_mountpt)) != NULL) {
109 			struct vnode *vp = NMTOV(newnamep);
110 
111 			VN_HOLD(vp);
112 			goto gotit;
113 		}
114 
115 		newnamep = kmem_zalloc(sizeof (struct namenode), KM_SLEEP);
116 		newvp = vn_alloc(KM_SLEEP);
117 		newnamep->nm_vnode = newvp;
118 
119 		mutex_init(&newnamep->nm_lock, NULL, MUTEX_DEFAULT, NULL);
120 
121 		mutex_enter(&nodep->nm_lock);
122 		newvp->v_flag = ((*vpp)->v_flag | VNOMAP | VNOSWAP) & ~VROOT;
123 		vn_setops(newvp, vn_getops(*vpp));
124 		newvp->v_vfsp = &namevfs;
125 		newvp->v_stream = outfilevp->v_stream;
126 		newvp->v_type = outfilevp->v_type;
127 		newvp->v_rdev = outfilevp->v_rdev;
128 		newvp->v_data = (caddr_t)newnamep;
129 		vn_exists(newvp);
130 		bcopy(&nodep->nm_vattr, &newnamep->nm_vattr, sizeof (vattr_t));
131 		newnamep->nm_vattr.va_type = outfilevp->v_type;
132 		newnamep->nm_vattr.va_nodeid = namenodeno_alloc();
133 		newnamep->nm_vattr.va_size = (u_offset_t)0;
134 		newnamep->nm_vattr.va_rdev = outfilevp->v_rdev;
135 		newnamep->nm_flag = NMNMNT;
136 		newnamep->nm_filevp = outfilevp;
137 		newnamep->nm_filep = nodep->nm_filep;
138 		newnamep->nm_mountpt = nodep->nm_mountpt;
139 		mutex_exit(&nodep->nm_lock);
140 
141 		/*
142 		 * Insert the new namenode into the hash list.
143 		 */
144 		nameinsert(newnamep);
145 gotit:
146 		mutex_exit(&ntable_lock);
147 		/*
148 		 * Release the above reference to the infilevp, the reference
149 		 * to the NAMEFS vnode, create a reference to the new vnode
150 		 * and return the new vnode to the user.
151 		 */
152 		VN_RELE(*vpp);
153 		*vpp = NMTOV(newnamep);
154 	}
155 	return (0);
156 }
157 
158 /*
159  * Close a mounted file descriptor.
160  * Remove any locks and apply the VOP_CLOSE operation to the vnode for
161  * the file descriptor.
162  */
163 static int
164 nm_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *crp,
165 	caller_context_t *ct)
166 {
167 	struct namenode *nodep = VTONM(vp);
168 	int error = 0;
169 
170 	(void) cleanlocks(vp, ttoproc(curthread)->p_pid, 0);
171 	cleanshares(vp, ttoproc(curthread)->p_pid);
172 	error = VOP_CLOSE(nodep->nm_filevp, flag, count, offset, crp, ct);
173 	if (count == 1) {
174 		(void) VOP_FSYNC(nodep->nm_filevp, FSYNC, crp, ct);
175 		/*
176 		 * Before VN_RELE() we need to remove the vnode from
177 		 * the hash table.  We should only do so in the  NMNMNT case.
178 		 * In other cases, nodep->nm_filep keeps a reference
179 		 * to nm_filevp and the entry in the hash table doesn't
180 		 * hurt.
181 		 */
182 		if ((nodep->nm_flag & NMNMNT) != 0) {
183 			mutex_enter(&ntable_lock);
184 			nameremove(nodep);
185 			mutex_exit(&ntable_lock);
186 		}
187 		VN_RELE(nodep->nm_filevp);
188 	}
189 	return (error);
190 }
191 
192 static int
193 nm_read(vnode_t *vp, struct uio *uiop, int ioflag, cred_t *crp,
194 	caller_context_t *ct)
195 {
196 	return (VOP_READ(VTONM(vp)->nm_filevp, uiop, ioflag, crp, ct));
197 }
198 
199 static int
200 nm_write(vnode_t *vp, struct uio *uiop, int ioflag, cred_t *crp,
201 	caller_context_t *ct)
202 {
203 	return (VOP_WRITE(VTONM(vp)->nm_filevp, uiop, ioflag, crp, ct));
204 }
205 
206 static int
207 nm_ioctl(vnode_t *vp, int cmd, intptr_t arg, int mode, cred_t *cr, int *rvalp,
208 	caller_context_t *ct)
209 {
210 	return (VOP_IOCTL(VTONM(vp)->nm_filevp, cmd, arg, mode, cr, rvalp, ct));
211 }
212 
213 /*
214  * Return in vap the attributes that are stored in the namenode
215  * structure.  Only the size is taken from the mounted object.
216  */
217 /* ARGSUSED */
218 static int
219 nm_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *crp,
220 	caller_context_t *ct)
221 {
222 	struct namenode *nodep = VTONM(vp);
223 	struct vattr va;
224 	int error;
225 
226 	mutex_enter(&nodep->nm_lock);
227 	bcopy(&nodep->nm_vattr, vap, sizeof (vattr_t));
228 	mutex_exit(&nodep->nm_lock);
229 
230 	if ((va.va_mask = vap->va_mask & AT_SIZE) != 0) {
231 		if (error = VOP_GETATTR(nodep->nm_filevp, &va, flags, crp, ct))
232 			return (error);
233 		vap->va_size = va.va_size;
234 	}
235 
236 	return (0);
237 }
238 
239 /*
240  * Standard access() like check.  Figure out which mode bits apply
241  * to the caller then pass the missing mode bits to the secpolicy function.
242  */
243 static int
244 nm_access_unlocked(void *vnp, int mode, cred_t *crp)
245 {
246 	struct namenode *nodep = vnp;
247 	int shift = 0;
248 
249 	if (crgetuid(crp) != nodep->nm_vattr.va_uid) {
250 		shift += 3;
251 		if (!groupmember(nodep->nm_vattr.va_gid, crp))
252 			shift += 3;
253 	}
254 
255 	return (secpolicy_vnode_access2(crp, NMTOV(nodep),
256 	    nodep->nm_vattr.va_uid, nodep->nm_vattr.va_mode << shift,
257 	    mode));
258 }
259 /*
260  * Set the attributes of the namenode from the attributes in vap.
261  */
262 /* ARGSUSED */
263 static int
264 nm_setattr(
265 	vnode_t *vp,
266 	vattr_t *vap,
267 	int flags,
268 	cred_t *crp,
269 	caller_context_t *ctp)
270 {
271 	struct namenode *nodep = VTONM(vp);
272 	struct vattr *nmvap = &nodep->nm_vattr;
273 	long mask = vap->va_mask;
274 	int error = 0;
275 
276 	/*
277 	 * Cannot set these attributes.
278 	 */
279 	if (mask & (AT_NOSET|AT_SIZE))
280 		return (EINVAL);
281 
282 	(void) VOP_RWLOCK(nodep->nm_filevp, V_WRITELOCK_TRUE, ctp);
283 	mutex_enter(&nodep->nm_lock);
284 
285 	/*
286 	 * Change ownership/group/time/access mode of mounted file
287 	 * descriptor.
288 	 */
289 
290 	error = secpolicy_vnode_setattr(crp, vp, vap, nmvap, flags,
291 	    nm_access_unlocked, nodep);
292 	if (error)
293 		goto out;
294 
295 	mask = vap->va_mask;
296 	/*
297 	 * If request to change mode, copy new
298 	 * mode into existing attribute structure.
299 	 */
300 	if (mask & AT_MODE)
301 		nmvap->va_mode = vap->va_mode & ~VSVTX;
302 
303 	/*
304 	 * If request was to change user or group, turn off suid and sgid
305 	 * bits.
306 	 * If the system was configured with the "rstchown" option, the
307 	 * owner is not permitted to give away the file, and can change
308 	 * the group id only to a group of which they are a member.
309 	 */
310 	if (mask & AT_UID)
311 		nmvap->va_uid = vap->va_uid;
312 	if (mask & AT_GID)
313 		nmvap->va_gid = vap->va_gid;
314 	/*
315 	 * If request is to modify times, make sure user has write
316 	 * permissions on the file.
317 	 */
318 	if (mask & AT_ATIME)
319 		nmvap->va_atime = vap->va_atime;
320 	if (mask & AT_MTIME) {
321 		nmvap->va_mtime = vap->va_mtime;
322 		gethrestime(&nmvap->va_ctime);
323 	}
324 out:
325 	mutex_exit(&nodep->nm_lock);
326 	VOP_RWUNLOCK(nodep->nm_filevp, V_WRITELOCK_TRUE, ctp);
327 	return (error);
328 }
329 
330 /*
331  * Check mode permission on the namenode.  First nm_access_unlocked()
332  * checks the bits on the name node, then an access check is performed
333  * on the underlying file.
334  */
335 /* ARGSUSED */
336 static int
337 nm_access(vnode_t *vp, int mode, int flags, cred_t *crp,
338 	caller_context_t *ct)
339 {
340 	struct namenode *nodep = VTONM(vp);
341 	int error;
342 
343 	mutex_enter(&nodep->nm_lock);
344 	error = nm_access_unlocked(nodep, mode, crp);
345 	mutex_exit(&nodep->nm_lock);
346 	if (error == 0)
347 		return (VOP_ACCESS(nodep->nm_filevp, mode, flags, crp, ct));
348 	else
349 		return (error);
350 }
351 
352 /*
353  * We can get here if a creat or open with O_CREAT is done on a namefs
354  * mount point, for example, as the object of a shell output redirection to
355  * the mount point.
356  */
357 /*ARGSUSED*/
358 static int
359 nm_create(vnode_t *dvp, char *name, vattr_t *vap, enum vcexcl excl,
360 	int mode, vnode_t **vpp, cred_t *cr, int flag,
361 	caller_context_t *ct, vsecattr_t *vsecp)
362 {
363 	int error;
364 
365 	ASSERT(dvp && *name == '\0');
366 	if (excl == NONEXCL) {
367 		if (mode && (error = nm_access(dvp, mode, 0, cr, ct)) != 0)
368 			return (error);
369 		VN_HOLD(dvp);
370 		return (0);
371 	}
372 	return (EEXIST);
373 }
374 
375 /*
376  * Links are not allowed on mounted file descriptors.
377  */
378 /*ARGSUSED*/
379 static int
380 nm_link(vnode_t *tdvp, vnode_t *vp, char *tnm, cred_t *crp,
381 	caller_context_t *ct, int flags)
382 {
383 	return (EXDEV);
384 }
385 
386 static int
387 nm_fsync(vnode_t *vp, int syncflag, cred_t *crp, caller_context_t *ct)
388 {
389 	return (VOP_FSYNC(VTONM(vp)->nm_filevp, syncflag, crp, ct));
390 }
391 
392 /* Free the namenode */
393 /* ARGSUSED */
394 static void
395 nm_inactive(vnode_t *vp, cred_t *crp, caller_context_t *ct)
396 {
397 	struct namenode *nodep = VTONM(vp);
398 	vfs_t *vfsp = vp->v_vfsp;
399 
400 	mutex_enter(&vp->v_lock);
401 	ASSERT(vp->v_count >= 1);
402 	if (--vp->v_count != 0) {
403 		mutex_exit(&vp->v_lock);
404 		return;
405 	}
406 	mutex_exit(&vp->v_lock);
407 	if (!(nodep->nm_flag & NMNMNT)) {
408 		ASSERT(nodep->nm_filep->f_vnode == nodep->nm_filevp);
409 		(void) closef(nodep->nm_filep);
410 	}
411 	vn_invalid(vp);
412 	vn_free(vp);
413 	if (vfsp != &namevfs)
414 		VFS_RELE(vfsp);
415 	namenodeno_free(nodep->nm_vattr.va_nodeid);
416 	kmem_free(nodep, sizeof (struct namenode));
417 }
418 
419 static int
420 nm_fid(vnode_t *vp, struct fid *fidnodep, caller_context_t *ct)
421 {
422 	return (VOP_FID(VTONM(vp)->nm_filevp, fidnodep, ct));
423 }
424 
425 static int
426 nm_rwlock(vnode_t *vp, int write, caller_context_t *ctp)
427 {
428 	return (VOP_RWLOCK(VTONM(vp)->nm_filevp, write, ctp));
429 }
430 
431 static void
432 nm_rwunlock(vnode_t *vp, int write, caller_context_t *ctp)
433 {
434 	VOP_RWUNLOCK(VTONM(vp)->nm_filevp, write, ctp);
435 }
436 
437 static int
438 nm_seek(vnode_t *vp, offset_t ooff, offset_t *noffp, caller_context_t *ct)
439 {
440 	return (VOP_SEEK(VTONM(vp)->nm_filevp, ooff, noffp, ct));
441 }
442 
443 /*
444  * Return the vnode representing the file descriptor in vpp.
445  */
446 static int
447 nm_realvp(vnode_t *vp, vnode_t **vpp, caller_context_t *ct)
448 {
449 	struct vnode *rvp;
450 
451 	vp = VTONM(vp)->nm_filevp;
452 	if (VOP_REALVP(vp, &rvp, ct) == 0)
453 		vp = rvp;
454 	*vpp = vp;
455 	return (0);
456 }
457 
458 static int
459 nm_poll(vnode_t *vp, short events, int anyyet, short *reventsp,
460 	pollhead_t **phpp, caller_context_t *ct)
461 {
462 	return (VOP_POLL(VTONM(vp)->nm_filevp, events, anyyet, reventsp,
463 	    phpp, ct));
464 }
465 
466 struct vnodeops *nm_vnodeops;
467 
468 const fs_operation_def_t nm_vnodeops_template[] = {
469 	VOPNAME_OPEN,		{ .vop_open = nm_open },
470 	VOPNAME_CLOSE,		{ .vop_close = nm_close },
471 	VOPNAME_READ,		{ .vop_read = nm_read },
472 	VOPNAME_WRITE,		{ .vop_write = nm_write },
473 	VOPNAME_IOCTL,		{ .vop_ioctl = nm_ioctl },
474 	VOPNAME_GETATTR,	{ .vop_getattr = nm_getattr },
475 	VOPNAME_SETATTR,	{ .vop_setattr = nm_setattr },
476 	VOPNAME_ACCESS,		{ .vop_access = nm_access },
477 	VOPNAME_CREATE,		{ .vop_create = nm_create },
478 	VOPNAME_LINK,		{ .vop_link = nm_link },
479 	VOPNAME_FSYNC,		{ .vop_fsync = nm_fsync },
480 	VOPNAME_INACTIVE,	{ .vop_inactive = nm_inactive },
481 	VOPNAME_FID,		{ .vop_fid = nm_fid },
482 	VOPNAME_RWLOCK,		{ .vop_rwlock = nm_rwlock },
483 	VOPNAME_RWUNLOCK,	{ .vop_rwunlock = nm_rwunlock },
484 	VOPNAME_SEEK,		{ .vop_seek = nm_seek },
485 	VOPNAME_REALVP,		{ .vop_realvp = nm_realvp },
486 	VOPNAME_POLL,		{ .vop_poll = nm_poll },
487 	VOPNAME_DISPOSE,	{ .error = fs_error },
488 	NULL,			NULL
489 };
490