1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
22 /* All Rights Reserved */
23
24
25 /*
26 * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
27 * Copyright (c) 2016, 2017 by Delphix. All rights reserved.
28 */
29
30 /*
31 * This file defines the vnode operations for mounted file descriptors.
32 * The routines in this file act as a layer between the NAMEFS file
33 * system and SPECFS/FIFOFS. With the exception of nm_open(), nm_setattr(),
34 * nm_getattr() and nm_access(), the routines simply apply the VOP operation
35 * to the vnode representing the file descriptor. This switches control
36 * to the underlying file system to which the file descriptor belongs.
37 */
38 #include <sys/types.h>
39 #include <sys/param.h>
40 #include <sys/systm.h>
41 #include <sys/cred.h>
42 #include <sys/errno.h>
43 #include <sys/time.h>
44 #include <sys/file.h>
45 #include <sys/fcntl.h>
46 #include <sys/flock.h>
47 #include <sys/kmem.h>
48 #include <sys/uio.h>
49 #include <sys/vfs.h>
50 #include <sys/vfs_opreg.h>
51 #include <sys/vnode.h>
52 #include <sys/pcb.h>
53 #include <sys/signal.h>
54 #include <sys/user.h>
55 #include <sys/proc.h>
56 #include <sys/conf.h>
57 #include <sys/debug.h>
58 #include <vm/seg.h>
59 #include <sys/fs/namenode.h>
60 #include <sys/stream.h>
61 #include <fs/fs_subr.h>
62 #include <sys/policy.h>
63
64 /*
65 * Create a reference to the vnode representing the file descriptor.
66 * Then, apply the VOP_OPEN operation to that vnode.
67 *
68 * The vnode for the file descriptor may be switched under you.
69 * If it is, search the hash list for an nodep - nodep->nm_filevp
70 * pair. If it exists, return that nodep to the user.
71 * If it does not exist, create a new namenode to attach
72 * to the nodep->nm_filevp then place the pair on the hash list.
73 *
74 * Newly created objects are like children/nodes in the mounted
75 * file system, with the parent being the initial mount.
76 */
77 int
nm_open(vnode_t ** vpp,int flag,cred_t * crp,caller_context_t * ct)78 nm_open(vnode_t **vpp, int flag, cred_t *crp, caller_context_t *ct)
79 {
80 struct namenode *nodep = VTONM(*vpp);
81 int error = 0;
82 struct namenode *newnamep;
83 struct vnode *newvp;
84 struct vnode *infilevp;
85 struct vnode *outfilevp;
86
87 /*
88 * If the vnode is switched under us, the corresponding
89 * VN_RELE for this VN_HOLD will be done by the file system
90 * performing the switch. Otherwise, the corresponding
91 * VN_RELE will be done by nm_close().
92 */
93 infilevp = outfilevp = nodep->nm_filevp;
94 VN_HOLD(outfilevp);
95
96 if ((error = VOP_OPEN(&outfilevp, flag, crp, ct)) != 0) {
97 VN_RELE(outfilevp);
98 return (error);
99 }
100 if (infilevp != outfilevp) {
101 /*
102 * See if the new filevp (outfilevp) is already associated
103 * with the mount point. If it is, then it already has a
104 * namenode associated with it.
105 */
106 mutex_enter(&ntable_lock);
107 if ((newnamep =
108 namefind(outfilevp, nodep->nm_mountpt)) != NULL) {
109 struct vnode *vp = NMTOV(newnamep);
110
111 VN_HOLD(vp);
112 goto gotit;
113 }
114
115 newnamep = kmem_zalloc(sizeof (struct namenode), KM_SLEEP);
116 newvp = vn_alloc(KM_SLEEP);
117 newnamep->nm_vnode = newvp;
118
119 mutex_init(&newnamep->nm_lock, NULL, MUTEX_DEFAULT, NULL);
120
121 mutex_enter(&nodep->nm_lock);
122 newvp->v_flag = ((*vpp)->v_flag | VNOMAP | VNOSWAP) & ~VROOT;
123 vn_setops(newvp, vn_getops(*vpp));
124 newvp->v_vfsp = &namevfs;
125 newvp->v_stream = outfilevp->v_stream;
126 newvp->v_type = outfilevp->v_type;
127 newvp->v_rdev = outfilevp->v_rdev;
128 newvp->v_data = (caddr_t)newnamep;
129 vn_exists(newvp);
130 bcopy(&nodep->nm_vattr, &newnamep->nm_vattr, sizeof (vattr_t));
131 newnamep->nm_vattr.va_type = outfilevp->v_type;
132 newnamep->nm_vattr.va_nodeid = namenodeno_alloc();
133 newnamep->nm_vattr.va_size = (u_offset_t)0;
134 newnamep->nm_vattr.va_rdev = outfilevp->v_rdev;
135 newnamep->nm_flag = NMNMNT;
136 newnamep->nm_filevp = outfilevp;
137 newnamep->nm_filep = nodep->nm_filep;
138 newnamep->nm_mountpt = nodep->nm_mountpt;
139 mutex_exit(&nodep->nm_lock);
140
141 /*
142 * Insert the new namenode into the hash list.
143 */
144 nameinsert(newnamep);
145 gotit:
146 mutex_exit(&ntable_lock);
147 /*
148 * Release the above reference to the infilevp, the reference
149 * to the NAMEFS vnode, create a reference to the new vnode
150 * and return the new vnode to the user.
151 */
152 VN_RELE(*vpp);
153 *vpp = NMTOV(newnamep);
154 }
155 return (0);
156 }
157
158 /*
159 * Close a mounted file descriptor.
160 * Remove any locks and apply the VOP_CLOSE operation to the vnode for
161 * the file descriptor.
162 */
163 static int
nm_close(vnode_t * vp,int flag,int count,offset_t offset,cred_t * crp,caller_context_t * ct)164 nm_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *crp,
165 caller_context_t *ct)
166 {
167 struct namenode *nodep = VTONM(vp);
168 int error = 0;
169
170 (void) cleanlocks(vp, ttoproc(curthread)->p_pid, 0);
171 cleanshares(vp, ttoproc(curthread)->p_pid);
172 error = VOP_CLOSE(nodep->nm_filevp, flag, count, offset, crp, ct);
173 if (count == 1) {
174 (void) VOP_FSYNC(nodep->nm_filevp, FSYNC, crp, ct);
175 /*
176 * Before VN_RELE() we need to remove the vnode from
177 * the hash table. We should only do so in the NMNMNT case.
178 * In other cases, nodep->nm_filep keeps a reference
179 * to nm_filevp and the entry in the hash table doesn't
180 * hurt.
181 */
182 if ((nodep->nm_flag & NMNMNT) != 0) {
183 mutex_enter(&ntable_lock);
184 nameremove(nodep);
185 mutex_exit(&ntable_lock);
186 }
187 VN_RELE(nodep->nm_filevp);
188 }
189 return (error);
190 }
191
192 static int
nm_read(vnode_t * vp,struct uio * uiop,int ioflag,cred_t * crp,caller_context_t * ct)193 nm_read(vnode_t *vp, struct uio *uiop, int ioflag, cred_t *crp,
194 caller_context_t *ct)
195 {
196 return (VOP_READ(VTONM(vp)->nm_filevp, uiop, ioflag, crp, ct));
197 }
198
199 static int
nm_write(vnode_t * vp,struct uio * uiop,int ioflag,cred_t * crp,caller_context_t * ct)200 nm_write(vnode_t *vp, struct uio *uiop, int ioflag, cred_t *crp,
201 caller_context_t *ct)
202 {
203 return (VOP_WRITE(VTONM(vp)->nm_filevp, uiop, ioflag, crp, ct));
204 }
205
206 static int
nm_ioctl(vnode_t * vp,int cmd,intptr_t arg,int mode,cred_t * cr,int * rvalp,caller_context_t * ct)207 nm_ioctl(vnode_t *vp, int cmd, intptr_t arg, int mode, cred_t *cr, int *rvalp,
208 caller_context_t *ct)
209 {
210 return (VOP_IOCTL(VTONM(vp)->nm_filevp, cmd, arg, mode, cr, rvalp, ct));
211 }
212
213 /*
214 * Return in vap the attributes that are stored in the namenode
215 * structure. Only the size is taken from the mounted object.
216 */
217 /* ARGSUSED */
218 static int
nm_getattr(vnode_t * vp,vattr_t * vap,int flags,cred_t * crp,caller_context_t * ct)219 nm_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *crp,
220 caller_context_t *ct)
221 {
222 struct namenode *nodep = VTONM(vp);
223 struct vattr va;
224 int error;
225
226 mutex_enter(&nodep->nm_lock);
227 bcopy(&nodep->nm_vattr, vap, sizeof (vattr_t));
228 mutex_exit(&nodep->nm_lock);
229
230 if ((va.va_mask = vap->va_mask & AT_SIZE) != 0) {
231 if (error = VOP_GETATTR(nodep->nm_filevp, &va, flags, crp, ct))
232 return (error);
233 vap->va_size = va.va_size;
234 }
235
236 return (0);
237 }
238
239 /*
240 * Standard access() like check. Figure out which mode bits apply
241 * to the caller then pass the missing mode bits to the secpolicy function.
242 */
243 static int
nm_access_unlocked(void * vnp,int mode,cred_t * crp)244 nm_access_unlocked(void *vnp, int mode, cred_t *crp)
245 {
246 struct namenode *nodep = vnp;
247 int shift = 0;
248
249 if (crgetuid(crp) != nodep->nm_vattr.va_uid) {
250 shift += 3;
251 if (!groupmember(nodep->nm_vattr.va_gid, crp))
252 shift += 3;
253 }
254
255 return (secpolicy_vnode_access2(crp, NMTOV(nodep),
256 nodep->nm_vattr.va_uid, nodep->nm_vattr.va_mode << shift,
257 mode));
258 }
259 /*
260 * Set the attributes of the namenode from the attributes in vap.
261 */
262 /* ARGSUSED */
263 static int
nm_setattr(vnode_t * vp,vattr_t * vap,int flags,cred_t * crp,caller_context_t * ctp)264 nm_setattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *crp,
265 caller_context_t *ctp)
266 {
267 struct namenode *nodep = VTONM(vp);
268 struct vattr *nmvap = &nodep->nm_vattr;
269 long mask = vap->va_mask;
270 int error = 0;
271
272 /*
273 * Cannot set these attributes.
274 */
275 if (mask & (AT_NOSET|AT_SIZE))
276 return (EINVAL);
277
278 (void) VOP_RWLOCK(nodep->nm_filevp, V_WRITELOCK_TRUE, ctp);
279 mutex_enter(&nodep->nm_lock);
280
281 /*
282 * Change ownership/group/time/access mode of mounted file
283 * descriptor.
284 */
285
286 error = secpolicy_vnode_setattr(crp, vp, vap, nmvap, flags,
287 nm_access_unlocked, nodep);
288 if (error)
289 goto out;
290
291 mask = vap->va_mask;
292 /*
293 * If request to change mode, copy new
294 * mode into existing attribute structure.
295 */
296 if (mask & AT_MODE)
297 nmvap->va_mode = vap->va_mode & ~VSVTX;
298
299 /*
300 * If request was to change user or group, turn off suid and sgid
301 * bits.
302 * If the system was configured with the "rstchown" option, the
303 * owner is not permitted to give away the file, and can change
304 * the group id only to a group of which they are a member.
305 */
306 if (mask & AT_UID)
307 nmvap->va_uid = vap->va_uid;
308 if (mask & AT_GID)
309 nmvap->va_gid = vap->va_gid;
310 /*
311 * If request is to modify times, make sure user has write
312 * permissions on the file.
313 */
314 if (mask & AT_ATIME)
315 nmvap->va_atime = vap->va_atime;
316 if (mask & AT_MTIME) {
317 nmvap->va_mtime = vap->va_mtime;
318 gethrestime(&nmvap->va_ctime);
319 }
320 out:
321 mutex_exit(&nodep->nm_lock);
322 VOP_RWUNLOCK(nodep->nm_filevp, V_WRITELOCK_TRUE, ctp);
323 return (error);
324 }
325
326 /*
327 * Check mode permission on the namenode. First nm_access_unlocked()
328 * checks the bits on the name node, then an access check is performed
329 * on the underlying file.
330 */
331 /* ARGSUSED */
332 static int
nm_access(vnode_t * vp,int mode,int flags,cred_t * crp,caller_context_t * ct)333 nm_access(vnode_t *vp, int mode, int flags, cred_t *crp, caller_context_t *ct)
334 {
335 struct namenode *nodep = VTONM(vp);
336 int error;
337
338 mutex_enter(&nodep->nm_lock);
339 error = nm_access_unlocked(nodep, mode, crp);
340 mutex_exit(&nodep->nm_lock);
341 if (error == 0)
342 return (VOP_ACCESS(nodep->nm_filevp, mode, flags, crp, ct));
343 else
344 return (error);
345 }
346
347 /*
348 * We can get here if a creat or open with O_CREAT is done on a namefs
349 * mount point, for example, as the object of a shell output redirection to
350 * the mount point.
351 */
352 /*ARGSUSED*/
353 static int
nm_create(vnode_t * dvp,char * name,vattr_t * vap,enum vcexcl excl,int mode,vnode_t ** vpp,cred_t * cr,int flag,caller_context_t * ct,vsecattr_t * vsecp)354 nm_create(vnode_t *dvp, char *name, vattr_t *vap, enum vcexcl excl, int mode,
355 vnode_t **vpp, cred_t *cr, int flag, caller_context_t *ct,
356 vsecattr_t *vsecp)
357 {
358 int error;
359
360 ASSERT(dvp && *name == '\0');
361 if (excl == NONEXCL) {
362 if (mode && (error = nm_access(dvp, mode, 0, cr, ct)) != 0)
363 return (error);
364 VN_HOLD(dvp);
365 return (0);
366 }
367 return (EEXIST);
368 }
369
370 /*
371 * Links are not allowed on mounted file descriptors.
372 */
373 /*ARGSUSED*/
374 static int
nm_link(vnode_t * tdvp,vnode_t * vp,char * tnm,cred_t * crp,caller_context_t * ct,int flags)375 nm_link(vnode_t *tdvp, vnode_t *vp, char *tnm, cred_t *crp,
376 caller_context_t *ct, int flags)
377 {
378 return (EXDEV);
379 }
380
381 static int
nm_fsync(vnode_t * vp,int syncflag,cred_t * crp,caller_context_t * ct)382 nm_fsync(vnode_t *vp, int syncflag, cred_t *crp, caller_context_t *ct)
383 {
384 return (VOP_FSYNC(VTONM(vp)->nm_filevp, syncflag, crp, ct));
385 }
386
387 /* Free the namenode */
388 /* ARGSUSED */
389 static void
nm_inactive(vnode_t * vp,cred_t * crp,caller_context_t * ct)390 nm_inactive(vnode_t *vp, cred_t *crp, caller_context_t *ct)
391 {
392 struct namenode *nodep = VTONM(vp);
393 vfs_t *vfsp = vp->v_vfsp;
394
395 mutex_enter(&vp->v_lock);
396 ASSERT(vp->v_count >= 1);
397 VN_RELE_LOCKED(vp);
398 if (vp->v_count != 0) {
399 mutex_exit(&vp->v_lock);
400 return;
401 }
402 mutex_exit(&vp->v_lock);
403 if (!(nodep->nm_flag & NMNMNT)) {
404 ASSERT(nodep->nm_filep->f_vnode == nodep->nm_filevp);
405 (void) closef(nodep->nm_filep);
406 }
407 vn_invalid(vp);
408 vn_free(vp);
409 if (vfsp != &namevfs)
410 VFS_RELE(vfsp);
411 namenodeno_free(nodep->nm_vattr.va_nodeid);
412 kmem_free(nodep, sizeof (struct namenode));
413 }
414
415 static int
nm_fid(vnode_t * vp,struct fid * fidnodep,caller_context_t * ct)416 nm_fid(vnode_t *vp, struct fid *fidnodep, caller_context_t *ct)
417 {
418 return (VOP_FID(VTONM(vp)->nm_filevp, fidnodep, ct));
419 }
420
421 static int
nm_rwlock(vnode_t * vp,int write,caller_context_t * ctp)422 nm_rwlock(vnode_t *vp, int write, caller_context_t *ctp)
423 {
424 return (VOP_RWLOCK(VTONM(vp)->nm_filevp, write, ctp));
425 }
426
427 static void
nm_rwunlock(vnode_t * vp,int write,caller_context_t * ctp)428 nm_rwunlock(vnode_t *vp, int write, caller_context_t *ctp)
429 {
430 VOP_RWUNLOCK(VTONM(vp)->nm_filevp, write, ctp);
431 }
432
433 static int
nm_seek(vnode_t * vp,offset_t ooff,offset_t * noffp,caller_context_t * ct)434 nm_seek(vnode_t *vp, offset_t ooff, offset_t *noffp, caller_context_t *ct)
435 {
436 return (VOP_SEEK(VTONM(vp)->nm_filevp, ooff, noffp, ct));
437 }
438
439 /*
440 * Return the vnode representing the file descriptor in vpp.
441 */
442 static int
nm_realvp(vnode_t * vp,vnode_t ** vpp,caller_context_t * ct)443 nm_realvp(vnode_t *vp, vnode_t **vpp, caller_context_t *ct)
444 {
445 struct vnode *rvp;
446
447 vp = VTONM(vp)->nm_filevp;
448 if (VOP_REALVP(vp, &rvp, ct) == 0)
449 vp = rvp;
450 *vpp = vp;
451 return (0);
452 }
453
454 static int
nm_poll(vnode_t * vp,short events,int anyyet,short * reventsp,pollhead_t ** phpp,caller_context_t * ct)455 nm_poll(vnode_t *vp, short events, int anyyet, short *reventsp,
456 pollhead_t **phpp, caller_context_t *ct)
457 {
458 return (VOP_POLL(VTONM(vp)->nm_filevp, events, anyyet, reventsp,
459 phpp, ct));
460 }
461
462 struct vnodeops *nm_vnodeops;
463
464 const fs_operation_def_t nm_vnodeops_template[] = {
465 VOPNAME_OPEN, { .vop_open = nm_open },
466 VOPNAME_CLOSE, { .vop_close = nm_close },
467 VOPNAME_READ, { .vop_read = nm_read },
468 VOPNAME_WRITE, { .vop_write = nm_write },
469 VOPNAME_IOCTL, { .vop_ioctl = nm_ioctl },
470 VOPNAME_GETATTR, { .vop_getattr = nm_getattr },
471 VOPNAME_SETATTR, { .vop_setattr = nm_setattr },
472 VOPNAME_ACCESS, { .vop_access = nm_access },
473 VOPNAME_CREATE, { .vop_create = nm_create },
474 VOPNAME_LINK, { .vop_link = nm_link },
475 VOPNAME_FSYNC, { .vop_fsync = nm_fsync },
476 VOPNAME_INACTIVE, { .vop_inactive = nm_inactive },
477 VOPNAME_FID, { .vop_fid = nm_fid },
478 VOPNAME_RWLOCK, { .vop_rwlock = nm_rwlock },
479 VOPNAME_RWUNLOCK, { .vop_rwunlock = nm_rwunlock },
480 VOPNAME_SEEK, { .vop_seek = nm_seek },
481 VOPNAME_REALVP, { .vop_realvp = nm_realvp },
482 VOPNAME_POLL, { .vop_poll = nm_poll },
483 VOPNAME_DISPOSE, { .error = fs_error },
484 NULL, NULL
485 };
486