1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26 /*
27 * Copyright (c) 2017 by Delphix. All rights reserved.
28 * Copyright 2021 Racktop Systems, Inc.
29 */
30
31 #include <sys/types.h>
32 #include <sys/t_lock.h>
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/bitmap.h>
36 #include <sys/debug.h>
37 #include <sys/errno.h>
38 #include <sys/strsubr.h>
39 #include <sys/cmn_err.h>
40 #include <sys/sysmacros.h>
41 #include <sys/filio.h>
42 #include <sys/flock.h>
43 #include <sys/stat.h>
44 #include <sys/share.h>
45
46 #include <sys/vfs.h>
47 #include <sys/vfs_opreg.h>
48
49 #include <sys/sockio.h>
50 #include <sys/socket.h>
51 #include <sys/socketvar.h>
52 #include <sys/strsun.h>
53
54 #include <fs/sockfs/sockcommon.h>
55 #include <fs/sockfs/socktpi.h>
56
57 /*
58 * Generic vnode ops
59 */
60 static int socket_vop_open(struct vnode **, int, struct cred *,
61 caller_context_t *);
62 static int socket_vop_close(struct vnode *, int, int, offset_t,
63 struct cred *, caller_context_t *);
64 static int socket_vop_read(struct vnode *, struct uio *, int,
65 struct cred *, caller_context_t *);
66 static int socket_vop_write(struct vnode *, struct uio *, int,
67 struct cred *, caller_context_t *);
68 static int socket_vop_ioctl(struct vnode *, int, intptr_t, int,
69 struct cred *, int32_t *, caller_context_t *);
70 static int socket_vop_setfl(struct vnode *, int, int, cred_t *,
71 caller_context_t *);
72 static int socket_vop_getattr(struct vnode *, struct vattr *, int,
73 struct cred *, caller_context_t *);
74 static int socket_vop_setattr(struct vnode *, struct vattr *, int,
75 struct cred *, caller_context_t *);
76 static int socket_vop_access(struct vnode *, int, int, struct cred *,
77 caller_context_t *);
78 static int socket_vop_fsync(struct vnode *, int, struct cred *,
79 caller_context_t *);
80 static void socket_vop_inactive(struct vnode *, struct cred *,
81 caller_context_t *);
82 static int socket_vop_fid(struct vnode *, struct fid *,
83 caller_context_t *);
84 static int socket_vop_seek(struct vnode *, offset_t, offset_t *,
85 caller_context_t *);
86 static int socket_vop_poll(struct vnode *, short, int, short *,
87 struct pollhead **, caller_context_t *);
88
89 extern int socket_close_internal(struct sonode *, int, cred_t *);
90 extern void socket_destroy_internal(struct sonode *, cred_t *);
91
92 struct vnodeops *socket_vnodeops;
93 const fs_operation_def_t socket_vnodeops_template[] = {
94 VOPNAME_OPEN, { .vop_open = socket_vop_open },
95 VOPNAME_CLOSE, { .vop_close = socket_vop_close },
96 VOPNAME_READ, { .vop_read = socket_vop_read },
97 VOPNAME_WRITE, { .vop_write = socket_vop_write },
98 VOPNAME_IOCTL, { .vop_ioctl = socket_vop_ioctl },
99 VOPNAME_SETFL, { .vop_setfl = socket_vop_setfl },
100 VOPNAME_GETATTR, { .vop_getattr = socket_vop_getattr },
101 VOPNAME_SETATTR, { .vop_setattr = socket_vop_setattr },
102 VOPNAME_ACCESS, { .vop_access = socket_vop_access },
103 VOPNAME_FSYNC, { .vop_fsync = socket_vop_fsync },
104 VOPNAME_INACTIVE, { .vop_inactive = socket_vop_inactive },
105 VOPNAME_FID, { .vop_fid = socket_vop_fid },
106 VOPNAME_SEEK, { .vop_seek = socket_vop_seek },
107 VOPNAME_POLL, { .vop_poll = socket_vop_poll },
108 VOPNAME_DISPOSE, { .error = fs_error },
109 NULL, NULL
110 };
111
112
113 /*
114 * generic vnode ops
115 */
116
117 /*ARGSUSED*/
118 static int
socket_vop_open(struct vnode ** vpp,int flag,struct cred * cr,caller_context_t * ct)119 socket_vop_open(struct vnode **vpp, int flag, struct cred *cr,
120 caller_context_t *ct)
121 {
122 struct vnode *vp = *vpp;
123 struct sonode *so = VTOSO(vp);
124
125 flag &= ~FCREAT; /* paranoia */
126 mutex_enter(&so->so_lock);
127 so->so_count++;
128 mutex_exit(&so->so_lock);
129
130 ASSERT(so->so_count != 0); /* wraparound */
131 ASSERT(vp->v_type == VSOCK);
132
133 return (0);
134 }
135
136 /*ARGSUSED*/
137 static int
socket_vop_close(struct vnode * vp,int flag,int count,offset_t offset,struct cred * cr,caller_context_t * ct)138 socket_vop_close(struct vnode *vp, int flag, int count, offset_t offset,
139 struct cred *cr, caller_context_t *ct)
140 {
141 struct sonode *so;
142 int error = 0;
143
144 so = VTOSO(vp);
145 ASSERT(vp->v_type == VSOCK);
146
147 cleanlocks(vp, ttoproc(curthread)->p_pid, 0);
148 cleanshares(vp, ttoproc(curthread)->p_pid);
149
150 if (vp->v_stream)
151 strclean(vp);
152
153 if (count > 1) {
154 dprint(2, ("socket_vop_close: count %d\n", count));
155 return (0);
156 }
157
158 mutex_enter(&so->so_lock);
159 if (--so->so_count == 0) {
160 /*
161 * Initiate connection shutdown.
162 */
163 mutex_exit(&so->so_lock);
164 error = socket_close_internal(so, flag, cr);
165 } else {
166 mutex_exit(&so->so_lock);
167 }
168
169 return (error);
170 }
171
172 /*ARGSUSED2*/
173 static int
socket_vop_read(struct vnode * vp,struct uio * uiop,int ioflag,struct cred * cr,caller_context_t * ct)174 socket_vop_read(struct vnode *vp, struct uio *uiop, int ioflag, struct cred *cr,
175 caller_context_t *ct)
176 {
177 struct sonode *so = VTOSO(vp);
178 struct nmsghdr lmsg;
179
180 ASSERT(vp->v_type == VSOCK);
181 bzero((void *)&lmsg, sizeof (lmsg));
182
183 return (socket_recvmsg(so, &lmsg, uiop, cr));
184 }
185
186 /*ARGSUSED2*/
187 static int
socket_vop_write(struct vnode * vp,struct uio * uiop,int ioflag,struct cred * cr,caller_context_t * ct)188 socket_vop_write(struct vnode *vp, struct uio *uiop, int ioflag,
189 struct cred *cr, caller_context_t *ct)
190 {
191 struct sonode *so = VTOSO(vp);
192 struct nmsghdr lmsg;
193
194 ASSERT(vp->v_type == VSOCK);
195 bzero((void *)&lmsg, sizeof (lmsg));
196
197 if (!(so->so_mode & SM_BYTESTREAM)) {
198 /*
199 * If the socket is not byte stream set MSG_EOR
200 */
201 lmsg.msg_flags = MSG_EOR;
202 }
203
204 return (socket_sendmsg(so, &lmsg, uiop, cr));
205 }
206
207 /*ARGSUSED4*/
208 static int
socket_vop_ioctl(struct vnode * vp,int cmd,intptr_t arg,int mode,struct cred * cr,int32_t * rvalp,caller_context_t * ct)209 socket_vop_ioctl(struct vnode *vp, int cmd, intptr_t arg, int mode,
210 struct cred *cr, int32_t *rvalp, caller_context_t *ct)
211 {
212 struct sonode *so = VTOSO(vp);
213
214 ASSERT(vp->v_type == VSOCK);
215
216 return (socket_ioctl(so, cmd, arg, mode, cr, rvalp));
217 }
218
219 /*
220 * Allow any flags. Record FNDELAY and FNONBLOCK so that they can be inherited
221 * from listener to acceptor.
222 */
223 /* ARGSUSED */
224 static int
socket_vop_setfl(vnode_t * vp,int oflags,int nflags,cred_t * cr,caller_context_t * ct)225 socket_vop_setfl(vnode_t *vp, int oflags, int nflags, cred_t *cr,
226 caller_context_t *ct)
227 {
228 struct sonode *so = VTOSO(vp);
229 int error = 0;
230
231 ASSERT(vp->v_type == VSOCK);
232
233 mutex_enter(&so->so_lock);
234 if (nflags & FNDELAY)
235 so->so_state |= SS_NDELAY;
236 else
237 so->so_state &= ~SS_NDELAY;
238 if (nflags & FNONBLOCK)
239 so->so_state |= SS_NONBLOCK;
240 else
241 so->so_state &= ~SS_NONBLOCK;
242 mutex_exit(&so->so_lock);
243
244 if (so->so_state & SS_ASYNC)
245 oflags |= FASYNC;
246 /*
247 * Sets/clears the SS_ASYNC flag based on the presence/absence
248 * of the FASYNC flag passed to fcntl(F_SETFL).
249 * This exists solely for BSD fcntl() FASYNC compatibility.
250 */
251 if ((oflags ^ nflags) & FASYNC && so->so_version != SOV_STREAM) {
252 int async = nflags & FASYNC;
253 int32_t rv;
254
255 /*
256 * For non-TPI sockets all we have to do is set/remove the
257 * SS_ASYNC bit, but for TPI it is more involved. For that
258 * reason we delegate the job to the protocol's ioctl handler.
259 */
260 error = socket_ioctl(so, FIOASYNC, (intptr_t)&async, FKIOCTL,
261 cr, &rv);
262 }
263 return (error);
264 }
265
266
267 /*
268 * Get the made up attributes for the vnode.
269 * 4.3BSD returns the current time for all the timestamps.
270 * 4.4BSD returns 0 for all the timestamps.
271 * Here we use the access and modified times recorded in the sonode.
272 *
273 * Just like in BSD there is not effect on the underlying file system node
274 * bound to an AF_UNIX pathname.
275 *
276 * When sockmod has been popped this will act just like a stream. Since
277 * a socket is always a clone there is no need to inspect the attributes
278 * of the "realvp".
279 */
280 /* ARGSUSED */
281 int
socket_vop_getattr(struct vnode * vp,struct vattr * vap,int flags,struct cred * cr,caller_context_t * ct)282 socket_vop_getattr(struct vnode *vp, struct vattr *vap, int flags,
283 struct cred *cr, caller_context_t *ct)
284 {
285 dev_t fsid;
286 struct sonode *so;
287 static int sonode_shift = 0;
288
289 /*
290 * Calculate the amount of bitshift to a sonode pointer which will
291 * still keep it unique. See below. Note that highbit() uses
292 * 1-based indexing for the highest bit set (and 0 for 'no bits set').
293 * To use the result of highbit() as a shift value, we must subtract 1
294 * from the result.
295 */
296 if (sonode_shift == 0) {
297 int bit = highbit(sizeof (struct sonode));
298
299 /* Sanity check */
300 VERIFY3S(bit, >, 0);
301 sonode_shift = bit - 1;
302 }
303
304 so = VTOSO(vp);
305 fsid = sockdev;
306
307 if (so->so_version == SOV_STREAM) {
308 /*
309 * The imaginary "sockmod" has been popped - act
310 * as a stream
311 */
312 vap->va_type = VCHR;
313 vap->va_mode = 0;
314 } else {
315 vap->va_type = vp->v_type;
316 vap->va_mode = S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|
317 S_IROTH|S_IWOTH;
318 }
319 vap->va_uid = vap->va_gid = 0;
320 vap->va_fsid = fsid;
321 /*
322 * If the va_nodeid is > UINT32_MAX, then stat(2) might fail in
323 * unexpected ways inside non-largefile aware 32-bit processes --
324 * historically, socket inode values (va_nodeid values) were capped at
325 * UINT16_MAX (for even more ancient reasons long since unnecessary).
326 * To avoid the potential of surprise failures, we shift down
327 * the sonode pointer address to try and get the most
328 * uniqueness into 32-bits. In practice, this represents the unique
329 * portion of the kernel address space, so the chance of duplicate
330 * socket inode values is minimized.
331 */
332 vap->va_nodeid = ((ino_t)so >> sonode_shift) & 0xFFFFFFFF;
333 vap->va_nlink = 0;
334 vap->va_size = 0;
335
336 /*
337 * We need to zero out the va_rdev to avoid some fstats getting
338 * EOVERFLOW. This also mimics SunOS 4.x and BSD behavior.
339 */
340 vap->va_rdev = (dev_t)0;
341 vap->va_blksize = MAXBSIZE;
342 vap->va_nblocks = btod(vap->va_size);
343
344 if (!SOCK_IS_NONSTR(so)) {
345 sotpi_info_t *sti = SOTOTPI(so);
346
347 mutex_enter(&so->so_lock);
348 vap->va_atime.tv_sec = sti->sti_atime;
349 vap->va_mtime.tv_sec = sti->sti_mtime;
350 vap->va_ctime.tv_sec = sti->sti_ctime;
351 mutex_exit(&so->so_lock);
352 } else {
353 vap->va_atime.tv_sec = 0;
354 vap->va_mtime.tv_sec = 0;
355 vap->va_ctime.tv_sec = 0;
356 }
357
358 vap->va_atime.tv_nsec = 0;
359 vap->va_mtime.tv_nsec = 0;
360 vap->va_ctime.tv_nsec = 0;
361 vap->va_seq = 0;
362
363 return (0);
364 }
365
366 /*
367 * Set attributes.
368 * Just like in BSD there is not effect on the underlying file system node
369 * bound to an AF_UNIX pathname.
370 *
371 * When sockmod has been popped this will act just like a stream. Since
372 * a socket is always a clone there is no need to modify the attributes
373 * of the "realvp".
374 */
375 /* ARGSUSED */
376 int
socket_vop_setattr(struct vnode * vp,struct vattr * vap,int flags,struct cred * cr,caller_context_t * ct)377 socket_vop_setattr(struct vnode *vp, struct vattr *vap, int flags,
378 struct cred *cr, caller_context_t *ct)
379 {
380 struct sonode *so = VTOSO(vp);
381
382 /*
383 * If times were changed, and we have a STREAMS socket, then update
384 * the sonode.
385 */
386 if (!SOCK_IS_NONSTR(so)) {
387 sotpi_info_t *sti = SOTOTPI(so);
388
389 mutex_enter(&so->so_lock);
390 if (vap->va_mask & AT_ATIME)
391 sti->sti_atime = vap->va_atime.tv_sec;
392 if (vap->va_mask & AT_MTIME) {
393 sti->sti_mtime = vap->va_mtime.tv_sec;
394 sti->sti_ctime = gethrestime_sec();
395 }
396 mutex_exit(&so->so_lock);
397 }
398
399 return (0);
400 }
401
402 /*
403 * Check if user is allowed to access vp. For non-STREAMS based sockets,
404 * there might not be a device attached to the file system. So for those
405 * types of sockets there are no permissions to check.
406 *
407 * XXX Should there be some other mechanism to check access rights?
408 */
409 /*ARGSUSED*/
410 int
socket_vop_access(struct vnode * vp,int mode,int flags,struct cred * cr,caller_context_t * ct)411 socket_vop_access(struct vnode *vp, int mode, int flags, struct cred *cr,
412 caller_context_t *ct)
413 {
414 struct sonode *so = VTOSO(vp);
415
416 if (!SOCK_IS_NONSTR(so)) {
417 ASSERT(so->so_sockparams->sp_sdev_info.sd_vnode != NULL);
418 return (VOP_ACCESS(so->so_sockparams->sp_sdev_info.sd_vnode,
419 mode, flags, cr, NULL));
420 }
421 return (0);
422 }
423
424 /*
425 * 4.3BSD and 4.4BSD fail a fsync on a socket with EINVAL.
426 * This code does the same to be compatible and also to not give an
427 * application the impression that the data has actually been "synced"
428 * to the other end of the connection.
429 */
430 /* ARGSUSED */
431 int
socket_vop_fsync(struct vnode * vp,int syncflag,struct cred * cr,caller_context_t * ct)432 socket_vop_fsync(struct vnode *vp, int syncflag, struct cred *cr,
433 caller_context_t *ct)
434 {
435 return (EINVAL);
436 }
437
438 /*ARGSUSED*/
439 static void
socket_vop_inactive(struct vnode * vp,struct cred * cr,caller_context_t * ct)440 socket_vop_inactive(struct vnode *vp, struct cred *cr, caller_context_t *ct)
441 {
442 struct sonode *so = VTOSO(vp);
443
444 ASSERT(vp->v_type == VSOCK);
445
446 mutex_enter(&vp->v_lock);
447 /*
448 * If no one has reclaimed the vnode, remove from the
449 * cache now.
450 */
451 if (vp->v_count < 1)
452 cmn_err(CE_PANIC, "socket_inactive: Bad v_count");
453
454 VN_RELE_LOCKED(vp);
455 if (vp->v_count != 0) {
456 mutex_exit(&vp->v_lock);
457 return;
458 }
459 mutex_exit(&vp->v_lock);
460
461
462 ASSERT(!vn_has_cached_data(vp));
463
464 /* socket specfic clean-up */
465 socket_destroy_internal(so, cr);
466 }
467
468 /* ARGSUSED */
469 int
socket_vop_fid(struct vnode * vp,struct fid * fidp,caller_context_t * ct)470 socket_vop_fid(struct vnode *vp, struct fid *fidp, caller_context_t *ct)
471 {
472 return (EINVAL);
473 }
474
475 /*
476 * Sockets are not seekable.
477 * (and there is a bug to fix STREAMS to make them fail this as well).
478 */
479 /*ARGSUSED*/
480 int
socket_vop_seek(struct vnode * vp,offset_t ooff,offset_t * noffp,caller_context_t * ct)481 socket_vop_seek(struct vnode *vp, offset_t ooff, offset_t *noffp,
482 caller_context_t *ct)
483 {
484 return (ESPIPE);
485 }
486
487 /*ARGSUSED*/
488 static int
socket_vop_poll(struct vnode * vp,short events,int anyyet,short * reventsp,struct pollhead ** phpp,caller_context_t * ct)489 socket_vop_poll(struct vnode *vp, short events, int anyyet, short *reventsp,
490 struct pollhead **phpp, caller_context_t *ct)
491 {
492 struct sonode *so = VTOSO(vp);
493
494 ASSERT(vp->v_type == VSOCK);
495
496 return (socket_poll(so, events, anyyet, reventsp, phpp));
497 }
498