1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 #include <sys/types.h>
28 #include <sys/t_lock.h>
29 #include <sys/param.h>
30 #include <sys/systm.h>
31 #include <sys/bitmap.h>
32 #include <sys/debug.h>
33 #include <sys/errno.h>
34 #include <sys/strsubr.h>
35 #include <sys/cmn_err.h>
36 #include <sys/sysmacros.h>
37 #include <sys/filio.h>
38 #include <sys/flock.h>
39 #include <sys/stat.h>
40 #include <sys/share.h>
41
42 #include <sys/vfs.h>
43 #include <sys/vfs_opreg.h>
44
45 #include <sys/sockio.h>
46 #include <sys/socket.h>
47 #include <sys/socketvar.h>
48 #include <sys/strsun.h>
49
50 #include <fs/sockfs/sockcommon.h>
51 #include <fs/sockfs/socktpi.h>
52
53 /*
54 * Generic vnode ops
55 */
56 static int socket_vop_open(struct vnode **, int, struct cred *,
57 caller_context_t *);
58 static int socket_vop_close(struct vnode *, int, int, offset_t,
59 struct cred *, caller_context_t *);
60 static int socket_vop_read(struct vnode *, struct uio *, int,
61 struct cred *, caller_context_t *);
62 static int socket_vop_write(struct vnode *, struct uio *, int,
63 struct cred *, caller_context_t *);
64 static int socket_vop_ioctl(struct vnode *, int, intptr_t, int,
65 struct cred *, int32_t *, caller_context_t *);
66 static int socket_vop_setfl(struct vnode *, int, int, cred_t *,
67 caller_context_t *);
68 static int socket_vop_getattr(struct vnode *, struct vattr *, int,
69 struct cred *, caller_context_t *);
70 static int socket_vop_setattr(struct vnode *, struct vattr *, int,
71 struct cred *, caller_context_t *);
72 static int socket_vop_access(struct vnode *, int, int, struct cred *,
73 caller_context_t *);
74 static int socket_vop_fsync(struct vnode *, int, struct cred *,
75 caller_context_t *);
76 static void socket_vop_inactive(struct vnode *, struct cred *,
77 caller_context_t *);
78 static int socket_vop_fid(struct vnode *, struct fid *,
79 caller_context_t *);
80 static int socket_vop_seek(struct vnode *, offset_t, offset_t *,
81 caller_context_t *);
82 static int socket_vop_poll(struct vnode *, short, int, short *,
83 struct pollhead **, caller_context_t *);
84
85 extern int socket_close_internal(struct sonode *, int, cred_t *);
86 extern void socket_destroy_internal(struct sonode *, cred_t *);
87
88 struct vnodeops *socket_vnodeops;
89 const fs_operation_def_t socket_vnodeops_template[] = {
90 VOPNAME_OPEN, { .vop_open = socket_vop_open },
91 VOPNAME_CLOSE, { .vop_close = socket_vop_close },
92 VOPNAME_READ, { .vop_read = socket_vop_read },
93 VOPNAME_WRITE, { .vop_write = socket_vop_write },
94 VOPNAME_IOCTL, { .vop_ioctl = socket_vop_ioctl },
95 VOPNAME_SETFL, { .vop_setfl = socket_vop_setfl },
96 VOPNAME_GETATTR, { .vop_getattr = socket_vop_getattr },
97 VOPNAME_SETATTR, { .vop_setattr = socket_vop_setattr },
98 VOPNAME_ACCESS, { .vop_access = socket_vop_access },
99 VOPNAME_FSYNC, { .vop_fsync = socket_vop_fsync },
100 VOPNAME_INACTIVE, { .vop_inactive = socket_vop_inactive },
101 VOPNAME_FID, { .vop_fid = socket_vop_fid },
102 VOPNAME_SEEK, { .vop_seek = socket_vop_seek },
103 VOPNAME_POLL, { .vop_poll = socket_vop_poll },
104 VOPNAME_DISPOSE, { .error = fs_error },
105 NULL, NULL
106 };
107
108
109 /*
110 * generic vnode ops
111 */
112
113 /*ARGSUSED*/
114 static int
socket_vop_open(struct vnode ** vpp,int flag,struct cred * cr,caller_context_t * ct)115 socket_vop_open(struct vnode **vpp, int flag, struct cred *cr,
116 caller_context_t *ct)
117 {
118 struct vnode *vp = *vpp;
119 struct sonode *so = VTOSO(vp);
120
121 flag &= ~FCREAT; /* paranoia */
122 mutex_enter(&so->so_lock);
123 so->so_count++;
124 mutex_exit(&so->so_lock);
125
126 ASSERT(so->so_count != 0); /* wraparound */
127 ASSERT(vp->v_type == VSOCK);
128
129 return (0);
130 }
131
132 /*ARGSUSED*/
133 static int
socket_vop_close(struct vnode * vp,int flag,int count,offset_t offset,struct cred * cr,caller_context_t * ct)134 socket_vop_close(struct vnode *vp, int flag, int count, offset_t offset,
135 struct cred *cr, caller_context_t *ct)
136 {
137 struct sonode *so;
138 int error = 0;
139
140 so = VTOSO(vp);
141 ASSERT(vp->v_type == VSOCK);
142
143 cleanlocks(vp, ttoproc(curthread)->p_pid, 0);
144 cleanshares(vp, ttoproc(curthread)->p_pid);
145
146 if (vp->v_stream)
147 strclean(vp);
148
149 if (count > 1) {
150 dprint(2, ("socket_vop_close: count %d\n", count));
151 return (0);
152 }
153
154 mutex_enter(&so->so_lock);
155 if (--so->so_count == 0) {
156 /*
157 * Initiate connection shutdown.
158 */
159 mutex_exit(&so->so_lock);
160 error = socket_close_internal(so, flag, cr);
161 } else {
162 mutex_exit(&so->so_lock);
163 }
164
165 return (error);
166 }
167
168 /*ARGSUSED2*/
169 static int
socket_vop_read(struct vnode * vp,struct uio * uiop,int ioflag,struct cred * cr,caller_context_t * ct)170 socket_vop_read(struct vnode *vp, struct uio *uiop, int ioflag, struct cred *cr,
171 caller_context_t *ct)
172 {
173 struct sonode *so = VTOSO(vp);
174 struct nmsghdr lmsg;
175
176 ASSERT(vp->v_type == VSOCK);
177 bzero((void *)&lmsg, sizeof (lmsg));
178
179 return (socket_recvmsg(so, &lmsg, uiop, cr));
180 }
181
182 /*ARGSUSED2*/
183 static int
socket_vop_write(struct vnode * vp,struct uio * uiop,int ioflag,struct cred * cr,caller_context_t * ct)184 socket_vop_write(struct vnode *vp, struct uio *uiop, int ioflag,
185 struct cred *cr, caller_context_t *ct)
186 {
187 struct sonode *so = VTOSO(vp);
188 struct nmsghdr lmsg;
189
190 ASSERT(vp->v_type == VSOCK);
191 bzero((void *)&lmsg, sizeof (lmsg));
192
193 if (!(so->so_mode & SM_BYTESTREAM)) {
194 /*
195 * If the socket is not byte stream set MSG_EOR
196 */
197 lmsg.msg_flags = MSG_EOR;
198 }
199
200 return (socket_sendmsg(so, &lmsg, uiop, cr));
201 }
202
203 /*ARGSUSED4*/
204 static int
socket_vop_ioctl(struct vnode * vp,int cmd,intptr_t arg,int mode,struct cred * cr,int32_t * rvalp,caller_context_t * ct)205 socket_vop_ioctl(struct vnode *vp, int cmd, intptr_t arg, int mode,
206 struct cred *cr, int32_t *rvalp, caller_context_t *ct)
207 {
208 struct sonode *so = VTOSO(vp);
209
210 ASSERT(vp->v_type == VSOCK);
211
212 return (socket_ioctl(so, cmd, arg, mode, cr, rvalp));
213 }
214
215 /*
216 * Allow any flags. Record FNDELAY and FNONBLOCK so that they can be inherited
217 * from listener to acceptor.
218 */
219 /* ARGSUSED */
220 static int
socket_vop_setfl(vnode_t * vp,int oflags,int nflags,cred_t * cr,caller_context_t * ct)221 socket_vop_setfl(vnode_t *vp, int oflags, int nflags, cred_t *cr,
222 caller_context_t *ct)
223 {
224 struct sonode *so = VTOSO(vp);
225 int error = 0;
226
227 ASSERT(vp->v_type == VSOCK);
228
229 mutex_enter(&so->so_lock);
230 if (nflags & FNDELAY)
231 so->so_state |= SS_NDELAY;
232 else
233 so->so_state &= ~SS_NDELAY;
234 if (nflags & FNONBLOCK)
235 so->so_state |= SS_NONBLOCK;
236 else
237 so->so_state &= ~SS_NONBLOCK;
238 mutex_exit(&so->so_lock);
239
240 if (so->so_state & SS_ASYNC)
241 oflags |= FASYNC;
242 /*
243 * Sets/clears the SS_ASYNC flag based on the presence/absence
244 * of the FASYNC flag passed to fcntl(F_SETFL).
245 * This exists solely for BSD fcntl() FASYNC compatibility.
246 */
247 if ((oflags ^ nflags) & FASYNC && so->so_version != SOV_STREAM) {
248 int async = nflags & FASYNC;
249 int32_t rv;
250
251 /*
252 * For non-TPI sockets all we have to do is set/remove the
253 * SS_ASYNC bit, but for TPI it is more involved. For that
254 * reason we delegate the job to the protocol's ioctl handler.
255 */
256 error = socket_ioctl(so, FIOASYNC, (intptr_t)&async, FKIOCTL,
257 cr, &rv);
258 }
259 return (error);
260 }
261
262
263 /*
264 * Get the made up attributes for the vnode.
265 * 4.3BSD returns the current time for all the timestamps.
266 * 4.4BSD returns 0 for all the timestamps.
267 * Here we use the access and modified times recorded in the sonode.
268 *
269 * Just like in BSD there is not effect on the underlying file system node
270 * bound to an AF_UNIX pathname.
271 *
272 * When sockmod has been popped this will act just like a stream. Since
273 * a socket is always a clone there is no need to inspect the attributes
274 * of the "realvp".
275 */
276 /* ARGSUSED */
277 int
socket_vop_getattr(struct vnode * vp,struct vattr * vap,int flags,struct cred * cr,caller_context_t * ct)278 socket_vop_getattr(struct vnode *vp, struct vattr *vap, int flags,
279 struct cred *cr, caller_context_t *ct)
280 {
281 dev_t fsid;
282 struct sonode *so;
283 static int sonode_shift = 0;
284
285 /*
286 * Calculate the amount of bitshift to a sonode pointer which will
287 * still keep it unique. See below.
288 */
289 if (sonode_shift == 0)
290 sonode_shift = highbit(sizeof (struct sonode));
291 ASSERT(sonode_shift > 0);
292
293 so = VTOSO(vp);
294 fsid = sockdev;
295
296 if (so->so_version == SOV_STREAM) {
297 /*
298 * The imaginary "sockmod" has been popped - act
299 * as a stream
300 */
301 vap->va_type = VCHR;
302 vap->va_mode = 0;
303 } else {
304 vap->va_type = vp->v_type;
305 vap->va_mode = S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|
306 S_IROTH|S_IWOTH;
307 }
308 vap->va_uid = vap->va_gid = 0;
309 vap->va_fsid = fsid;
310 /*
311 * If the va_nodeid is > MAX_USHORT, then i386 stats might fail.
312 * So we shift down the sonode pointer to try and get the most
313 * uniqueness into 16-bits.
314 */
315 vap->va_nodeid = ((ino_t)so >> sonode_shift) & 0xFFFF;
316 vap->va_nlink = 0;
317 vap->va_size = 0;
318
319 /*
320 * We need to zero out the va_rdev to avoid some fstats getting
321 * EOVERFLOW. This also mimics SunOS 4.x and BSD behavior.
322 */
323 vap->va_rdev = (dev_t)0;
324 vap->va_blksize = MAXBSIZE;
325 vap->va_nblocks = btod(vap->va_size);
326
327 if (!SOCK_IS_NONSTR(so)) {
328 sotpi_info_t *sti = SOTOTPI(so);
329
330 mutex_enter(&so->so_lock);
331 vap->va_atime.tv_sec = sti->sti_atime;
332 vap->va_mtime.tv_sec = sti->sti_mtime;
333 vap->va_ctime.tv_sec = sti->sti_ctime;
334 mutex_exit(&so->so_lock);
335 } else {
336 vap->va_atime.tv_sec = 0;
337 vap->va_mtime.tv_sec = 0;
338 vap->va_ctime.tv_sec = 0;
339 }
340
341 vap->va_atime.tv_nsec = 0;
342 vap->va_mtime.tv_nsec = 0;
343 vap->va_ctime.tv_nsec = 0;
344 vap->va_seq = 0;
345
346 return (0);
347 }
348
349 /*
350 * Set attributes.
351 * Just like in BSD there is not effect on the underlying file system node
352 * bound to an AF_UNIX pathname.
353 *
354 * When sockmod has been popped this will act just like a stream. Since
355 * a socket is always a clone there is no need to modify the attributes
356 * of the "realvp".
357 */
358 /* ARGSUSED */
359 int
socket_vop_setattr(struct vnode * vp,struct vattr * vap,int flags,struct cred * cr,caller_context_t * ct)360 socket_vop_setattr(struct vnode *vp, struct vattr *vap, int flags,
361 struct cred *cr, caller_context_t *ct)
362 {
363 struct sonode *so = VTOSO(vp);
364
365 /*
366 * If times were changed, and we have a STREAMS socket, then update
367 * the sonode.
368 */
369 if (!SOCK_IS_NONSTR(so)) {
370 sotpi_info_t *sti = SOTOTPI(so);
371
372 mutex_enter(&so->so_lock);
373 if (vap->va_mask & AT_ATIME)
374 sti->sti_atime = vap->va_atime.tv_sec;
375 if (vap->va_mask & AT_MTIME) {
376 sti->sti_mtime = vap->va_mtime.tv_sec;
377 sti->sti_ctime = gethrestime_sec();
378 }
379 mutex_exit(&so->so_lock);
380 }
381
382 return (0);
383 }
384
385 /*
386 * Check if user is allowed to access vp. For non-STREAMS based sockets,
387 * there might not be a device attached to the file system. So for those
388 * types of sockets there are no permissions to check.
389 *
390 * XXX Should there be some other mechanism to check access rights?
391 */
392 /*ARGSUSED*/
393 int
socket_vop_access(struct vnode * vp,int mode,int flags,struct cred * cr,caller_context_t * ct)394 socket_vop_access(struct vnode *vp, int mode, int flags, struct cred *cr,
395 caller_context_t *ct)
396 {
397 struct sonode *so = VTOSO(vp);
398
399 if (!SOCK_IS_NONSTR(so)) {
400 ASSERT(so->so_sockparams->sp_sdev_info.sd_vnode != NULL);
401 return (VOP_ACCESS(so->so_sockparams->sp_sdev_info.sd_vnode,
402 mode, flags, cr, NULL));
403 }
404 return (0);
405 }
406
407 /*
408 * 4.3BSD and 4.4BSD fail a fsync on a socket with EINVAL.
409 * This code does the same to be compatible and also to not give an
410 * application the impression that the data has actually been "synced"
411 * to the other end of the connection.
412 */
413 /* ARGSUSED */
414 int
socket_vop_fsync(struct vnode * vp,int syncflag,struct cred * cr,caller_context_t * ct)415 socket_vop_fsync(struct vnode *vp, int syncflag, struct cred *cr,
416 caller_context_t *ct)
417 {
418 return (EINVAL);
419 }
420
421 /*ARGSUSED*/
422 static void
socket_vop_inactive(struct vnode * vp,struct cred * cr,caller_context_t * ct)423 socket_vop_inactive(struct vnode *vp, struct cred *cr, caller_context_t *ct)
424 {
425 struct sonode *so = VTOSO(vp);
426
427 ASSERT(vp->v_type == VSOCK);
428
429 mutex_enter(&vp->v_lock);
430 /*
431 * If no one has reclaimed the vnode, remove from the
432 * cache now.
433 */
434 if (vp->v_count < 1)
435 cmn_err(CE_PANIC, "socket_inactive: Bad v_count");
436
437 /*
438 * Drop the temporary hold by vn_rele now
439 */
440 if (--vp->v_count != 0) {
441 mutex_exit(&vp->v_lock);
442 return;
443 }
444 mutex_exit(&vp->v_lock);
445
446
447 ASSERT(!vn_has_cached_data(vp));
448
449 /* socket specfic clean-up */
450 socket_destroy_internal(so, cr);
451 }
452
453 /* ARGSUSED */
454 int
socket_vop_fid(struct vnode * vp,struct fid * fidp,caller_context_t * ct)455 socket_vop_fid(struct vnode *vp, struct fid *fidp, caller_context_t *ct)
456 {
457 return (EINVAL);
458 }
459
460 /*
461 * Sockets are not seekable.
462 * (and there is a bug to fix STREAMS to make them fail this as well).
463 */
464 /*ARGSUSED*/
465 int
socket_vop_seek(struct vnode * vp,offset_t ooff,offset_t * noffp,caller_context_t * ct)466 socket_vop_seek(struct vnode *vp, offset_t ooff, offset_t *noffp,
467 caller_context_t *ct)
468 {
469 return (ESPIPE);
470 }
471
472 /*ARGSUSED*/
473 static int
socket_vop_poll(struct vnode * vp,short events,int anyyet,short * reventsp,struct pollhead ** phpp,caller_context_t * ct)474 socket_vop_poll(struct vnode *vp, short events, int anyyet, short *reventsp,
475 struct pollhead **phpp, caller_context_t *ct)
476 {
477 struct sonode *so = VTOSO(vp);
478
479 ASSERT(vp->v_type == VSOCK);
480
481 return (socket_poll(so, events, anyyet, reventsp, phpp));
482 }
483