1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 /* 27 * Copyright (c) 2017 by Delphix. All rights reserved. 28 * Copyright 2021 Racktop Systems, Inc. 29 */ 30 31 #include <sys/types.h> 32 #include <sys/t_lock.h> 33 #include <sys/param.h> 34 #include <sys/systm.h> 35 #include <sys/bitmap.h> 36 #include <sys/debug.h> 37 #include <sys/errno.h> 38 #include <sys/strsubr.h> 39 #include <sys/cmn_err.h> 40 #include <sys/sysmacros.h> 41 #include <sys/filio.h> 42 #include <sys/flock.h> 43 #include <sys/stat.h> 44 #include <sys/share.h> 45 46 #include <sys/vfs.h> 47 #include <sys/vfs_opreg.h> 48 49 #include <sys/sockio.h> 50 #include <sys/socket.h> 51 #include <sys/socketvar.h> 52 #include <sys/strsun.h> 53 54 #include <fs/sockfs/sockcommon.h> 55 #include <fs/sockfs/socktpi.h> 56 57 /* 58 * Generic vnode ops 59 */ 60 static int socket_vop_open(struct vnode **, int, struct cred *, 61 caller_context_t *); 62 static int socket_vop_close(struct vnode *, int, int, offset_t, 63 struct cred *, caller_context_t *); 64 static int socket_vop_read(struct vnode *, struct uio *, int, 65 struct cred *, caller_context_t *); 66 static int socket_vop_write(struct vnode *, struct uio *, int, 67 struct cred *, caller_context_t *); 68 static int socket_vop_ioctl(struct vnode *, int, intptr_t, int, 69 struct cred *, int32_t *, caller_context_t *); 70 static int socket_vop_setfl(struct vnode *, int, int, cred_t *, 71 caller_context_t *); 72 static int socket_vop_getattr(struct vnode *, struct vattr *, int, 73 struct cred *, caller_context_t *); 74 static int socket_vop_setattr(struct vnode *, struct vattr *, int, 75 struct cred *, caller_context_t *); 76 static int socket_vop_access(struct vnode *, int, int, struct cred *, 77 caller_context_t *); 78 static int socket_vop_fsync(struct vnode *, int, struct cred *, 79 caller_context_t *); 80 static void socket_vop_inactive(struct vnode *, struct cred *, 81 caller_context_t *); 82 static int socket_vop_fid(struct vnode *, struct fid *, 83 caller_context_t *); 84 static int socket_vop_seek(struct vnode *, offset_t, offset_t *, 85 caller_context_t *); 86 static int socket_vop_poll(struct vnode *, short, int, short *, 87 struct pollhead **, caller_context_t *); 88 89 extern int socket_close_internal(struct sonode *, int, cred_t *); 90 extern void socket_destroy_internal(struct sonode *, cred_t *); 91 92 struct vnodeops *socket_vnodeops; 93 const fs_operation_def_t socket_vnodeops_template[] = { 94 VOPNAME_OPEN, { .vop_open = socket_vop_open }, 95 VOPNAME_CLOSE, { .vop_close = socket_vop_close }, 96 VOPNAME_READ, { .vop_read = socket_vop_read }, 97 VOPNAME_WRITE, { .vop_write = socket_vop_write }, 98 VOPNAME_IOCTL, { .vop_ioctl = socket_vop_ioctl }, 99 VOPNAME_SETFL, { .vop_setfl = socket_vop_setfl }, 100 VOPNAME_GETATTR, { .vop_getattr = socket_vop_getattr }, 101 VOPNAME_SETATTR, { .vop_setattr = socket_vop_setattr }, 102 VOPNAME_ACCESS, { .vop_access = socket_vop_access }, 103 VOPNAME_FSYNC, { .vop_fsync = socket_vop_fsync }, 104 VOPNAME_INACTIVE, { .vop_inactive = socket_vop_inactive }, 105 VOPNAME_FID, { .vop_fid = socket_vop_fid }, 106 VOPNAME_SEEK, { .vop_seek = socket_vop_seek }, 107 VOPNAME_POLL, { .vop_poll = socket_vop_poll }, 108 VOPNAME_DISPOSE, { .error = fs_error }, 109 NULL, NULL 110 }; 111 112 113 /* 114 * generic vnode ops 115 */ 116 117 /*ARGSUSED*/ 118 static int 119 socket_vop_open(struct vnode **vpp, int flag, struct cred *cr, 120 caller_context_t *ct) 121 { 122 struct vnode *vp = *vpp; 123 struct sonode *so = VTOSO(vp); 124 125 flag &= ~FCREAT; /* paranoia */ 126 mutex_enter(&so->so_lock); 127 so->so_count++; 128 mutex_exit(&so->so_lock); 129 130 ASSERT(so->so_count != 0); /* wraparound */ 131 ASSERT(vp->v_type == VSOCK); 132 133 return (0); 134 } 135 136 /*ARGSUSED*/ 137 static int 138 socket_vop_close(struct vnode *vp, int flag, int count, offset_t offset, 139 struct cred *cr, caller_context_t *ct) 140 { 141 struct sonode *so; 142 int error = 0; 143 144 so = VTOSO(vp); 145 ASSERT(vp->v_type == VSOCK); 146 147 cleanlocks(vp, ttoproc(curthread)->p_pid, 0); 148 cleanshares(vp, ttoproc(curthread)->p_pid); 149 150 if (vp->v_stream) 151 strclean(vp); 152 153 if (count > 1) { 154 dprint(2, ("socket_vop_close: count %d\n", count)); 155 return (0); 156 } 157 158 mutex_enter(&so->so_lock); 159 if (--so->so_count == 0) { 160 /* 161 * Initiate connection shutdown. 162 */ 163 mutex_exit(&so->so_lock); 164 error = socket_close_internal(so, flag, cr); 165 } else { 166 mutex_exit(&so->so_lock); 167 } 168 169 return (error); 170 } 171 172 /*ARGSUSED2*/ 173 static int 174 socket_vop_read(struct vnode *vp, struct uio *uiop, int ioflag, struct cred *cr, 175 caller_context_t *ct) 176 { 177 struct sonode *so = VTOSO(vp); 178 struct nmsghdr lmsg; 179 180 ASSERT(vp->v_type == VSOCK); 181 bzero((void *)&lmsg, sizeof (lmsg)); 182 183 return (socket_recvmsg(so, &lmsg, uiop, cr)); 184 } 185 186 /*ARGSUSED2*/ 187 static int 188 socket_vop_write(struct vnode *vp, struct uio *uiop, int ioflag, 189 struct cred *cr, caller_context_t *ct) 190 { 191 struct sonode *so = VTOSO(vp); 192 struct nmsghdr lmsg; 193 194 ASSERT(vp->v_type == VSOCK); 195 bzero((void *)&lmsg, sizeof (lmsg)); 196 197 if (!(so->so_mode & SM_BYTESTREAM)) { 198 /* 199 * If the socket is not byte stream set MSG_EOR 200 */ 201 lmsg.msg_flags = MSG_EOR; 202 } 203 204 return (socket_sendmsg(so, &lmsg, uiop, cr)); 205 } 206 207 /*ARGSUSED4*/ 208 static int 209 socket_vop_ioctl(struct vnode *vp, int cmd, intptr_t arg, int mode, 210 struct cred *cr, int32_t *rvalp, caller_context_t *ct) 211 { 212 struct sonode *so = VTOSO(vp); 213 214 ASSERT(vp->v_type == VSOCK); 215 216 return (socket_ioctl(so, cmd, arg, mode, cr, rvalp)); 217 } 218 219 /* 220 * Allow any flags. Record FNDELAY and FNONBLOCK so that they can be inherited 221 * from listener to acceptor. 222 */ 223 /* ARGSUSED */ 224 static int 225 socket_vop_setfl(vnode_t *vp, int oflags, int nflags, cred_t *cr, 226 caller_context_t *ct) 227 { 228 struct sonode *so = VTOSO(vp); 229 int error = 0; 230 231 ASSERT(vp->v_type == VSOCK); 232 233 mutex_enter(&so->so_lock); 234 if (nflags & FNDELAY) 235 so->so_state |= SS_NDELAY; 236 else 237 so->so_state &= ~SS_NDELAY; 238 if (nflags & FNONBLOCK) 239 so->so_state |= SS_NONBLOCK; 240 else 241 so->so_state &= ~SS_NONBLOCK; 242 mutex_exit(&so->so_lock); 243 244 if (so->so_state & SS_ASYNC) 245 oflags |= FASYNC; 246 /* 247 * Sets/clears the SS_ASYNC flag based on the presence/absence 248 * of the FASYNC flag passed to fcntl(F_SETFL). 249 * This exists solely for BSD fcntl() FASYNC compatibility. 250 */ 251 if ((oflags ^ nflags) & FASYNC && so->so_version != SOV_STREAM) { 252 int async = nflags & FASYNC; 253 int32_t rv; 254 255 /* 256 * For non-TPI sockets all we have to do is set/remove the 257 * SS_ASYNC bit, but for TPI it is more involved. For that 258 * reason we delegate the job to the protocol's ioctl handler. 259 */ 260 error = socket_ioctl(so, FIOASYNC, (intptr_t)&async, FKIOCTL, 261 cr, &rv); 262 } 263 return (error); 264 } 265 266 267 /* 268 * Get the made up attributes for the vnode. 269 * 4.3BSD returns the current time for all the timestamps. 270 * 4.4BSD returns 0 for all the timestamps. 271 * Here we use the access and modified times recorded in the sonode. 272 * 273 * Just like in BSD there is not effect on the underlying file system node 274 * bound to an AF_UNIX pathname. 275 * 276 * When sockmod has been popped this will act just like a stream. Since 277 * a socket is always a clone there is no need to inspect the attributes 278 * of the "realvp". 279 */ 280 /* ARGSUSED */ 281 int 282 socket_vop_getattr(struct vnode *vp, struct vattr *vap, int flags, 283 struct cred *cr, caller_context_t *ct) 284 { 285 dev_t fsid; 286 struct sonode *so; 287 static int sonode_shift = 0; 288 289 /* 290 * Calculate the amount of bitshift to a sonode pointer which will 291 * still keep it unique. See below. Note that highbit() uses 292 * 1-based indexing for the highest bit set (and 0 for 'no bits set'). 293 * To use the result of highbit() as a shift value, we must subtract 1 294 * from the result. 295 */ 296 if (sonode_shift == 0) { 297 int bit = highbit(sizeof (struct sonode)); 298 299 /* Sanity check */ 300 VERIFY3S(bit, >, 0); 301 sonode_shift = bit - 1; 302 } 303 304 so = VTOSO(vp); 305 fsid = sockdev; 306 307 if (so->so_version == SOV_STREAM) { 308 /* 309 * The imaginary "sockmod" has been popped - act 310 * as a stream 311 */ 312 vap->va_type = VCHR; 313 vap->va_mode = 0; 314 } else { 315 vap->va_type = vp->v_type; 316 vap->va_mode = S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP| 317 S_IROTH|S_IWOTH; 318 } 319 vap->va_uid = vap->va_gid = 0; 320 vap->va_fsid = fsid; 321 /* 322 * If the va_nodeid is > UINT32_MAX, then stat(2) might fail in 323 * unexpected ways inside non-largefile aware 32-bit processes -- 324 * historically, socket inode values (va_nodeid values) were capped at 325 * UINT16_MAX (for even more ancient reasons long since unnecessary). 326 * To avoid the potential of surprise failures, we shift down 327 * the sonode pointer address to try and get the most 328 * uniqueness into 32-bits. In practice, this represents the unique 329 * portion of the kernel address space, so the chance of duplicate 330 * socket inode values is minimized. 331 */ 332 vap->va_nodeid = ((ino_t)so >> sonode_shift) & 0xFFFFFFFF; 333 vap->va_nlink = 0; 334 vap->va_size = 0; 335 336 /* 337 * We need to zero out the va_rdev to avoid some fstats getting 338 * EOVERFLOW. This also mimics SunOS 4.x and BSD behavior. 339 */ 340 vap->va_rdev = (dev_t)0; 341 vap->va_blksize = MAXBSIZE; 342 vap->va_nblocks = btod(vap->va_size); 343 344 if (!SOCK_IS_NONSTR(so)) { 345 sotpi_info_t *sti = SOTOTPI(so); 346 347 mutex_enter(&so->so_lock); 348 vap->va_atime.tv_sec = sti->sti_atime; 349 vap->va_mtime.tv_sec = sti->sti_mtime; 350 vap->va_ctime.tv_sec = sti->sti_ctime; 351 mutex_exit(&so->so_lock); 352 } else { 353 vap->va_atime.tv_sec = 0; 354 vap->va_mtime.tv_sec = 0; 355 vap->va_ctime.tv_sec = 0; 356 } 357 358 vap->va_atime.tv_nsec = 0; 359 vap->va_mtime.tv_nsec = 0; 360 vap->va_ctime.tv_nsec = 0; 361 vap->va_seq = 0; 362 363 return (0); 364 } 365 366 /* 367 * Set attributes. 368 * Just like in BSD there is not effect on the underlying file system node 369 * bound to an AF_UNIX pathname. 370 * 371 * When sockmod has been popped this will act just like a stream. Since 372 * a socket is always a clone there is no need to modify the attributes 373 * of the "realvp". 374 */ 375 /* ARGSUSED */ 376 int 377 socket_vop_setattr(struct vnode *vp, struct vattr *vap, int flags, 378 struct cred *cr, caller_context_t *ct) 379 { 380 struct sonode *so = VTOSO(vp); 381 382 /* 383 * If times were changed, and we have a STREAMS socket, then update 384 * the sonode. 385 */ 386 if (!SOCK_IS_NONSTR(so)) { 387 sotpi_info_t *sti = SOTOTPI(so); 388 389 mutex_enter(&so->so_lock); 390 if (vap->va_mask & AT_ATIME) 391 sti->sti_atime = vap->va_atime.tv_sec; 392 if (vap->va_mask & AT_MTIME) { 393 sti->sti_mtime = vap->va_mtime.tv_sec; 394 sti->sti_ctime = gethrestime_sec(); 395 } 396 mutex_exit(&so->so_lock); 397 } 398 399 return (0); 400 } 401 402 /* 403 * Check if user is allowed to access vp. For non-STREAMS based sockets, 404 * there might not be a device attached to the file system. So for those 405 * types of sockets there are no permissions to check. 406 * 407 * XXX Should there be some other mechanism to check access rights? 408 */ 409 /*ARGSUSED*/ 410 int 411 socket_vop_access(struct vnode *vp, int mode, int flags, struct cred *cr, 412 caller_context_t *ct) 413 { 414 struct sonode *so = VTOSO(vp); 415 416 if (!SOCK_IS_NONSTR(so)) { 417 ASSERT(so->so_sockparams->sp_sdev_info.sd_vnode != NULL); 418 return (VOP_ACCESS(so->so_sockparams->sp_sdev_info.sd_vnode, 419 mode, flags, cr, NULL)); 420 } 421 return (0); 422 } 423 424 /* 425 * 4.3BSD and 4.4BSD fail a fsync on a socket with EINVAL. 426 * This code does the same to be compatible and also to not give an 427 * application the impression that the data has actually been "synced" 428 * to the other end of the connection. 429 */ 430 /* ARGSUSED */ 431 int 432 socket_vop_fsync(struct vnode *vp, int syncflag, struct cred *cr, 433 caller_context_t *ct) 434 { 435 return (EINVAL); 436 } 437 438 /*ARGSUSED*/ 439 static void 440 socket_vop_inactive(struct vnode *vp, struct cred *cr, caller_context_t *ct) 441 { 442 struct sonode *so = VTOSO(vp); 443 444 ASSERT(vp->v_type == VSOCK); 445 446 mutex_enter(&vp->v_lock); 447 /* 448 * If no one has reclaimed the vnode, remove from the 449 * cache now. 450 */ 451 if (vp->v_count < 1) 452 cmn_err(CE_PANIC, "socket_inactive: Bad v_count"); 453 454 VN_RELE_LOCKED(vp); 455 if (vp->v_count != 0) { 456 mutex_exit(&vp->v_lock); 457 return; 458 } 459 mutex_exit(&vp->v_lock); 460 461 462 ASSERT(!vn_has_cached_data(vp)); 463 464 /* socket specfic clean-up */ 465 socket_destroy_internal(so, cr); 466 } 467 468 /* ARGSUSED */ 469 int 470 socket_vop_fid(struct vnode *vp, struct fid *fidp, caller_context_t *ct) 471 { 472 return (EINVAL); 473 } 474 475 /* 476 * Sockets are not seekable. 477 * (and there is a bug to fix STREAMS to make them fail this as well). 478 */ 479 /*ARGSUSED*/ 480 int 481 socket_vop_seek(struct vnode *vp, offset_t ooff, offset_t *noffp, 482 caller_context_t *ct) 483 { 484 return (ESPIPE); 485 } 486 487 /*ARGSUSED*/ 488 static int 489 socket_vop_poll(struct vnode *vp, short events, int anyyet, short *reventsp, 490 struct pollhead **phpp, caller_context_t *ct) 491 { 492 struct sonode *so = VTOSO(vp); 493 494 ASSERT(vp->v_type == VSOCK); 495 496 return (socket_poll(so, events, anyyet, reventsp, phpp)); 497 } 498