1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 1995, 2010, Oracle and/or its affiliates. All rights reserved. 24 * Copyright 2016 Nexenta Systems, Inc. All rights reserved. 25 * Copyright 2015, Joyent, Inc. All rights reserved. 26 * Copyright 2020 OmniOS Community Edition (OmniOSce) Association. 27 * Copyright 2022 Garrett D'Amore 28 */ 29 30 #include <sys/types.h> 31 #include <sys/t_lock.h> 32 #include <sys/param.h> 33 #include <sys/systm.h> 34 #include <sys/buf.h> 35 #include <sys/conf.h> 36 #include <sys/cred.h> 37 #include <sys/kmem.h> 38 #include <sys/sysmacros.h> 39 #include <sys/vfs.h> 40 #include <sys/vfs_opreg.h> 41 #include <sys/vnode.h> 42 #include <sys/debug.h> 43 #include <sys/errno.h> 44 #include <sys/time.h> 45 #include <sys/file.h> 46 #include <sys/open.h> 47 #include <sys/user.h> 48 #include <sys/termios.h> 49 #include <sys/stream.h> 50 #include <sys/strsubr.h> 51 #include <sys/strsun.h> 52 #include <sys/esunddi.h> 53 #include <sys/flock.h> 54 #include <sys/modctl.h> 55 #include <sys/cmn_err.h> 56 #include <sys/mkdev.h> 57 #include <sys/pathname.h> 58 #include <sys/ddi.h> 59 #include <sys/stat.h> 60 #include <sys/fs/snode.h> 61 #include <sys/fs/dv_node.h> 62 #include <sys/zone.h> 63 64 #include <sys/socket.h> 65 #include <sys/socketvar.h> 66 #include <netinet/in.h> 67 #include <sys/un.h> 68 #include <sys/ucred.h> 69 70 #include <sys/tiuser.h> 71 #define _SUN_TPI_VERSION 2 72 #include <sys/tihdr.h> 73 74 #include <c2/audit.h> 75 76 #include <fs/sockfs/sockcommon.h> 77 #include <fs/sockfs/sockfilter_impl.h> 78 #include <fs/sockfs/socktpi.h> 79 #include <fs/sockfs/socktpi_impl.h> 80 #include <fs/sockfs/sodirect.h> 81 82 /* 83 * Macros that operate on struct cmsghdr. 84 * The CMSG_VALID macro does not assume that the last option buffer is padded. 85 */ 86 #define CMSG_CONTENT(cmsg) (&((cmsg)[1])) 87 #define CMSG_CONTENTLEN(cmsg) ((cmsg)->cmsg_len - sizeof (struct cmsghdr)) 88 #define CMSG_VALID(cmsg, start, end) \ 89 (ISALIGNED_cmsghdr(cmsg) && \ 90 ((uintptr_t)(cmsg) >= (uintptr_t)(start)) && \ 91 ((uintptr_t)(cmsg) < (uintptr_t)(end)) && \ 92 ((ssize_t)(cmsg)->cmsg_len >= sizeof (struct cmsghdr)) && \ 93 ((uintptr_t)(cmsg) + (cmsg)->cmsg_len <= (uintptr_t)(end))) 94 #define SO_LOCK_WAKEUP_TIME 3000 /* Wakeup time in milliseconds */ 95 96 dev_t sockdev; /* For fsid in getattr */ 97 98 struct socklist socklist; 99 100 struct kmem_cache *socket_cache; 101 102 /* 103 * sockconf_lock protects the socket configuration (socket types and 104 * socket filters) which is changed via the sockconfig system call. 105 */ 106 krwlock_t sockconf_lock; 107 108 static int sockfs_update(kstat_t *, int); 109 static int sockfs_snapshot(kstat_t *, void *, int); 110 extern smod_info_t *sotpi_smod_create(void); 111 112 extern void sendfile_init(); 113 114 extern int modrootloaded; 115 116 /* 117 * Translate from a device pathname (e.g. "/dev/tcp") to a vnode. 118 * Returns with the vnode held. 119 */ 120 int 121 sogetvp(char *devpath, vnode_t **vpp, int uioflag) 122 { 123 struct snode *csp; 124 vnode_t *vp, *dvp; 125 major_t maj; 126 int error; 127 128 ASSERT(uioflag == UIO_SYSSPACE || uioflag == UIO_USERSPACE); 129 130 /* 131 * Lookup the underlying filesystem vnode. 132 */ 133 error = lookupname(devpath, uioflag, FOLLOW, NULLVPP, &vp); 134 if (error) 135 return (error); 136 137 /* Check that it is the correct vnode */ 138 if (vp->v_type != VCHR) { 139 VN_RELE(vp); 140 return (ENOTSOCK); 141 } 142 143 /* 144 * If devpath went through devfs, the device should already 145 * be configured. If devpath is a mknod file, however, we 146 * need to make sure the device is properly configured. 147 * To do this, we do something similar to spec_open() 148 * except that we resolve to the minor/leaf level since 149 * we need to return a vnode. 150 */ 151 csp = VTOS(VTOS(vp)->s_commonvp); 152 if (!(csp->s_flag & SDIPSET)) { 153 char *pathname = kmem_alloc(MAXPATHLEN, KM_SLEEP); 154 error = ddi_dev_pathname(vp->v_rdev, S_IFCHR, pathname); 155 if (error == 0) 156 error = devfs_lookupname(pathname, NULLVPP, &dvp); 157 VN_RELE(vp); 158 kmem_free(pathname, MAXPATHLEN); 159 if (error != 0) 160 return (ENXIO); 161 vp = dvp; /* use the devfs vp */ 162 } 163 164 /* device is configured at this point */ 165 maj = getmajor(vp->v_rdev); 166 if (!STREAMSTAB(maj)) { 167 VN_RELE(vp); 168 return (ENOSTR); 169 } 170 171 *vpp = vp; 172 return (0); 173 } 174 175 /* 176 * Update the accessed, updated, or changed times in an sonode 177 * with the current time. 178 * 179 * Note that both SunOS 4.X and 4.4BSD sockets do not present reasonable 180 * attributes in a fstat call. (They return the current time and 0 for 181 * all timestamps, respectively.) We maintain the current timestamps 182 * here primarily so that should sockmod be popped the resulting 183 * file descriptor will behave like a stream w.r.t. the timestamps. 184 */ 185 void 186 so_update_attrs(struct sonode *so, int flag) 187 { 188 time_t now = gethrestime_sec(); 189 190 if (SOCK_IS_NONSTR(so)) 191 return; 192 193 mutex_enter(&so->so_lock); 194 so->so_flag |= flag; 195 if (flag & SOACC) 196 SOTOTPI(so)->sti_atime = now; 197 if (flag & SOMOD) 198 SOTOTPI(so)->sti_mtime = now; 199 mutex_exit(&so->so_lock); 200 } 201 202 extern so_create_func_t sock_comm_create_function; 203 extern so_destroy_func_t sock_comm_destroy_function; 204 /* 205 * Init function called when sockfs is loaded. 206 */ 207 int 208 sockinit(int fstype, char *name) 209 { 210 static const fs_operation_def_t sock_vfsops_template[] = { 211 NULL, NULL 212 }; 213 int error; 214 major_t dev; 215 char *err_str; 216 217 error = vfs_setfsops(fstype, sock_vfsops_template, NULL); 218 if (error != 0) { 219 zcmn_err(GLOBAL_ZONEID, CE_WARN, 220 "sockinit: bad vfs ops template"); 221 return (error); 222 } 223 224 error = vn_make_ops(name, socket_vnodeops_template, 225 &socket_vnodeops); 226 if (error != 0) { 227 err_str = "sockinit: bad socket vnode ops template"; 228 /* vn_make_ops() does not reset socktpi_vnodeops on failure. */ 229 socket_vnodeops = NULL; 230 goto failure; 231 } 232 233 socket_cache = kmem_cache_create("socket_cache", 234 sizeof (struct sonode), 0, sonode_constructor, 235 sonode_destructor, NULL, NULL, NULL, 0); 236 237 rw_init(&sockconf_lock, NULL, RW_DEFAULT, NULL); 238 239 error = socktpi_init(); 240 if (error != 0) { 241 err_str = NULL; 242 goto failure; 243 } 244 245 error = sod_init(); 246 if (error != 0) { 247 err_str = NULL; 248 goto failure; 249 } 250 251 /* 252 * Set up the default create and destroy functions 253 */ 254 sock_comm_create_function = socket_sonode_create; 255 sock_comm_destroy_function = socket_sonode_destroy; 256 257 /* 258 * Build initial list mapping socket parameters to vnode. 259 */ 260 smod_init(); 261 smod_add(sotpi_smod_create()); 262 263 sockparams_init(); 264 265 /* 266 * If sockets are needed before init runs /sbin/soconfig 267 * it is possible to preload the sockparams list here using 268 * calls like: 269 * sockconfig(1,2,3, "/dev/tcp", 0); 270 */ 271 272 /* 273 * Create a unique dev_t for use in so_fsid. 274 */ 275 276 if ((dev = getudev()) == (major_t)-1) 277 dev = 0; 278 sockdev = makedevice(dev, 0); 279 280 mutex_init(&socklist.sl_lock, NULL, MUTEX_DEFAULT, NULL); 281 sendfile_init(); 282 283 /* Initialize socket filters */ 284 sof_init(); 285 286 return (0); 287 288 failure: 289 (void) vfs_freevfsops_by_type(fstype); 290 if (socket_vnodeops != NULL) 291 vn_freevnodeops(socket_vnodeops); 292 if (err_str != NULL) 293 zcmn_err(GLOBAL_ZONEID, CE_WARN, err_str); 294 return (error); 295 } 296 297 /* 298 * Caller must hold the mutex. Used to set SOLOCKED. 299 */ 300 void 301 so_lock_single(struct sonode *so) 302 { 303 ASSERT(MUTEX_HELD(&so->so_lock)); 304 305 while (so->so_flag & (SOLOCKED | SOASYNC_UNBIND)) { 306 cv_wait_stop(&so->so_single_cv, &so->so_lock, 307 SO_LOCK_WAKEUP_TIME); 308 } 309 so->so_flag |= SOLOCKED; 310 } 311 312 /* 313 * Caller must hold the mutex and pass in SOLOCKED or SOASYNC_UNBIND. 314 * Used to clear SOLOCKED or SOASYNC_UNBIND. 315 */ 316 void 317 so_unlock_single(struct sonode *so, int flag) 318 { 319 ASSERT(MUTEX_HELD(&so->so_lock)); 320 ASSERT(flag & (SOLOCKED|SOASYNC_UNBIND)); 321 ASSERT((flag & ~(SOLOCKED|SOASYNC_UNBIND)) == 0); 322 ASSERT(so->so_flag & flag); 323 /* 324 * Process the T_DISCON_IND on sti_discon_ind_mp. 325 * 326 * Call to so_drain_discon_ind will result in so_lock 327 * being dropped and re-acquired later. 328 */ 329 if (!SOCK_IS_NONSTR(so)) { 330 sotpi_info_t *sti = SOTOTPI(so); 331 332 if (sti->sti_discon_ind_mp != NULL) 333 so_drain_discon_ind(so); 334 } 335 336 cv_signal(&so->so_single_cv); 337 so->so_flag &= ~flag; 338 } 339 340 /* 341 * Caller must hold the mutex. Used to set SOREADLOCKED. 342 * If the caller wants nonblocking behavior it should set fmode. 343 */ 344 int 345 so_lock_read(struct sonode *so, int fmode) 346 { 347 ASSERT(MUTEX_HELD(&so->so_lock)); 348 349 while (so->so_flag & SOREADLOCKED) { 350 if (fmode & (FNDELAY|FNONBLOCK)) 351 return (EWOULDBLOCK); 352 cv_wait_stop(&so->so_read_cv, &so->so_lock, 353 SO_LOCK_WAKEUP_TIME); 354 } 355 so->so_flag |= SOREADLOCKED; 356 return (0); 357 } 358 359 /* 360 * Like so_lock_read above but allows signals. 361 */ 362 int 363 so_lock_read_intr(struct sonode *so, int fmode) 364 { 365 ASSERT(MUTEX_HELD(&so->so_lock)); 366 367 while (so->so_flag & SOREADLOCKED) { 368 if (fmode & (FNDELAY|FNONBLOCK)) 369 return (EWOULDBLOCK); 370 if (!cv_wait_sig(&so->so_read_cv, &so->so_lock)) 371 return (EINTR); 372 } 373 so->so_flag |= SOREADLOCKED; 374 return (0); 375 } 376 377 /* 378 * Caller must hold the mutex. Used to clear SOREADLOCKED, 379 * set in so_lock_read() or so_lock_read_intr(). 380 */ 381 void 382 so_unlock_read(struct sonode *so) 383 { 384 ASSERT(MUTEX_HELD(&so->so_lock)); 385 ASSERT(so->so_flag & SOREADLOCKED); 386 387 cv_signal(&so->so_read_cv); 388 so->so_flag &= ~SOREADLOCKED; 389 } 390 391 /* 392 * Verify that the specified offset falls within the mblk and 393 * that the resulting pointer is aligned. 394 * Returns NULL if not. 395 */ 396 void * 397 sogetoff(mblk_t *mp, t_uscalar_t offset, 398 t_uscalar_t length, uint_t align_size) 399 { 400 uintptr_t ptr1, ptr2; 401 402 ASSERT(mp && mp->b_wptr >= mp->b_rptr); 403 ptr1 = (uintptr_t)mp->b_rptr + offset; 404 ptr2 = (uintptr_t)ptr1 + length; 405 if (ptr1 < (uintptr_t)mp->b_rptr || ptr2 > (uintptr_t)mp->b_wptr) { 406 eprintline(0); 407 return (NULL); 408 } 409 if ((ptr1 & (align_size - 1)) != 0) { 410 eprintline(0); 411 return (NULL); 412 } 413 return ((void *)ptr1); 414 } 415 416 /* 417 * Return the AF_UNIX underlying filesystem vnode matching a given name. 418 * Makes sure the sending and the destination sonodes are compatible. 419 * The vnode is returned held. 420 * 421 * The underlying filesystem VSOCK vnode has a v_stream pointer that 422 * references the actual stream head (hence indirectly the actual sonode). 423 */ 424 static int 425 so_ux_lookup(struct sonode *so, struct sockaddr_un *soun, int checkaccess, 426 vnode_t **vpp) 427 { 428 vnode_t *vp; /* Underlying filesystem vnode */ 429 vnode_t *rvp; /* real vnode */ 430 vnode_t *svp; /* sockfs vnode */ 431 struct sonode *so2; 432 int error; 433 434 dprintso(so, 1, ("so_ux_lookup(%p) name <%s>\n", (void *)so, 435 soun->sun_path)); 436 437 error = lookupname(soun->sun_path, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp); 438 if (error) { 439 eprintsoline(so, error); 440 return (error); 441 } 442 443 /* 444 * Traverse lofs mounts get the real vnode 445 */ 446 if (VOP_REALVP(vp, &rvp, NULL) == 0) { 447 VN_HOLD(rvp); /* hold the real vnode */ 448 VN_RELE(vp); /* release hold from lookup */ 449 vp = rvp; 450 } 451 452 if (vp->v_type != VSOCK) { 453 error = ENOTSOCK; 454 eprintsoline(so, error); 455 goto done2; 456 } 457 458 if (checkaccess) { 459 /* 460 * Check that we have permissions to access the destination 461 * vnode. This check is not done in BSD but it is required 462 * by X/Open. 463 */ 464 error = VOP_ACCESS(vp, VREAD|VWRITE, 0, CRED(), NULL); 465 if (error != 0) { 466 eprintsoline(so, error); 467 goto done2; 468 } 469 } 470 471 /* 472 * Check if the remote socket has been closed. 473 * 474 * Synchronize with vn_rele_stream by holding v_lock while traversing 475 * v_stream->sd_vnode. 476 */ 477 mutex_enter(&vp->v_lock); 478 if (vp->v_stream == NULL) { 479 mutex_exit(&vp->v_lock); 480 if (so->so_type == SOCK_DGRAM) 481 error = EDESTADDRREQ; 482 else 483 error = ECONNREFUSED; 484 485 eprintsoline(so, error); 486 goto done2; 487 } 488 ASSERT(vp->v_stream->sd_vnode); 489 svp = vp->v_stream->sd_vnode; 490 /* 491 * holding v_lock on underlying filesystem vnode and acquiring 492 * it on sockfs vnode. Assumes that no code ever attempts to 493 * acquire these locks in the reverse order. 494 */ 495 VN_HOLD(svp); 496 mutex_exit(&vp->v_lock); 497 498 if (svp->v_type != VSOCK) { 499 error = ENOTSOCK; 500 eprintsoline(so, error); 501 goto done; 502 } 503 504 so2 = VTOSO(svp); 505 506 if (so->so_type != so2->so_type) { 507 error = EPROTOTYPE; 508 eprintsoline(so, error); 509 goto done; 510 } 511 512 VN_RELE(svp); 513 *vpp = vp; 514 return (0); 515 516 done: 517 VN_RELE(svp); 518 done2: 519 VN_RELE(vp); 520 return (error); 521 } 522 523 /* 524 * Verify peer address for connect and sendto/sendmsg. 525 * Since sendto/sendmsg would not get synchronous errors from the transport 526 * provider we have to do these ugly checks in the socket layer to 527 * preserve compatibility with SunOS 4.X. 528 */ 529 int 530 so_addr_verify(struct sonode *so, const struct sockaddr *name, 531 socklen_t namelen) 532 { 533 int family; 534 535 dprintso(so, 1, ("so_addr_verify(%p, %p, %d)\n", 536 (void *)so, (void *)name, namelen)); 537 538 ASSERT(name != NULL); 539 540 family = so->so_family; 541 switch (family) { 542 case AF_INET: 543 if (name->sa_family != family) { 544 eprintsoline(so, EAFNOSUPPORT); 545 return (EAFNOSUPPORT); 546 } 547 if (namelen != (socklen_t)sizeof (struct sockaddr_in)) { 548 eprintsoline(so, EINVAL); 549 return (EINVAL); 550 } 551 break; 552 case AF_INET6: { 553 #ifdef DEBUG 554 struct sockaddr_in6 *sin6; 555 #endif /* DEBUG */ 556 557 if (name->sa_family != family) { 558 eprintsoline(so, EAFNOSUPPORT); 559 return (EAFNOSUPPORT); 560 } 561 if (namelen != (socklen_t)sizeof (struct sockaddr_in6)) { 562 eprintsoline(so, EINVAL); 563 return (EINVAL); 564 } 565 #ifdef DEBUG 566 /* Verify that apps don't forget to clear sin6_scope_id etc */ 567 sin6 = (struct sockaddr_in6 *)name; 568 if (sin6->sin6_scope_id != 0 && 569 !IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr)) { 570 zcmn_err(getzoneid(), CE_WARN, 571 "connect/send* with uninitialized sin6_scope_id " 572 "(%d) on socket. Pid = %d\n", 573 (int)sin6->sin6_scope_id, (int)curproc->p_pid); 574 } 575 #endif /* DEBUG */ 576 break; 577 } 578 case AF_UNIX: 579 if (SOTOTPI(so)->sti_faddr_noxlate) { 580 return (0); 581 } 582 if (namelen < (socklen_t)sizeof (short)) { 583 eprintsoline(so, ENOENT); 584 return (ENOENT); 585 } 586 if (name->sa_family != family) { 587 eprintsoline(so, EAFNOSUPPORT); 588 return (EAFNOSUPPORT); 589 } 590 /* MAXPATHLEN + soun_family + nul termination */ 591 if (namelen > (socklen_t)(MAXPATHLEN + sizeof (short) + 1)) { 592 eprintsoline(so, ENAMETOOLONG); 593 return (ENAMETOOLONG); 594 } 595 596 break; 597 598 default: 599 /* 600 * Default is don't do any length or sa_family check 601 * to allow non-sockaddr style addresses. 602 */ 603 break; 604 } 605 606 return (0); 607 } 608 609 610 /* 611 * Translate an AF_UNIX sockaddr_un to the transport internal name. 612 * Assumes caller has called so_addr_verify first. The translated 613 * (internal form) address is stored in sti->sti_ux_taddr. 614 */ 615 /*ARGSUSED*/ 616 int 617 so_ux_addr_xlate(struct sonode *so, struct sockaddr *name, 618 socklen_t namelen, int checkaccess, 619 void **addrp, socklen_t *addrlenp) 620 { 621 int error; 622 struct sockaddr_un *soun; 623 vnode_t *vp; 624 void *addr; 625 socklen_t addrlen; 626 sotpi_info_t *sti = SOTOTPI(so); 627 628 dprintso(so, 1, ("so_ux_addr_xlate(%p, %p, %d, %d)\n", 629 (void *)so, (void *)name, namelen, checkaccess)); 630 631 ASSERT(name != NULL); 632 ASSERT(so->so_family == AF_UNIX); 633 ASSERT(!sti->sti_faddr_noxlate); 634 ASSERT(namelen >= (socklen_t)sizeof (short)); 635 ASSERT(name->sa_family == AF_UNIX); 636 soun = (struct sockaddr_un *)name; 637 /* 638 * Lookup vnode for the specified path name and verify that 639 * it is a socket. 640 */ 641 error = so_ux_lookup(so, soun, checkaccess, &vp); 642 if (error) { 643 eprintsoline(so, error); 644 return (error); 645 } 646 /* 647 * Use the address of the peer vnode as the address to send 648 * to. We release the peer vnode here. In case it has been 649 * closed by the time the T_CONN_REQ or T_UNITDATA_REQ reaches the 650 * transport the message will get an error or be dropped. 651 * Note that that soua_vp is never dereferenced; it's just a 652 * convenient value by which we can identify the peer. 653 */ 654 sti->sti_ux_taddr.soua_vp = vp; 655 sti->sti_ux_taddr.soua_magic = SOU_MAGIC_EXPLICIT; 656 addr = &sti->sti_ux_taddr; 657 addrlen = (socklen_t)sizeof (sti->sti_ux_taddr); 658 dprintso(so, 1, ("ux_xlate UNIX: addrlen %d, vp %p\n", 659 addrlen, (void *)vp)); 660 VN_RELE(vp); 661 *addrp = addr; 662 *addrlenp = (socklen_t)addrlen; 663 return (0); 664 } 665 666 /* 667 * Esballoc free function for messages that contain SO_FILEP option. 668 * Decrement the reference count on the file pointers using closef. 669 */ 670 void 671 fdbuf_free(struct fdbuf *fdbuf) 672 { 673 int i; 674 struct file *fp; 675 676 dprint(1, ("fdbuf_free: %d fds\n", fdbuf->fd_numfd)); 677 for (i = 0; i < fdbuf->fd_numfd; i++) { 678 /* 679 * We need pointer size alignment for fd_fds. On a LP64 680 * kernel, the required alignment is 8 bytes while 681 * the option headers and values are only 4 bytes 682 * aligned. So its safer to do a bcopy compared to 683 * assigning fdbuf->fd_fds[i] to fp. 684 */ 685 bcopy((char *)&fdbuf->fd_fds[i], (char *)&fp, sizeof (fp)); 686 dprint(1, ("fdbuf_free: [%d] = %p\n", i, (void *)fp)); 687 (void) closef(fp); 688 } 689 if (fdbuf->fd_ebuf != NULL) 690 kmem_free(fdbuf->fd_ebuf, fdbuf->fd_ebuflen); 691 kmem_free(fdbuf, fdbuf->fd_size); 692 } 693 694 /* 695 * Allocate an esballoc'ed message for AF_UNIX file descriptor passing. 696 * Waits if memory is not available. 697 */ 698 mblk_t * 699 fdbuf_allocmsg(int size, struct fdbuf *fdbuf) 700 { 701 uchar_t *buf; 702 mblk_t *mp; 703 704 dprint(1, ("fdbuf_allocmsg: size %d, %d fds\n", size, fdbuf->fd_numfd)); 705 buf = kmem_alloc(size, KM_SLEEP); 706 fdbuf->fd_ebuf = (caddr_t)buf; 707 fdbuf->fd_ebuflen = size; 708 fdbuf->fd_frtn.free_func = fdbuf_free; 709 fdbuf->fd_frtn.free_arg = (caddr_t)fdbuf; 710 711 mp = esballoc_wait(buf, size, BPRI_MED, &fdbuf->fd_frtn); 712 mp->b_datap->db_type = M_PROTO; 713 return (mp); 714 } 715 716 /* 717 * Extract file descriptors from a fdbuf. 718 * Return list in rights/rightslen. 719 */ 720 /*ARGSUSED*/ 721 static int 722 fdbuf_extract(struct fdbuf *fdbuf, void *rights, int rightslen) 723 { 724 int i, fd; 725 int *rp; 726 struct file *fp; 727 int numfd; 728 729 dprint(1, ("fdbuf_extract: %d fds, len %d\n", 730 fdbuf->fd_numfd, rightslen)); 731 732 numfd = fdbuf->fd_numfd; 733 ASSERT(rightslen == numfd * (int)sizeof (int)); 734 735 /* 736 * Allocate a file descriptor and increment the f_count. 737 * The latter is needed since we always call fdbuf_free 738 * which performs a closef. 739 */ 740 rp = (int *)rights; 741 for (i = 0; i < numfd; i++) { 742 if ((fd = ufalloc(0)) == -1) 743 goto cleanup; 744 /* 745 * We need pointer size alignment for fd_fds. On a LP64 746 * kernel, the required alignment is 8 bytes while 747 * the option headers and values are only 4 bytes 748 * aligned. So its safer to do a bcopy compared to 749 * assigning fdbuf->fd_fds[i] to fp. 750 */ 751 bcopy((char *)&fdbuf->fd_fds[i], (char *)&fp, sizeof (fp)); 752 mutex_enter(&fp->f_tlock); 753 fp->f_count++; 754 mutex_exit(&fp->f_tlock); 755 setf(fd, fp); 756 *rp++ = fd; 757 if (AU_AUDITING()) 758 audit_fdrecv(fd, fp); 759 dprint(1, ("fdbuf_extract: [%d] = %d, %p refcnt %d\n", 760 i, fd, (void *)fp, fp->f_count)); 761 } 762 return (0); 763 764 cleanup: 765 /* 766 * Undo whatever partial work the loop above has done. 767 */ 768 { 769 int j; 770 771 rp = (int *)rights; 772 for (j = 0; j < i; j++) { 773 dprint(0, 774 ("fdbuf_extract: cleanup[%d] = %d\n", j, *rp)); 775 (void) closeandsetf(*rp++, NULL); 776 } 777 } 778 779 return (EMFILE); 780 } 781 782 /* 783 * Insert file descriptors into an fdbuf. 784 * Returns a kmem_alloc'ed fdbuf. The fdbuf should be freed 785 * by calling fdbuf_free(). 786 */ 787 int 788 fdbuf_create(void *rights, int rightslen, struct fdbuf **fdbufp) 789 { 790 int numfd, i; 791 int *fds; 792 struct file *fp; 793 struct fdbuf *fdbuf; 794 int fdbufsize; 795 796 dprint(1, ("fdbuf_create: len %d\n", rightslen)); 797 798 numfd = rightslen / (int)sizeof (int); 799 800 fdbufsize = (int)FDBUF_HDRSIZE + (numfd * (int)sizeof (struct file *)); 801 fdbuf = kmem_alloc(fdbufsize, KM_SLEEP); 802 fdbuf->fd_size = fdbufsize; 803 fdbuf->fd_numfd = 0; 804 fdbuf->fd_ebuf = NULL; 805 fdbuf->fd_ebuflen = 0; 806 fds = (int *)rights; 807 for (i = 0; i < numfd; i++) { 808 if ((fp = getf(fds[i])) == NULL) { 809 fdbuf_free(fdbuf); 810 return (EBADF); 811 } 812 dprint(1, ("fdbuf_create: [%d] = %d, %p refcnt %d\n", 813 i, fds[i], (void *)fp, fp->f_count)); 814 mutex_enter(&fp->f_tlock); 815 fp->f_count++; 816 mutex_exit(&fp->f_tlock); 817 /* 818 * The maximum alignment for fdbuf (or any option header 819 * and its value) it 4 bytes. On a LP64 kernel, the alignment 820 * is not sufficient for pointers (fd_fds in this case). Since 821 * we just did a kmem_alloc (we get a double word alignment), 822 * we don't need to do anything on the send side (we loose 823 * the double word alignment because fdbuf goes after an 824 * option header (eg T_unitdata_req) which is only 4 byte 825 * aligned). We take care of this when we extract the file 826 * descriptor in fdbuf_extract or fdbuf_free. 827 */ 828 fdbuf->fd_fds[i] = fp; 829 fdbuf->fd_numfd++; 830 releasef(fds[i]); 831 if (AU_AUDITING()) 832 audit_fdsend(fds[i], fp, 0); 833 } 834 *fdbufp = fdbuf; 835 return (0); 836 } 837 838 static int 839 fdbuf_optlen(int rightslen) 840 { 841 int numfd; 842 843 numfd = rightslen / (int)sizeof (int); 844 845 return ((int)FDBUF_HDRSIZE + (numfd * (int)sizeof (struct file *))); 846 } 847 848 static t_uscalar_t 849 fdbuf_cmsglen(int fdbuflen) 850 { 851 return (t_uscalar_t)((fdbuflen - FDBUF_HDRSIZE) / 852 (int)sizeof (struct file *) * (int)sizeof (int)); 853 } 854 855 856 /* 857 * Return non-zero if the mblk and fdbuf are consistent. 858 */ 859 static int 860 fdbuf_verify(mblk_t *mp, struct fdbuf *fdbuf, int fdbuflen) 861 { 862 if (fdbuflen >= FDBUF_HDRSIZE && 863 fdbuflen == fdbuf->fd_size) { 864 frtn_t *frp = mp->b_datap->db_frtnp; 865 /* 866 * Check that the SO_FILEP portion of the 867 * message has not been modified by 868 * the loopback transport. The sending sockfs generates 869 * a message that is esballoc'ed with the free function 870 * being fdbuf_free() and where free_arg contains the 871 * identical information as the SO_FILEP content. 872 * 873 * If any of these constraints are not satisfied we 874 * silently ignore the option. 875 */ 876 ASSERT(mp); 877 if (frp != NULL && 878 frp->free_func == fdbuf_free && 879 frp->free_arg != NULL && 880 bcmp(frp->free_arg, fdbuf, fdbuflen) == 0) { 881 dprint(1, ("fdbuf_verify: fdbuf %p len %d\n", 882 (void *)fdbuf, fdbuflen)); 883 return (1); 884 } else { 885 zcmn_err(getzoneid(), CE_WARN, 886 "sockfs: mismatched fdbuf content (%p)", 887 (void *)mp); 888 return (0); 889 } 890 } else { 891 zcmn_err(getzoneid(), CE_WARN, 892 "sockfs: mismatched fdbuf len %d, %d\n", 893 fdbuflen, fdbuf->fd_size); 894 return (0); 895 } 896 } 897 898 /* 899 * When the file descriptors returned by sorecvmsg can not be passed 900 * to the application this routine will cleanup the references on 901 * the files. Start at startoff bytes into the buffer. 902 */ 903 static void 904 close_fds(void *fdbuf, int fdbuflen, int startoff) 905 { 906 int *fds = (int *)fdbuf; 907 int numfd = fdbuflen / (int)sizeof (int); 908 int i; 909 910 dprint(1, ("close_fds(%p, %d, %d)\n", fdbuf, fdbuflen, startoff)); 911 912 for (i = 0; i < numfd; i++) { 913 if (startoff < 0) 914 startoff = 0; 915 if (startoff < (int)sizeof (int)) { 916 /* 917 * This file descriptor is partially or fully after 918 * the offset 919 */ 920 dprint(0, 921 ("close_fds: cleanup[%d] = %d\n", i, fds[i])); 922 (void) closeandsetf(fds[i], NULL); 923 } 924 startoff -= (int)sizeof (int); 925 } 926 } 927 928 /* 929 * Close all file descriptors contained in the control part starting at 930 * the startoffset. 931 */ 932 void 933 so_closefds(void *control, t_uscalar_t controllen, int oldflg, 934 int startoff) 935 { 936 struct cmsghdr *cmsg; 937 938 if (control == NULL) 939 return; 940 941 if (oldflg) { 942 close_fds(control, controllen, startoff); 943 return; 944 } 945 /* Scan control part for file descriptors. */ 946 for (cmsg = (struct cmsghdr *)control; 947 CMSG_VALID(cmsg, control, (uintptr_t)control + controllen); 948 cmsg = CMSG_NEXT(cmsg)) { 949 if (cmsg->cmsg_level == SOL_SOCKET && 950 cmsg->cmsg_type == SCM_RIGHTS) { 951 close_fds(CMSG_CONTENT(cmsg), 952 (int)CMSG_CONTENTLEN(cmsg), 953 startoff - (int)sizeof (struct cmsghdr)); 954 } 955 startoff -= ROUNDUP_cmsglen(cmsg->cmsg_len); 956 } 957 } 958 959 /* 960 * Handle truncation of a cmsg when the receive buffer is not big enough. 961 * Adjust the cmsg_len header field in the last cmsg that will be included in 962 * the buffer to reflect the number of bytes included. 963 */ 964 void 965 so_truncatecmsg(void *control, t_uscalar_t controllen, uint_t maxlen) 966 { 967 struct cmsghdr *cmsg; 968 uint_t len = 0; 969 970 if (control == NULL) 971 return; 972 973 for (cmsg = control; 974 CMSG_VALID(cmsg, control, (uintptr_t)control + controllen); 975 cmsg = CMSG_NEXT(cmsg)) { 976 977 len += ROUNDUP_cmsglen(cmsg->cmsg_len); 978 979 if (len > maxlen) { 980 /* 981 * This cmsg is the last one that will be included in 982 * the truncated buffer. 983 */ 984 socklen_t diff = len - maxlen; 985 986 if (diff < CMSG_CONTENTLEN(cmsg)) { 987 dprint(1, ("so_truncatecmsg: %d -> %d\n", 988 cmsg->cmsg_len, cmsg->cmsg_len - diff)); 989 cmsg->cmsg_len -= diff; 990 } else { 991 cmsg->cmsg_len = sizeof (struct cmsghdr); 992 } 993 break; 994 } 995 } 996 } 997 998 /* 999 * Returns a pointer/length for the file descriptors contained 1000 * in the control buffer. Returns with *fdlenp == -1 if there are no 1001 * file descriptor options present. This is different than there being 1002 * a zero-length file descriptor option. 1003 * Fail if there are multiple SCM_RIGHT cmsgs. 1004 */ 1005 int 1006 so_getfdopt(void *control, t_uscalar_t controllen, int oldflg, 1007 void **fdsp, int *fdlenp) 1008 { 1009 struct cmsghdr *cmsg; 1010 void *fds; 1011 int fdlen; 1012 1013 if (control == NULL) { 1014 *fdsp = NULL; 1015 *fdlenp = -1; 1016 return (0); 1017 } 1018 1019 if (oldflg) { 1020 *fdsp = control; 1021 if (controllen == 0) 1022 *fdlenp = -1; 1023 else 1024 *fdlenp = controllen; 1025 dprint(1, ("so_getfdopt: old %d\n", *fdlenp)); 1026 return (0); 1027 } 1028 1029 fds = NULL; 1030 fdlen = 0; 1031 1032 for (cmsg = (struct cmsghdr *)control; 1033 CMSG_VALID(cmsg, control, (uintptr_t)control + controllen); 1034 cmsg = CMSG_NEXT(cmsg)) { 1035 if (cmsg->cmsg_level == SOL_SOCKET && 1036 cmsg->cmsg_type == SCM_RIGHTS) { 1037 if (fds != NULL) 1038 return (EINVAL); 1039 fds = CMSG_CONTENT(cmsg); 1040 fdlen = (int)CMSG_CONTENTLEN(cmsg); 1041 dprint(1, ("so_getfdopt: new %lu\n", 1042 (size_t)CMSG_CONTENTLEN(cmsg))); 1043 } 1044 } 1045 if (fds == NULL) { 1046 dprint(1, ("so_getfdopt: NONE\n")); 1047 *fdlenp = -1; 1048 } else 1049 *fdlenp = fdlen; 1050 *fdsp = fds; 1051 return (0); 1052 } 1053 1054 /* 1055 * Return the length of the options including any file descriptor options. 1056 */ 1057 t_uscalar_t 1058 so_optlen(void *control, t_uscalar_t controllen, int oldflg) 1059 { 1060 struct cmsghdr *cmsg; 1061 t_uscalar_t optlen = 0; 1062 t_uscalar_t len; 1063 1064 if (control == NULL) 1065 return (0); 1066 1067 if (oldflg) 1068 return ((t_uscalar_t)(sizeof (struct T_opthdr) + 1069 fdbuf_optlen(controllen))); 1070 1071 for (cmsg = (struct cmsghdr *)control; 1072 CMSG_VALID(cmsg, control, (uintptr_t)control + controllen); 1073 cmsg = CMSG_NEXT(cmsg)) { 1074 if (cmsg->cmsg_level == SOL_SOCKET && 1075 cmsg->cmsg_type == SCM_RIGHTS) { 1076 len = fdbuf_optlen((int)CMSG_CONTENTLEN(cmsg)); 1077 } else { 1078 len = (t_uscalar_t)CMSG_CONTENTLEN(cmsg); 1079 } 1080 optlen += (t_uscalar_t)(_TPI_ALIGN_TOPT(len) + 1081 sizeof (struct T_opthdr)); 1082 } 1083 dprint(1, ("so_optlen: controllen %d, flg %d -> optlen %d\n", 1084 controllen, oldflg, optlen)); 1085 return (optlen); 1086 } 1087 1088 /* 1089 * Copy options from control to the mblk. Skip any file descriptor options. 1090 */ 1091 void 1092 so_cmsg2opt(void *control, t_uscalar_t controllen, int oldflg, mblk_t *mp) 1093 { 1094 struct T_opthdr toh; 1095 struct cmsghdr *cmsg; 1096 1097 if (control == NULL) 1098 return; 1099 1100 if (oldflg) { 1101 /* No real options - caller has handled file descriptors */ 1102 return; 1103 } 1104 for (cmsg = (struct cmsghdr *)control; 1105 CMSG_VALID(cmsg, control, (uintptr_t)control + controllen); 1106 cmsg = CMSG_NEXT(cmsg)) { 1107 /* 1108 * Note: The caller handles file descriptors prior 1109 * to calling this function. 1110 */ 1111 t_uscalar_t len; 1112 1113 if (cmsg->cmsg_level == SOL_SOCKET && 1114 cmsg->cmsg_type == SCM_RIGHTS) 1115 continue; 1116 1117 len = (t_uscalar_t)CMSG_CONTENTLEN(cmsg); 1118 toh.level = cmsg->cmsg_level; 1119 toh.name = cmsg->cmsg_type; 1120 toh.len = len + (t_uscalar_t)sizeof (struct T_opthdr); 1121 toh.status = 0; 1122 1123 soappendmsg(mp, &toh, sizeof (toh)); 1124 soappendmsg(mp, CMSG_CONTENT(cmsg), len); 1125 mp->b_wptr += _TPI_ALIGN_TOPT(len) - len; 1126 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 1127 } 1128 } 1129 1130 /* 1131 * Return the length of the control message derived from the options. 1132 * Exclude SO_SRCADDR and SO_UNIX_CLOSE options. Include SO_FILEP. 1133 * When oldflg is set only include SO_FILEP. 1134 * so_opt2cmsg and so_cmsglen are inter-related since so_cmsglen 1135 * allocates the space that so_opt2cmsg fills. If one changes, the other should 1136 * also be checked for any possible impacts. 1137 */ 1138 t_uscalar_t 1139 so_cmsglen(mblk_t *mp, void *opt, t_uscalar_t optlen, int oldflg) 1140 { 1141 t_uscalar_t cmsglen = 0; 1142 struct T_opthdr *tohp; 1143 t_uscalar_t len; 1144 t_uscalar_t last_roundup = 0; 1145 1146 ASSERT(__TPI_TOPT_ISALIGNED(opt)); 1147 1148 for (tohp = (struct T_opthdr *)opt; 1149 tohp && _TPI_TOPT_VALID(tohp, opt, (uintptr_t)opt + optlen); 1150 tohp = _TPI_TOPT_NEXTHDR(opt, optlen, tohp)) { 1151 dprint(1, ("so_cmsglen: level 0x%x, name %d, len %d\n", 1152 tohp->level, tohp->name, tohp->len)); 1153 if (tohp->level == SOL_SOCKET && 1154 (tohp->name == SO_SRCADDR || 1155 tohp->name == SO_UNIX_CLOSE)) { 1156 continue; 1157 } 1158 if (tohp->level == SOL_SOCKET && tohp->name == SO_FILEP) { 1159 struct fdbuf *fdbuf; 1160 int fdbuflen; 1161 1162 fdbuf = (struct fdbuf *)_TPI_TOPT_DATA(tohp); 1163 fdbuflen = (int)_TPI_TOPT_DATALEN(tohp); 1164 1165 if (!fdbuf_verify(mp, fdbuf, fdbuflen)) 1166 continue; 1167 if (oldflg) { 1168 cmsglen += fdbuf_cmsglen(fdbuflen); 1169 continue; 1170 } 1171 len = fdbuf_cmsglen(fdbuflen); 1172 } else if (tohp->level == SOL_SOCKET && 1173 tohp->name == SCM_TIMESTAMP) { 1174 if (oldflg) 1175 continue; 1176 1177 if (get_udatamodel() == DATAMODEL_NATIVE) { 1178 len = sizeof (struct timeval); 1179 } else { 1180 len = sizeof (struct timeval32); 1181 } 1182 } else { 1183 if (oldflg) 1184 continue; 1185 len = (t_uscalar_t)_TPI_TOPT_DATALEN(tohp); 1186 } 1187 /* 1188 * Exclude roundup for last option to not set 1189 * MSG_CTRUNC when the cmsg fits but the padding doesn't fit. 1190 */ 1191 last_roundup = (t_uscalar_t) 1192 (ROUNDUP_cmsglen(len + (int)sizeof (struct cmsghdr)) - 1193 (len + (int)sizeof (struct cmsghdr))); 1194 cmsglen += (t_uscalar_t)(len + (int)sizeof (struct cmsghdr)) + 1195 last_roundup; 1196 } 1197 cmsglen -= last_roundup; 1198 dprint(1, ("so_cmsglen: optlen %d, flg %d -> cmsglen %d\n", 1199 optlen, oldflg, cmsglen)); 1200 return (cmsglen); 1201 } 1202 1203 /* 1204 * Copy options from options to the control. Convert SO_FILEP to 1205 * file descriptors. 1206 * Returns errno or zero. 1207 * so_opt2cmsg and so_cmsglen are inter-related since so_cmsglen 1208 * allocates the space that so_opt2cmsg fills. If one changes, the other should 1209 * also be checked for any possible impacts. 1210 */ 1211 int 1212 so_opt2cmsg(mblk_t *mp, void *opt, t_uscalar_t optlen, int oldflg, 1213 void *control, t_uscalar_t controllen) 1214 { 1215 struct T_opthdr *tohp; 1216 struct cmsghdr *cmsg; 1217 struct fdbuf *fdbuf; 1218 int fdbuflen; 1219 int error; 1220 #if defined(DEBUG) || defined(__lint) 1221 struct cmsghdr *cend = (struct cmsghdr *) 1222 (((uint8_t *)control) + ROUNDUP_cmsglen(controllen)); 1223 #endif 1224 cmsg = (struct cmsghdr *)control; 1225 1226 ASSERT(__TPI_TOPT_ISALIGNED(opt)); 1227 1228 for (tohp = (struct T_opthdr *)opt; 1229 tohp && _TPI_TOPT_VALID(tohp, opt, (uintptr_t)opt + optlen); 1230 tohp = _TPI_TOPT_NEXTHDR(opt, optlen, tohp)) { 1231 dprint(1, ("so_opt2cmsg: level 0x%x, name %d, len %d\n", 1232 tohp->level, tohp->name, tohp->len)); 1233 1234 if (tohp->level == SOL_SOCKET && 1235 (tohp->name == SO_SRCADDR || 1236 tohp->name == SO_UNIX_CLOSE)) { 1237 continue; 1238 } 1239 ASSERT((uintptr_t)cmsg <= (uintptr_t)control + controllen); 1240 if (tohp->level == SOL_SOCKET && tohp->name == SO_FILEP) { 1241 fdbuf = (struct fdbuf *)_TPI_TOPT_DATA(tohp); 1242 fdbuflen = (int)_TPI_TOPT_DATALEN(tohp); 1243 1244 if (!fdbuf_verify(mp, fdbuf, fdbuflen)) 1245 return (EPROTO); 1246 if (oldflg) { 1247 error = fdbuf_extract(fdbuf, control, 1248 (int)controllen); 1249 if (error != 0) 1250 return (error); 1251 continue; 1252 } else { 1253 int fdlen; 1254 1255 fdlen = (int)fdbuf_cmsglen( 1256 (int)_TPI_TOPT_DATALEN(tohp)); 1257 1258 cmsg->cmsg_level = tohp->level; 1259 cmsg->cmsg_type = SCM_RIGHTS; 1260 cmsg->cmsg_len = (socklen_t)(fdlen + 1261 sizeof (struct cmsghdr)); 1262 1263 error = fdbuf_extract(fdbuf, 1264 CMSG_CONTENT(cmsg), fdlen); 1265 if (error != 0) 1266 return (error); 1267 } 1268 } else if (tohp->level == SOL_SOCKET && 1269 tohp->name == SCM_TIMESTAMP) { 1270 timestruc_t *timestamp; 1271 1272 if (oldflg) 1273 continue; 1274 1275 cmsg->cmsg_level = tohp->level; 1276 cmsg->cmsg_type = tohp->name; 1277 1278 timestamp = 1279 (timestruc_t *)P2ROUNDUP((intptr_t)&tohp[1], 1280 sizeof (intptr_t)); 1281 1282 if (get_udatamodel() == DATAMODEL_NATIVE) { 1283 struct timeval tv; 1284 1285 cmsg->cmsg_len = sizeof (struct timeval) + 1286 sizeof (struct cmsghdr); 1287 tv.tv_sec = timestamp->tv_sec; 1288 tv.tv_usec = timestamp->tv_nsec / 1289 (NANOSEC / MICROSEC); 1290 /* 1291 * on LP64 systems, the struct timeval in 1292 * the destination will not be 8-byte aligned, 1293 * so use bcopy to avoid alignment trouble 1294 */ 1295 bcopy(&tv, CMSG_CONTENT(cmsg), sizeof (tv)); 1296 } else { 1297 struct timeval32 *time32; 1298 1299 cmsg->cmsg_len = sizeof (struct timeval32) + 1300 sizeof (struct cmsghdr); 1301 time32 = (struct timeval32 *)CMSG_CONTENT(cmsg); 1302 time32->tv_sec = (time32_t)timestamp->tv_sec; 1303 time32->tv_usec = 1304 (int32_t)(timestamp->tv_nsec / 1305 (NANOSEC / MICROSEC)); 1306 } 1307 1308 } else { 1309 if (oldflg) 1310 continue; 1311 1312 cmsg->cmsg_level = tohp->level; 1313 cmsg->cmsg_type = tohp->name; 1314 cmsg->cmsg_len = (socklen_t)sizeof (struct cmsghdr); 1315 if (tohp->level == IPPROTO_IP && 1316 (tohp->name == IP_RECVTOS || 1317 tohp->name == IP_RECVTTL)) { 1318 /* 1319 * The data for these is a uint8_t but, in 1320 * order to maintain alignment for any 1321 * following TPI primitives in the message, 1322 * there will be some trailing padding bytes 1323 * which are included in the TPI_TOPT_DATALEN. 1324 * For these types, we set the cmsg_len 1325 * explicitly to the correct value. 1326 */ 1327 cmsg->cmsg_len += (socklen_t)sizeof (uint8_t); 1328 } else { 1329 cmsg->cmsg_len += 1330 (socklen_t)(_TPI_TOPT_DATALEN(tohp)); 1331 } 1332 1333 /* copy content to control data part */ 1334 bcopy(&tohp[1], CMSG_CONTENT(cmsg), 1335 CMSG_CONTENTLEN(cmsg)); 1336 } 1337 /* move to next CMSG structure! */ 1338 cmsg = CMSG_NEXT(cmsg); 1339 } 1340 dprint(1, ("so_opt2cmsg: buf %p len %d; cend %p; final cmsg %p\n", 1341 control, controllen, (void *)cend, (void *)cmsg)); 1342 ASSERT(cmsg <= cend); 1343 return (0); 1344 } 1345 1346 /* 1347 * Extract the SO_SRCADDR option value if present. 1348 */ 1349 void 1350 so_getopt_srcaddr(void *opt, t_uscalar_t optlen, void **srcp, 1351 t_uscalar_t *srclenp) 1352 { 1353 struct T_opthdr *tohp; 1354 1355 ASSERT(__TPI_TOPT_ISALIGNED(opt)); 1356 1357 ASSERT(srcp != NULL && srclenp != NULL); 1358 *srcp = NULL; 1359 *srclenp = 0; 1360 1361 for (tohp = (struct T_opthdr *)opt; 1362 tohp && _TPI_TOPT_VALID(tohp, opt, (uintptr_t)opt + optlen); 1363 tohp = _TPI_TOPT_NEXTHDR(opt, optlen, tohp)) { 1364 dprint(1, ("so_getopt_srcaddr: level 0x%x, name %d, len %d\n", 1365 tohp->level, tohp->name, tohp->len)); 1366 if (tohp->level == SOL_SOCKET && 1367 tohp->name == SO_SRCADDR) { 1368 *srcp = _TPI_TOPT_DATA(tohp); 1369 *srclenp = (t_uscalar_t)_TPI_TOPT_DATALEN(tohp); 1370 } 1371 } 1372 } 1373 1374 /* 1375 * Verify if the SO_UNIX_CLOSE option is present. 1376 */ 1377 int 1378 so_getopt_unix_close(void *opt, t_uscalar_t optlen) 1379 { 1380 struct T_opthdr *tohp; 1381 1382 ASSERT(__TPI_TOPT_ISALIGNED(opt)); 1383 1384 for (tohp = (struct T_opthdr *)opt; 1385 tohp && _TPI_TOPT_VALID(tohp, opt, (uintptr_t)opt + optlen); 1386 tohp = _TPI_TOPT_NEXTHDR(opt, optlen, tohp)) { 1387 dprint(1, 1388 ("so_getopt_unix_close: level 0x%x, name %d, len %d\n", 1389 tohp->level, tohp->name, tohp->len)); 1390 if (tohp->level == SOL_SOCKET && 1391 tohp->name == SO_UNIX_CLOSE) 1392 return (1); 1393 } 1394 return (0); 1395 } 1396 1397 /* 1398 * Allocate an M_PROTO message. 1399 * 1400 * If allocation fails the behavior depends on sleepflg: 1401 * _ALLOC_NOSLEEP fail immediately 1402 * _ALLOC_INTR sleep for memory until a signal is caught 1403 * _ALLOC_SLEEP sleep forever. Don't return NULL. 1404 */ 1405 mblk_t * 1406 soallocproto(size_t size, int sleepflg, cred_t *cr) 1407 { 1408 mblk_t *mp; 1409 1410 /* Round up size for reuse */ 1411 size = MAX(size, 64); 1412 if (cr != NULL) 1413 mp = allocb_cred(size, cr, curproc->p_pid); 1414 else 1415 mp = allocb(size, BPRI_MED); 1416 1417 if (mp == NULL) { 1418 int error; /* Dummy - error not returned to caller */ 1419 1420 switch (sleepflg) { 1421 case _ALLOC_SLEEP: 1422 if (cr != NULL) { 1423 mp = allocb_cred_wait(size, STR_NOSIG, &error, 1424 cr, curproc->p_pid); 1425 } else { 1426 mp = allocb_wait(size, BPRI_MED, STR_NOSIG, 1427 &error); 1428 } 1429 ASSERT(mp); 1430 break; 1431 case _ALLOC_INTR: 1432 if (cr != NULL) { 1433 mp = allocb_cred_wait(size, 0, &error, cr, 1434 curproc->p_pid); 1435 } else { 1436 mp = allocb_wait(size, BPRI_MED, 0, &error); 1437 } 1438 if (mp == NULL) { 1439 /* Caught signal while sleeping for memory */ 1440 eprintline(ENOBUFS); 1441 return (NULL); 1442 } 1443 break; 1444 case _ALLOC_NOSLEEP: 1445 default: 1446 eprintline(ENOBUFS); 1447 return (NULL); 1448 } 1449 } 1450 DB_TYPE(mp) = M_PROTO; 1451 return (mp); 1452 } 1453 1454 /* 1455 * Allocate an M_PROTO message with a single component. 1456 * len is the length of buf. size is the amount to allocate. 1457 * 1458 * buf can be NULL with a non-zero len. 1459 * This results in a bzero'ed chunk being placed the message. 1460 */ 1461 mblk_t * 1462 soallocproto1(const void *buf, ssize_t len, ssize_t size, int sleepflg, 1463 cred_t *cr) 1464 { 1465 mblk_t *mp; 1466 1467 if (size == 0) 1468 size = len; 1469 1470 ASSERT(size >= len); 1471 /* Round up size for reuse */ 1472 size = MAX(size, 64); 1473 mp = soallocproto(size, sleepflg, cr); 1474 if (mp == NULL) 1475 return (NULL); 1476 mp->b_datap->db_type = M_PROTO; 1477 if (len != 0) { 1478 if (buf != NULL) 1479 bcopy(buf, mp->b_wptr, len); 1480 else 1481 bzero(mp->b_wptr, len); 1482 mp->b_wptr += len; 1483 } 1484 return (mp); 1485 } 1486 1487 /* 1488 * Append buf/len to mp. 1489 * The caller has to ensure that there is enough room in the mblk. 1490 * 1491 * buf can be NULL with a non-zero len. 1492 * This results in a bzero'ed chunk being placed the message. 1493 */ 1494 void 1495 soappendmsg(mblk_t *mp, const void *buf, ssize_t len) 1496 { 1497 ASSERT(mp); 1498 1499 if (len != 0) { 1500 /* Assert for room left */ 1501 ASSERT(mp->b_datap->db_lim - mp->b_wptr >= len); 1502 if (buf != NULL) 1503 bcopy(buf, mp->b_wptr, len); 1504 else 1505 bzero(mp->b_wptr, len); 1506 } 1507 mp->b_wptr += len; 1508 } 1509 1510 /* 1511 * Create a message using two kernel buffers. 1512 * If size is set that will determine the allocation size (e.g. for future 1513 * soappendmsg calls). If size is zero it is derived from the buffer 1514 * lengths. 1515 */ 1516 mblk_t * 1517 soallocproto2(const void *buf1, ssize_t len1, const void *buf2, ssize_t len2, 1518 ssize_t size, int sleepflg, cred_t *cr) 1519 { 1520 mblk_t *mp; 1521 1522 if (size == 0) 1523 size = len1 + len2; 1524 ASSERT(size >= len1 + len2); 1525 1526 mp = soallocproto1(buf1, len1, size, sleepflg, cr); 1527 if (mp) 1528 soappendmsg(mp, buf2, len2); 1529 return (mp); 1530 } 1531 1532 /* 1533 * Create a message using three kernel buffers. 1534 * If size is set that will determine the allocation size (for future 1535 * soappendmsg calls). If size is zero it is derived from the buffer 1536 * lengths. 1537 */ 1538 mblk_t * 1539 soallocproto3(const void *buf1, ssize_t len1, const void *buf2, ssize_t len2, 1540 const void *buf3, ssize_t len3, ssize_t size, int sleepflg, cred_t *cr) 1541 { 1542 mblk_t *mp; 1543 1544 if (size == 0) 1545 size = len1 + len2 +len3; 1546 ASSERT(size >= len1 + len2 + len3); 1547 1548 mp = soallocproto1(buf1, len1, size, sleepflg, cr); 1549 if (mp != NULL) { 1550 soappendmsg(mp, buf2, len2); 1551 soappendmsg(mp, buf3, len3); 1552 } 1553 return (mp); 1554 } 1555 1556 #ifdef DEBUG 1557 char * 1558 pr_state(uint_t state, uint_t mode) 1559 { 1560 static char buf[1024]; 1561 1562 buf[0] = 0; 1563 if (state & SS_ISCONNECTED) 1564 (void) strcat(buf, "ISCONNECTED "); 1565 if (state & SS_ISCONNECTING) 1566 (void) strcat(buf, "ISCONNECTING "); 1567 if (state & SS_ISDISCONNECTING) 1568 (void) strcat(buf, "ISDISCONNECTING "); 1569 if (state & SS_CANTSENDMORE) 1570 (void) strcat(buf, "CANTSENDMORE "); 1571 1572 if (state & SS_CANTRCVMORE) 1573 (void) strcat(buf, "CANTRCVMORE "); 1574 if (state & SS_ISBOUND) 1575 (void) strcat(buf, "ISBOUND "); 1576 if (state & SS_NDELAY) 1577 (void) strcat(buf, "NDELAY "); 1578 if (state & SS_NONBLOCK) 1579 (void) strcat(buf, "NONBLOCK "); 1580 1581 if (state & SS_ASYNC) 1582 (void) strcat(buf, "ASYNC "); 1583 if (state & SS_ACCEPTCONN) 1584 (void) strcat(buf, "ACCEPTCONN "); 1585 if (state & SS_SAVEDEOR) 1586 (void) strcat(buf, "SAVEDEOR "); 1587 1588 if (state & SS_RCVATMARK) 1589 (void) strcat(buf, "RCVATMARK "); 1590 if (state & SS_OOBPEND) 1591 (void) strcat(buf, "OOBPEND "); 1592 if (state & SS_HAVEOOBDATA) 1593 (void) strcat(buf, "HAVEOOBDATA "); 1594 if (state & SS_HADOOBDATA) 1595 (void) strcat(buf, "HADOOBDATA "); 1596 1597 if (mode & SM_PRIV) 1598 (void) strcat(buf, "PRIV "); 1599 if (mode & SM_ATOMIC) 1600 (void) strcat(buf, "ATOMIC "); 1601 if (mode & SM_ADDR) 1602 (void) strcat(buf, "ADDR "); 1603 if (mode & SM_CONNREQUIRED) 1604 (void) strcat(buf, "CONNREQUIRED "); 1605 1606 if (mode & SM_FDPASSING) 1607 (void) strcat(buf, "FDPASSING "); 1608 if (mode & SM_EXDATA) 1609 (void) strcat(buf, "EXDATA "); 1610 if (mode & SM_OPTDATA) 1611 (void) strcat(buf, "OPTDATA "); 1612 if (mode & SM_BYTESTREAM) 1613 (void) strcat(buf, "BYTESTREAM "); 1614 return (buf); 1615 } 1616 1617 char * 1618 pr_addr(int family, struct sockaddr *addr, t_uscalar_t addrlen) 1619 { 1620 static char buf[1024]; 1621 1622 if (addr == NULL || addrlen == 0) { 1623 (void) sprintf(buf, "(len %d) %p", addrlen, (void *)addr); 1624 return (buf); 1625 } 1626 switch (family) { 1627 case AF_INET: { 1628 struct sockaddr_in sin; 1629 1630 bcopy(addr, &sin, sizeof (sin)); 1631 1632 (void) sprintf(buf, "(len %d) %x/%d", 1633 addrlen, ntohl(sin.sin_addr.s_addr), ntohs(sin.sin_port)); 1634 break; 1635 } 1636 case AF_INET6: { 1637 struct sockaddr_in6 sin6; 1638 uint16_t *piece = (uint16_t *)&sin6.sin6_addr; 1639 1640 bcopy((char *)addr, (char *)&sin6, sizeof (sin6)); 1641 (void) sprintf(buf, "(len %d) %x:%x:%x:%x:%x:%x:%x:%x/%d", 1642 addrlen, 1643 ntohs(piece[0]), ntohs(piece[1]), 1644 ntohs(piece[2]), ntohs(piece[3]), 1645 ntohs(piece[4]), ntohs(piece[5]), 1646 ntohs(piece[6]), ntohs(piece[7]), 1647 ntohs(sin6.sin6_port)); 1648 break; 1649 } 1650 case AF_UNIX: { 1651 struct sockaddr_un *soun = (struct sockaddr_un *)addr; 1652 1653 (void) sprintf(buf, "(len %d) %s", addrlen, 1654 (soun == NULL) ? "(none)" : soun->sun_path); 1655 break; 1656 } 1657 default: 1658 (void) sprintf(buf, "(unknown af %d)", family); 1659 break; 1660 } 1661 return (buf); 1662 } 1663 1664 /* The logical equivalence operator (a if-and-only-if b) */ 1665 #define EQUIVALENT(a, b) (((a) && (b)) || (!(a) && (!(b)))) 1666 1667 /* 1668 * Verify limitations and invariants on oob state. 1669 * Return 1 if OK, otherwise 0 so that it can be used as 1670 * ASSERT(verify_oobstate(so)); 1671 */ 1672 int 1673 so_verify_oobstate(struct sonode *so) 1674 { 1675 boolean_t havemark; 1676 1677 ASSERT(MUTEX_HELD(&so->so_lock)); 1678 1679 /* 1680 * The possible state combinations are: 1681 * 0 1682 * SS_OOBPEND 1683 * SS_OOBPEND|SS_HAVEOOBDATA 1684 * SS_OOBPEND|SS_HADOOBDATA 1685 * SS_HADOOBDATA 1686 */ 1687 switch (so->so_state & (SS_OOBPEND|SS_HAVEOOBDATA|SS_HADOOBDATA)) { 1688 case 0: 1689 case SS_OOBPEND: 1690 case SS_OOBPEND|SS_HAVEOOBDATA: 1691 case SS_OOBPEND|SS_HADOOBDATA: 1692 case SS_HADOOBDATA: 1693 break; 1694 default: 1695 printf("Bad oob state 1 (%p): state %s\n", 1696 (void *)so, pr_state(so->so_state, so->so_mode)); 1697 return (0); 1698 } 1699 1700 /* SS_RCVATMARK should only be set when SS_OOBPEND is set */ 1701 if ((so->so_state & (SS_RCVATMARK|SS_OOBPEND)) == SS_RCVATMARK) { 1702 printf("Bad oob state 2 (%p): state %s\n", 1703 (void *)so, pr_state(so->so_state, so->so_mode)); 1704 return (0); 1705 } 1706 1707 /* 1708 * (havemark != 0 or SS_RCVATMARK) iff SS_OOBPEND 1709 * For TPI, the presence of a "mark" is indicated by sti_oobsigcnt. 1710 */ 1711 havemark = (SOCK_IS_NONSTR(so)) ? so->so_oobmark > 0 : 1712 SOTOTPI(so)->sti_oobsigcnt > 0; 1713 1714 if (!EQUIVALENT(havemark || (so->so_state & SS_RCVATMARK), 1715 so->so_state & SS_OOBPEND)) { 1716 printf("Bad oob state 3 (%p): state %s\n", 1717 (void *)so, pr_state(so->so_state, so->so_mode)); 1718 return (0); 1719 } 1720 1721 /* 1722 * Unless SO_OOBINLINE we have so_oobmsg != NULL iff SS_HAVEOOBDATA 1723 */ 1724 if (!(so->so_options & SO_OOBINLINE) && 1725 !EQUIVALENT(so->so_oobmsg != NULL, so->so_state & SS_HAVEOOBDATA)) { 1726 printf("Bad oob state 4 (%p): state %s\n", 1727 (void *)so, pr_state(so->so_state, so->so_mode)); 1728 return (0); 1729 } 1730 1731 if (!SOCK_IS_NONSTR(so) && 1732 SOTOTPI(so)->sti_oobsigcnt < SOTOTPI(so)->sti_oobcnt) { 1733 printf("Bad oob state 5 (%p): counts %d/%d state %s\n", 1734 (void *)so, SOTOTPI(so)->sti_oobsigcnt, 1735 SOTOTPI(so)->sti_oobcnt, 1736 pr_state(so->so_state, so->so_mode)); 1737 return (0); 1738 } 1739 1740 return (1); 1741 } 1742 #undef EQUIVALENT 1743 #endif /* DEBUG */ 1744 1745 /* initialize sockfs zone specific kstat related items */ 1746 void * 1747 sock_kstat_init(zoneid_t zoneid) 1748 { 1749 kstat_t *ksp; 1750 1751 ksp = kstat_create_zone("sockfs", 0, "sock_unix_list", "misc", 1752 KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VAR_SIZE|KSTAT_FLAG_VIRTUAL, zoneid); 1753 1754 if (ksp != NULL) { 1755 ksp->ks_update = sockfs_update; 1756 ksp->ks_snapshot = sockfs_snapshot; 1757 ksp->ks_lock = &socklist.sl_lock; 1758 ksp->ks_private = (void *)(uintptr_t)zoneid; 1759 kstat_install(ksp); 1760 } 1761 1762 return (ksp); 1763 } 1764 1765 /* tear down sockfs zone specific kstat related items */ 1766 /*ARGSUSED*/ 1767 void 1768 sock_kstat_fini(zoneid_t zoneid, void *arg) 1769 { 1770 kstat_t *ksp = (kstat_t *)arg; 1771 1772 if (ksp != NULL) { 1773 ASSERT(zoneid == (zoneid_t)(uintptr_t)ksp->ks_private); 1774 kstat_delete(ksp); 1775 } 1776 } 1777 1778 /* 1779 * Zones: 1780 * Note that nactive is going to be different for each zone. 1781 * This means we require kstat to call sockfs_update and then sockfs_snapshot 1782 * for the same zone, or sockfs_snapshot will be taken into the wrong size 1783 * buffer. This is safe, but if the buffer is too small, user will not be 1784 * given details of all sockets. However, as this kstat has a ks_lock, kstat 1785 * driver will keep it locked between the update and the snapshot, so no 1786 * other process (zone) can currently get inbetween resulting in a wrong size 1787 * buffer allocation. 1788 */ 1789 static int 1790 sockfs_update(kstat_t *ksp, int rw) 1791 { 1792 uint_t nactive = 0; /* # of active AF_UNIX sockets */ 1793 struct sonode *so; /* current sonode on socklist */ 1794 zoneid_t myzoneid = (zoneid_t)(uintptr_t)ksp->ks_private; 1795 1796 ASSERT((zoneid_t)(uintptr_t)ksp->ks_private == getzoneid()); 1797 1798 if (rw == KSTAT_WRITE) { /* bounce all writes */ 1799 return (EACCES); 1800 } 1801 1802 for (so = socklist.sl_list; so != NULL; so = SOTOTPI(so)->sti_next_so) { 1803 if (so->so_count != 0 && so->so_zoneid == myzoneid) { 1804 nactive++; 1805 } 1806 } 1807 ksp->ks_ndata = nactive; 1808 ksp->ks_data_size = nactive * sizeof (struct sockinfo); 1809 1810 return (0); 1811 } 1812 1813 static int 1814 sockfs_snapshot(kstat_t *ksp, void *buf, int rw) 1815 { 1816 int ns; /* # of sonodes we've copied */ 1817 struct sonode *so; /* current sonode on socklist */ 1818 struct sockinfo *psi; /* where we put sockinfo data */ 1819 t_uscalar_t sn_len; /* soa_len */ 1820 zoneid_t myzoneid = (zoneid_t)(uintptr_t)ksp->ks_private; 1821 sotpi_info_t *sti; 1822 1823 ASSERT((zoneid_t)(uintptr_t)ksp->ks_private == getzoneid()); 1824 1825 ksp->ks_snaptime = gethrtime(); 1826 1827 if (rw == KSTAT_WRITE) { /* bounce all writes */ 1828 return (EACCES); 1829 } 1830 1831 /* 1832 * For each sonode on the socklist, we massage the important 1833 * info into buf, in sockinfo format. 1834 */ 1835 psi = (struct sockinfo *)buf; 1836 ns = 0; 1837 for (so = socklist.sl_list; so != NULL; so = SOTOTPI(so)->sti_next_so) { 1838 vattr_t attr; 1839 1840 /* only stuff active sonodes and the same zone: */ 1841 if (so->so_count == 0 || so->so_zoneid != myzoneid) { 1842 continue; 1843 } 1844 1845 /* 1846 * If the sonode was activated between the update and the 1847 * snapshot, we're done - as this is only a snapshot. 1848 */ 1849 if ((caddr_t)(psi) >= (caddr_t)buf + ksp->ks_data_size) { 1850 break; 1851 } 1852 1853 sti = SOTOTPI(so); 1854 /* copy important info into buf: */ 1855 psi->si_size = sizeof (struct sockinfo); 1856 psi->si_family = so->so_family; 1857 psi->si_type = so->so_type; 1858 psi->si_flag = so->so_flag; 1859 psi->si_state = so->so_state; 1860 psi->si_serv_type = sti->sti_serv_type; 1861 psi->si_ux_laddr_sou_magic = sti->sti_ux_laddr.soua_magic; 1862 psi->si_ux_faddr_sou_magic = sti->sti_ux_faddr.soua_magic; 1863 psi->si_laddr_soa_len = sti->sti_laddr.soa_len; 1864 psi->si_faddr_soa_len = sti->sti_faddr.soa_len; 1865 psi->si_szoneid = so->so_zoneid; 1866 psi->si_faddr_noxlate = sti->sti_faddr_noxlate; 1867 1868 /* 1869 * Grab the inode, if possible. 1870 * This must be done before entering so_lock as VOP_GETATTR 1871 * will acquire it. 1872 */ 1873 if (so->so_vnode == NULL || 1874 VOP_GETATTR(so->so_vnode, &attr, 0, CRED(), NULL) != 0) 1875 attr.va_nodeid = 0; 1876 1877 psi->si_inode = attr.va_nodeid; 1878 1879 mutex_enter(&so->so_lock); 1880 1881 if (sti->sti_laddr_sa != NULL) { 1882 ASSERT(sti->sti_laddr_sa->sa_data != NULL); 1883 sn_len = sti->sti_laddr_len; 1884 ASSERT(sn_len <= sizeof (short) + 1885 sizeof (psi->si_laddr_sun_path)); 1886 1887 psi->si_laddr_family = 1888 sti->sti_laddr_sa->sa_family; 1889 if (sn_len != 0) { 1890 /* AF_UNIX socket names are NULL terminated */ 1891 (void) strncpy(psi->si_laddr_sun_path, 1892 sti->sti_laddr_sa->sa_data, 1893 sizeof (psi->si_laddr_sun_path)); 1894 sn_len = strlen(psi->si_laddr_sun_path); 1895 } 1896 psi->si_laddr_sun_path[sn_len] = 0; 1897 } 1898 1899 if (sti->sti_faddr_sa != NULL) { 1900 ASSERT(sti->sti_faddr_sa->sa_data != NULL); 1901 sn_len = sti->sti_faddr_len; 1902 ASSERT(sn_len <= sizeof (short) + 1903 sizeof (psi->si_faddr_sun_path)); 1904 1905 psi->si_faddr_family = 1906 sti->sti_faddr_sa->sa_family; 1907 if (sn_len != 0) { 1908 (void) strncpy(psi->si_faddr_sun_path, 1909 sti->sti_faddr_sa->sa_data, 1910 sizeof (psi->si_faddr_sun_path)); 1911 sn_len = strlen(psi->si_faddr_sun_path); 1912 } 1913 psi->si_faddr_sun_path[sn_len] = 0; 1914 } 1915 1916 mutex_exit(&so->so_lock); 1917 1918 (void) snprintf(psi->si_son_straddr, 1919 sizeof (psi->si_son_straddr), "%p", (void *)so); 1920 (void) snprintf(psi->si_lvn_straddr, 1921 sizeof (psi->si_lvn_straddr), "%p", 1922 (void *)sti->sti_ux_laddr.soua_vp); 1923 (void) snprintf(psi->si_fvn_straddr, 1924 sizeof (psi->si_fvn_straddr), "%p", 1925 (void *)sti->sti_ux_faddr.soua_vp); 1926 1927 ns++; 1928 psi++; 1929 } 1930 1931 ksp->ks_ndata = ns; 1932 return (0); 1933 } 1934 1935 ssize_t 1936 soreadfile(file_t *fp, uchar_t *buf, u_offset_t fileoff, int *err, size_t size) 1937 { 1938 struct uio auio; 1939 struct iovec aiov[1]; 1940 register vnode_t *vp; 1941 int ioflag, rwflag; 1942 ssize_t cnt; 1943 int error = 0; 1944 int iovcnt = 0; 1945 short fflag; 1946 1947 vp = fp->f_vnode; 1948 fflag = fp->f_flag; 1949 1950 rwflag = 0; 1951 aiov[0].iov_base = (caddr_t)buf; 1952 aiov[0].iov_len = size; 1953 iovcnt = 1; 1954 cnt = (ssize_t)size; 1955 (void) VOP_RWLOCK(vp, rwflag, NULL); 1956 1957 auio.uio_loffset = fileoff; 1958 auio.uio_iov = aiov; 1959 auio.uio_iovcnt = iovcnt; 1960 auio.uio_resid = cnt; 1961 auio.uio_segflg = UIO_SYSSPACE; 1962 auio.uio_llimit = MAXOFFSET_T; 1963 auio.uio_fmode = fflag; 1964 auio.uio_extflg = UIO_COPY_CACHED; 1965 1966 ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC); 1967 1968 /* If read sync is not asked for, filter sync flags */ 1969 if ((ioflag & FRSYNC) == 0) 1970 ioflag &= ~(FSYNC|FDSYNC); 1971 error = VOP_READ(vp, &auio, ioflag, fp->f_cred, NULL); 1972 cnt -= auio.uio_resid; 1973 1974 VOP_RWUNLOCK(vp, rwflag, NULL); 1975 1976 if (error == EINTR && cnt != 0) 1977 error = 0; 1978 1979 if (error != 0) { 1980 *err = error; 1981 return (0); 1982 } else { 1983 *err = 0; 1984 return (cnt); 1985 } 1986 } 1987 1988 int 1989 so_copyin(const void *from, void *to, size_t size, int fromkernel) 1990 { 1991 if (fromkernel) { 1992 bcopy(from, to, size); 1993 return (0); 1994 } 1995 return (xcopyin(from, to, size)); 1996 } 1997 1998 int 1999 so_copyout(const void *from, void *to, size_t size, int tokernel) 2000 { 2001 if (tokernel) { 2002 bcopy(from, to, size); 2003 return (0); 2004 } 2005 return (xcopyout(from, to, size)); 2006 } 2007