1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 1995, 2010, Oracle and/or its affiliates. All rights reserved. 24 * Copyright 2016 Nexenta Systems, Inc. All rights reserved. 25 * Copyright 2015, Joyent, Inc. All rights reserved. 26 * Copyright 2020 OmniOS Community Edition (OmniOSce) Association. 27 */ 28 29 #include <sys/types.h> 30 #include <sys/t_lock.h> 31 #include <sys/param.h> 32 #include <sys/systm.h> 33 #include <sys/buf.h> 34 #include <sys/conf.h> 35 #include <sys/cred.h> 36 #include <sys/kmem.h> 37 #include <sys/sysmacros.h> 38 #include <sys/vfs.h> 39 #include <sys/vfs_opreg.h> 40 #include <sys/vnode.h> 41 #include <sys/debug.h> 42 #include <sys/errno.h> 43 #include <sys/time.h> 44 #include <sys/file.h> 45 #include <sys/open.h> 46 #include <sys/user.h> 47 #include <sys/termios.h> 48 #include <sys/stream.h> 49 #include <sys/strsubr.h> 50 #include <sys/strsun.h> 51 #include <sys/esunddi.h> 52 #include <sys/flock.h> 53 #include <sys/modctl.h> 54 #include <sys/cmn_err.h> 55 #include <sys/mkdev.h> 56 #include <sys/pathname.h> 57 #include <sys/ddi.h> 58 #include <sys/stat.h> 59 #include <sys/fs/snode.h> 60 #include <sys/fs/dv_node.h> 61 #include <sys/zone.h> 62 63 #include <sys/socket.h> 64 #include <sys/socketvar.h> 65 #include <netinet/in.h> 66 #include <sys/un.h> 67 #include <sys/ucred.h> 68 69 #include <sys/tiuser.h> 70 #define _SUN_TPI_VERSION 2 71 #include <sys/tihdr.h> 72 73 #include <c2/audit.h> 74 75 #include <fs/sockfs/nl7c.h> 76 #include <fs/sockfs/sockcommon.h> 77 #include <fs/sockfs/sockfilter_impl.h> 78 #include <fs/sockfs/socktpi.h> 79 #include <fs/sockfs/socktpi_impl.h> 80 #include <fs/sockfs/sodirect.h> 81 82 /* 83 * Macros that operate on struct cmsghdr. 84 * The CMSG_VALID macro does not assume that the last option buffer is padded. 85 */ 86 #define CMSG_CONTENT(cmsg) (&((cmsg)[1])) 87 #define CMSG_CONTENTLEN(cmsg) ((cmsg)->cmsg_len - sizeof (struct cmsghdr)) 88 #define CMSG_VALID(cmsg, start, end) \ 89 (ISALIGNED_cmsghdr(cmsg) && \ 90 ((uintptr_t)(cmsg) >= (uintptr_t)(start)) && \ 91 ((uintptr_t)(cmsg) < (uintptr_t)(end)) && \ 92 ((ssize_t)(cmsg)->cmsg_len >= sizeof (struct cmsghdr)) && \ 93 ((uintptr_t)(cmsg) + (cmsg)->cmsg_len <= (uintptr_t)(end))) 94 #define SO_LOCK_WAKEUP_TIME 3000 /* Wakeup time in milliseconds */ 95 96 dev_t sockdev; /* For fsid in getattr */ 97 int sockfs_defer_nl7c_init = 0; 98 99 struct socklist socklist; 100 101 struct kmem_cache *socket_cache; 102 103 /* 104 * sockconf_lock protects the socket configuration (socket types and 105 * socket filters) which is changed via the sockconfig system call. 106 */ 107 krwlock_t sockconf_lock; 108 109 static int sockfs_update(kstat_t *, int); 110 static int sockfs_snapshot(kstat_t *, void *, int); 111 extern smod_info_t *sotpi_smod_create(void); 112 113 extern void sendfile_init(); 114 115 extern void nl7c_init(void); 116 117 extern int modrootloaded; 118 119 /* 120 * Translate from a device pathname (e.g. "/dev/tcp") to a vnode. 121 * Returns with the vnode held. 122 */ 123 int 124 sogetvp(char *devpath, vnode_t **vpp, int uioflag) 125 { 126 struct snode *csp; 127 vnode_t *vp, *dvp; 128 major_t maj; 129 int error; 130 131 ASSERT(uioflag == UIO_SYSSPACE || uioflag == UIO_USERSPACE); 132 133 /* 134 * Lookup the underlying filesystem vnode. 135 */ 136 error = lookupname(devpath, uioflag, FOLLOW, NULLVPP, &vp); 137 if (error) 138 return (error); 139 140 /* Check that it is the correct vnode */ 141 if (vp->v_type != VCHR) { 142 VN_RELE(vp); 143 return (ENOTSOCK); 144 } 145 146 /* 147 * If devpath went through devfs, the device should already 148 * be configured. If devpath is a mknod file, however, we 149 * need to make sure the device is properly configured. 150 * To do this, we do something similar to spec_open() 151 * except that we resolve to the minor/leaf level since 152 * we need to return a vnode. 153 */ 154 csp = VTOS(VTOS(vp)->s_commonvp); 155 if (!(csp->s_flag & SDIPSET)) { 156 char *pathname = kmem_alloc(MAXPATHLEN, KM_SLEEP); 157 error = ddi_dev_pathname(vp->v_rdev, S_IFCHR, pathname); 158 if (error == 0) 159 error = devfs_lookupname(pathname, NULLVPP, &dvp); 160 VN_RELE(vp); 161 kmem_free(pathname, MAXPATHLEN); 162 if (error != 0) 163 return (ENXIO); 164 vp = dvp; /* use the devfs vp */ 165 } 166 167 /* device is configured at this point */ 168 maj = getmajor(vp->v_rdev); 169 if (!STREAMSTAB(maj)) { 170 VN_RELE(vp); 171 return (ENOSTR); 172 } 173 174 *vpp = vp; 175 return (0); 176 } 177 178 /* 179 * Update the accessed, updated, or changed times in an sonode 180 * with the current time. 181 * 182 * Note that both SunOS 4.X and 4.4BSD sockets do not present reasonable 183 * attributes in a fstat call. (They return the current time and 0 for 184 * all timestamps, respectively.) We maintain the current timestamps 185 * here primarily so that should sockmod be popped the resulting 186 * file descriptor will behave like a stream w.r.t. the timestamps. 187 */ 188 void 189 so_update_attrs(struct sonode *so, int flag) 190 { 191 time_t now = gethrestime_sec(); 192 193 if (SOCK_IS_NONSTR(so)) 194 return; 195 196 mutex_enter(&so->so_lock); 197 so->so_flag |= flag; 198 if (flag & SOACC) 199 SOTOTPI(so)->sti_atime = now; 200 if (flag & SOMOD) 201 SOTOTPI(so)->sti_mtime = now; 202 mutex_exit(&so->so_lock); 203 } 204 205 extern so_create_func_t sock_comm_create_function; 206 extern so_destroy_func_t sock_comm_destroy_function; 207 /* 208 * Init function called when sockfs is loaded. 209 */ 210 int 211 sockinit(int fstype, char *name) 212 { 213 static const fs_operation_def_t sock_vfsops_template[] = { 214 NULL, NULL 215 }; 216 int error; 217 major_t dev; 218 char *err_str; 219 220 error = vfs_setfsops(fstype, sock_vfsops_template, NULL); 221 if (error != 0) { 222 zcmn_err(GLOBAL_ZONEID, CE_WARN, 223 "sockinit: bad vfs ops template"); 224 return (error); 225 } 226 227 error = vn_make_ops(name, socket_vnodeops_template, 228 &socket_vnodeops); 229 if (error != 0) { 230 err_str = "sockinit: bad socket vnode ops template"; 231 /* vn_make_ops() does not reset socktpi_vnodeops on failure. */ 232 socket_vnodeops = NULL; 233 goto failure; 234 } 235 236 socket_cache = kmem_cache_create("socket_cache", 237 sizeof (struct sonode), 0, sonode_constructor, 238 sonode_destructor, NULL, NULL, NULL, 0); 239 240 rw_init(&sockconf_lock, NULL, RW_DEFAULT, NULL); 241 242 error = socktpi_init(); 243 if (error != 0) { 244 err_str = NULL; 245 goto failure; 246 } 247 248 error = sod_init(); 249 if (error != 0) { 250 err_str = NULL; 251 goto failure; 252 } 253 254 /* 255 * Set up the default create and destroy functions 256 */ 257 sock_comm_create_function = socket_sonode_create; 258 sock_comm_destroy_function = socket_sonode_destroy; 259 260 /* 261 * Build initial list mapping socket parameters to vnode. 262 */ 263 smod_init(); 264 smod_add(sotpi_smod_create()); 265 266 sockparams_init(); 267 268 /* 269 * If sockets are needed before init runs /sbin/soconfig 270 * it is possible to preload the sockparams list here using 271 * calls like: 272 * sockconfig(1,2,3, "/dev/tcp", 0); 273 */ 274 275 /* 276 * Create a unique dev_t for use in so_fsid. 277 */ 278 279 if ((dev = getudev()) == (major_t)-1) 280 dev = 0; 281 sockdev = makedevice(dev, 0); 282 283 mutex_init(&socklist.sl_lock, NULL, MUTEX_DEFAULT, NULL); 284 sendfile_init(); 285 if (!modrootloaded) { 286 sockfs_defer_nl7c_init = 1; 287 } else { 288 nl7c_init(); 289 } 290 291 /* Initialize socket filters */ 292 sof_init(); 293 294 return (0); 295 296 failure: 297 (void) vfs_freevfsops_by_type(fstype); 298 if (socket_vnodeops != NULL) 299 vn_freevnodeops(socket_vnodeops); 300 if (err_str != NULL) 301 zcmn_err(GLOBAL_ZONEID, CE_WARN, err_str); 302 return (error); 303 } 304 305 /* 306 * Caller must hold the mutex. Used to set SOLOCKED. 307 */ 308 void 309 so_lock_single(struct sonode *so) 310 { 311 ASSERT(MUTEX_HELD(&so->so_lock)); 312 313 while (so->so_flag & (SOLOCKED | SOASYNC_UNBIND)) { 314 cv_wait_stop(&so->so_single_cv, &so->so_lock, 315 SO_LOCK_WAKEUP_TIME); 316 } 317 so->so_flag |= SOLOCKED; 318 } 319 320 /* 321 * Caller must hold the mutex and pass in SOLOCKED or SOASYNC_UNBIND. 322 * Used to clear SOLOCKED or SOASYNC_UNBIND. 323 */ 324 void 325 so_unlock_single(struct sonode *so, int flag) 326 { 327 ASSERT(MUTEX_HELD(&so->so_lock)); 328 ASSERT(flag & (SOLOCKED|SOASYNC_UNBIND)); 329 ASSERT((flag & ~(SOLOCKED|SOASYNC_UNBIND)) == 0); 330 ASSERT(so->so_flag & flag); 331 /* 332 * Process the T_DISCON_IND on sti_discon_ind_mp. 333 * 334 * Call to so_drain_discon_ind will result in so_lock 335 * being dropped and re-acquired later. 336 */ 337 if (!SOCK_IS_NONSTR(so)) { 338 sotpi_info_t *sti = SOTOTPI(so); 339 340 if (sti->sti_discon_ind_mp != NULL) 341 so_drain_discon_ind(so); 342 } 343 344 cv_signal(&so->so_single_cv); 345 so->so_flag &= ~flag; 346 } 347 348 /* 349 * Caller must hold the mutex. Used to set SOREADLOCKED. 350 * If the caller wants nonblocking behavior it should set fmode. 351 */ 352 int 353 so_lock_read(struct sonode *so, int fmode) 354 { 355 ASSERT(MUTEX_HELD(&so->so_lock)); 356 357 while (so->so_flag & SOREADLOCKED) { 358 if (fmode & (FNDELAY|FNONBLOCK)) 359 return (EWOULDBLOCK); 360 cv_wait_stop(&so->so_read_cv, &so->so_lock, 361 SO_LOCK_WAKEUP_TIME); 362 } 363 so->so_flag |= SOREADLOCKED; 364 return (0); 365 } 366 367 /* 368 * Like so_lock_read above but allows signals. 369 */ 370 int 371 so_lock_read_intr(struct sonode *so, int fmode) 372 { 373 ASSERT(MUTEX_HELD(&so->so_lock)); 374 375 while (so->so_flag & SOREADLOCKED) { 376 if (fmode & (FNDELAY|FNONBLOCK)) 377 return (EWOULDBLOCK); 378 if (!cv_wait_sig(&so->so_read_cv, &so->so_lock)) 379 return (EINTR); 380 } 381 so->so_flag |= SOREADLOCKED; 382 return (0); 383 } 384 385 /* 386 * Caller must hold the mutex. Used to clear SOREADLOCKED, 387 * set in so_lock_read() or so_lock_read_intr(). 388 */ 389 void 390 so_unlock_read(struct sonode *so) 391 { 392 ASSERT(MUTEX_HELD(&so->so_lock)); 393 ASSERT(so->so_flag & SOREADLOCKED); 394 395 cv_signal(&so->so_read_cv); 396 so->so_flag &= ~SOREADLOCKED; 397 } 398 399 /* 400 * Verify that the specified offset falls within the mblk and 401 * that the resulting pointer is aligned. 402 * Returns NULL if not. 403 */ 404 void * 405 sogetoff(mblk_t *mp, t_uscalar_t offset, 406 t_uscalar_t length, uint_t align_size) 407 { 408 uintptr_t ptr1, ptr2; 409 410 ASSERT(mp && mp->b_wptr >= mp->b_rptr); 411 ptr1 = (uintptr_t)mp->b_rptr + offset; 412 ptr2 = (uintptr_t)ptr1 + length; 413 if (ptr1 < (uintptr_t)mp->b_rptr || ptr2 > (uintptr_t)mp->b_wptr) { 414 eprintline(0); 415 return (NULL); 416 } 417 if ((ptr1 & (align_size - 1)) != 0) { 418 eprintline(0); 419 return (NULL); 420 } 421 return ((void *)ptr1); 422 } 423 424 /* 425 * Return the AF_UNIX underlying filesystem vnode matching a given name. 426 * Makes sure the sending and the destination sonodes are compatible. 427 * The vnode is returned held. 428 * 429 * The underlying filesystem VSOCK vnode has a v_stream pointer that 430 * references the actual stream head (hence indirectly the actual sonode). 431 */ 432 static int 433 so_ux_lookup(struct sonode *so, struct sockaddr_un *soun, int checkaccess, 434 vnode_t **vpp) 435 { 436 vnode_t *vp; /* Underlying filesystem vnode */ 437 vnode_t *rvp; /* real vnode */ 438 vnode_t *svp; /* sockfs vnode */ 439 struct sonode *so2; 440 int error; 441 442 dprintso(so, 1, ("so_ux_lookup(%p) name <%s>\n", (void *)so, 443 soun->sun_path)); 444 445 error = lookupname(soun->sun_path, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp); 446 if (error) { 447 eprintsoline(so, error); 448 return (error); 449 } 450 451 /* 452 * Traverse lofs mounts get the real vnode 453 */ 454 if (VOP_REALVP(vp, &rvp, NULL) == 0) { 455 VN_HOLD(rvp); /* hold the real vnode */ 456 VN_RELE(vp); /* release hold from lookup */ 457 vp = rvp; 458 } 459 460 if (vp->v_type != VSOCK) { 461 error = ENOTSOCK; 462 eprintsoline(so, error); 463 goto done2; 464 } 465 466 if (checkaccess) { 467 /* 468 * Check that we have permissions to access the destination 469 * vnode. This check is not done in BSD but it is required 470 * by X/Open. 471 */ 472 if (error = VOP_ACCESS(vp, VREAD|VWRITE, 0, CRED(), NULL)) { 473 eprintsoline(so, error); 474 goto done2; 475 } 476 } 477 478 /* 479 * Check if the remote socket has been closed. 480 * 481 * Synchronize with vn_rele_stream by holding v_lock while traversing 482 * v_stream->sd_vnode. 483 */ 484 mutex_enter(&vp->v_lock); 485 if (vp->v_stream == NULL) { 486 mutex_exit(&vp->v_lock); 487 if (so->so_type == SOCK_DGRAM) 488 error = EDESTADDRREQ; 489 else 490 error = ECONNREFUSED; 491 492 eprintsoline(so, error); 493 goto done2; 494 } 495 ASSERT(vp->v_stream->sd_vnode); 496 svp = vp->v_stream->sd_vnode; 497 /* 498 * holding v_lock on underlying filesystem vnode and acquiring 499 * it on sockfs vnode. Assumes that no code ever attempts to 500 * acquire these locks in the reverse order. 501 */ 502 VN_HOLD(svp); 503 mutex_exit(&vp->v_lock); 504 505 if (svp->v_type != VSOCK) { 506 error = ENOTSOCK; 507 eprintsoline(so, error); 508 goto done; 509 } 510 511 so2 = VTOSO(svp); 512 513 if (so->so_type != so2->so_type) { 514 error = EPROTOTYPE; 515 eprintsoline(so, error); 516 goto done; 517 } 518 519 VN_RELE(svp); 520 *vpp = vp; 521 return (0); 522 523 done: 524 VN_RELE(svp); 525 done2: 526 VN_RELE(vp); 527 return (error); 528 } 529 530 /* 531 * Verify peer address for connect and sendto/sendmsg. 532 * Since sendto/sendmsg would not get synchronous errors from the transport 533 * provider we have to do these ugly checks in the socket layer to 534 * preserve compatibility with SunOS 4.X. 535 */ 536 int 537 so_addr_verify(struct sonode *so, const struct sockaddr *name, 538 socklen_t namelen) 539 { 540 int family; 541 542 dprintso(so, 1, ("so_addr_verify(%p, %p, %d)\n", 543 (void *)so, (void *)name, namelen)); 544 545 ASSERT(name != NULL); 546 547 family = so->so_family; 548 switch (family) { 549 case AF_INET: 550 if (name->sa_family != family) { 551 eprintsoline(so, EAFNOSUPPORT); 552 return (EAFNOSUPPORT); 553 } 554 if (namelen != (socklen_t)sizeof (struct sockaddr_in)) { 555 eprintsoline(so, EINVAL); 556 return (EINVAL); 557 } 558 break; 559 case AF_INET6: { 560 #ifdef DEBUG 561 struct sockaddr_in6 *sin6; 562 #endif /* DEBUG */ 563 564 if (name->sa_family != family) { 565 eprintsoline(so, EAFNOSUPPORT); 566 return (EAFNOSUPPORT); 567 } 568 if (namelen != (socklen_t)sizeof (struct sockaddr_in6)) { 569 eprintsoline(so, EINVAL); 570 return (EINVAL); 571 } 572 #ifdef DEBUG 573 /* Verify that apps don't forget to clear sin6_scope_id etc */ 574 sin6 = (struct sockaddr_in6 *)name; 575 if (sin6->sin6_scope_id != 0 && 576 !IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr)) { 577 zcmn_err(getzoneid(), CE_WARN, 578 "connect/send* with uninitialized sin6_scope_id " 579 "(%d) on socket. Pid = %d\n", 580 (int)sin6->sin6_scope_id, (int)curproc->p_pid); 581 } 582 #endif /* DEBUG */ 583 break; 584 } 585 case AF_UNIX: 586 if (SOTOTPI(so)->sti_faddr_noxlate) { 587 return (0); 588 } 589 if (namelen < (socklen_t)sizeof (short)) { 590 eprintsoline(so, ENOENT); 591 return (ENOENT); 592 } 593 if (name->sa_family != family) { 594 eprintsoline(so, EAFNOSUPPORT); 595 return (EAFNOSUPPORT); 596 } 597 /* MAXPATHLEN + soun_family + nul termination */ 598 if (namelen > (socklen_t)(MAXPATHLEN + sizeof (short) + 1)) { 599 eprintsoline(so, ENAMETOOLONG); 600 return (ENAMETOOLONG); 601 } 602 603 break; 604 605 default: 606 /* 607 * Default is don't do any length or sa_family check 608 * to allow non-sockaddr style addresses. 609 */ 610 break; 611 } 612 613 return (0); 614 } 615 616 617 /* 618 * Translate an AF_UNIX sockaddr_un to the transport internal name. 619 * Assumes caller has called so_addr_verify first. The translated 620 * (internal form) address is stored in sti->sti_ux_taddr. 621 */ 622 /*ARGSUSED*/ 623 int 624 so_ux_addr_xlate(struct sonode *so, struct sockaddr *name, 625 socklen_t namelen, int checkaccess, 626 void **addrp, socklen_t *addrlenp) 627 { 628 int error; 629 struct sockaddr_un *soun; 630 vnode_t *vp; 631 void *addr; 632 socklen_t addrlen; 633 sotpi_info_t *sti = SOTOTPI(so); 634 635 dprintso(so, 1, ("so_ux_addr_xlate(%p, %p, %d, %d)\n", 636 (void *)so, (void *)name, namelen, checkaccess)); 637 638 ASSERT(name != NULL); 639 ASSERT(so->so_family == AF_UNIX); 640 ASSERT(!sti->sti_faddr_noxlate); 641 ASSERT(namelen >= (socklen_t)sizeof (short)); 642 ASSERT(name->sa_family == AF_UNIX); 643 soun = (struct sockaddr_un *)name; 644 /* 645 * Lookup vnode for the specified path name and verify that 646 * it is a socket. 647 */ 648 error = so_ux_lookup(so, soun, checkaccess, &vp); 649 if (error) { 650 eprintsoline(so, error); 651 return (error); 652 } 653 /* 654 * Use the address of the peer vnode as the address to send 655 * to. We release the peer vnode here. In case it has been 656 * closed by the time the T_CONN_REQ or T_UNITDATA_REQ reaches the 657 * transport the message will get an error or be dropped. 658 * Note that that soua_vp is never dereferenced; it's just a 659 * convenient value by which we can identify the peer. 660 */ 661 sti->sti_ux_taddr.soua_vp = vp; 662 sti->sti_ux_taddr.soua_magic = SOU_MAGIC_EXPLICIT; 663 addr = &sti->sti_ux_taddr; 664 addrlen = (socklen_t)sizeof (sti->sti_ux_taddr); 665 dprintso(so, 1, ("ux_xlate UNIX: addrlen %d, vp %p\n", 666 addrlen, (void *)vp)); 667 VN_RELE(vp); 668 *addrp = addr; 669 *addrlenp = (socklen_t)addrlen; 670 return (0); 671 } 672 673 /* 674 * Esballoc free function for messages that contain SO_FILEP option. 675 * Decrement the reference count on the file pointers using closef. 676 */ 677 void 678 fdbuf_free(struct fdbuf *fdbuf) 679 { 680 int i; 681 struct file *fp; 682 683 dprint(1, ("fdbuf_free: %d fds\n", fdbuf->fd_numfd)); 684 for (i = 0; i < fdbuf->fd_numfd; i++) { 685 /* 686 * We need pointer size alignment for fd_fds. On a LP64 687 * kernel, the required alignment is 8 bytes while 688 * the option headers and values are only 4 bytes 689 * aligned. So its safer to do a bcopy compared to 690 * assigning fdbuf->fd_fds[i] to fp. 691 */ 692 bcopy((char *)&fdbuf->fd_fds[i], (char *)&fp, sizeof (fp)); 693 dprint(1, ("fdbuf_free: [%d] = %p\n", i, (void *)fp)); 694 (void) closef(fp); 695 } 696 if (fdbuf->fd_ebuf != NULL) 697 kmem_free(fdbuf->fd_ebuf, fdbuf->fd_ebuflen); 698 kmem_free(fdbuf, fdbuf->fd_size); 699 } 700 701 /* 702 * Allocate an esballoc'ed message for AF_UNIX file descriptor passing. 703 * Waits if memory is not available. 704 */ 705 mblk_t * 706 fdbuf_allocmsg(int size, struct fdbuf *fdbuf) 707 { 708 uchar_t *buf; 709 mblk_t *mp; 710 711 dprint(1, ("fdbuf_allocmsg: size %d, %d fds\n", size, fdbuf->fd_numfd)); 712 buf = kmem_alloc(size, KM_SLEEP); 713 fdbuf->fd_ebuf = (caddr_t)buf; 714 fdbuf->fd_ebuflen = size; 715 fdbuf->fd_frtn.free_func = fdbuf_free; 716 fdbuf->fd_frtn.free_arg = (caddr_t)fdbuf; 717 718 mp = esballoc_wait(buf, size, BPRI_MED, &fdbuf->fd_frtn); 719 mp->b_datap->db_type = M_PROTO; 720 return (mp); 721 } 722 723 /* 724 * Extract file descriptors from a fdbuf. 725 * Return list in rights/rightslen. 726 */ 727 /*ARGSUSED*/ 728 static int 729 fdbuf_extract(struct fdbuf *fdbuf, void *rights, int rightslen) 730 { 731 int i, fd; 732 int *rp; 733 struct file *fp; 734 int numfd; 735 736 dprint(1, ("fdbuf_extract: %d fds, len %d\n", 737 fdbuf->fd_numfd, rightslen)); 738 739 numfd = fdbuf->fd_numfd; 740 ASSERT(rightslen == numfd * (int)sizeof (int)); 741 742 /* 743 * Allocate a file descriptor and increment the f_count. 744 * The latter is needed since we always call fdbuf_free 745 * which performs a closef. 746 */ 747 rp = (int *)rights; 748 for (i = 0; i < numfd; i++) { 749 if ((fd = ufalloc(0)) == -1) 750 goto cleanup; 751 /* 752 * We need pointer size alignment for fd_fds. On a LP64 753 * kernel, the required alignment is 8 bytes while 754 * the option headers and values are only 4 bytes 755 * aligned. So its safer to do a bcopy compared to 756 * assigning fdbuf->fd_fds[i] to fp. 757 */ 758 bcopy((char *)&fdbuf->fd_fds[i], (char *)&fp, sizeof (fp)); 759 mutex_enter(&fp->f_tlock); 760 fp->f_count++; 761 mutex_exit(&fp->f_tlock); 762 setf(fd, fp); 763 *rp++ = fd; 764 if (AU_AUDITING()) 765 audit_fdrecv(fd, fp); 766 dprint(1, ("fdbuf_extract: [%d] = %d, %p refcnt %d\n", 767 i, fd, (void *)fp, fp->f_count)); 768 } 769 return (0); 770 771 cleanup: 772 /* 773 * Undo whatever partial work the loop above has done. 774 */ 775 { 776 int j; 777 778 rp = (int *)rights; 779 for (j = 0; j < i; j++) { 780 dprint(0, 781 ("fdbuf_extract: cleanup[%d] = %d\n", j, *rp)); 782 (void) closeandsetf(*rp++, NULL); 783 } 784 } 785 786 return (EMFILE); 787 } 788 789 /* 790 * Insert file descriptors into an fdbuf. 791 * Returns a kmem_alloc'ed fdbuf. The fdbuf should be freed 792 * by calling fdbuf_free(). 793 */ 794 int 795 fdbuf_create(void *rights, int rightslen, struct fdbuf **fdbufp) 796 { 797 int numfd, i; 798 int *fds; 799 struct file *fp; 800 struct fdbuf *fdbuf; 801 int fdbufsize; 802 803 dprint(1, ("fdbuf_create: len %d\n", rightslen)); 804 805 numfd = rightslen / (int)sizeof (int); 806 807 fdbufsize = (int)FDBUF_HDRSIZE + (numfd * (int)sizeof (struct file *)); 808 fdbuf = kmem_alloc(fdbufsize, KM_SLEEP); 809 fdbuf->fd_size = fdbufsize; 810 fdbuf->fd_numfd = 0; 811 fdbuf->fd_ebuf = NULL; 812 fdbuf->fd_ebuflen = 0; 813 fds = (int *)rights; 814 for (i = 0; i < numfd; i++) { 815 if ((fp = getf(fds[i])) == NULL) { 816 fdbuf_free(fdbuf); 817 return (EBADF); 818 } 819 dprint(1, ("fdbuf_create: [%d] = %d, %p refcnt %d\n", 820 i, fds[i], (void *)fp, fp->f_count)); 821 mutex_enter(&fp->f_tlock); 822 fp->f_count++; 823 mutex_exit(&fp->f_tlock); 824 /* 825 * The maximum alignment for fdbuf (or any option header 826 * and its value) it 4 bytes. On a LP64 kernel, the alignment 827 * is not sufficient for pointers (fd_fds in this case). Since 828 * we just did a kmem_alloc (we get a double word alignment), 829 * we don't need to do anything on the send side (we loose 830 * the double word alignment because fdbuf goes after an 831 * option header (eg T_unitdata_req) which is only 4 byte 832 * aligned). We take care of this when we extract the file 833 * descriptor in fdbuf_extract or fdbuf_free. 834 */ 835 fdbuf->fd_fds[i] = fp; 836 fdbuf->fd_numfd++; 837 releasef(fds[i]); 838 if (AU_AUDITING()) 839 audit_fdsend(fds[i], fp, 0); 840 } 841 *fdbufp = fdbuf; 842 return (0); 843 } 844 845 static int 846 fdbuf_optlen(int rightslen) 847 { 848 int numfd; 849 850 numfd = rightslen / (int)sizeof (int); 851 852 return ((int)FDBUF_HDRSIZE + (numfd * (int)sizeof (struct file *))); 853 } 854 855 static t_uscalar_t 856 fdbuf_cmsglen(int fdbuflen) 857 { 858 return (t_uscalar_t)((fdbuflen - FDBUF_HDRSIZE) / 859 (int)sizeof (struct file *) * (int)sizeof (int)); 860 } 861 862 863 /* 864 * Return non-zero if the mblk and fdbuf are consistent. 865 */ 866 static int 867 fdbuf_verify(mblk_t *mp, struct fdbuf *fdbuf, int fdbuflen) 868 { 869 if (fdbuflen >= FDBUF_HDRSIZE && 870 fdbuflen == fdbuf->fd_size) { 871 frtn_t *frp = mp->b_datap->db_frtnp; 872 /* 873 * Check that the SO_FILEP portion of the 874 * message has not been modified by 875 * the loopback transport. The sending sockfs generates 876 * a message that is esballoc'ed with the free function 877 * being fdbuf_free() and where free_arg contains the 878 * identical information as the SO_FILEP content. 879 * 880 * If any of these constraints are not satisfied we 881 * silently ignore the option. 882 */ 883 ASSERT(mp); 884 if (frp != NULL && 885 frp->free_func == fdbuf_free && 886 frp->free_arg != NULL && 887 bcmp(frp->free_arg, fdbuf, fdbuflen) == 0) { 888 dprint(1, ("fdbuf_verify: fdbuf %p len %d\n", 889 (void *)fdbuf, fdbuflen)); 890 return (1); 891 } else { 892 zcmn_err(getzoneid(), CE_WARN, 893 "sockfs: mismatched fdbuf content (%p)", 894 (void *)mp); 895 return (0); 896 } 897 } else { 898 zcmn_err(getzoneid(), CE_WARN, 899 "sockfs: mismatched fdbuf len %d, %d\n", 900 fdbuflen, fdbuf->fd_size); 901 return (0); 902 } 903 } 904 905 /* 906 * When the file descriptors returned by sorecvmsg can not be passed 907 * to the application this routine will cleanup the references on 908 * the files. Start at startoff bytes into the buffer. 909 */ 910 static void 911 close_fds(void *fdbuf, int fdbuflen, int startoff) 912 { 913 int *fds = (int *)fdbuf; 914 int numfd = fdbuflen / (int)sizeof (int); 915 int i; 916 917 dprint(1, ("close_fds(%p, %d, %d)\n", fdbuf, fdbuflen, startoff)); 918 919 for (i = 0; i < numfd; i++) { 920 if (startoff < 0) 921 startoff = 0; 922 if (startoff < (int)sizeof (int)) { 923 /* 924 * This file descriptor is partially or fully after 925 * the offset 926 */ 927 dprint(0, 928 ("close_fds: cleanup[%d] = %d\n", i, fds[i])); 929 (void) closeandsetf(fds[i], NULL); 930 } 931 startoff -= (int)sizeof (int); 932 } 933 } 934 935 /* 936 * Close all file descriptors contained in the control part starting at 937 * the startoffset. 938 */ 939 void 940 so_closefds(void *control, t_uscalar_t controllen, int oldflg, 941 int startoff) 942 { 943 struct cmsghdr *cmsg; 944 945 if (control == NULL) 946 return; 947 948 if (oldflg) { 949 close_fds(control, controllen, startoff); 950 return; 951 } 952 /* Scan control part for file descriptors. */ 953 for (cmsg = (struct cmsghdr *)control; 954 CMSG_VALID(cmsg, control, (uintptr_t)control + controllen); 955 cmsg = CMSG_NEXT(cmsg)) { 956 if (cmsg->cmsg_level == SOL_SOCKET && 957 cmsg->cmsg_type == SCM_RIGHTS) { 958 close_fds(CMSG_CONTENT(cmsg), 959 (int)CMSG_CONTENTLEN(cmsg), 960 startoff - (int)sizeof (struct cmsghdr)); 961 } 962 startoff -= ROUNDUP_cmsglen(cmsg->cmsg_len); 963 } 964 } 965 966 /* 967 * Handle truncation of a cmsg when the receive buffer is not big enough. 968 * Adjust the cmsg_len header field in the last cmsg that will be included in 969 * the buffer to reflect the number of bytes included. 970 */ 971 void 972 so_truncatecmsg(void *control, t_uscalar_t controllen, uint_t maxlen) 973 { 974 struct cmsghdr *cmsg; 975 uint_t len = 0; 976 977 if (control == NULL) 978 return; 979 980 for (cmsg = control; 981 CMSG_VALID(cmsg, control, (uintptr_t)control + controllen); 982 cmsg = CMSG_NEXT(cmsg)) { 983 984 len += ROUNDUP_cmsglen(cmsg->cmsg_len); 985 986 if (len > maxlen) { 987 /* 988 * This cmsg is the last one that will be included in 989 * the truncated buffer. 990 */ 991 socklen_t diff = len - maxlen; 992 993 if (diff < CMSG_CONTENTLEN(cmsg)) { 994 dprint(1, ("so_truncatecmsg: %d -> %d\n", 995 cmsg->cmsg_len, cmsg->cmsg_len - diff)); 996 cmsg->cmsg_len -= diff; 997 } else { 998 cmsg->cmsg_len = sizeof (struct cmsghdr); 999 } 1000 break; 1001 } 1002 } 1003 } 1004 1005 /* 1006 * Returns a pointer/length for the file descriptors contained 1007 * in the control buffer. Returns with *fdlenp == -1 if there are no 1008 * file descriptor options present. This is different than there being 1009 * a zero-length file descriptor option. 1010 * Fail if there are multiple SCM_RIGHT cmsgs. 1011 */ 1012 int 1013 so_getfdopt(void *control, t_uscalar_t controllen, int oldflg, 1014 void **fdsp, int *fdlenp) 1015 { 1016 struct cmsghdr *cmsg; 1017 void *fds; 1018 int fdlen; 1019 1020 if (control == NULL) { 1021 *fdsp = NULL; 1022 *fdlenp = -1; 1023 return (0); 1024 } 1025 1026 if (oldflg) { 1027 *fdsp = control; 1028 if (controllen == 0) 1029 *fdlenp = -1; 1030 else 1031 *fdlenp = controllen; 1032 dprint(1, ("so_getfdopt: old %d\n", *fdlenp)); 1033 return (0); 1034 } 1035 1036 fds = NULL; 1037 fdlen = 0; 1038 1039 for (cmsg = (struct cmsghdr *)control; 1040 CMSG_VALID(cmsg, control, (uintptr_t)control + controllen); 1041 cmsg = CMSG_NEXT(cmsg)) { 1042 if (cmsg->cmsg_level == SOL_SOCKET && 1043 cmsg->cmsg_type == SCM_RIGHTS) { 1044 if (fds != NULL) 1045 return (EINVAL); 1046 fds = CMSG_CONTENT(cmsg); 1047 fdlen = (int)CMSG_CONTENTLEN(cmsg); 1048 dprint(1, ("so_getfdopt: new %lu\n", 1049 (size_t)CMSG_CONTENTLEN(cmsg))); 1050 } 1051 } 1052 if (fds == NULL) { 1053 dprint(1, ("so_getfdopt: NONE\n")); 1054 *fdlenp = -1; 1055 } else 1056 *fdlenp = fdlen; 1057 *fdsp = fds; 1058 return (0); 1059 } 1060 1061 /* 1062 * Return the length of the options including any file descriptor options. 1063 */ 1064 t_uscalar_t 1065 so_optlen(void *control, t_uscalar_t controllen, int oldflg) 1066 { 1067 struct cmsghdr *cmsg; 1068 t_uscalar_t optlen = 0; 1069 t_uscalar_t len; 1070 1071 if (control == NULL) 1072 return (0); 1073 1074 if (oldflg) 1075 return ((t_uscalar_t)(sizeof (struct T_opthdr) + 1076 fdbuf_optlen(controllen))); 1077 1078 for (cmsg = (struct cmsghdr *)control; 1079 CMSG_VALID(cmsg, control, (uintptr_t)control + controllen); 1080 cmsg = CMSG_NEXT(cmsg)) { 1081 if (cmsg->cmsg_level == SOL_SOCKET && 1082 cmsg->cmsg_type == SCM_RIGHTS) { 1083 len = fdbuf_optlen((int)CMSG_CONTENTLEN(cmsg)); 1084 } else { 1085 len = (t_uscalar_t)CMSG_CONTENTLEN(cmsg); 1086 } 1087 optlen += (t_uscalar_t)(_TPI_ALIGN_TOPT(len) + 1088 sizeof (struct T_opthdr)); 1089 } 1090 dprint(1, ("so_optlen: controllen %d, flg %d -> optlen %d\n", 1091 controllen, oldflg, optlen)); 1092 return (optlen); 1093 } 1094 1095 /* 1096 * Copy options from control to the mblk. Skip any file descriptor options. 1097 */ 1098 void 1099 so_cmsg2opt(void *control, t_uscalar_t controllen, int oldflg, mblk_t *mp) 1100 { 1101 struct T_opthdr toh; 1102 struct cmsghdr *cmsg; 1103 1104 if (control == NULL) 1105 return; 1106 1107 if (oldflg) { 1108 /* No real options - caller has handled file descriptors */ 1109 return; 1110 } 1111 for (cmsg = (struct cmsghdr *)control; 1112 CMSG_VALID(cmsg, control, (uintptr_t)control + controllen); 1113 cmsg = CMSG_NEXT(cmsg)) { 1114 /* 1115 * Note: The caller handles file descriptors prior 1116 * to calling this function. 1117 */ 1118 t_uscalar_t len; 1119 1120 if (cmsg->cmsg_level == SOL_SOCKET && 1121 cmsg->cmsg_type == SCM_RIGHTS) 1122 continue; 1123 1124 len = (t_uscalar_t)CMSG_CONTENTLEN(cmsg); 1125 toh.level = cmsg->cmsg_level; 1126 toh.name = cmsg->cmsg_type; 1127 toh.len = len + (t_uscalar_t)sizeof (struct T_opthdr); 1128 toh.status = 0; 1129 1130 soappendmsg(mp, &toh, sizeof (toh)); 1131 soappendmsg(mp, CMSG_CONTENT(cmsg), len); 1132 mp->b_wptr += _TPI_ALIGN_TOPT(len) - len; 1133 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 1134 } 1135 } 1136 1137 /* 1138 * Return the length of the control message derived from the options. 1139 * Exclude SO_SRCADDR and SO_UNIX_CLOSE options. Include SO_FILEP. 1140 * When oldflg is set only include SO_FILEP. 1141 * so_opt2cmsg and so_cmsglen are inter-related since so_cmsglen 1142 * allocates the space that so_opt2cmsg fills. If one changes, the other should 1143 * also be checked for any possible impacts. 1144 */ 1145 t_uscalar_t 1146 so_cmsglen(mblk_t *mp, void *opt, t_uscalar_t optlen, int oldflg) 1147 { 1148 t_uscalar_t cmsglen = 0; 1149 struct T_opthdr *tohp; 1150 t_uscalar_t len; 1151 t_uscalar_t last_roundup = 0; 1152 1153 ASSERT(__TPI_TOPT_ISALIGNED(opt)); 1154 1155 for (tohp = (struct T_opthdr *)opt; 1156 tohp && _TPI_TOPT_VALID(tohp, opt, (uintptr_t)opt + optlen); 1157 tohp = _TPI_TOPT_NEXTHDR(opt, optlen, tohp)) { 1158 dprint(1, ("so_cmsglen: level 0x%x, name %d, len %d\n", 1159 tohp->level, tohp->name, tohp->len)); 1160 if (tohp->level == SOL_SOCKET && 1161 (tohp->name == SO_SRCADDR || 1162 tohp->name == SO_UNIX_CLOSE)) { 1163 continue; 1164 } 1165 if (tohp->level == SOL_SOCKET && tohp->name == SO_FILEP) { 1166 struct fdbuf *fdbuf; 1167 int fdbuflen; 1168 1169 fdbuf = (struct fdbuf *)_TPI_TOPT_DATA(tohp); 1170 fdbuflen = (int)_TPI_TOPT_DATALEN(tohp); 1171 1172 if (!fdbuf_verify(mp, fdbuf, fdbuflen)) 1173 continue; 1174 if (oldflg) { 1175 cmsglen += fdbuf_cmsglen(fdbuflen); 1176 continue; 1177 } 1178 len = fdbuf_cmsglen(fdbuflen); 1179 } else if (tohp->level == SOL_SOCKET && 1180 tohp->name == SCM_TIMESTAMP) { 1181 if (oldflg) 1182 continue; 1183 1184 if (get_udatamodel() == DATAMODEL_NATIVE) { 1185 len = sizeof (struct timeval); 1186 } else { 1187 len = sizeof (struct timeval32); 1188 } 1189 } else { 1190 if (oldflg) 1191 continue; 1192 len = (t_uscalar_t)_TPI_TOPT_DATALEN(tohp); 1193 } 1194 /* 1195 * Exclude roundup for last option to not set 1196 * MSG_CTRUNC when the cmsg fits but the padding doesn't fit. 1197 */ 1198 last_roundup = (t_uscalar_t) 1199 (ROUNDUP_cmsglen(len + (int)sizeof (struct cmsghdr)) - 1200 (len + (int)sizeof (struct cmsghdr))); 1201 cmsglen += (t_uscalar_t)(len + (int)sizeof (struct cmsghdr)) + 1202 last_roundup; 1203 } 1204 cmsglen -= last_roundup; 1205 dprint(1, ("so_cmsglen: optlen %d, flg %d -> cmsglen %d\n", 1206 optlen, oldflg, cmsglen)); 1207 return (cmsglen); 1208 } 1209 1210 /* 1211 * Copy options from options to the control. Convert SO_FILEP to 1212 * file descriptors. 1213 * Returns errno or zero. 1214 * so_opt2cmsg and so_cmsglen are inter-related since so_cmsglen 1215 * allocates the space that so_opt2cmsg fills. If one changes, the other should 1216 * also be checked for any possible impacts. 1217 */ 1218 int 1219 so_opt2cmsg(mblk_t *mp, void *opt, t_uscalar_t optlen, int oldflg, 1220 void *control, t_uscalar_t controllen) 1221 { 1222 struct T_opthdr *tohp; 1223 struct cmsghdr *cmsg; 1224 struct fdbuf *fdbuf; 1225 int fdbuflen; 1226 int error; 1227 #if defined(DEBUG) || defined(__lint) 1228 struct cmsghdr *cend = (struct cmsghdr *) 1229 (((uint8_t *)control) + ROUNDUP_cmsglen(controllen)); 1230 #endif 1231 cmsg = (struct cmsghdr *)control; 1232 1233 ASSERT(__TPI_TOPT_ISALIGNED(opt)); 1234 1235 for (tohp = (struct T_opthdr *)opt; 1236 tohp && _TPI_TOPT_VALID(tohp, opt, (uintptr_t)opt + optlen); 1237 tohp = _TPI_TOPT_NEXTHDR(opt, optlen, tohp)) { 1238 dprint(1, ("so_opt2cmsg: level 0x%x, name %d, len %d\n", 1239 tohp->level, tohp->name, tohp->len)); 1240 1241 if (tohp->level == SOL_SOCKET && 1242 (tohp->name == SO_SRCADDR || 1243 tohp->name == SO_UNIX_CLOSE)) { 1244 continue; 1245 } 1246 ASSERT((uintptr_t)cmsg <= (uintptr_t)control + controllen); 1247 if (tohp->level == SOL_SOCKET && tohp->name == SO_FILEP) { 1248 fdbuf = (struct fdbuf *)_TPI_TOPT_DATA(tohp); 1249 fdbuflen = (int)_TPI_TOPT_DATALEN(tohp); 1250 1251 if (!fdbuf_verify(mp, fdbuf, fdbuflen)) 1252 return (EPROTO); 1253 if (oldflg) { 1254 error = fdbuf_extract(fdbuf, control, 1255 (int)controllen); 1256 if (error != 0) 1257 return (error); 1258 continue; 1259 } else { 1260 int fdlen; 1261 1262 fdlen = (int)fdbuf_cmsglen( 1263 (int)_TPI_TOPT_DATALEN(tohp)); 1264 1265 cmsg->cmsg_level = tohp->level; 1266 cmsg->cmsg_type = SCM_RIGHTS; 1267 cmsg->cmsg_len = (socklen_t)(fdlen + 1268 sizeof (struct cmsghdr)); 1269 1270 error = fdbuf_extract(fdbuf, 1271 CMSG_CONTENT(cmsg), fdlen); 1272 if (error != 0) 1273 return (error); 1274 } 1275 } else if (tohp->level == SOL_SOCKET && 1276 tohp->name == SCM_TIMESTAMP) { 1277 timestruc_t *timestamp; 1278 1279 if (oldflg) 1280 continue; 1281 1282 cmsg->cmsg_level = tohp->level; 1283 cmsg->cmsg_type = tohp->name; 1284 1285 timestamp = 1286 (timestruc_t *)P2ROUNDUP((intptr_t)&tohp[1], 1287 sizeof (intptr_t)); 1288 1289 if (get_udatamodel() == DATAMODEL_NATIVE) { 1290 struct timeval tv; 1291 1292 cmsg->cmsg_len = sizeof (struct timeval) + 1293 sizeof (struct cmsghdr); 1294 tv.tv_sec = timestamp->tv_sec; 1295 tv.tv_usec = timestamp->tv_nsec / 1296 (NANOSEC / MICROSEC); 1297 /* 1298 * on LP64 systems, the struct timeval in 1299 * the destination will not be 8-byte aligned, 1300 * so use bcopy to avoid alignment trouble 1301 */ 1302 bcopy(&tv, CMSG_CONTENT(cmsg), sizeof (tv)); 1303 } else { 1304 struct timeval32 *time32; 1305 1306 cmsg->cmsg_len = sizeof (struct timeval32) + 1307 sizeof (struct cmsghdr); 1308 time32 = (struct timeval32 *)CMSG_CONTENT(cmsg); 1309 time32->tv_sec = (time32_t)timestamp->tv_sec; 1310 time32->tv_usec = 1311 (int32_t)(timestamp->tv_nsec / 1312 (NANOSEC / MICROSEC)); 1313 } 1314 1315 } else { 1316 if (oldflg) 1317 continue; 1318 1319 cmsg->cmsg_level = tohp->level; 1320 cmsg->cmsg_type = tohp->name; 1321 cmsg->cmsg_len = (socklen_t)sizeof (struct cmsghdr); 1322 if (tohp->level == IPPROTO_IP && 1323 (tohp->name == IP_RECVTOS || 1324 tohp->name == IP_RECVTTL)) { 1325 /* 1326 * The data for these is a uint8_t but, in 1327 * order to maintain alignment for any 1328 * following TPI primitives in the message, 1329 * there will be some trailing padding bytes 1330 * which are included in the TPI_TOPT_DATALEN. 1331 * For these types, we set the cmsg_len 1332 * explicitly to the correct value. 1333 */ 1334 cmsg->cmsg_len += (socklen_t)sizeof (uint8_t); 1335 } else { 1336 cmsg->cmsg_len += 1337 (socklen_t)(_TPI_TOPT_DATALEN(tohp)); 1338 } 1339 1340 /* copy content to control data part */ 1341 bcopy(&tohp[1], CMSG_CONTENT(cmsg), 1342 CMSG_CONTENTLEN(cmsg)); 1343 } 1344 /* move to next CMSG structure! */ 1345 cmsg = CMSG_NEXT(cmsg); 1346 } 1347 dprint(1, ("so_opt2cmsg: buf %p len %d; cend %p; final cmsg %p\n", 1348 control, controllen, (void *)cend, (void *)cmsg)); 1349 ASSERT(cmsg <= cend); 1350 return (0); 1351 } 1352 1353 /* 1354 * Extract the SO_SRCADDR option value if present. 1355 */ 1356 void 1357 so_getopt_srcaddr(void *opt, t_uscalar_t optlen, void **srcp, 1358 t_uscalar_t *srclenp) 1359 { 1360 struct T_opthdr *tohp; 1361 1362 ASSERT(__TPI_TOPT_ISALIGNED(opt)); 1363 1364 ASSERT(srcp != NULL && srclenp != NULL); 1365 *srcp = NULL; 1366 *srclenp = 0; 1367 1368 for (tohp = (struct T_opthdr *)opt; 1369 tohp && _TPI_TOPT_VALID(tohp, opt, (uintptr_t)opt + optlen); 1370 tohp = _TPI_TOPT_NEXTHDR(opt, optlen, tohp)) { 1371 dprint(1, ("so_getopt_srcaddr: level 0x%x, name %d, len %d\n", 1372 tohp->level, tohp->name, tohp->len)); 1373 if (tohp->level == SOL_SOCKET && 1374 tohp->name == SO_SRCADDR) { 1375 *srcp = _TPI_TOPT_DATA(tohp); 1376 *srclenp = (t_uscalar_t)_TPI_TOPT_DATALEN(tohp); 1377 } 1378 } 1379 } 1380 1381 /* 1382 * Verify if the SO_UNIX_CLOSE option is present. 1383 */ 1384 int 1385 so_getopt_unix_close(void *opt, t_uscalar_t optlen) 1386 { 1387 struct T_opthdr *tohp; 1388 1389 ASSERT(__TPI_TOPT_ISALIGNED(opt)); 1390 1391 for (tohp = (struct T_opthdr *)opt; 1392 tohp && _TPI_TOPT_VALID(tohp, opt, (uintptr_t)opt + optlen); 1393 tohp = _TPI_TOPT_NEXTHDR(opt, optlen, tohp)) { 1394 dprint(1, 1395 ("so_getopt_unix_close: level 0x%x, name %d, len %d\n", 1396 tohp->level, tohp->name, tohp->len)); 1397 if (tohp->level == SOL_SOCKET && 1398 tohp->name == SO_UNIX_CLOSE) 1399 return (1); 1400 } 1401 return (0); 1402 } 1403 1404 /* 1405 * Allocate an M_PROTO message. 1406 * 1407 * If allocation fails the behavior depends on sleepflg: 1408 * _ALLOC_NOSLEEP fail immediately 1409 * _ALLOC_INTR sleep for memory until a signal is caught 1410 * _ALLOC_SLEEP sleep forever. Don't return NULL. 1411 */ 1412 mblk_t * 1413 soallocproto(size_t size, int sleepflg, cred_t *cr) 1414 { 1415 mblk_t *mp; 1416 1417 /* Round up size for reuse */ 1418 size = MAX(size, 64); 1419 if (cr != NULL) 1420 mp = allocb_cred(size, cr, curproc->p_pid); 1421 else 1422 mp = allocb(size, BPRI_MED); 1423 1424 if (mp == NULL) { 1425 int error; /* Dummy - error not returned to caller */ 1426 1427 switch (sleepflg) { 1428 case _ALLOC_SLEEP: 1429 if (cr != NULL) { 1430 mp = allocb_cred_wait(size, STR_NOSIG, &error, 1431 cr, curproc->p_pid); 1432 } else { 1433 mp = allocb_wait(size, BPRI_MED, STR_NOSIG, 1434 &error); 1435 } 1436 ASSERT(mp); 1437 break; 1438 case _ALLOC_INTR: 1439 if (cr != NULL) { 1440 mp = allocb_cred_wait(size, 0, &error, cr, 1441 curproc->p_pid); 1442 } else { 1443 mp = allocb_wait(size, BPRI_MED, 0, &error); 1444 } 1445 if (mp == NULL) { 1446 /* Caught signal while sleeping for memory */ 1447 eprintline(ENOBUFS); 1448 return (NULL); 1449 } 1450 break; 1451 case _ALLOC_NOSLEEP: 1452 default: 1453 eprintline(ENOBUFS); 1454 return (NULL); 1455 } 1456 } 1457 DB_TYPE(mp) = M_PROTO; 1458 return (mp); 1459 } 1460 1461 /* 1462 * Allocate an M_PROTO message with a single component. 1463 * len is the length of buf. size is the amount to allocate. 1464 * 1465 * buf can be NULL with a non-zero len. 1466 * This results in a bzero'ed chunk being placed the message. 1467 */ 1468 mblk_t * 1469 soallocproto1(const void *buf, ssize_t len, ssize_t size, int sleepflg, 1470 cred_t *cr) 1471 { 1472 mblk_t *mp; 1473 1474 if (size == 0) 1475 size = len; 1476 1477 ASSERT(size >= len); 1478 /* Round up size for reuse */ 1479 size = MAX(size, 64); 1480 mp = soallocproto(size, sleepflg, cr); 1481 if (mp == NULL) 1482 return (NULL); 1483 mp->b_datap->db_type = M_PROTO; 1484 if (len != 0) { 1485 if (buf != NULL) 1486 bcopy(buf, mp->b_wptr, len); 1487 else 1488 bzero(mp->b_wptr, len); 1489 mp->b_wptr += len; 1490 } 1491 return (mp); 1492 } 1493 1494 /* 1495 * Append buf/len to mp. 1496 * The caller has to ensure that there is enough room in the mblk. 1497 * 1498 * buf can be NULL with a non-zero len. 1499 * This results in a bzero'ed chunk being placed the message. 1500 */ 1501 void 1502 soappendmsg(mblk_t *mp, const void *buf, ssize_t len) 1503 { 1504 ASSERT(mp); 1505 1506 if (len != 0) { 1507 /* Assert for room left */ 1508 ASSERT(mp->b_datap->db_lim - mp->b_wptr >= len); 1509 if (buf != NULL) 1510 bcopy(buf, mp->b_wptr, len); 1511 else 1512 bzero(mp->b_wptr, len); 1513 } 1514 mp->b_wptr += len; 1515 } 1516 1517 /* 1518 * Create a message using two kernel buffers. 1519 * If size is set that will determine the allocation size (e.g. for future 1520 * soappendmsg calls). If size is zero it is derived from the buffer 1521 * lengths. 1522 */ 1523 mblk_t * 1524 soallocproto2(const void *buf1, ssize_t len1, const void *buf2, ssize_t len2, 1525 ssize_t size, int sleepflg, cred_t *cr) 1526 { 1527 mblk_t *mp; 1528 1529 if (size == 0) 1530 size = len1 + len2; 1531 ASSERT(size >= len1 + len2); 1532 1533 mp = soallocproto1(buf1, len1, size, sleepflg, cr); 1534 if (mp) 1535 soappendmsg(mp, buf2, len2); 1536 return (mp); 1537 } 1538 1539 /* 1540 * Create a message using three kernel buffers. 1541 * If size is set that will determine the allocation size (for future 1542 * soappendmsg calls). If size is zero it is derived from the buffer 1543 * lengths. 1544 */ 1545 mblk_t * 1546 soallocproto3(const void *buf1, ssize_t len1, const void *buf2, ssize_t len2, 1547 const void *buf3, ssize_t len3, ssize_t size, int sleepflg, cred_t *cr) 1548 { 1549 mblk_t *mp; 1550 1551 if (size == 0) 1552 size = len1 + len2 +len3; 1553 ASSERT(size >= len1 + len2 + len3); 1554 1555 mp = soallocproto1(buf1, len1, size, sleepflg, cr); 1556 if (mp != NULL) { 1557 soappendmsg(mp, buf2, len2); 1558 soappendmsg(mp, buf3, len3); 1559 } 1560 return (mp); 1561 } 1562 1563 #ifdef DEBUG 1564 char * 1565 pr_state(uint_t state, uint_t mode) 1566 { 1567 static char buf[1024]; 1568 1569 buf[0] = 0; 1570 if (state & SS_ISCONNECTED) 1571 (void) strcat(buf, "ISCONNECTED "); 1572 if (state & SS_ISCONNECTING) 1573 (void) strcat(buf, "ISCONNECTING "); 1574 if (state & SS_ISDISCONNECTING) 1575 (void) strcat(buf, "ISDISCONNECTING "); 1576 if (state & SS_CANTSENDMORE) 1577 (void) strcat(buf, "CANTSENDMORE "); 1578 1579 if (state & SS_CANTRCVMORE) 1580 (void) strcat(buf, "CANTRCVMORE "); 1581 if (state & SS_ISBOUND) 1582 (void) strcat(buf, "ISBOUND "); 1583 if (state & SS_NDELAY) 1584 (void) strcat(buf, "NDELAY "); 1585 if (state & SS_NONBLOCK) 1586 (void) strcat(buf, "NONBLOCK "); 1587 1588 if (state & SS_ASYNC) 1589 (void) strcat(buf, "ASYNC "); 1590 if (state & SS_ACCEPTCONN) 1591 (void) strcat(buf, "ACCEPTCONN "); 1592 if (state & SS_SAVEDEOR) 1593 (void) strcat(buf, "SAVEDEOR "); 1594 1595 if (state & SS_RCVATMARK) 1596 (void) strcat(buf, "RCVATMARK "); 1597 if (state & SS_OOBPEND) 1598 (void) strcat(buf, "OOBPEND "); 1599 if (state & SS_HAVEOOBDATA) 1600 (void) strcat(buf, "HAVEOOBDATA "); 1601 if (state & SS_HADOOBDATA) 1602 (void) strcat(buf, "HADOOBDATA "); 1603 1604 if (mode & SM_PRIV) 1605 (void) strcat(buf, "PRIV "); 1606 if (mode & SM_ATOMIC) 1607 (void) strcat(buf, "ATOMIC "); 1608 if (mode & SM_ADDR) 1609 (void) strcat(buf, "ADDR "); 1610 if (mode & SM_CONNREQUIRED) 1611 (void) strcat(buf, "CONNREQUIRED "); 1612 1613 if (mode & SM_FDPASSING) 1614 (void) strcat(buf, "FDPASSING "); 1615 if (mode & SM_EXDATA) 1616 (void) strcat(buf, "EXDATA "); 1617 if (mode & SM_OPTDATA) 1618 (void) strcat(buf, "OPTDATA "); 1619 if (mode & SM_BYTESTREAM) 1620 (void) strcat(buf, "BYTESTREAM "); 1621 return (buf); 1622 } 1623 1624 char * 1625 pr_addr(int family, struct sockaddr *addr, t_uscalar_t addrlen) 1626 { 1627 static char buf[1024]; 1628 1629 if (addr == NULL || addrlen == 0) { 1630 (void) sprintf(buf, "(len %d) %p", addrlen, (void *)addr); 1631 return (buf); 1632 } 1633 switch (family) { 1634 case AF_INET: { 1635 struct sockaddr_in sin; 1636 1637 bcopy(addr, &sin, sizeof (sin)); 1638 1639 (void) sprintf(buf, "(len %d) %x/%d", 1640 addrlen, ntohl(sin.sin_addr.s_addr), ntohs(sin.sin_port)); 1641 break; 1642 } 1643 case AF_INET6: { 1644 struct sockaddr_in6 sin6; 1645 uint16_t *piece = (uint16_t *)&sin6.sin6_addr; 1646 1647 bcopy((char *)addr, (char *)&sin6, sizeof (sin6)); 1648 (void) sprintf(buf, "(len %d) %x:%x:%x:%x:%x:%x:%x:%x/%d", 1649 addrlen, 1650 ntohs(piece[0]), ntohs(piece[1]), 1651 ntohs(piece[2]), ntohs(piece[3]), 1652 ntohs(piece[4]), ntohs(piece[5]), 1653 ntohs(piece[6]), ntohs(piece[7]), 1654 ntohs(sin6.sin6_port)); 1655 break; 1656 } 1657 case AF_UNIX: { 1658 struct sockaddr_un *soun = (struct sockaddr_un *)addr; 1659 1660 (void) sprintf(buf, "(len %d) %s", addrlen, 1661 (soun == NULL) ? "(none)" : soun->sun_path); 1662 break; 1663 } 1664 default: 1665 (void) sprintf(buf, "(unknown af %d)", family); 1666 break; 1667 } 1668 return (buf); 1669 } 1670 1671 /* The logical equivalence operator (a if-and-only-if b) */ 1672 #define EQUIVALENT(a, b) (((a) && (b)) || (!(a) && (!(b)))) 1673 1674 /* 1675 * Verify limitations and invariants on oob state. 1676 * Return 1 if OK, otherwise 0 so that it can be used as 1677 * ASSERT(verify_oobstate(so)); 1678 */ 1679 int 1680 so_verify_oobstate(struct sonode *so) 1681 { 1682 boolean_t havemark; 1683 1684 ASSERT(MUTEX_HELD(&so->so_lock)); 1685 1686 /* 1687 * The possible state combinations are: 1688 * 0 1689 * SS_OOBPEND 1690 * SS_OOBPEND|SS_HAVEOOBDATA 1691 * SS_OOBPEND|SS_HADOOBDATA 1692 * SS_HADOOBDATA 1693 */ 1694 switch (so->so_state & (SS_OOBPEND|SS_HAVEOOBDATA|SS_HADOOBDATA)) { 1695 case 0: 1696 case SS_OOBPEND: 1697 case SS_OOBPEND|SS_HAVEOOBDATA: 1698 case SS_OOBPEND|SS_HADOOBDATA: 1699 case SS_HADOOBDATA: 1700 break; 1701 default: 1702 printf("Bad oob state 1 (%p): state %s\n", 1703 (void *)so, pr_state(so->so_state, so->so_mode)); 1704 return (0); 1705 } 1706 1707 /* SS_RCVATMARK should only be set when SS_OOBPEND is set */ 1708 if ((so->so_state & (SS_RCVATMARK|SS_OOBPEND)) == SS_RCVATMARK) { 1709 printf("Bad oob state 2 (%p): state %s\n", 1710 (void *)so, pr_state(so->so_state, so->so_mode)); 1711 return (0); 1712 } 1713 1714 /* 1715 * (havemark != 0 or SS_RCVATMARK) iff SS_OOBPEND 1716 * For TPI, the presence of a "mark" is indicated by sti_oobsigcnt. 1717 */ 1718 havemark = (SOCK_IS_NONSTR(so)) ? so->so_oobmark > 0 : 1719 SOTOTPI(so)->sti_oobsigcnt > 0; 1720 1721 if (!EQUIVALENT(havemark || (so->so_state & SS_RCVATMARK), 1722 so->so_state & SS_OOBPEND)) { 1723 printf("Bad oob state 3 (%p): state %s\n", 1724 (void *)so, pr_state(so->so_state, so->so_mode)); 1725 return (0); 1726 } 1727 1728 /* 1729 * Unless SO_OOBINLINE we have so_oobmsg != NULL iff SS_HAVEOOBDATA 1730 */ 1731 if (!(so->so_options & SO_OOBINLINE) && 1732 !EQUIVALENT(so->so_oobmsg != NULL, so->so_state & SS_HAVEOOBDATA)) { 1733 printf("Bad oob state 4 (%p): state %s\n", 1734 (void *)so, pr_state(so->so_state, so->so_mode)); 1735 return (0); 1736 } 1737 1738 if (!SOCK_IS_NONSTR(so) && 1739 SOTOTPI(so)->sti_oobsigcnt < SOTOTPI(so)->sti_oobcnt) { 1740 printf("Bad oob state 5 (%p): counts %d/%d state %s\n", 1741 (void *)so, SOTOTPI(so)->sti_oobsigcnt, 1742 SOTOTPI(so)->sti_oobcnt, 1743 pr_state(so->so_state, so->so_mode)); 1744 return (0); 1745 } 1746 1747 return (1); 1748 } 1749 #undef EQUIVALENT 1750 #endif /* DEBUG */ 1751 1752 /* initialize sockfs zone specific kstat related items */ 1753 void * 1754 sock_kstat_init(zoneid_t zoneid) 1755 { 1756 kstat_t *ksp; 1757 1758 ksp = kstat_create_zone("sockfs", 0, "sock_unix_list", "misc", 1759 KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VAR_SIZE|KSTAT_FLAG_VIRTUAL, zoneid); 1760 1761 if (ksp != NULL) { 1762 ksp->ks_update = sockfs_update; 1763 ksp->ks_snapshot = sockfs_snapshot; 1764 ksp->ks_lock = &socklist.sl_lock; 1765 ksp->ks_private = (void *)(uintptr_t)zoneid; 1766 kstat_install(ksp); 1767 } 1768 1769 return (ksp); 1770 } 1771 1772 /* tear down sockfs zone specific kstat related items */ 1773 /*ARGSUSED*/ 1774 void 1775 sock_kstat_fini(zoneid_t zoneid, void *arg) 1776 { 1777 kstat_t *ksp = (kstat_t *)arg; 1778 1779 if (ksp != NULL) { 1780 ASSERT(zoneid == (zoneid_t)(uintptr_t)ksp->ks_private); 1781 kstat_delete(ksp); 1782 } 1783 } 1784 1785 /* 1786 * Zones: 1787 * Note that nactive is going to be different for each zone. 1788 * This means we require kstat to call sockfs_update and then sockfs_snapshot 1789 * for the same zone, or sockfs_snapshot will be taken into the wrong size 1790 * buffer. This is safe, but if the buffer is too small, user will not be 1791 * given details of all sockets. However, as this kstat has a ks_lock, kstat 1792 * driver will keep it locked between the update and the snapshot, so no 1793 * other process (zone) can currently get inbetween resulting in a wrong size 1794 * buffer allocation. 1795 */ 1796 static int 1797 sockfs_update(kstat_t *ksp, int rw) 1798 { 1799 uint_t nactive = 0; /* # of active AF_UNIX sockets */ 1800 struct sonode *so; /* current sonode on socklist */ 1801 zoneid_t myzoneid = (zoneid_t)(uintptr_t)ksp->ks_private; 1802 1803 ASSERT((zoneid_t)(uintptr_t)ksp->ks_private == getzoneid()); 1804 1805 if (rw == KSTAT_WRITE) { /* bounce all writes */ 1806 return (EACCES); 1807 } 1808 1809 for (so = socklist.sl_list; so != NULL; so = SOTOTPI(so)->sti_next_so) { 1810 if (so->so_count != 0 && so->so_zoneid == myzoneid) { 1811 nactive++; 1812 } 1813 } 1814 ksp->ks_ndata = nactive; 1815 ksp->ks_data_size = nactive * sizeof (struct sockinfo); 1816 1817 return (0); 1818 } 1819 1820 static int 1821 sockfs_snapshot(kstat_t *ksp, void *buf, int rw) 1822 { 1823 int ns; /* # of sonodes we've copied */ 1824 struct sonode *so; /* current sonode on socklist */ 1825 struct sockinfo *psi; /* where we put sockinfo data */ 1826 t_uscalar_t sn_len; /* soa_len */ 1827 zoneid_t myzoneid = (zoneid_t)(uintptr_t)ksp->ks_private; 1828 sotpi_info_t *sti; 1829 1830 ASSERT((zoneid_t)(uintptr_t)ksp->ks_private == getzoneid()); 1831 1832 ksp->ks_snaptime = gethrtime(); 1833 1834 if (rw == KSTAT_WRITE) { /* bounce all writes */ 1835 return (EACCES); 1836 } 1837 1838 /* 1839 * For each sonode on the socklist, we massage the important 1840 * info into buf, in sockinfo format. 1841 */ 1842 psi = (struct sockinfo *)buf; 1843 ns = 0; 1844 for (so = socklist.sl_list; so != NULL; so = SOTOTPI(so)->sti_next_so) { 1845 vattr_t attr; 1846 1847 /* only stuff active sonodes and the same zone: */ 1848 if (so->so_count == 0 || so->so_zoneid != myzoneid) { 1849 continue; 1850 } 1851 1852 /* 1853 * If the sonode was activated between the update and the 1854 * snapshot, we're done - as this is only a snapshot. 1855 */ 1856 if ((caddr_t)(psi) >= (caddr_t)buf + ksp->ks_data_size) { 1857 break; 1858 } 1859 1860 sti = SOTOTPI(so); 1861 /* copy important info into buf: */ 1862 psi->si_size = sizeof (struct sockinfo); 1863 psi->si_family = so->so_family; 1864 psi->si_type = so->so_type; 1865 psi->si_flag = so->so_flag; 1866 psi->si_state = so->so_state; 1867 psi->si_serv_type = sti->sti_serv_type; 1868 psi->si_ux_laddr_sou_magic = sti->sti_ux_laddr.soua_magic; 1869 psi->si_ux_faddr_sou_magic = sti->sti_ux_faddr.soua_magic; 1870 psi->si_laddr_soa_len = sti->sti_laddr.soa_len; 1871 psi->si_faddr_soa_len = sti->sti_faddr.soa_len; 1872 psi->si_szoneid = so->so_zoneid; 1873 psi->si_faddr_noxlate = sti->sti_faddr_noxlate; 1874 1875 /* 1876 * Grab the inode, if possible. 1877 * This must be done before entering so_lock as VOP_GETATTR 1878 * will acquire it. 1879 */ 1880 if (so->so_vnode == NULL || 1881 VOP_GETATTR(so->so_vnode, &attr, 0, CRED(), NULL) != 0) 1882 attr.va_nodeid = 0; 1883 1884 psi->si_inode = attr.va_nodeid; 1885 1886 mutex_enter(&so->so_lock); 1887 1888 if (sti->sti_laddr_sa != NULL) { 1889 ASSERT(sti->sti_laddr_sa->sa_data != NULL); 1890 sn_len = sti->sti_laddr_len; 1891 ASSERT(sn_len <= sizeof (short) + 1892 sizeof (psi->si_laddr_sun_path)); 1893 1894 psi->si_laddr_family = 1895 sti->sti_laddr_sa->sa_family; 1896 if (sn_len != 0) { 1897 /* AF_UNIX socket names are NULL terminated */ 1898 (void) strncpy(psi->si_laddr_sun_path, 1899 sti->sti_laddr_sa->sa_data, 1900 sizeof (psi->si_laddr_sun_path)); 1901 sn_len = strlen(psi->si_laddr_sun_path); 1902 } 1903 psi->si_laddr_sun_path[sn_len] = 0; 1904 } 1905 1906 if (sti->sti_faddr_sa != NULL) { 1907 ASSERT(sti->sti_faddr_sa->sa_data != NULL); 1908 sn_len = sti->sti_faddr_len; 1909 ASSERT(sn_len <= sizeof (short) + 1910 sizeof (psi->si_faddr_sun_path)); 1911 1912 psi->si_faddr_family = 1913 sti->sti_faddr_sa->sa_family; 1914 if (sn_len != 0) { 1915 (void) strncpy(psi->si_faddr_sun_path, 1916 sti->sti_faddr_sa->sa_data, 1917 sizeof (psi->si_faddr_sun_path)); 1918 sn_len = strlen(psi->si_faddr_sun_path); 1919 } 1920 psi->si_faddr_sun_path[sn_len] = 0; 1921 } 1922 1923 mutex_exit(&so->so_lock); 1924 1925 (void) snprintf(psi->si_son_straddr, 1926 sizeof (psi->si_son_straddr), "%p", (void *)so); 1927 (void) snprintf(psi->si_lvn_straddr, 1928 sizeof (psi->si_lvn_straddr), "%p", 1929 (void *)sti->sti_ux_laddr.soua_vp); 1930 (void) snprintf(psi->si_fvn_straddr, 1931 sizeof (psi->si_fvn_straddr), "%p", 1932 (void *)sti->sti_ux_faddr.soua_vp); 1933 1934 ns++; 1935 psi++; 1936 } 1937 1938 ksp->ks_ndata = ns; 1939 return (0); 1940 } 1941 1942 ssize_t 1943 soreadfile(file_t *fp, uchar_t *buf, u_offset_t fileoff, int *err, size_t size) 1944 { 1945 struct uio auio; 1946 struct iovec aiov[1]; 1947 register vnode_t *vp; 1948 int ioflag, rwflag; 1949 ssize_t cnt; 1950 int error = 0; 1951 int iovcnt = 0; 1952 short fflag; 1953 1954 vp = fp->f_vnode; 1955 fflag = fp->f_flag; 1956 1957 rwflag = 0; 1958 aiov[0].iov_base = (caddr_t)buf; 1959 aiov[0].iov_len = size; 1960 iovcnt = 1; 1961 cnt = (ssize_t)size; 1962 (void) VOP_RWLOCK(vp, rwflag, NULL); 1963 1964 auio.uio_loffset = fileoff; 1965 auio.uio_iov = aiov; 1966 auio.uio_iovcnt = iovcnt; 1967 auio.uio_resid = cnt; 1968 auio.uio_segflg = UIO_SYSSPACE; 1969 auio.uio_llimit = MAXOFFSET_T; 1970 auio.uio_fmode = fflag; 1971 auio.uio_extflg = UIO_COPY_CACHED; 1972 1973 ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC); 1974 1975 /* If read sync is not asked for, filter sync flags */ 1976 if ((ioflag & FRSYNC) == 0) 1977 ioflag &= ~(FSYNC|FDSYNC); 1978 error = VOP_READ(vp, &auio, ioflag, fp->f_cred, NULL); 1979 cnt -= auio.uio_resid; 1980 1981 VOP_RWUNLOCK(vp, rwflag, NULL); 1982 1983 if (error == EINTR && cnt != 0) 1984 error = 0; 1985 out: 1986 if (error != 0) { 1987 *err = error; 1988 return (0); 1989 } else { 1990 *err = 0; 1991 return (cnt); 1992 } 1993 } 1994 1995 int 1996 so_copyin(const void *from, void *to, size_t size, int fromkernel) 1997 { 1998 if (fromkernel) { 1999 bcopy(from, to, size); 2000 return (0); 2001 } 2002 return (xcopyin(from, to, size)); 2003 } 2004 2005 int 2006 so_copyout(const void *from, void *to, size_t size, int tokernel) 2007 { 2008 if (tokernel) { 2009 bcopy(from, to, size); 2010 return (0); 2011 } 2012 return (xcopyout(from, to, size)); 2013 } 2014