1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include <sys/types.h> 28 #include <sys/t_lock.h> 29 #include <sys/param.h> 30 #include <sys/systm.h> 31 #include <sys/buf.h> 32 #include <sys/conf.h> 33 #include <sys/cred.h> 34 #include <sys/kmem.h> 35 #include <sys/sysmacros.h> 36 #include <sys/vfs.h> 37 #include <sys/vfs_opreg.h> 38 #include <sys/vnode.h> 39 #include <sys/debug.h> 40 #include <sys/errno.h> 41 #include <sys/time.h> 42 #include <sys/file.h> 43 #include <sys/open.h> 44 #include <sys/user.h> 45 #include <sys/uio.h> 46 #include <sys/termios.h> 47 #include <sys/stream.h> 48 #include <sys/strsubr.h> 49 #include <sys/strsun.h> 50 #include <sys/esunddi.h> 51 #include <sys/flock.h> 52 #include <sys/modctl.h> 53 #include <sys/cmn_err.h> 54 #include <sys/mkdev.h> 55 #include <sys/pathname.h> 56 #include <sys/ddi.h> 57 #include <sys/stat.h> 58 #include <sys/fs/snode.h> 59 #include <sys/fs/dv_node.h> 60 #include <sys/zone.h> 61 62 #include <sys/socket.h> 63 #include <sys/socketvar.h> 64 #include <netinet/in.h> 65 #include <sys/un.h> 66 67 #include <sys/ucred.h> 68 69 #include <sys/tiuser.h> 70 #define _SUN_TPI_VERSION 2 71 #include <sys/tihdr.h> 72 73 #include <c2/audit.h> 74 75 #include <fs/sockfs/nl7c.h> 76 77 /* 78 * Macros that operate on struct cmsghdr. 79 * The CMSG_VALID macro does not assume that the last option buffer is padded. 80 */ 81 #define CMSG_CONTENT(cmsg) (&((cmsg)[1])) 82 #define CMSG_CONTENTLEN(cmsg) ((cmsg)->cmsg_len - sizeof (struct cmsghdr)) 83 #define CMSG_VALID(cmsg, start, end) \ 84 (ISALIGNED_cmsghdr(cmsg) && \ 85 ((uintptr_t)(cmsg) >= (uintptr_t)(start)) && \ 86 ((uintptr_t)(cmsg) < (uintptr_t)(end)) && \ 87 ((ssize_t)(cmsg)->cmsg_len >= sizeof (struct cmsghdr)) && \ 88 ((uintptr_t)(cmsg) + (cmsg)->cmsg_len <= (uintptr_t)(end))) 89 #define SO_LOCK_WAKEUP_TIME 3000 /* Wakeup time in milliseconds */ 90 91 static struct kmem_cache *socktpi_cache, *socktpi_unix_cache; 92 struct kmem_cache *socktpi_sod_cache; 93 94 dev_t sockdev; /* For fsid in getattr */ 95 96 struct sockparams *sphead; 97 krwlock_t splist_lock; 98 99 struct socklist socklist; 100 101 static int sockfs_update(kstat_t *, int); 102 static int sockfs_snapshot(kstat_t *, void *, int); 103 104 extern void sendfile_init(); 105 106 extern void nl7c_init(void); 107 108 extern int sostr_init(); 109 110 #define ADRSTRLEN (2 * sizeof (void *) + 1) 111 /* 112 * kernel structure for passing the sockinfo data back up to the user. 113 * the strings array allows us to convert AF_UNIX addresses into strings 114 * with a common method regardless of which n-bit kernel we're running. 115 */ 116 struct k_sockinfo { 117 struct sockinfo ks_si; 118 char ks_straddr[3][ADRSTRLEN]; 119 }; 120 121 /* 122 * Translate from a device pathname (e.g. "/dev/tcp") to a vnode. 123 * Returns with the vnode held. 124 */ 125 static int 126 sogetvp(char *devpath, vnode_t **vpp, int uioflag) 127 { 128 struct snode *csp; 129 vnode_t *vp, *dvp; 130 major_t maj; 131 int error; 132 133 ASSERT(uioflag == UIO_SYSSPACE || uioflag == UIO_USERSPACE); 134 /* 135 * Lookup the underlying filesystem vnode. 136 */ 137 error = lookupname(devpath, uioflag, FOLLOW, NULLVPP, &vp); 138 if (error) 139 return (error); 140 141 /* Check that it is the correct vnode */ 142 if (vp->v_type != VCHR) { 143 VN_RELE(vp); 144 return (ENOTSOCK); 145 } 146 147 /* 148 * If devpath went through devfs, the device should already 149 * be configured. If devpath is a mknod file, however, we 150 * need to make sure the device is properly configured. 151 * To do this, we do something similar to spec_open() 152 * except that we resolve to the minor/leaf level since 153 * we need to return a vnode. 154 */ 155 csp = VTOS(VTOS(vp)->s_commonvp); 156 if (!(csp->s_flag & SDIPSET)) { 157 char *pathname = kmem_alloc(MAXPATHLEN, KM_SLEEP); 158 error = ddi_dev_pathname(vp->v_rdev, S_IFCHR, pathname); 159 if (error == 0) 160 error = devfs_lookupname(pathname, NULLVPP, &dvp); 161 VN_RELE(vp); 162 kmem_free(pathname, MAXPATHLEN); 163 if (error != 0) 164 return (ENXIO); 165 vp = dvp; /* use the devfs vp */ 166 } 167 168 /* device is configured at this point */ 169 maj = getmajor(vp->v_rdev); 170 if (!STREAMSTAB(maj)) { 171 VN_RELE(vp); 172 return (ENOSTR); 173 } 174 175 *vpp = vp; 176 return (0); 177 } 178 179 /* 180 * Add or delete (latter if devpath is NULL) an enter to the sockparams 181 * table. If devpathlen is zero the devpath with not be kmem_freed. Otherwise 182 * this routine assumes that the caller has kmem_alloced devpath/devpathlen 183 * for this routine to consume. 184 * The zero devpathlen could be used if the kernel wants to create entries 185 * itself by calling sockconfig(1,2,3, "/dev/tcp", 0); 186 */ 187 int 188 soconfig(int domain, int type, int protocol, 189 char *devpath, int devpathlen) 190 { 191 struct sockparams **spp; 192 struct sockparams *sp; 193 int error = 0; 194 195 dprint(0, ("soconfig(%d,%d,%d,%s,%d)\n", 196 domain, type, protocol, devpath, devpathlen)); 197 198 /* 199 * Look for an existing match. 200 */ 201 rw_enter(&splist_lock, RW_WRITER); 202 for (spp = &sphead; (sp = *spp) != NULL; spp = &sp->sp_next) { 203 if (sp->sp_domain == domain && 204 sp->sp_type == type && 205 sp->sp_protocol == protocol) { 206 break; 207 } 208 } 209 if (devpath == NULL) { 210 ASSERT(devpathlen == 0); 211 212 /* Delete existing entry */ 213 if (sp == NULL) { 214 error = ENXIO; 215 goto done; 216 } 217 /* Unlink and free existing entry */ 218 *spp = sp->sp_next; 219 ASSERT(sp->sp_vnode); 220 VN_RELE(sp->sp_vnode); 221 if (sp->sp_devpathlen != 0) 222 kmem_free(sp->sp_devpath, sp->sp_devpathlen); 223 kmem_free(sp, sizeof (*sp)); 224 } else { 225 vnode_t *vp; 226 227 /* Add new entry */ 228 if (sp != NULL) { 229 error = EEXIST; 230 goto done; 231 } 232 233 error = sogetvp(devpath, &vp, UIO_SYSSPACE); 234 if (error) { 235 dprint(0, ("soconfig: vp %s failed with %d\n", 236 devpath, error)); 237 goto done; 238 } 239 240 dprint(0, ("soconfig: %s => vp %p, dev 0x%lx\n", 241 devpath, (void *)vp, vp->v_rdev)); 242 243 sp = kmem_alloc(sizeof (*sp), KM_SLEEP); 244 sp->sp_domain = domain; 245 sp->sp_type = type; 246 sp->sp_protocol = protocol; 247 sp->sp_devpath = devpath; 248 sp->sp_devpathlen = devpathlen; 249 sp->sp_vnode = vp; 250 sp->sp_next = NULL; 251 *spp = sp; 252 } 253 done: 254 rw_exit(&splist_lock); 255 if (error) { 256 if (devpath != NULL) 257 kmem_free(devpath, devpathlen); 258 #ifdef SOCK_DEBUG 259 eprintline(error); 260 #endif /* SOCK_DEBUG */ 261 } 262 return (error); 263 } 264 265 /* 266 * Lookup an entry in the sockparams list based on the triple. 267 * If no entry is found and devpath is not NULL translate devpath to a 268 * vnode. Note that devpath is a pointer to a user address! 269 * Returns with the vnode held. 270 * 271 * When this routine uses devpath it does not create an entry in the sockparams 272 * list since this routine can run on behalf of any user and one user 273 * should not be able to effect the transport used by another user. 274 * 275 * In order to return the correct error this routine has to do wildcard scans 276 * of the list. The errors are (in decreasing precedence): 277 * EAFNOSUPPORT - address family not in list 278 * EPROTONOSUPPORT - address family supported but not protocol. 279 * EPROTOTYPE - address family and protocol supported but not socket type. 280 */ 281 vnode_t * 282 solookup(int domain, int type, int protocol, char *devpath, int *errorp) 283 { 284 struct sockparams *sp; 285 int error; 286 vnode_t *vp; 287 288 rw_enter(&splist_lock, RW_READER); 289 for (sp = sphead; sp != NULL; sp = sp->sp_next) { 290 if (sp->sp_domain == domain && 291 sp->sp_type == type && 292 sp->sp_protocol == protocol) { 293 break; 294 } 295 } 296 if (sp == NULL) { 297 dprint(0, ("solookup(%d,%d,%d) not found\n", 298 domain, type, protocol)); 299 if (devpath == NULL) { 300 /* Determine correct error code */ 301 int found = 0; 302 303 for (sp = sphead; sp != NULL; sp = sp->sp_next) { 304 if (sp->sp_domain == domain && found < 1) 305 found = 1; 306 if (sp->sp_domain == domain && 307 sp->sp_protocol == protocol && found < 2) 308 found = 2; 309 } 310 rw_exit(&splist_lock); 311 switch (found) { 312 case 0: 313 *errorp = EAFNOSUPPORT; 314 break; 315 case 1: 316 *errorp = EPROTONOSUPPORT; 317 break; 318 case 2: 319 *errorp = EPROTOTYPE; 320 break; 321 } 322 return (NULL); 323 } 324 rw_exit(&splist_lock); 325 326 /* 327 * Return vp based on devpath. 328 * Do not enter into table to avoid random users 329 * modifying the sockparams list. 330 */ 331 error = sogetvp(devpath, &vp, UIO_USERSPACE); 332 if (error) { 333 dprint(0, ("solookup: vp %p failed with %d\n", 334 (void *)devpath, error)); 335 *errorp = EPROTONOSUPPORT; 336 return (NULL); 337 } 338 dprint(0, ("solookup: %p => vp %p, dev 0x%lx\n", 339 (void *)devpath, (void *)vp, vp->v_rdev)); 340 341 return (vp); 342 } 343 dprint(0, ("solookup(%d,%d,%d) vp %p devpath %s\n", 344 domain, type, protocol, (void *)sp->sp_vnode, sp->sp_devpath)); 345 346 vp = sp->sp_vnode; 347 VN_HOLD(vp); 348 rw_exit(&splist_lock); 349 return (vp); 350 } 351 352 /* 353 * Return a socket vnode. 354 * 355 * Assumes that the caller is "passing" an VN_HOLD for accessvp i.e. 356 * when the socket is freed a VN_RELE will take place. 357 * 358 * Note that sockets assume that the driver will clone (either itself 359 * or by using the clone driver) i.e. a socket() call will always 360 * result in a new vnode being created. 361 */ 362 struct vnode * 363 makesockvp(struct vnode *accessvp, int domain, int type, int protocol) 364 { 365 kmem_cache_t *cp; 366 struct sonode *so; 367 struct vnode *vp; 368 time_t now; 369 dev_t dev; 370 371 cp = (domain == AF_UNIX) ? socktpi_unix_cache : socktpi_cache; 372 so = kmem_cache_alloc(cp, KM_SLEEP); 373 so->so_cache = cp; 374 so->so_obj = so; 375 vp = SOTOV(so); 376 now = gethrestime_sec(); 377 378 so->so_flag = 0; 379 ASSERT(so->so_accessvp == NULL); 380 so->so_accessvp = accessvp; 381 dev = accessvp->v_rdev; 382 383 /* 384 * Record in so_flag that it is a clone. 385 */ 386 if (getmajor(dev) == clone_major) { 387 so->so_flag |= SOCLONE; 388 } 389 so->so_dev = dev; 390 391 so->so_state = 0; 392 so->so_mode = 0; 393 394 so->so_fsid = sockdev; 395 so->so_atime = now; 396 so->so_mtime = now; 397 so->so_ctime = now; /* Never modified */ 398 so->so_count = 0; 399 400 so->so_family = (short)domain; 401 so->so_type = (short)type; 402 so->so_protocol = (short)protocol; 403 so->so_pushcnt = 0; 404 405 so->so_options = 0; 406 so->so_linger.l_onoff = 0; 407 so->so_linger.l_linger = 0; 408 so->so_sndbuf = 0; 409 so->so_rcvbuf = 0; 410 so->so_sndlowat = 0; 411 so->so_rcvlowat = 0; 412 #ifdef notyet 413 so->so_sndtimeo = 0; 414 so->so_rcvtimeo = 0; 415 #endif /* notyet */ 416 so->so_error = 0; 417 so->so_delayed_error = 0; 418 419 ASSERT(so->so_oobmsg == NULL); 420 so->so_oobcnt = 0; 421 so->so_oobsigcnt = 0; 422 so->so_pgrp = 0; 423 so->so_provinfo = NULL; 424 425 ASSERT(so->so_laddr_sa == NULL && so->so_faddr_sa == NULL); 426 so->so_laddr_len = so->so_faddr_len = 0; 427 so->so_laddr_maxlen = so->so_faddr_maxlen = 0; 428 so->so_eaddr_mp = NULL; 429 so->so_priv = NULL; 430 431 so->so_peercred = NULL; 432 433 ASSERT(so->so_ack_mp == NULL); 434 ASSERT(so->so_conn_ind_head == NULL); 435 ASSERT(so->so_conn_ind_tail == NULL); 436 ASSERT(so->so_ux_bound_vp == NULL); 437 ASSERT(so->so_unbind_mp == NULL); 438 439 vn_reinit(vp); 440 vp->v_vfsp = rootvfs; 441 vp->v_type = VSOCK; 442 vp->v_rdev = so->so_dev; 443 vn_exists(vp); 444 445 return (vp); 446 } 447 448 void 449 sockfree(struct sonode *so) 450 { 451 mblk_t *mp; 452 vnode_t *vp; 453 454 ASSERT(so->so_count == 0); 455 ASSERT(so->so_accessvp); 456 ASSERT(so->so_discon_ind_mp == NULL); 457 458 vp = so->so_accessvp; 459 VN_RELE(vp); 460 461 /* 462 * Protect so->so_[lf]addr_sa so that sockfs_snapshot() can safely 463 * indirect them. It also uses so_accessvp as a validity test. 464 */ 465 mutex_enter(&so->so_lock); 466 467 so->so_accessvp = NULL; 468 469 if (so->so_laddr_sa) { 470 ASSERT((caddr_t)so->so_faddr_sa == 471 (caddr_t)so->so_laddr_sa + so->so_laddr_maxlen); 472 ASSERT(so->so_faddr_maxlen == so->so_laddr_maxlen); 473 so->so_state &= ~(SS_LADDR_VALID | SS_FADDR_VALID); 474 kmem_free(so->so_laddr_sa, so->so_laddr_maxlen * 2); 475 so->so_laddr_sa = NULL; 476 so->so_laddr_len = so->so_laddr_maxlen = 0; 477 so->so_faddr_sa = NULL; 478 so->so_faddr_len = so->so_faddr_maxlen = 0; 479 } 480 481 mutex_exit(&so->so_lock); 482 483 if ((mp = so->so_eaddr_mp) != NULL) { 484 freemsg(mp); 485 so->so_eaddr_mp = NULL; 486 so->so_delayed_error = 0; 487 } 488 if ((mp = so->so_ack_mp) != NULL) { 489 freemsg(mp); 490 so->so_ack_mp = NULL; 491 } 492 if ((mp = so->so_conn_ind_head) != NULL) { 493 mblk_t *mp1; 494 495 while (mp) { 496 mp1 = mp->b_next; 497 mp->b_next = NULL; 498 freemsg(mp); 499 mp = mp1; 500 } 501 so->so_conn_ind_head = so->so_conn_ind_tail = NULL; 502 so->so_state &= ~SS_HASCONNIND; 503 } 504 #ifdef DEBUG 505 mutex_enter(&so->so_lock); 506 ASSERT(so_verify_oobstate(so)); 507 mutex_exit(&so->so_lock); 508 #endif /* DEBUG */ 509 if ((mp = so->so_oobmsg) != NULL) { 510 freemsg(mp); 511 so->so_oobmsg = NULL; 512 so->so_state &= ~(SS_OOBPEND|SS_HAVEOOBDATA|SS_HADOOBDATA); 513 } 514 515 if ((mp = so->so_nl7c_rcv_mp) != NULL) { 516 so->so_nl7c_rcv_mp = NULL; 517 freemsg(mp); 518 } 519 so->so_nl7c_rcv_rval = 0; 520 if (so->so_nl7c_uri != NULL) { 521 nl7c_urifree(so); 522 /* urifree() cleared nl7c_uri */ 523 } 524 if (so->so_nl7c_flags) { 525 so->so_nl7c_flags = 0; 526 } 527 528 if (so->so_direct != NULL) { 529 sodirect_t *sodp = so->so_direct; 530 531 ASSERT(sodp->sod_uioafh == NULL); 532 533 so->so_direct = NULL; 534 kmem_cache_free(socktpi_sod_cache, sodp); 535 } 536 537 ASSERT(so->so_ux_bound_vp == NULL); 538 if ((mp = so->so_unbind_mp) != NULL) { 539 freemsg(mp); 540 so->so_unbind_mp = NULL; 541 } 542 vn_invalid(SOTOV(so)); 543 544 if (so->so_peercred != NULL) 545 crfree(so->so_peercred); 546 547 kmem_cache_free(so->so_cache, so->so_obj); 548 } 549 550 /* 551 * Update the accessed, updated, or changed times in an sonode 552 * with the current time. 553 * 554 * Note that both SunOS 4.X and 4.4BSD sockets do not present reasonable 555 * attributes in a fstat call. (They return the current time and 0 for 556 * all timestamps, respectively.) We maintain the current timestamps 557 * here primarily so that should sockmod be popped the resulting 558 * file descriptor will behave like a stream w.r.t. the timestamps. 559 */ 560 void 561 so_update_attrs(struct sonode *so, int flag) 562 { 563 time_t now = gethrestime_sec(); 564 565 mutex_enter(&so->so_lock); 566 so->so_flag |= flag; 567 if (flag & SOACC) 568 so->so_atime = now; 569 if (flag & SOMOD) 570 so->so_mtime = now; 571 mutex_exit(&so->so_lock); 572 } 573 574 /*ARGSUSED*/ 575 static int 576 socktpi_constructor(void *buf, void *cdrarg, int kmflags) 577 { 578 struct sonode *so = buf; 579 struct vnode *vp; 580 581 vp = so->so_vnode = vn_alloc(kmflags); 582 if (vp == NULL) { 583 return (-1); 584 } 585 vn_setops(vp, socktpi_vnodeops); 586 vp->v_data = so; 587 588 so->so_direct = NULL; 589 590 so->so_nl7c_flags = 0; 591 so->so_nl7c_uri = NULL; 592 so->so_nl7c_rcv_mp = NULL; 593 594 so->so_oobmsg = NULL; 595 so->so_ack_mp = NULL; 596 so->so_conn_ind_head = NULL; 597 so->so_conn_ind_tail = NULL; 598 so->so_discon_ind_mp = NULL; 599 so->so_ux_bound_vp = NULL; 600 so->so_unbind_mp = NULL; 601 so->so_accessvp = NULL; 602 so->so_laddr_sa = NULL; 603 so->so_faddr_sa = NULL; 604 so->so_ops = &sotpi_sonodeops; 605 606 mutex_init(&so->so_lock, NULL, MUTEX_DEFAULT, NULL); 607 mutex_init(&so->so_plumb_lock, NULL, MUTEX_DEFAULT, NULL); 608 cv_init(&so->so_state_cv, NULL, CV_DEFAULT, NULL); 609 cv_init(&so->so_ack_cv, NULL, CV_DEFAULT, NULL); 610 cv_init(&so->so_connind_cv, NULL, CV_DEFAULT, NULL); 611 cv_init(&so->so_want_cv, NULL, CV_DEFAULT, NULL); 612 613 return (0); 614 } 615 616 /*ARGSUSED1*/ 617 static void 618 socktpi_destructor(void *buf, void *cdrarg) 619 { 620 struct sonode *so = buf; 621 struct vnode *vp = SOTOV(so); 622 623 ASSERT(so->so_direct == NULL); 624 625 ASSERT(so->so_nl7c_flags == 0); 626 ASSERT(so->so_nl7c_uri == NULL); 627 ASSERT(so->so_nl7c_rcv_mp == NULL); 628 629 ASSERT(so->so_oobmsg == NULL); 630 ASSERT(so->so_ack_mp == NULL); 631 ASSERT(so->so_conn_ind_head == NULL); 632 ASSERT(so->so_conn_ind_tail == NULL); 633 ASSERT(so->so_discon_ind_mp == NULL); 634 ASSERT(so->so_ux_bound_vp == NULL); 635 ASSERT(so->so_unbind_mp == NULL); 636 ASSERT(so->so_ops == &sotpi_sonodeops); 637 638 ASSERT(vn_matchops(vp, socktpi_vnodeops)); 639 ASSERT(vp->v_data == so); 640 641 vn_free(vp); 642 643 mutex_destroy(&so->so_lock); 644 mutex_destroy(&so->so_plumb_lock); 645 cv_destroy(&so->so_state_cv); 646 cv_destroy(&so->so_ack_cv); 647 cv_destroy(&so->so_connind_cv); 648 cv_destroy(&so->so_want_cv); 649 } 650 651 static int 652 socktpi_unix_constructor(void *buf, void *cdrarg, int kmflags) 653 { 654 int retval; 655 656 if ((retval = socktpi_constructor(buf, cdrarg, kmflags)) == 0) { 657 struct sonode *so = (struct sonode *)buf; 658 659 mutex_enter(&socklist.sl_lock); 660 661 so->so_next = socklist.sl_list; 662 so->so_prev = NULL; 663 if (so->so_next != NULL) 664 so->so_next->so_prev = so; 665 socklist.sl_list = so; 666 667 mutex_exit(&socklist.sl_lock); 668 669 } 670 return (retval); 671 } 672 673 static void 674 socktpi_unix_destructor(void *buf, void *cdrarg) 675 { 676 struct sonode *so = (struct sonode *)buf; 677 678 mutex_enter(&socklist.sl_lock); 679 680 if (so->so_next != NULL) 681 so->so_next->so_prev = so->so_prev; 682 if (so->so_prev != NULL) 683 so->so_prev->so_next = so->so_next; 684 else 685 socklist.sl_list = so->so_next; 686 687 mutex_exit(&socklist.sl_lock); 688 689 socktpi_destructor(buf, cdrarg); 690 } 691 692 /* 693 * Init function called when sockfs is loaded. 694 */ 695 int 696 sockinit(int fstype, char *name) 697 { 698 static const fs_operation_def_t sock_vfsops_template[] = { 699 NULL, NULL 700 }; 701 int error; 702 major_t dev; 703 char *err_str; 704 705 error = vfs_setfsops(fstype, sock_vfsops_template, NULL); 706 if (error != 0) { 707 zcmn_err(GLOBAL_ZONEID, CE_WARN, 708 "sockinit: bad vfs ops template"); 709 return (error); 710 } 711 712 error = vn_make_ops(name, socktpi_vnodeops_template, &socktpi_vnodeops); 713 if (error != 0) { 714 err_str = "sockinit: bad sock vnode ops template"; 715 /* vn_make_ops() does not reset socktpi_vnodeops on failure. */ 716 socktpi_vnodeops = NULL; 717 goto failure; 718 } 719 720 error = sosctp_init(); 721 if (error != 0) { 722 err_str = NULL; 723 goto failure; 724 } 725 726 error = sosdp_init(); 727 if (error != 0) { 728 err_str = NULL; 729 goto failure; 730 } 731 732 error = sostr_init(); 733 if (error != 0) { 734 err_str = NULL; 735 goto failure; 736 } 737 738 /* 739 * Create sonode caches. We create a special one for AF_UNIX so 740 * that we can track them for netstat(1m). 741 */ 742 socktpi_cache = kmem_cache_create("socktpi_cache", 743 sizeof (struct sonode), 0, socktpi_constructor, 744 socktpi_destructor, NULL, NULL, NULL, 0); 745 746 socktpi_unix_cache = kmem_cache_create("socktpi_unix_cache", 747 sizeof (struct sonode), 0, socktpi_unix_constructor, 748 socktpi_unix_destructor, NULL, NULL, NULL, 0); 749 750 /* 751 * Build initial list mapping socket parameters to vnode. 752 */ 753 rw_init(&splist_lock, NULL, RW_DEFAULT, NULL); 754 755 /* 756 * If sockets are needed before init runs /sbin/soconfig 757 * it is possible to preload the sockparams list here using 758 * calls like: 759 * sockconfig(1,2,3, "/dev/tcp", 0); 760 */ 761 762 /* 763 * Create a unique dev_t for use in so_fsid. 764 */ 765 766 if ((dev = getudev()) == (major_t)-1) 767 dev = 0; 768 sockdev = makedevice(dev, 0); 769 770 mutex_init(&socklist.sl_lock, NULL, MUTEX_DEFAULT, NULL); 771 sendfile_init(); 772 nl7c_init(); 773 774 return (0); 775 776 failure: 777 (void) vfs_freevfsops_by_type(fstype); 778 if (socktpi_vnodeops != NULL) 779 vn_freevnodeops(socktpi_vnodeops); 780 if (err_str != NULL) 781 zcmn_err(GLOBAL_ZONEID, CE_WARN, err_str); 782 return (error); 783 } 784 785 /* 786 * Caller must hold the mutex. Used to set SOLOCKED. 787 */ 788 void 789 so_lock_single(struct sonode *so) 790 { 791 ASSERT(MUTEX_HELD(&so->so_lock)); 792 793 while (so->so_flag & (SOLOCKED | SOASYNC_UNBIND)) { 794 so->so_flag |= SOWANT; 795 cv_wait_stop(&so->so_want_cv, &so->so_lock, 796 SO_LOCK_WAKEUP_TIME); 797 } 798 so->so_flag |= SOLOCKED; 799 } 800 801 /* 802 * Caller must hold the mutex and pass in SOLOCKED or SOASYNC_UNBIND. 803 * Used to clear SOLOCKED or SOASYNC_UNBIND. 804 */ 805 void 806 so_unlock_single(struct sonode *so, int flag) 807 { 808 ASSERT(MUTEX_HELD(&so->so_lock)); 809 ASSERT(flag & (SOLOCKED|SOASYNC_UNBIND)); 810 ASSERT((flag & ~(SOLOCKED|SOASYNC_UNBIND)) == 0); 811 ASSERT(so->so_flag & flag); 812 813 /* 814 * Process the T_DISCON_IND on so_discon_ind_mp. 815 * 816 * Call to so_drain_discon_ind will result in so_lock 817 * being dropped and re-acquired later. 818 */ 819 if (so->so_discon_ind_mp != NULL) 820 so_drain_discon_ind(so); 821 822 if (so->so_flag & SOWANT) 823 cv_broadcast(&so->so_want_cv); 824 so->so_flag &= ~(SOWANT|flag); 825 } 826 827 /* 828 * Caller must hold the mutex. Used to set SOREADLOCKED. 829 * If the caller wants nonblocking behavior it should set fmode. 830 */ 831 int 832 so_lock_read(struct sonode *so, int fmode) 833 { 834 ASSERT(MUTEX_HELD(&so->so_lock)); 835 836 while (so->so_flag & SOREADLOCKED) { 837 if (fmode & (FNDELAY|FNONBLOCK)) 838 return (EWOULDBLOCK); 839 so->so_flag |= SOWANT; 840 cv_wait_stop(&so->so_want_cv, &so->so_lock, 841 SO_LOCK_WAKEUP_TIME); 842 } 843 so->so_flag |= SOREADLOCKED; 844 return (0); 845 } 846 847 /* 848 * Like so_lock_read above but allows signals. 849 */ 850 int 851 so_lock_read_intr(struct sonode *so, int fmode) 852 { 853 ASSERT(MUTEX_HELD(&so->so_lock)); 854 855 while (so->so_flag & SOREADLOCKED) { 856 if (fmode & (FNDELAY|FNONBLOCK)) 857 return (EWOULDBLOCK); 858 so->so_flag |= SOWANT; 859 if (!cv_wait_sig(&so->so_want_cv, &so->so_lock)) 860 return (EINTR); 861 } 862 so->so_flag |= SOREADLOCKED; 863 return (0); 864 } 865 866 /* 867 * Caller must hold the mutex. Used to clear SOREADLOCKED, 868 * set in so_lock_read() or so_lock_read_intr(). 869 */ 870 void 871 so_unlock_read(struct sonode *so) 872 { 873 ASSERT(MUTEX_HELD(&so->so_lock)); 874 ASSERT(so->so_flag & SOREADLOCKED); 875 876 if (so->so_flag & SOWANT) 877 cv_broadcast(&so->so_want_cv); 878 so->so_flag &= ~(SOWANT|SOREADLOCKED); 879 } 880 881 /* 882 * Verify that the specified offset falls within the mblk and 883 * that the resulting pointer is aligned. 884 * Returns NULL if not. 885 */ 886 void * 887 sogetoff(mblk_t *mp, t_uscalar_t offset, 888 t_uscalar_t length, uint_t align_size) 889 { 890 uintptr_t ptr1, ptr2; 891 892 ASSERT(mp && mp->b_wptr >= mp->b_rptr); 893 ptr1 = (uintptr_t)mp->b_rptr + offset; 894 ptr2 = (uintptr_t)ptr1 + length; 895 if (ptr1 < (uintptr_t)mp->b_rptr || ptr2 > (uintptr_t)mp->b_wptr) { 896 eprintline(0); 897 return (NULL); 898 } 899 if ((ptr1 & (align_size - 1)) != 0) { 900 eprintline(0); 901 return (NULL); 902 } 903 return ((void *)ptr1); 904 } 905 906 /* 907 * Return the AF_UNIX underlying filesystem vnode matching a given name. 908 * Makes sure the sending and the destination sonodes are compatible. 909 * The vnode is returned held. 910 * 911 * The underlying filesystem VSOCK vnode has a v_stream pointer that 912 * references the actual stream head (hence indirectly the actual sonode). 913 */ 914 static int 915 so_ux_lookup(struct sonode *so, struct sockaddr_un *soun, int checkaccess, 916 vnode_t **vpp) 917 { 918 vnode_t *vp; /* Underlying filesystem vnode */ 919 vnode_t *rvp; /* real vnode */ 920 vnode_t *svp; /* sockfs vnode */ 921 struct sonode *so2; 922 int error; 923 924 dprintso(so, 1, ("so_ux_lookup(%p) name <%s>\n", (void *)so, 925 soun->sun_path)); 926 927 error = lookupname(soun->sun_path, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp); 928 if (error) { 929 eprintsoline(so, error); 930 return (error); 931 } 932 933 /* 934 * Traverse lofs mounts get the real vnode 935 */ 936 if (VOP_REALVP(vp, &rvp, NULL) == 0) { 937 VN_HOLD(rvp); /* hold the real vnode */ 938 VN_RELE(vp); /* release hold from lookup */ 939 vp = rvp; 940 } 941 942 if (vp->v_type != VSOCK) { 943 error = ENOTSOCK; 944 eprintsoline(so, error); 945 goto done2; 946 } 947 948 if (checkaccess) { 949 /* 950 * Check that we have permissions to access the destination 951 * vnode. This check is not done in BSD but it is required 952 * by X/Open. 953 */ 954 if (error = VOP_ACCESS(vp, VREAD|VWRITE, 0, CRED(), NULL)) { 955 eprintsoline(so, error); 956 goto done2; 957 } 958 } 959 960 /* 961 * Check if the remote socket has been closed. 962 * 963 * Synchronize with vn_rele_stream by holding v_lock while traversing 964 * v_stream->sd_vnode. 965 */ 966 mutex_enter(&vp->v_lock); 967 if (vp->v_stream == NULL) { 968 mutex_exit(&vp->v_lock); 969 if (so->so_type == SOCK_DGRAM) 970 error = EDESTADDRREQ; 971 else 972 error = ECONNREFUSED; 973 974 eprintsoline(so, error); 975 goto done2; 976 } 977 ASSERT(vp->v_stream->sd_vnode); 978 svp = vp->v_stream->sd_vnode; 979 /* 980 * holding v_lock on underlying filesystem vnode and acquiring 981 * it on sockfs vnode. Assumes that no code ever attempts to 982 * acquire these locks in the reverse order. 983 */ 984 VN_HOLD(svp); 985 mutex_exit(&vp->v_lock); 986 987 if (svp->v_type != VSOCK) { 988 error = ENOTSOCK; 989 eprintsoline(so, error); 990 goto done; 991 } 992 993 so2 = VTOSO(svp); 994 995 if (so->so_type != so2->so_type) { 996 error = EPROTOTYPE; 997 eprintsoline(so, error); 998 goto done; 999 } 1000 1001 VN_RELE(svp); 1002 *vpp = vp; 1003 return (0); 1004 1005 done: 1006 VN_RELE(svp); 1007 done2: 1008 VN_RELE(vp); 1009 return (error); 1010 } 1011 1012 /* 1013 * Verify peer address for connect and sendto/sendmsg. 1014 * Since sendto/sendmsg would not get synchronous errors from the transport 1015 * provider we have to do these ugly checks in the socket layer to 1016 * preserve compatibility with SunOS 4.X. 1017 */ 1018 int 1019 so_addr_verify(struct sonode *so, const struct sockaddr *name, 1020 socklen_t namelen) 1021 { 1022 int family; 1023 1024 dprintso(so, 1, ("so_addr_verify(%p, %p, %d)\n", 1025 (void *)so, (void *)name, namelen)); 1026 1027 ASSERT(name != NULL); 1028 1029 family = so->so_family; 1030 switch (family) { 1031 case AF_INET: 1032 if (name->sa_family != family) { 1033 eprintsoline(so, EAFNOSUPPORT); 1034 return (EAFNOSUPPORT); 1035 } 1036 if (namelen != (socklen_t)sizeof (struct sockaddr_in)) { 1037 eprintsoline(so, EINVAL); 1038 return (EINVAL); 1039 } 1040 break; 1041 case AF_INET6: { 1042 #ifdef DEBUG 1043 struct sockaddr_in6 *sin6; 1044 #endif /* DEBUG */ 1045 1046 if (name->sa_family != family) { 1047 eprintsoline(so, EAFNOSUPPORT); 1048 return (EAFNOSUPPORT); 1049 } 1050 if (namelen != (socklen_t)sizeof (struct sockaddr_in6)) { 1051 eprintsoline(so, EINVAL); 1052 return (EINVAL); 1053 } 1054 #ifdef DEBUG 1055 /* Verify that apps don't forget to clear sin6_scope_id etc */ 1056 sin6 = (struct sockaddr_in6 *)name; 1057 if (sin6->sin6_scope_id != 0 && 1058 !IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr)) { 1059 zcmn_err(getzoneid(), CE_WARN, 1060 "connect/send* with uninitialized sin6_scope_id " 1061 "(%d) on socket. Pid = %d\n", 1062 (int)sin6->sin6_scope_id, (int)curproc->p_pid); 1063 } 1064 #endif /* DEBUG */ 1065 break; 1066 } 1067 case AF_UNIX: 1068 if (so->so_state & SS_FADDR_NOXLATE) { 1069 return (0); 1070 } 1071 if (namelen < (socklen_t)sizeof (short)) { 1072 eprintsoline(so, ENOENT); 1073 return (ENOENT); 1074 } 1075 if (name->sa_family != family) { 1076 eprintsoline(so, EAFNOSUPPORT); 1077 return (EAFNOSUPPORT); 1078 } 1079 /* MAXPATHLEN + soun_family + nul termination */ 1080 if (namelen > (socklen_t)(MAXPATHLEN + sizeof (short) + 1)) { 1081 eprintsoline(so, ENAMETOOLONG); 1082 return (ENAMETOOLONG); 1083 } 1084 1085 break; 1086 1087 default: 1088 /* 1089 * Default is don't do any length or sa_family check 1090 * to allow non-sockaddr style addresses. 1091 */ 1092 break; 1093 } 1094 1095 return (0); 1096 } 1097 1098 1099 /* 1100 * Translate an AF_UNIX sockaddr_un to the transport internal name. 1101 * Assumes caller has called so_addr_verify first. 1102 */ 1103 /*ARGSUSED*/ 1104 int 1105 so_ux_addr_xlate(struct sonode *so, struct sockaddr *name, 1106 socklen_t namelen, int checkaccess, 1107 void **addrp, socklen_t *addrlenp) 1108 { 1109 int error; 1110 struct sockaddr_un *soun; 1111 vnode_t *vp; 1112 void *addr; 1113 socklen_t addrlen; 1114 1115 dprintso(so, 1, ("so_ux_addr_xlate(%p, %p, %d, %d)\n", 1116 (void *)so, (void *)name, namelen, checkaccess)); 1117 1118 ASSERT(name != NULL); 1119 ASSERT(so->so_family == AF_UNIX); 1120 ASSERT(!(so->so_state & SS_FADDR_NOXLATE)); 1121 ASSERT(namelen >= (socklen_t)sizeof (short)); 1122 ASSERT(name->sa_family == AF_UNIX); 1123 soun = (struct sockaddr_un *)name; 1124 /* 1125 * Lookup vnode for the specified path name and verify that 1126 * it is a socket. 1127 */ 1128 error = so_ux_lookup(so, soun, checkaccess, &vp); 1129 if (error) { 1130 eprintsoline(so, error); 1131 return (error); 1132 } 1133 /* 1134 * Use the address of the peer vnode as the address to send 1135 * to. We release the peer vnode here. In case it has been 1136 * closed by the time the T_CONN_REQ or T_UNIDATA_REQ reaches the 1137 * transport the message will get an error or be dropped. 1138 */ 1139 so->so_ux_faddr.soua_vp = vp; 1140 so->so_ux_faddr.soua_magic = SOU_MAGIC_EXPLICIT; 1141 addr = &so->so_ux_faddr; 1142 addrlen = (socklen_t)sizeof (so->so_ux_faddr); 1143 dprintso(so, 1, ("ux_xlate UNIX: addrlen %d, vp %p\n", 1144 addrlen, (void *)vp)); 1145 VN_RELE(vp); 1146 *addrp = addr; 1147 *addrlenp = (socklen_t)addrlen; 1148 return (0); 1149 } 1150 1151 /* 1152 * Esballoc free function for messages that contain SO_FILEP option. 1153 * Decrement the reference count on the file pointers using closef. 1154 */ 1155 void 1156 fdbuf_free(struct fdbuf *fdbuf) 1157 { 1158 int i; 1159 struct file *fp; 1160 1161 dprint(1, ("fdbuf_free: %d fds\n", fdbuf->fd_numfd)); 1162 for (i = 0; i < fdbuf->fd_numfd; i++) { 1163 /* 1164 * We need pointer size alignment for fd_fds. On a LP64 1165 * kernel, the required alignment is 8 bytes while 1166 * the option headers and values are only 4 bytes 1167 * aligned. So its safer to do a bcopy compared to 1168 * assigning fdbuf->fd_fds[i] to fp. 1169 */ 1170 bcopy((char *)&fdbuf->fd_fds[i], (char *)&fp, sizeof (fp)); 1171 dprint(1, ("fdbuf_free: [%d] = %p\n", i, (void *)fp)); 1172 (void) closef(fp); 1173 } 1174 if (fdbuf->fd_ebuf != NULL) 1175 kmem_free(fdbuf->fd_ebuf, fdbuf->fd_ebuflen); 1176 kmem_free(fdbuf, fdbuf->fd_size); 1177 } 1178 1179 /* 1180 * Allocate an esballoc'ed message for AF_UNIX file descriptor passing. 1181 * Waits if memory is not available. 1182 */ 1183 mblk_t * 1184 fdbuf_allocmsg(int size, struct fdbuf *fdbuf) 1185 { 1186 uchar_t *buf; 1187 mblk_t *mp; 1188 1189 dprint(1, ("fdbuf_allocmsg: size %d, %d fds\n", size, fdbuf->fd_numfd)); 1190 buf = kmem_alloc(size, KM_SLEEP); 1191 fdbuf->fd_ebuf = (caddr_t)buf; 1192 fdbuf->fd_ebuflen = size; 1193 fdbuf->fd_frtn.free_func = fdbuf_free; 1194 fdbuf->fd_frtn.free_arg = (caddr_t)fdbuf; 1195 1196 mp = esballoc_wait(buf, size, BPRI_MED, &fdbuf->fd_frtn); 1197 mp->b_datap->db_type = M_PROTO; 1198 return (mp); 1199 } 1200 1201 /* 1202 * Extract file descriptors from a fdbuf. 1203 * Return list in rights/rightslen. 1204 */ 1205 /*ARGSUSED*/ 1206 static int 1207 fdbuf_extract(struct fdbuf *fdbuf, void *rights, int rightslen) 1208 { 1209 int i, fd; 1210 int *rp; 1211 struct file *fp; 1212 int numfd; 1213 1214 dprint(1, ("fdbuf_extract: %d fds, len %d\n", 1215 fdbuf->fd_numfd, rightslen)); 1216 1217 numfd = fdbuf->fd_numfd; 1218 ASSERT(rightslen == numfd * (int)sizeof (int)); 1219 1220 /* 1221 * Allocate a file descriptor and increment the f_count. 1222 * The latter is needed since we always call fdbuf_free 1223 * which performs a closef. 1224 */ 1225 rp = (int *)rights; 1226 for (i = 0; i < numfd; i++) { 1227 if ((fd = ufalloc(0)) == -1) 1228 goto cleanup; 1229 /* 1230 * We need pointer size alignment for fd_fds. On a LP64 1231 * kernel, the required alignment is 8 bytes while 1232 * the option headers and values are only 4 bytes 1233 * aligned. So its safer to do a bcopy compared to 1234 * assigning fdbuf->fd_fds[i] to fp. 1235 */ 1236 bcopy((char *)&fdbuf->fd_fds[i], (char *)&fp, sizeof (fp)); 1237 mutex_enter(&fp->f_tlock); 1238 fp->f_count++; 1239 mutex_exit(&fp->f_tlock); 1240 setf(fd, fp); 1241 *rp++ = fd; 1242 if (audit_active) 1243 audit_fdrecv(fd, fp); 1244 dprint(1, ("fdbuf_extract: [%d] = %d, %p refcnt %d\n", 1245 i, fd, (void *)fp, fp->f_count)); 1246 } 1247 return (0); 1248 1249 cleanup: 1250 /* 1251 * Undo whatever partial work the loop above has done. 1252 */ 1253 { 1254 int j; 1255 1256 rp = (int *)rights; 1257 for (j = 0; j < i; j++) { 1258 dprint(0, 1259 ("fdbuf_extract: cleanup[%d] = %d\n", j, *rp)); 1260 (void) closeandsetf(*rp++, NULL); 1261 } 1262 } 1263 1264 return (EMFILE); 1265 } 1266 1267 /* 1268 * Insert file descriptors into an fdbuf. 1269 * Returns a kmem_alloc'ed fdbuf. The fdbuf should be freed 1270 * by calling fdbuf_free(). 1271 */ 1272 int 1273 fdbuf_create(void *rights, int rightslen, struct fdbuf **fdbufp) 1274 { 1275 int numfd, i; 1276 int *fds; 1277 struct file *fp; 1278 struct fdbuf *fdbuf; 1279 int fdbufsize; 1280 1281 dprint(1, ("fdbuf_create: len %d\n", rightslen)); 1282 1283 numfd = rightslen / (int)sizeof (int); 1284 1285 fdbufsize = (int)FDBUF_HDRSIZE + (numfd * (int)sizeof (struct file *)); 1286 fdbuf = kmem_alloc(fdbufsize, KM_SLEEP); 1287 fdbuf->fd_size = fdbufsize; 1288 fdbuf->fd_numfd = 0; 1289 fdbuf->fd_ebuf = NULL; 1290 fdbuf->fd_ebuflen = 0; 1291 fds = (int *)rights; 1292 for (i = 0; i < numfd; i++) { 1293 if ((fp = getf(fds[i])) == NULL) { 1294 fdbuf_free(fdbuf); 1295 return (EBADF); 1296 } 1297 dprint(1, ("fdbuf_create: [%d] = %d, %p refcnt %d\n", 1298 i, fds[i], (void *)fp, fp->f_count)); 1299 mutex_enter(&fp->f_tlock); 1300 fp->f_count++; 1301 mutex_exit(&fp->f_tlock); 1302 /* 1303 * The maximum alignment for fdbuf (or any option header 1304 * and its value) it 4 bytes. On a LP64 kernel, the alignment 1305 * is not sufficient for pointers (fd_fds in this case). Since 1306 * we just did a kmem_alloc (we get a double word alignment), 1307 * we don't need to do anything on the send side (we loose 1308 * the double word alignment because fdbuf goes after an 1309 * option header (eg T_unitdata_req) which is only 4 byte 1310 * aligned). We take care of this when we extract the file 1311 * descriptor in fdbuf_extract or fdbuf_free. 1312 */ 1313 fdbuf->fd_fds[i] = fp; 1314 fdbuf->fd_numfd++; 1315 releasef(fds[i]); 1316 if (audit_active) 1317 audit_fdsend(fds[i], fp, 0); 1318 } 1319 *fdbufp = fdbuf; 1320 return (0); 1321 } 1322 1323 static int 1324 fdbuf_optlen(int rightslen) 1325 { 1326 int numfd; 1327 1328 numfd = rightslen / (int)sizeof (int); 1329 1330 return ((int)FDBUF_HDRSIZE + (numfd * (int)sizeof (struct file *))); 1331 } 1332 1333 static t_uscalar_t 1334 fdbuf_cmsglen(int fdbuflen) 1335 { 1336 return (t_uscalar_t)((fdbuflen - FDBUF_HDRSIZE) / 1337 (int)sizeof (struct file *) * (int)sizeof (int)); 1338 } 1339 1340 1341 /* 1342 * Return non-zero if the mblk and fdbuf are consistent. 1343 */ 1344 static int 1345 fdbuf_verify(mblk_t *mp, struct fdbuf *fdbuf, int fdbuflen) 1346 { 1347 if (fdbuflen >= FDBUF_HDRSIZE && 1348 fdbuflen == fdbuf->fd_size) { 1349 frtn_t *frp = mp->b_datap->db_frtnp; 1350 /* 1351 * Check that the SO_FILEP portion of the 1352 * message has not been modified by 1353 * the loopback transport. The sending sockfs generates 1354 * a message that is esballoc'ed with the free function 1355 * being fdbuf_free() and where free_arg contains the 1356 * identical information as the SO_FILEP content. 1357 * 1358 * If any of these constraints are not satisfied we 1359 * silently ignore the option. 1360 */ 1361 ASSERT(mp); 1362 if (frp != NULL && 1363 frp->free_func == fdbuf_free && 1364 frp->free_arg != NULL && 1365 bcmp(frp->free_arg, fdbuf, fdbuflen) == 0) { 1366 dprint(1, ("fdbuf_verify: fdbuf %p len %d\n", 1367 (void *)fdbuf, fdbuflen)); 1368 return (1); 1369 } else { 1370 zcmn_err(getzoneid(), CE_WARN, 1371 "sockfs: mismatched fdbuf content (%p)", 1372 (void *)mp); 1373 return (0); 1374 } 1375 } else { 1376 zcmn_err(getzoneid(), CE_WARN, 1377 "sockfs: mismatched fdbuf len %d, %d\n", 1378 fdbuflen, fdbuf->fd_size); 1379 return (0); 1380 } 1381 } 1382 1383 /* 1384 * When the file descriptors returned by sorecvmsg can not be passed 1385 * to the application this routine will cleanup the references on 1386 * the files. Start at startoff bytes into the buffer. 1387 */ 1388 static void 1389 close_fds(void *fdbuf, int fdbuflen, int startoff) 1390 { 1391 int *fds = (int *)fdbuf; 1392 int numfd = fdbuflen / (int)sizeof (int); 1393 int i; 1394 1395 dprint(1, ("close_fds(%p, %d, %d)\n", fdbuf, fdbuflen, startoff)); 1396 1397 for (i = 0; i < numfd; i++) { 1398 if (startoff < 0) 1399 startoff = 0; 1400 if (startoff < (int)sizeof (int)) { 1401 /* 1402 * This file descriptor is partially or fully after 1403 * the offset 1404 */ 1405 dprint(0, 1406 ("close_fds: cleanup[%d] = %d\n", i, fds[i])); 1407 (void) closeandsetf(fds[i], NULL); 1408 } 1409 startoff -= (int)sizeof (int); 1410 } 1411 } 1412 1413 /* 1414 * Close all file descriptors contained in the control part starting at 1415 * the startoffset. 1416 */ 1417 void 1418 so_closefds(void *control, t_uscalar_t controllen, int oldflg, 1419 int startoff) 1420 { 1421 struct cmsghdr *cmsg; 1422 1423 if (control == NULL) 1424 return; 1425 1426 if (oldflg) { 1427 close_fds(control, controllen, startoff); 1428 return; 1429 } 1430 /* Scan control part for file descriptors. */ 1431 for (cmsg = (struct cmsghdr *)control; 1432 CMSG_VALID(cmsg, control, (uintptr_t)control + controllen); 1433 cmsg = CMSG_NEXT(cmsg)) { 1434 if (cmsg->cmsg_level == SOL_SOCKET && 1435 cmsg->cmsg_type == SCM_RIGHTS) { 1436 close_fds(CMSG_CONTENT(cmsg), 1437 (int)CMSG_CONTENTLEN(cmsg), 1438 startoff - (int)sizeof (struct cmsghdr)); 1439 } 1440 startoff -= cmsg->cmsg_len; 1441 } 1442 } 1443 1444 /* 1445 * Returns a pointer/length for the file descriptors contained 1446 * in the control buffer. Returns with *fdlenp == -1 if there are no 1447 * file descriptor options present. This is different than there being 1448 * a zero-length file descriptor option. 1449 * Fail if there are multiple SCM_RIGHT cmsgs. 1450 */ 1451 int 1452 so_getfdopt(void *control, t_uscalar_t controllen, int oldflg, 1453 void **fdsp, int *fdlenp) 1454 { 1455 struct cmsghdr *cmsg; 1456 void *fds; 1457 int fdlen; 1458 1459 if (control == NULL) { 1460 *fdsp = NULL; 1461 *fdlenp = -1; 1462 return (0); 1463 } 1464 1465 if (oldflg) { 1466 *fdsp = control; 1467 if (controllen == 0) 1468 *fdlenp = -1; 1469 else 1470 *fdlenp = controllen; 1471 dprint(1, ("so_getfdopt: old %d\n", *fdlenp)); 1472 return (0); 1473 } 1474 1475 fds = NULL; 1476 fdlen = 0; 1477 1478 for (cmsg = (struct cmsghdr *)control; 1479 CMSG_VALID(cmsg, control, (uintptr_t)control + controllen); 1480 cmsg = CMSG_NEXT(cmsg)) { 1481 if (cmsg->cmsg_level == SOL_SOCKET && 1482 cmsg->cmsg_type == SCM_RIGHTS) { 1483 if (fds != NULL) 1484 return (EINVAL); 1485 fds = CMSG_CONTENT(cmsg); 1486 fdlen = (int)CMSG_CONTENTLEN(cmsg); 1487 dprint(1, ("so_getfdopt: new %lu\n", 1488 (size_t)CMSG_CONTENTLEN(cmsg))); 1489 } 1490 } 1491 if (fds == NULL) { 1492 dprint(1, ("so_getfdopt: NONE\n")); 1493 *fdlenp = -1; 1494 } else 1495 *fdlenp = fdlen; 1496 *fdsp = fds; 1497 return (0); 1498 } 1499 1500 /* 1501 * Return the length of the options including any file descriptor options. 1502 */ 1503 t_uscalar_t 1504 so_optlen(void *control, t_uscalar_t controllen, int oldflg) 1505 { 1506 struct cmsghdr *cmsg; 1507 t_uscalar_t optlen = 0; 1508 t_uscalar_t len; 1509 1510 if (control == NULL) 1511 return (0); 1512 1513 if (oldflg) 1514 return ((t_uscalar_t)(sizeof (struct T_opthdr) + 1515 fdbuf_optlen(controllen))); 1516 1517 for (cmsg = (struct cmsghdr *)control; 1518 CMSG_VALID(cmsg, control, (uintptr_t)control + controllen); 1519 cmsg = CMSG_NEXT(cmsg)) { 1520 if (cmsg->cmsg_level == SOL_SOCKET && 1521 cmsg->cmsg_type == SCM_RIGHTS) { 1522 len = fdbuf_optlen((int)CMSG_CONTENTLEN(cmsg)); 1523 } else { 1524 len = (t_uscalar_t)CMSG_CONTENTLEN(cmsg); 1525 } 1526 optlen += (t_uscalar_t)(_TPI_ALIGN_TOPT(len) + 1527 sizeof (struct T_opthdr)); 1528 } 1529 dprint(1, ("so_optlen: controllen %d, flg %d -> optlen %d\n", 1530 controllen, oldflg, optlen)); 1531 return (optlen); 1532 } 1533 1534 /* 1535 * Copy options from control to the mblk. Skip any file descriptor options. 1536 */ 1537 void 1538 so_cmsg2opt(void *control, t_uscalar_t controllen, int oldflg, mblk_t *mp) 1539 { 1540 struct T_opthdr toh; 1541 struct cmsghdr *cmsg; 1542 1543 if (control == NULL) 1544 return; 1545 1546 if (oldflg) { 1547 /* No real options - caller has handled file descriptors */ 1548 return; 1549 } 1550 for (cmsg = (struct cmsghdr *)control; 1551 CMSG_VALID(cmsg, control, (uintptr_t)control + controllen); 1552 cmsg = CMSG_NEXT(cmsg)) { 1553 /* 1554 * Note: The caller handles file descriptors prior 1555 * to calling this function. 1556 */ 1557 t_uscalar_t len; 1558 1559 if (cmsg->cmsg_level == SOL_SOCKET && 1560 cmsg->cmsg_type == SCM_RIGHTS) 1561 continue; 1562 1563 len = (t_uscalar_t)CMSG_CONTENTLEN(cmsg); 1564 toh.level = cmsg->cmsg_level; 1565 toh.name = cmsg->cmsg_type; 1566 toh.len = len + (t_uscalar_t)sizeof (struct T_opthdr); 1567 toh.status = 0; 1568 1569 soappendmsg(mp, &toh, sizeof (toh)); 1570 soappendmsg(mp, CMSG_CONTENT(cmsg), len); 1571 mp->b_wptr += _TPI_ALIGN_TOPT(len) - len; 1572 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 1573 } 1574 } 1575 1576 /* 1577 * Return the length of the control message derived from the options. 1578 * Exclude SO_SRCADDR and SO_UNIX_CLOSE options. Include SO_FILEP. 1579 * When oldflg is set only include SO_FILEP. 1580 * so_opt2cmsg and so_cmsglen are inter-related since so_cmsglen 1581 * allocates the space that so_opt2cmsg fills. If one changes, the other should 1582 * also be checked for any possible impacts. 1583 */ 1584 t_uscalar_t 1585 so_cmsglen(mblk_t *mp, void *opt, t_uscalar_t optlen, int oldflg) 1586 { 1587 t_uscalar_t cmsglen = 0; 1588 struct T_opthdr *tohp; 1589 t_uscalar_t len; 1590 t_uscalar_t last_roundup = 0; 1591 1592 ASSERT(__TPI_TOPT_ISALIGNED(opt)); 1593 1594 for (tohp = (struct T_opthdr *)opt; 1595 tohp && _TPI_TOPT_VALID(tohp, opt, (uintptr_t)opt + optlen); 1596 tohp = _TPI_TOPT_NEXTHDR(opt, optlen, tohp)) { 1597 dprint(1, ("so_cmsglen: level 0x%x, name %d, len %d\n", 1598 tohp->level, tohp->name, tohp->len)); 1599 if (tohp->level == SOL_SOCKET && 1600 (tohp->name == SO_SRCADDR || 1601 tohp->name == SO_UNIX_CLOSE)) { 1602 continue; 1603 } 1604 if (tohp->level == SOL_SOCKET && tohp->name == SO_FILEP) { 1605 struct fdbuf *fdbuf; 1606 int fdbuflen; 1607 1608 fdbuf = (struct fdbuf *)_TPI_TOPT_DATA(tohp); 1609 fdbuflen = (int)_TPI_TOPT_DATALEN(tohp); 1610 1611 if (!fdbuf_verify(mp, fdbuf, fdbuflen)) 1612 continue; 1613 if (oldflg) { 1614 cmsglen += fdbuf_cmsglen(fdbuflen); 1615 continue; 1616 } 1617 len = fdbuf_cmsglen(fdbuflen); 1618 } else if (tohp->level == SOL_SOCKET && 1619 tohp->name == SCM_TIMESTAMP) { 1620 if (oldflg) 1621 continue; 1622 1623 if (get_udatamodel() == DATAMODEL_NATIVE) { 1624 len = sizeof (struct timeval); 1625 } else { 1626 len = sizeof (struct timeval32); 1627 } 1628 } else { 1629 if (oldflg) 1630 continue; 1631 len = (t_uscalar_t)_TPI_TOPT_DATALEN(tohp); 1632 } 1633 /* 1634 * Exclude roundup for last option to not set 1635 * MSG_CTRUNC when the cmsg fits but the padding doesn't fit. 1636 */ 1637 last_roundup = (t_uscalar_t) 1638 (ROUNDUP_cmsglen(len + (int)sizeof (struct cmsghdr)) - 1639 (len + (int)sizeof (struct cmsghdr))); 1640 cmsglen += (t_uscalar_t)(len + (int)sizeof (struct cmsghdr)) + 1641 last_roundup; 1642 } 1643 cmsglen -= last_roundup; 1644 dprint(1, ("so_cmsglen: optlen %d, flg %d -> cmsglen %d\n", 1645 optlen, oldflg, cmsglen)); 1646 return (cmsglen); 1647 } 1648 1649 /* 1650 * Copy options from options to the control. Convert SO_FILEP to 1651 * file descriptors. 1652 * Returns errno or zero. 1653 * so_opt2cmsg and so_cmsglen are inter-related since so_cmsglen 1654 * allocates the space that so_opt2cmsg fills. If one changes, the other should 1655 * also be checked for any possible impacts. 1656 */ 1657 int 1658 so_opt2cmsg(mblk_t *mp, void *opt, t_uscalar_t optlen, int oldflg, 1659 void *control, t_uscalar_t controllen) 1660 { 1661 struct T_opthdr *tohp; 1662 struct cmsghdr *cmsg; 1663 struct fdbuf *fdbuf; 1664 int fdbuflen; 1665 int error; 1666 #if defined(DEBUG) || defined(__lint) 1667 struct cmsghdr *cend = (struct cmsghdr *) 1668 (((uint8_t *)control) + ROUNDUP_cmsglen(controllen)); 1669 #endif 1670 cmsg = (struct cmsghdr *)control; 1671 1672 ASSERT(__TPI_TOPT_ISALIGNED(opt)); 1673 1674 for (tohp = (struct T_opthdr *)opt; 1675 tohp && _TPI_TOPT_VALID(tohp, opt, (uintptr_t)opt + optlen); 1676 tohp = _TPI_TOPT_NEXTHDR(opt, optlen, tohp)) { 1677 dprint(1, ("so_opt2cmsg: level 0x%x, name %d, len %d\n", 1678 tohp->level, tohp->name, tohp->len)); 1679 1680 if (tohp->level == SOL_SOCKET && 1681 (tohp->name == SO_SRCADDR || 1682 tohp->name == SO_UNIX_CLOSE)) { 1683 continue; 1684 } 1685 ASSERT((uintptr_t)cmsg <= (uintptr_t)control + controllen); 1686 if (tohp->level == SOL_SOCKET && tohp->name == SO_FILEP) { 1687 fdbuf = (struct fdbuf *)_TPI_TOPT_DATA(tohp); 1688 fdbuflen = (int)_TPI_TOPT_DATALEN(tohp); 1689 1690 if (!fdbuf_verify(mp, fdbuf, fdbuflen)) 1691 return (EPROTO); 1692 if (oldflg) { 1693 error = fdbuf_extract(fdbuf, control, 1694 (int)controllen); 1695 if (error != 0) 1696 return (error); 1697 continue; 1698 } else { 1699 int fdlen; 1700 1701 fdlen = (int)fdbuf_cmsglen( 1702 (int)_TPI_TOPT_DATALEN(tohp)); 1703 1704 cmsg->cmsg_level = tohp->level; 1705 cmsg->cmsg_type = SCM_RIGHTS; 1706 cmsg->cmsg_len = (socklen_t)(fdlen + 1707 sizeof (struct cmsghdr)); 1708 1709 error = fdbuf_extract(fdbuf, 1710 CMSG_CONTENT(cmsg), fdlen); 1711 if (error != 0) 1712 return (error); 1713 } 1714 } else if (tohp->level == SOL_SOCKET && 1715 tohp->name == SCM_TIMESTAMP) { 1716 timestruc_t *timestamp; 1717 1718 if (oldflg) 1719 continue; 1720 1721 cmsg->cmsg_level = tohp->level; 1722 cmsg->cmsg_type = tohp->name; 1723 1724 timestamp = 1725 (timestruc_t *)P2ROUNDUP((intptr_t)&tohp[1], 1726 sizeof (intptr_t)); 1727 1728 if (get_udatamodel() == DATAMODEL_NATIVE) { 1729 struct timeval tv; 1730 1731 cmsg->cmsg_len = sizeof (struct timeval) + 1732 sizeof (struct cmsghdr); 1733 tv.tv_sec = timestamp->tv_sec; 1734 tv.tv_usec = timestamp->tv_nsec / 1735 (NANOSEC / MICROSEC); 1736 /* 1737 * on LP64 systems, the struct timeval in 1738 * the destination will not be 8-byte aligned, 1739 * so use bcopy to avoid alignment trouble 1740 */ 1741 bcopy(&tv, CMSG_CONTENT(cmsg), sizeof (tv)); 1742 } else { 1743 struct timeval32 *time32; 1744 1745 cmsg->cmsg_len = sizeof (struct timeval32) + 1746 sizeof (struct cmsghdr); 1747 time32 = (struct timeval32 *)CMSG_CONTENT(cmsg); 1748 time32->tv_sec = (time32_t)timestamp->tv_sec; 1749 time32->tv_usec = 1750 (int32_t)(timestamp->tv_nsec / 1751 (NANOSEC / MICROSEC)); 1752 } 1753 1754 } else { 1755 if (oldflg) 1756 continue; 1757 1758 cmsg->cmsg_level = tohp->level; 1759 cmsg->cmsg_type = tohp->name; 1760 cmsg->cmsg_len = (socklen_t)(_TPI_TOPT_DATALEN(tohp) + 1761 sizeof (struct cmsghdr)); 1762 1763 /* copy content to control data part */ 1764 bcopy(&tohp[1], CMSG_CONTENT(cmsg), 1765 CMSG_CONTENTLEN(cmsg)); 1766 } 1767 /* move to next CMSG structure! */ 1768 cmsg = CMSG_NEXT(cmsg); 1769 } 1770 dprint(1, ("so_opt2cmsg: buf %p len %d; cend %p; final cmsg %p\n", 1771 control, controllen, (void *)cend, (void *)cmsg)); 1772 ASSERT(cmsg <= cend); 1773 return (0); 1774 } 1775 1776 /* 1777 * Extract the SO_SRCADDR option value if present. 1778 */ 1779 void 1780 so_getopt_srcaddr(void *opt, t_uscalar_t optlen, void **srcp, 1781 t_uscalar_t *srclenp) 1782 { 1783 struct T_opthdr *tohp; 1784 1785 ASSERT(__TPI_TOPT_ISALIGNED(opt)); 1786 1787 ASSERT(srcp != NULL && srclenp != NULL); 1788 *srcp = NULL; 1789 *srclenp = 0; 1790 1791 for (tohp = (struct T_opthdr *)opt; 1792 tohp && _TPI_TOPT_VALID(tohp, opt, (uintptr_t)opt + optlen); 1793 tohp = _TPI_TOPT_NEXTHDR(opt, optlen, tohp)) { 1794 dprint(1, ("so_getopt_srcaddr: level 0x%x, name %d, len %d\n", 1795 tohp->level, tohp->name, tohp->len)); 1796 if (tohp->level == SOL_SOCKET && 1797 tohp->name == SO_SRCADDR) { 1798 *srcp = _TPI_TOPT_DATA(tohp); 1799 *srclenp = (t_uscalar_t)_TPI_TOPT_DATALEN(tohp); 1800 } 1801 } 1802 } 1803 1804 /* 1805 * Verify if the SO_UNIX_CLOSE option is present. 1806 */ 1807 int 1808 so_getopt_unix_close(void *opt, t_uscalar_t optlen) 1809 { 1810 struct T_opthdr *tohp; 1811 1812 ASSERT(__TPI_TOPT_ISALIGNED(opt)); 1813 1814 for (tohp = (struct T_opthdr *)opt; 1815 tohp && _TPI_TOPT_VALID(tohp, opt, (uintptr_t)opt + optlen); 1816 tohp = _TPI_TOPT_NEXTHDR(opt, optlen, tohp)) { 1817 dprint(1, 1818 ("so_getopt_unix_close: level 0x%x, name %d, len %d\n", 1819 tohp->level, tohp->name, tohp->len)); 1820 if (tohp->level == SOL_SOCKET && 1821 tohp->name == SO_UNIX_CLOSE) 1822 return (1); 1823 } 1824 return (0); 1825 } 1826 1827 /* 1828 * Allocate an M_PROTO message. 1829 * 1830 * If allocation fails the behavior depends on sleepflg: 1831 * _ALLOC_NOSLEEP fail immediately 1832 * _ALLOC_INTR sleep for memory until a signal is caught 1833 * _ALLOC_SLEEP sleep forever. Don't return NULL. 1834 */ 1835 mblk_t * 1836 soallocproto(size_t size, int sleepflg) 1837 { 1838 mblk_t *mp; 1839 1840 /* Round up size for reuse */ 1841 size = MAX(size, 64); 1842 mp = allocb(size, BPRI_MED); 1843 if (mp == NULL) { 1844 int error; /* Dummy - error not returned to caller */ 1845 1846 switch (sleepflg) { 1847 case _ALLOC_SLEEP: 1848 mp = allocb_wait(size, BPRI_MED, STR_NOSIG, &error); 1849 ASSERT(mp); 1850 break; 1851 case _ALLOC_INTR: 1852 mp = allocb_wait(size, BPRI_MED, 0, &error); 1853 if (mp == NULL) { 1854 /* Caught signal while sleeping for memory */ 1855 eprintline(ENOBUFS); 1856 return (NULL); 1857 } 1858 break; 1859 case _ALLOC_NOSLEEP: 1860 default: 1861 eprintline(ENOBUFS); 1862 return (NULL); 1863 } 1864 } 1865 DB_TYPE(mp) = M_PROTO; 1866 return (mp); 1867 } 1868 1869 /* 1870 * Allocate an M_PROTO message with a single component. 1871 * len is the length of buf. size is the amount to allocate. 1872 * 1873 * buf can be NULL with a non-zero len. 1874 * This results in a bzero'ed chunk being placed the message. 1875 */ 1876 mblk_t * 1877 soallocproto1(const void *buf, ssize_t len, ssize_t size, int sleepflg) 1878 { 1879 mblk_t *mp; 1880 1881 if (size == 0) 1882 size = len; 1883 1884 ASSERT(size >= len); 1885 /* Round up size for reuse */ 1886 size = MAX(size, 64); 1887 mp = soallocproto(size, sleepflg); 1888 if (mp == NULL) 1889 return (NULL); 1890 mp->b_datap->db_type = M_PROTO; 1891 if (len != 0) { 1892 if (buf != NULL) 1893 bcopy(buf, mp->b_wptr, len); 1894 else 1895 bzero(mp->b_wptr, len); 1896 mp->b_wptr += len; 1897 } 1898 return (mp); 1899 } 1900 1901 /* 1902 * Append buf/len to mp. 1903 * The caller has to ensure that there is enough room in the mblk. 1904 * 1905 * buf can be NULL with a non-zero len. 1906 * This results in a bzero'ed chunk being placed the message. 1907 */ 1908 void 1909 soappendmsg(mblk_t *mp, const void *buf, ssize_t len) 1910 { 1911 ASSERT(mp); 1912 1913 if (len != 0) { 1914 /* Assert for room left */ 1915 ASSERT(mp->b_datap->db_lim - mp->b_wptr >= len); 1916 if (buf != NULL) 1917 bcopy(buf, mp->b_wptr, len); 1918 else 1919 bzero(mp->b_wptr, len); 1920 } 1921 mp->b_wptr += len; 1922 } 1923 1924 /* 1925 * Create a message using two kernel buffers. 1926 * If size is set that will determine the allocation size (e.g. for future 1927 * soappendmsg calls). If size is zero it is derived from the buffer 1928 * lengths. 1929 */ 1930 mblk_t * 1931 soallocproto2(const void *buf1, ssize_t len1, const void *buf2, ssize_t len2, 1932 ssize_t size, int sleepflg) 1933 { 1934 mblk_t *mp; 1935 1936 if (size == 0) 1937 size = len1 + len2; 1938 ASSERT(size >= len1 + len2); 1939 1940 mp = soallocproto1(buf1, len1, size, sleepflg); 1941 if (mp) 1942 soappendmsg(mp, buf2, len2); 1943 return (mp); 1944 } 1945 1946 /* 1947 * Create a message using three kernel buffers. 1948 * If size is set that will determine the allocation size (for future 1949 * soappendmsg calls). If size is zero it is derived from the buffer 1950 * lengths. 1951 */ 1952 mblk_t * 1953 soallocproto3(const void *buf1, ssize_t len1, const void *buf2, ssize_t len2, 1954 const void *buf3, ssize_t len3, ssize_t size, int sleepflg) 1955 { 1956 mblk_t *mp; 1957 1958 if (size == 0) 1959 size = len1 + len2 +len3; 1960 ASSERT(size >= len1 + len2 + len3); 1961 1962 mp = soallocproto1(buf1, len1, size, sleepflg); 1963 if (mp != NULL) { 1964 soappendmsg(mp, buf2, len2); 1965 soappendmsg(mp, buf3, len3); 1966 } 1967 return (mp); 1968 } 1969 1970 #ifdef DEBUG 1971 char * 1972 pr_state(uint_t state, uint_t mode) 1973 { 1974 static char buf[1024]; 1975 1976 buf[0] = 0; 1977 if (state & SS_ISCONNECTED) 1978 (void) strcat(buf, "ISCONNECTED "); 1979 if (state & SS_ISCONNECTING) 1980 (void) strcat(buf, "ISCONNECTING "); 1981 if (state & SS_ISDISCONNECTING) 1982 (void) strcat(buf, "ISDISCONNECTING "); 1983 if (state & SS_CANTSENDMORE) 1984 (void) strcat(buf, "CANTSENDMORE "); 1985 1986 if (state & SS_CANTRCVMORE) 1987 (void) strcat(buf, "CANTRCVMORE "); 1988 if (state & SS_ISBOUND) 1989 (void) strcat(buf, "ISBOUND "); 1990 if (state & SS_NDELAY) 1991 (void) strcat(buf, "NDELAY "); 1992 if (state & SS_NONBLOCK) 1993 (void) strcat(buf, "NONBLOCK "); 1994 1995 if (state & SS_ASYNC) 1996 (void) strcat(buf, "ASYNC "); 1997 if (state & SS_ACCEPTCONN) 1998 (void) strcat(buf, "ACCEPTCONN "); 1999 if (state & SS_HASCONNIND) 2000 (void) strcat(buf, "HASCONNIND "); 2001 if (state & SS_SAVEDEOR) 2002 (void) strcat(buf, "SAVEDEOR "); 2003 2004 if (state & SS_RCVATMARK) 2005 (void) strcat(buf, "RCVATMARK "); 2006 if (state & SS_OOBPEND) 2007 (void) strcat(buf, "OOBPEND "); 2008 if (state & SS_HAVEOOBDATA) 2009 (void) strcat(buf, "HAVEOOBDATA "); 2010 if (state & SS_HADOOBDATA) 2011 (void) strcat(buf, "HADOOBDATA "); 2012 2013 if (state & SS_FADDR_NOXLATE) 2014 (void) strcat(buf, "FADDR_NOXLATE "); 2015 2016 if (mode & SM_PRIV) 2017 (void) strcat(buf, "PRIV "); 2018 if (mode & SM_ATOMIC) 2019 (void) strcat(buf, "ATOMIC "); 2020 if (mode & SM_ADDR) 2021 (void) strcat(buf, "ADDR "); 2022 if (mode & SM_CONNREQUIRED) 2023 (void) strcat(buf, "CONNREQUIRED "); 2024 2025 if (mode & SM_FDPASSING) 2026 (void) strcat(buf, "FDPASSING "); 2027 if (mode & SM_EXDATA) 2028 (void) strcat(buf, "EXDATA "); 2029 if (mode & SM_OPTDATA) 2030 (void) strcat(buf, "OPTDATA "); 2031 if (mode & SM_BYTESTREAM) 2032 (void) strcat(buf, "BYTESTREAM "); 2033 return (buf); 2034 } 2035 2036 char * 2037 pr_addr(int family, struct sockaddr *addr, t_uscalar_t addrlen) 2038 { 2039 static char buf[1024]; 2040 2041 if (addr == NULL || addrlen == 0) { 2042 (void) sprintf(buf, "(len %d) %p", addrlen, (void *)addr); 2043 return (buf); 2044 } 2045 switch (family) { 2046 case AF_INET: { 2047 struct sockaddr_in sin; 2048 2049 bcopy(addr, &sin, sizeof (sin)); 2050 2051 (void) sprintf(buf, "(len %d) %x/%d", 2052 addrlen, ntohl(sin.sin_addr.s_addr), ntohs(sin.sin_port)); 2053 break; 2054 } 2055 case AF_INET6: { 2056 struct sockaddr_in6 sin6; 2057 uint16_t *piece = (uint16_t *)&sin6.sin6_addr; 2058 2059 bcopy((char *)addr, (char *)&sin6, sizeof (sin6)); 2060 (void) sprintf(buf, "(len %d) %x:%x:%x:%x:%x:%x:%x:%x/%d", 2061 addrlen, 2062 ntohs(piece[0]), ntohs(piece[1]), 2063 ntohs(piece[2]), ntohs(piece[3]), 2064 ntohs(piece[4]), ntohs(piece[5]), 2065 ntohs(piece[6]), ntohs(piece[7]), 2066 ntohs(sin6.sin6_port)); 2067 break; 2068 } 2069 case AF_UNIX: { 2070 struct sockaddr_un *soun = (struct sockaddr_un *)addr; 2071 2072 (void) sprintf(buf, "(len %d) %s", addrlen, 2073 (soun == NULL) ? "(none)" : soun->sun_path); 2074 break; 2075 } 2076 default: 2077 (void) sprintf(buf, "(unknown af %d)", family); 2078 break; 2079 } 2080 return (buf); 2081 } 2082 2083 /* The logical equivalence operator (a if-and-only-if b) */ 2084 #define EQUIV(a, b) (((a) && (b)) || (!(a) && (!(b)))) 2085 2086 /* 2087 * Verify limitations and invariants on oob state. 2088 * Return 1 if OK, otherwise 0 so that it can be used as 2089 * ASSERT(verify_oobstate(so)); 2090 */ 2091 int 2092 so_verify_oobstate(struct sonode *so) 2093 { 2094 ASSERT(MUTEX_HELD(&so->so_lock)); 2095 2096 /* 2097 * The possible state combinations are: 2098 * 0 2099 * SS_OOBPEND 2100 * SS_OOBPEND|SS_HAVEOOBDATA 2101 * SS_OOBPEND|SS_HADOOBDATA 2102 * SS_HADOOBDATA 2103 */ 2104 switch (so->so_state & (SS_OOBPEND|SS_HAVEOOBDATA|SS_HADOOBDATA)) { 2105 case 0: 2106 case SS_OOBPEND: 2107 case SS_OOBPEND|SS_HAVEOOBDATA: 2108 case SS_OOBPEND|SS_HADOOBDATA: 2109 case SS_HADOOBDATA: 2110 break; 2111 default: 2112 printf("Bad oob state 1 (%p): counts %d/%d state %s\n", 2113 (void *)so, so->so_oobsigcnt, 2114 so->so_oobcnt, pr_state(so->so_state, so->so_mode)); 2115 return (0); 2116 } 2117 2118 /* SS_RCVATMARK should only be set when SS_OOBPEND is set */ 2119 if ((so->so_state & (SS_RCVATMARK|SS_OOBPEND)) == SS_RCVATMARK) { 2120 printf("Bad oob state 2 (%p): counts %d/%d state %s\n", 2121 (void *)so, so->so_oobsigcnt, 2122 so->so_oobcnt, pr_state(so->so_state, so->so_mode)); 2123 return (0); 2124 } 2125 2126 /* 2127 * (so_oobsigcnt != 0 or SS_RCVATMARK) iff SS_OOBPEND 2128 */ 2129 if (!EQUIV((so->so_oobsigcnt != 0) || (so->so_state & SS_RCVATMARK), 2130 so->so_state & SS_OOBPEND)) { 2131 printf("Bad oob state 3 (%p): counts %d/%d state %s\n", 2132 (void *)so, so->so_oobsigcnt, 2133 so->so_oobcnt, pr_state(so->so_state, so->so_mode)); 2134 return (0); 2135 } 2136 2137 /* 2138 * Unless SO_OOBINLINE we have so_oobmsg != NULL iff SS_HAVEOOBDATA 2139 */ 2140 if (!(so->so_options & SO_OOBINLINE) && 2141 !EQUIV(so->so_oobmsg != NULL, so->so_state & SS_HAVEOOBDATA)) { 2142 printf("Bad oob state 4 (%p): counts %d/%d state %s\n", 2143 (void *)so, so->so_oobsigcnt, 2144 so->so_oobcnt, pr_state(so->so_state, so->so_mode)); 2145 return (0); 2146 } 2147 if (so->so_oobsigcnt < so->so_oobcnt) { 2148 printf("Bad oob state 5 (%p): counts %d/%d state %s\n", 2149 (void *)so, so->so_oobsigcnt, 2150 so->so_oobcnt, pr_state(so->so_state, so->so_mode)); 2151 return (0); 2152 } 2153 return (1); 2154 } 2155 #undef EQUIV 2156 2157 #endif /* DEBUG */ 2158 2159 /* initialize sockfs zone specific kstat related items */ 2160 void * 2161 sock_kstat_init(zoneid_t zoneid) 2162 { 2163 kstat_t *ksp; 2164 2165 ksp = kstat_create_zone("sockfs", 0, "sock_unix_list", "misc", 2166 KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VAR_SIZE|KSTAT_FLAG_VIRTUAL, zoneid); 2167 2168 if (ksp != NULL) { 2169 ksp->ks_update = sockfs_update; 2170 ksp->ks_snapshot = sockfs_snapshot; 2171 ksp->ks_lock = &socklist.sl_lock; 2172 ksp->ks_private = (void *)(uintptr_t)zoneid; 2173 kstat_install(ksp); 2174 } 2175 2176 return (ksp); 2177 } 2178 2179 /* tear down sockfs zone specific kstat related items */ 2180 /*ARGSUSED*/ 2181 void 2182 sock_kstat_fini(zoneid_t zoneid, void *arg) 2183 { 2184 kstat_t *ksp = (kstat_t *)arg; 2185 2186 if (ksp != NULL) { 2187 ASSERT(zoneid == (zoneid_t)(uintptr_t)ksp->ks_private); 2188 kstat_delete(ksp); 2189 } 2190 } 2191 2192 /* 2193 * Zones: 2194 * Note that nactive is going to be different for each zone. 2195 * This means we require kstat to call sockfs_update and then sockfs_snapshot 2196 * for the same zone, or sockfs_snapshot will be taken into the wrong size 2197 * buffer. This is safe, but if the buffer is too small, user will not be 2198 * given details of all sockets. However, as this kstat has a ks_lock, kstat 2199 * driver will keep it locked between the update and the snapshot, so no 2200 * other process (zone) can currently get inbetween resulting in a wrong size 2201 * buffer allocation. 2202 */ 2203 static int 2204 sockfs_update(kstat_t *ksp, int rw) 2205 { 2206 uint_t nactive = 0; /* # of active AF_UNIX sockets */ 2207 struct sonode *so; /* current sonode on socklist */ 2208 zoneid_t myzoneid = (zoneid_t)(uintptr_t)ksp->ks_private; 2209 2210 ASSERT((zoneid_t)(uintptr_t)ksp->ks_private == getzoneid()); 2211 2212 if (rw == KSTAT_WRITE) { /* bounce all writes */ 2213 return (EACCES); 2214 } 2215 2216 for (so = socklist.sl_list; so != NULL; so = so->so_next) { 2217 if (so->so_accessvp != NULL && so->so_zoneid == myzoneid) { 2218 nactive++; 2219 } 2220 } 2221 ksp->ks_ndata = nactive; 2222 ksp->ks_data_size = nactive * sizeof (struct k_sockinfo); 2223 2224 return (0); 2225 } 2226 2227 static int 2228 sockfs_snapshot(kstat_t *ksp, void *buf, int rw) 2229 { 2230 int ns; /* # of sonodes we've copied */ 2231 struct sonode *so; /* current sonode on socklist */ 2232 struct k_sockinfo *pksi; /* where we put sockinfo data */ 2233 t_uscalar_t sn_len; /* soa_len */ 2234 zoneid_t myzoneid = (zoneid_t)(uintptr_t)ksp->ks_private; 2235 2236 ASSERT((zoneid_t)(uintptr_t)ksp->ks_private == getzoneid()); 2237 2238 ksp->ks_snaptime = gethrtime(); 2239 2240 if (rw == KSTAT_WRITE) { /* bounce all writes */ 2241 return (EACCES); 2242 } 2243 2244 /* 2245 * for each sonode on the socklist, we massage the important 2246 * info into buf, in k_sockinfo format. 2247 */ 2248 pksi = (struct k_sockinfo *)buf; 2249 for (ns = 0, so = socklist.sl_list; so != NULL; so = so->so_next) { 2250 /* only stuff active sonodes and the same zone: */ 2251 if (so->so_accessvp == NULL || so->so_zoneid != myzoneid) { 2252 continue; 2253 } 2254 2255 /* 2256 * If the sonode was activated between the update and the 2257 * snapshot, we're done - as this is only a snapshot. 2258 */ 2259 if ((caddr_t)(pksi) >= (caddr_t)buf + ksp->ks_data_size) { 2260 break; 2261 } 2262 2263 /* copy important info into buf: */ 2264 pksi->ks_si.si_size = sizeof (struct k_sockinfo); 2265 pksi->ks_si.si_family = so->so_family; 2266 pksi->ks_si.si_type = so->so_type; 2267 pksi->ks_si.si_flag = so->so_flag; 2268 pksi->ks_si.si_state = so->so_state; 2269 pksi->ks_si.si_serv_type = so->so_serv_type; 2270 pksi->ks_si.si_ux_laddr_sou_magic = so->so_ux_laddr.soua_magic; 2271 pksi->ks_si.si_ux_faddr_sou_magic = so->so_ux_faddr.soua_magic; 2272 pksi->ks_si.si_laddr_soa_len = so->so_laddr.soa_len; 2273 pksi->ks_si.si_faddr_soa_len = so->so_faddr.soa_len; 2274 pksi->ks_si.si_szoneid = so->so_zoneid; 2275 2276 mutex_enter(&so->so_lock); 2277 2278 if (so->so_laddr_sa != NULL) { 2279 ASSERT(so->so_laddr_sa->sa_data != NULL); 2280 sn_len = so->so_laddr_len; 2281 ASSERT(sn_len <= sizeof (short) + 2282 sizeof (pksi->ks_si.si_laddr_sun_path)); 2283 2284 pksi->ks_si.si_laddr_family = 2285 so->so_laddr_sa->sa_family; 2286 if (sn_len != 0) { 2287 /* AF_UNIX socket names are NULL terminated */ 2288 (void) strncpy(pksi->ks_si.si_laddr_sun_path, 2289 so->so_laddr_sa->sa_data, 2290 sizeof (pksi->ks_si.si_laddr_sun_path)); 2291 sn_len = strlen(pksi->ks_si.si_laddr_sun_path); 2292 } 2293 pksi->ks_si.si_laddr_sun_path[sn_len] = 0; 2294 } 2295 2296 if (so->so_faddr_sa != NULL) { 2297 ASSERT(so->so_faddr_sa->sa_data != NULL); 2298 sn_len = so->so_faddr_len; 2299 ASSERT(sn_len <= sizeof (short) + 2300 sizeof (pksi->ks_si.si_faddr_sun_path)); 2301 2302 pksi->ks_si.si_faddr_family = 2303 so->so_faddr_sa->sa_family; 2304 if (sn_len != 0) { 2305 (void) strncpy(pksi->ks_si.si_faddr_sun_path, 2306 so->so_faddr_sa->sa_data, 2307 sizeof (pksi->ks_si.si_faddr_sun_path)); 2308 sn_len = strlen(pksi->ks_si.si_faddr_sun_path); 2309 } 2310 pksi->ks_si.si_faddr_sun_path[sn_len] = 0; 2311 } 2312 2313 mutex_exit(&so->so_lock); 2314 2315 (void) sprintf(pksi->ks_straddr[0], "%p", (void *)so); 2316 (void) sprintf(pksi->ks_straddr[1], "%p", 2317 (void *)so->so_ux_laddr.soua_vp); 2318 (void) sprintf(pksi->ks_straddr[2], "%p", 2319 (void *)so->so_ux_faddr.soua_vp); 2320 2321 ns++; 2322 pksi++; 2323 } 2324 2325 ksp->ks_ndata = ns; 2326 return (0); 2327 } 2328 2329 ssize_t 2330 soreadfile(file_t *fp, uchar_t *buf, u_offset_t fileoff, int *err, size_t size) 2331 { 2332 struct uio auio; 2333 struct iovec aiov[MSG_MAXIOVLEN]; 2334 register vnode_t *vp; 2335 int ioflag, rwflag; 2336 ssize_t cnt; 2337 int error = 0; 2338 int iovcnt = 0; 2339 short fflag; 2340 2341 vp = fp->f_vnode; 2342 fflag = fp->f_flag; 2343 2344 rwflag = 0; 2345 aiov[0].iov_base = (caddr_t)buf; 2346 aiov[0].iov_len = size; 2347 iovcnt = 1; 2348 cnt = (ssize_t)size; 2349 (void) VOP_RWLOCK(vp, rwflag, NULL); 2350 2351 auio.uio_loffset = fileoff; 2352 auio.uio_iov = aiov; 2353 auio.uio_iovcnt = iovcnt; 2354 auio.uio_resid = cnt; 2355 auio.uio_segflg = UIO_SYSSPACE; 2356 auio.uio_llimit = MAXOFFSET_T; 2357 auio.uio_fmode = fflag; 2358 auio.uio_extflg = UIO_COPY_CACHED; 2359 2360 ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC); 2361 2362 /* If read sync is not asked for, filter sync flags */ 2363 if ((ioflag & FRSYNC) == 0) 2364 ioflag &= ~(FSYNC|FDSYNC); 2365 error = VOP_READ(vp, &auio, ioflag, fp->f_cred, NULL); 2366 cnt -= auio.uio_resid; 2367 2368 VOP_RWUNLOCK(vp, rwflag, NULL); 2369 2370 if (error == EINTR && cnt != 0) 2371 error = 0; 2372 out: 2373 if (error != 0) { 2374 *err = error; 2375 return (0); 2376 } else { 2377 *err = 0; 2378 return (cnt); 2379 } 2380 } 2381