1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <sys/t_lock.h> 31 #include <sys/param.h> 32 #include <sys/systm.h> 33 #include <sys/buf.h> 34 #include <sys/conf.h> 35 #include <sys/cred.h> 36 #include <sys/kmem.h> 37 #include <sys/sysmacros.h> 38 #include <sys/vfs.h> 39 #include <sys/vfs_opreg.h> 40 #include <sys/vnode.h> 41 #include <sys/debug.h> 42 #include <sys/errno.h> 43 #include <sys/time.h> 44 #include <sys/file.h> 45 #include <sys/open.h> 46 #include <sys/user.h> 47 #include <sys/termios.h> 48 #include <sys/stream.h> 49 #include <sys/strsubr.h> 50 #include <sys/strsun.h> 51 #include <sys/esunddi.h> 52 #include <sys/flock.h> 53 #include <sys/modctl.h> 54 #include <sys/cmn_err.h> 55 #include <sys/mkdev.h> 56 #include <sys/pathname.h> 57 #include <sys/ddi.h> 58 #include <sys/stat.h> 59 #include <sys/fs/snode.h> 60 #include <sys/fs/dv_node.h> 61 #include <sys/zone.h> 62 63 #include <sys/socket.h> 64 #include <sys/socketvar.h> 65 #include <netinet/in.h> 66 #include <sys/un.h> 67 68 #include <sys/ucred.h> 69 70 #include <sys/tiuser.h> 71 #define _SUN_TPI_VERSION 2 72 #include <sys/tihdr.h> 73 74 #include <c2/audit.h> 75 76 #include <fs/sockfs/nl7c.h> 77 78 /* 79 * Macros that operate on struct cmsghdr. 80 * The CMSG_VALID macro does not assume that the last option buffer is padded. 81 */ 82 #define CMSG_CONTENT(cmsg) (&((cmsg)[1])) 83 #define CMSG_CONTENTLEN(cmsg) ((cmsg)->cmsg_len - sizeof (struct cmsghdr)) 84 #define CMSG_VALID(cmsg, start, end) \ 85 (ISALIGNED_cmsghdr(cmsg) && \ 86 ((uintptr_t)(cmsg) >= (uintptr_t)(start)) && \ 87 ((uintptr_t)(cmsg) < (uintptr_t)(end)) && \ 88 ((ssize_t)(cmsg)->cmsg_len >= sizeof (struct cmsghdr)) && \ 89 ((uintptr_t)(cmsg) + (cmsg)->cmsg_len <= (uintptr_t)(end))) 90 #define SO_LOCK_WAKEUP_TIME 3000 /* Wakeup time in milliseconds */ 91 92 static struct kmem_cache *socktpi_cache, *socktpi_unix_cache; 93 94 dev_t sockdev; /* For fsid in getattr */ 95 96 struct sockparams *sphead; 97 krwlock_t splist_lock; 98 99 struct socklist socklist; 100 101 static int sockfs_update(kstat_t *, int); 102 static int sockfs_snapshot(kstat_t *, void *, int); 103 104 extern void sendfile_init(); 105 106 extern void nl7c_init(void); 107 108 #define ADRSTRLEN (2 * sizeof (void *) + 1) 109 /* 110 * kernel structure for passing the sockinfo data back up to the user. 111 * the strings array allows us to convert AF_UNIX addresses into strings 112 * with a common method regardless of which n-bit kernel we're running. 113 */ 114 struct k_sockinfo { 115 struct sockinfo ks_si; 116 char ks_straddr[3][ADRSTRLEN]; 117 }; 118 119 /* 120 * Translate from a device pathname (e.g. "/dev/tcp") to a vnode. 121 * Returns with the vnode held. 122 */ 123 static int 124 sogetvp(char *devpath, vnode_t **vpp, int uioflag) 125 { 126 struct snode *csp; 127 vnode_t *vp, *dvp; 128 major_t maj; 129 int error; 130 131 ASSERT(uioflag == UIO_SYSSPACE || uioflag == UIO_USERSPACE); 132 /* 133 * Lookup the underlying filesystem vnode. 134 */ 135 error = lookupname(devpath, uioflag, FOLLOW, NULLVPP, &vp); 136 if (error) 137 return (error); 138 139 /* Check that it is the correct vnode */ 140 if (vp->v_type != VCHR) { 141 VN_RELE(vp); 142 return (ENOTSOCK); 143 } 144 145 /* 146 * If devpath went through devfs, the device should already 147 * be configured. If devpath is a mknod file, however, we 148 * need to make sure the device is properly configured. 149 * To do this, we do something similar to spec_open() 150 * except that we resolve to the minor/leaf level since 151 * we need to return a vnode. 152 */ 153 csp = VTOS(VTOS(vp)->s_commonvp); 154 if (!(csp->s_flag & SDIPSET)) { 155 char *pathname = kmem_alloc(MAXPATHLEN, KM_SLEEP); 156 error = ddi_dev_pathname(vp->v_rdev, S_IFCHR, pathname); 157 if (error == 0) 158 error = devfs_lookupname(pathname, NULLVPP, &dvp); 159 VN_RELE(vp); 160 kmem_free(pathname, MAXPATHLEN); 161 if (error != 0) 162 return (ENXIO); 163 vp = dvp; /* use the devfs vp */ 164 } 165 166 /* device is configured at this point */ 167 maj = getmajor(vp->v_rdev); 168 if (!STREAMSTAB(maj)) { 169 VN_RELE(vp); 170 return (ENOSTR); 171 } 172 173 *vpp = vp; 174 return (0); 175 } 176 177 /* 178 * Add or delete (latter if devpath is NULL) an enter to the sockparams 179 * table. If devpathlen is zero the devpath with not be kmem_freed. Otherwise 180 * this routine assumes that the caller has kmem_alloced devpath/devpathlen 181 * for this routine to consume. 182 * The zero devpathlen could be used if the kernel wants to create entries 183 * itself by calling sockconfig(1,2,3, "/dev/tcp", 0); 184 */ 185 int 186 soconfig(int domain, int type, int protocol, 187 char *devpath, int devpathlen) 188 { 189 struct sockparams **spp; 190 struct sockparams *sp; 191 int error = 0; 192 193 dprint(0, ("soconfig(%d,%d,%d,%s,%d)\n", 194 domain, type, protocol, devpath, devpathlen)); 195 196 /* 197 * Look for an existing match. 198 */ 199 rw_enter(&splist_lock, RW_WRITER); 200 for (spp = &sphead; (sp = *spp) != NULL; spp = &sp->sp_next) { 201 if (sp->sp_domain == domain && 202 sp->sp_type == type && 203 sp->sp_protocol == protocol) { 204 break; 205 } 206 } 207 if (devpath == NULL) { 208 ASSERT(devpathlen == 0); 209 210 /* Delete existing entry */ 211 if (sp == NULL) { 212 error = ENXIO; 213 goto done; 214 } 215 /* Unlink and free existing entry */ 216 *spp = sp->sp_next; 217 ASSERT(sp->sp_vnode); 218 VN_RELE(sp->sp_vnode); 219 if (sp->sp_devpathlen != 0) 220 kmem_free(sp->sp_devpath, sp->sp_devpathlen); 221 kmem_free(sp, sizeof (*sp)); 222 } else { 223 vnode_t *vp; 224 225 /* Add new entry */ 226 if (sp != NULL) { 227 error = EEXIST; 228 goto done; 229 } 230 231 error = sogetvp(devpath, &vp, UIO_SYSSPACE); 232 if (error) { 233 dprint(0, ("soconfig: vp %s failed with %d\n", 234 devpath, error)); 235 goto done; 236 } 237 238 dprint(0, ("soconfig: %s => vp %p, dev 0x%lx\n", 239 devpath, vp, vp->v_rdev)); 240 241 sp = kmem_alloc(sizeof (*sp), KM_SLEEP); 242 sp->sp_domain = domain; 243 sp->sp_type = type; 244 sp->sp_protocol = protocol; 245 sp->sp_devpath = devpath; 246 sp->sp_devpathlen = devpathlen; 247 sp->sp_vnode = vp; 248 sp->sp_next = NULL; 249 *spp = sp; 250 } 251 done: 252 rw_exit(&splist_lock); 253 if (error) { 254 if (devpath != NULL) 255 kmem_free(devpath, devpathlen); 256 #ifdef SOCK_DEBUG 257 eprintline(error); 258 #endif /* SOCK_DEBUG */ 259 } 260 return (error); 261 } 262 263 /* 264 * Lookup an entry in the sockparams list based on the triple. 265 * If no entry is found and devpath is not NULL translate devpath to a 266 * vnode. Note that devpath is a pointer to a user address! 267 * Returns with the vnode held. 268 * 269 * When this routine uses devpath it does not create an entry in the sockparams 270 * list since this routine can run on behalf of any user and one user 271 * should not be able to effect the transport used by another user. 272 * 273 * In order to return the correct error this routine has to do wildcard scans 274 * of the list. The errors are (in decreasing precedence): 275 * EAFNOSUPPORT - address family not in list 276 * EPROTONOSUPPORT - address family supported but not protocol. 277 * EPROTOTYPE - address family and protocol supported but not socket type. 278 */ 279 vnode_t * 280 solookup(int domain, int type, int protocol, char *devpath, int *errorp) 281 { 282 struct sockparams *sp; 283 int error; 284 vnode_t *vp; 285 286 rw_enter(&splist_lock, RW_READER); 287 for (sp = sphead; sp != NULL; sp = sp->sp_next) { 288 if (sp->sp_domain == domain && 289 sp->sp_type == type && 290 sp->sp_protocol == protocol) { 291 break; 292 } 293 } 294 if (sp == NULL) { 295 dprint(0, ("solookup(%d,%d,%d) not found\n", 296 domain, type, protocol)); 297 if (devpath == NULL) { 298 /* Determine correct error code */ 299 int found = 0; 300 301 for (sp = sphead; sp != NULL; sp = sp->sp_next) { 302 if (sp->sp_domain == domain && found < 1) 303 found = 1; 304 if (sp->sp_domain == domain && 305 sp->sp_protocol == protocol && found < 2) 306 found = 2; 307 } 308 rw_exit(&splist_lock); 309 switch (found) { 310 case 0: 311 *errorp = EAFNOSUPPORT; 312 break; 313 case 1: 314 *errorp = EPROTONOSUPPORT; 315 break; 316 case 2: 317 *errorp = EPROTOTYPE; 318 break; 319 } 320 return (NULL); 321 } 322 rw_exit(&splist_lock); 323 324 /* 325 * Return vp based on devpath. 326 * Do not enter into table to avoid random users 327 * modifying the sockparams list. 328 */ 329 error = sogetvp(devpath, &vp, UIO_USERSPACE); 330 if (error) { 331 dprint(0, ("solookup: vp %p failed with %d\n", 332 devpath, error)); 333 *errorp = EPROTONOSUPPORT; 334 return (NULL); 335 } 336 dprint(0, ("solookup: %p => vp %p, dev 0x%lx\n", 337 devpath, vp, vp->v_rdev)); 338 339 return (vp); 340 } 341 dprint(0, ("solookup(%d,%d,%d) vp %p devpath %s\n", 342 domain, type, protocol, sp->sp_vnode, sp->sp_devpath)); 343 344 vp = sp->sp_vnode; 345 VN_HOLD(vp); 346 rw_exit(&splist_lock); 347 return (vp); 348 } 349 350 /* 351 * Return a socket vnode. 352 * 353 * Assumes that the caller is "passing" an VN_HOLD for accessvp i.e. 354 * when the socket is freed a VN_RELE will take place. 355 * 356 * Note that sockets assume that the driver will clone (either itself 357 * or by using the clone driver) i.e. a socket() call will always 358 * result in a new vnode being created. 359 */ 360 struct vnode * 361 makesockvp(struct vnode *accessvp, int domain, int type, int protocol) 362 { 363 kmem_cache_t *cp; 364 struct sonode *so; 365 struct vnode *vp; 366 time_t now; 367 dev_t dev; 368 369 cp = (domain == AF_UNIX) ? socktpi_unix_cache : socktpi_cache; 370 so = kmem_cache_alloc(cp, KM_SLEEP); 371 so->so_cache = cp; 372 so->so_obj = so; 373 vp = SOTOV(so); 374 now = gethrestime_sec(); 375 376 so->so_flag = 0; 377 ASSERT(so->so_accessvp == NULL); 378 so->so_accessvp = accessvp; 379 dev = accessvp->v_rdev; 380 381 /* 382 * Record in so_flag that it is a clone. 383 */ 384 if (getmajor(dev) == clone_major) { 385 so->so_flag |= SOCLONE; 386 } 387 so->so_dev = dev; 388 389 so->so_state = 0; 390 so->so_mode = 0; 391 392 so->so_fsid = sockdev; 393 so->so_atime = now; 394 so->so_mtime = now; 395 so->so_ctime = now; /* Never modified */ 396 so->so_count = 0; 397 398 so->so_family = (short)domain; 399 so->so_type = (short)type; 400 so->so_protocol = (short)protocol; 401 so->so_pushcnt = 0; 402 403 so->so_options = 0; 404 so->so_linger.l_onoff = 0; 405 so->so_linger.l_linger = 0; 406 so->so_sndbuf = 0; 407 so->so_rcvbuf = 0; 408 so->so_sndlowat = 0; 409 so->so_rcvlowat = 0; 410 #ifdef notyet 411 so->so_sndtimeo = 0; 412 so->so_rcvtimeo = 0; 413 #endif /* notyet */ 414 so->so_error = 0; 415 so->so_delayed_error = 0; 416 417 ASSERT(so->so_oobmsg == NULL); 418 so->so_oobcnt = 0; 419 so->so_oobsigcnt = 0; 420 so->so_pgrp = 0; 421 so->so_provinfo = NULL; 422 423 ASSERT(so->so_laddr_sa == NULL && so->so_faddr_sa == NULL); 424 so->so_laddr_len = so->so_faddr_len = 0; 425 so->so_laddr_maxlen = so->so_faddr_maxlen = 0; 426 so->so_eaddr_mp = NULL; 427 so->so_priv = NULL; 428 429 so->so_peercred = NULL; 430 431 ASSERT(so->so_ack_mp == NULL); 432 ASSERT(so->so_conn_ind_head == NULL); 433 ASSERT(so->so_conn_ind_tail == NULL); 434 ASSERT(so->so_ux_bound_vp == NULL); 435 ASSERT(so->so_unbind_mp == NULL); 436 437 vn_reinit(vp); 438 vp->v_vfsp = rootvfs; 439 vp->v_type = VSOCK; 440 vp->v_rdev = so->so_dev; 441 vn_exists(vp); 442 443 return (vp); 444 } 445 446 void 447 sockfree(struct sonode *so) 448 { 449 mblk_t *mp; 450 vnode_t *vp; 451 452 ASSERT(so->so_count == 0); 453 ASSERT(so->so_accessvp); 454 ASSERT(so->so_discon_ind_mp == NULL); 455 456 vp = so->so_accessvp; 457 VN_RELE(vp); 458 459 /* 460 * Protect so->so_[lf]addr_sa so that sockfs_snapshot() can safely 461 * indirect them. It also uses so_accessvp as a validity test. 462 */ 463 mutex_enter(&so->so_lock); 464 465 so->so_accessvp = NULL; 466 467 if (so->so_laddr_sa) { 468 ASSERT((caddr_t)so->so_faddr_sa == 469 (caddr_t)so->so_laddr_sa + so->so_laddr_maxlen); 470 ASSERT(so->so_faddr_maxlen == so->so_laddr_maxlen); 471 so->so_state &= ~(SS_LADDR_VALID | SS_FADDR_VALID); 472 kmem_free(so->so_laddr_sa, so->so_laddr_maxlen * 2); 473 so->so_laddr_sa = NULL; 474 so->so_laddr_len = so->so_laddr_maxlen = 0; 475 so->so_faddr_sa = NULL; 476 so->so_faddr_len = so->so_faddr_maxlen = 0; 477 } 478 479 mutex_exit(&so->so_lock); 480 481 if ((mp = so->so_eaddr_mp) != NULL) { 482 freemsg(mp); 483 so->so_eaddr_mp = NULL; 484 so->so_delayed_error = 0; 485 } 486 if ((mp = so->so_ack_mp) != NULL) { 487 freemsg(mp); 488 so->so_ack_mp = NULL; 489 } 490 if ((mp = so->so_conn_ind_head) != NULL) { 491 mblk_t *mp1; 492 493 while (mp) { 494 mp1 = mp->b_next; 495 mp->b_next = NULL; 496 freemsg(mp); 497 mp = mp1; 498 } 499 so->so_conn_ind_head = so->so_conn_ind_tail = NULL; 500 so->so_state &= ~SS_HASCONNIND; 501 } 502 #ifdef DEBUG 503 mutex_enter(&so->so_lock); 504 ASSERT(so_verify_oobstate(so)); 505 mutex_exit(&so->so_lock); 506 #endif /* DEBUG */ 507 if ((mp = so->so_oobmsg) != NULL) { 508 freemsg(mp); 509 so->so_oobmsg = NULL; 510 so->so_state &= ~(SS_OOBPEND|SS_HAVEOOBDATA|SS_HADOOBDATA); 511 } 512 513 if ((mp = so->so_nl7c_rcv_mp) != NULL) { 514 so->so_nl7c_rcv_mp = NULL; 515 freemsg(mp); 516 } 517 so->so_nl7c_rcv_rval = 0; 518 if (so->so_nl7c_uri != NULL) { 519 nl7c_urifree(so); 520 /* urifree() cleared nl7c_uri */ 521 } 522 if (so->so_nl7c_flags) { 523 so->so_nl7c_flags = 0; 524 } 525 526 ASSERT(so->so_ux_bound_vp == NULL); 527 if ((mp = so->so_unbind_mp) != NULL) { 528 freemsg(mp); 529 so->so_unbind_mp = NULL; 530 } 531 vn_invalid(SOTOV(so)); 532 533 if (so->so_peercred != NULL) 534 crfree(so->so_peercred); 535 536 kmem_cache_free(so->so_cache, so->so_obj); 537 } 538 539 /* 540 * Update the accessed, updated, or changed times in an sonode 541 * with the current time. 542 * 543 * Note that both SunOS 4.X and 4.4BSD sockets do not present reasonable 544 * attributes in a fstat call. (They return the current time and 0 for 545 * all timestamps, respectively.) We maintain the current timestamps 546 * here primarily so that should sockmod be popped the resulting 547 * file descriptor will behave like a stream w.r.t. the timestamps. 548 */ 549 void 550 so_update_attrs(struct sonode *so, int flag) 551 { 552 time_t now = gethrestime_sec(); 553 554 mutex_enter(&so->so_lock); 555 so->so_flag |= flag; 556 if (flag & SOACC) 557 so->so_atime = now; 558 if (flag & SOMOD) 559 so->so_mtime = now; 560 mutex_exit(&so->so_lock); 561 } 562 563 /*ARGSUSED*/ 564 static int 565 socktpi_constructor(void *buf, void *cdrarg, int kmflags) 566 { 567 struct sonode *so = buf; 568 struct vnode *vp; 569 570 so->so_nl7c_flags = 0; 571 so->so_nl7c_uri = NULL; 572 so->so_nl7c_rcv_mp = NULL; 573 574 so->so_oobmsg = NULL; 575 so->so_ack_mp = NULL; 576 so->so_conn_ind_head = NULL; 577 so->so_conn_ind_tail = NULL; 578 so->so_discon_ind_mp = NULL; 579 so->so_ux_bound_vp = NULL; 580 so->so_unbind_mp = NULL; 581 so->so_accessvp = NULL; 582 so->so_laddr_sa = NULL; 583 so->so_faddr_sa = NULL; 584 so->so_ops = &sotpi_sonodeops; 585 586 vp = vn_alloc(KM_SLEEP); 587 so->so_vnode = vp; 588 589 vn_setops(vp, socktpi_vnodeops); 590 vp->v_data = (caddr_t)so; 591 592 mutex_init(&so->so_lock, NULL, MUTEX_DEFAULT, NULL); 593 mutex_init(&so->so_plumb_lock, NULL, MUTEX_DEFAULT, NULL); 594 cv_init(&so->so_state_cv, NULL, CV_DEFAULT, NULL); 595 cv_init(&so->so_ack_cv, NULL, CV_DEFAULT, NULL); 596 cv_init(&so->so_connind_cv, NULL, CV_DEFAULT, NULL); 597 cv_init(&so->so_want_cv, NULL, CV_DEFAULT, NULL); 598 599 return (0); 600 } 601 602 /*ARGSUSED1*/ 603 static void 604 socktpi_destructor(void *buf, void *cdrarg) 605 { 606 struct sonode *so = buf; 607 struct vnode *vp = SOTOV(so); 608 609 ASSERT(so->so_nl7c_flags == 0); 610 ASSERT(so->so_nl7c_uri == NULL); 611 ASSERT(so->so_nl7c_rcv_mp == NULL); 612 613 ASSERT(so->so_oobmsg == NULL); 614 ASSERT(so->so_ack_mp == NULL); 615 ASSERT(so->so_conn_ind_head == NULL); 616 ASSERT(so->so_conn_ind_tail == NULL); 617 ASSERT(so->so_discon_ind_mp == NULL); 618 ASSERT(so->so_ux_bound_vp == NULL); 619 ASSERT(so->so_unbind_mp == NULL); 620 ASSERT(so->so_ops == &sotpi_sonodeops); 621 622 ASSERT(vn_matchops(vp, socktpi_vnodeops)); 623 ASSERT(vp->v_data == (caddr_t)so); 624 625 vn_free(vp); 626 627 mutex_destroy(&so->so_lock); 628 mutex_destroy(&so->so_plumb_lock); 629 cv_destroy(&so->so_state_cv); 630 cv_destroy(&so->so_ack_cv); 631 cv_destroy(&so->so_connind_cv); 632 cv_destroy(&so->so_want_cv); 633 } 634 635 static int 636 socktpi_unix_constructor(void *buf, void *cdrarg, int kmflags) 637 { 638 int retval; 639 640 if ((retval = socktpi_constructor(buf, cdrarg, kmflags)) == 0) { 641 struct sonode *so = (struct sonode *)buf; 642 643 mutex_enter(&socklist.sl_lock); 644 645 so->so_next = socklist.sl_list; 646 so->so_prev = NULL; 647 if (so->so_next != NULL) 648 so->so_next->so_prev = so; 649 socklist.sl_list = so; 650 651 mutex_exit(&socklist.sl_lock); 652 653 } 654 return (retval); 655 } 656 657 static void 658 socktpi_unix_destructor(void *buf, void *cdrarg) 659 { 660 struct sonode *so = (struct sonode *)buf; 661 662 mutex_enter(&socklist.sl_lock); 663 664 if (so->so_next != NULL) 665 so->so_next->so_prev = so->so_prev; 666 if (so->so_prev != NULL) 667 so->so_prev->so_next = so->so_next; 668 else 669 socklist.sl_list = so->so_next; 670 671 mutex_exit(&socklist.sl_lock); 672 673 socktpi_destructor(buf, cdrarg); 674 } 675 676 /* 677 * Init function called when sockfs is loaded. 678 */ 679 int 680 sockinit(int fstype, char *name) 681 { 682 static const fs_operation_def_t sock_vfsops_template[] = { 683 NULL, NULL 684 }; 685 int error; 686 major_t dev; 687 char *err_str; 688 689 error = vfs_setfsops(fstype, sock_vfsops_template, NULL); 690 if (error != 0) { 691 zcmn_err(GLOBAL_ZONEID, CE_WARN, 692 "sockinit: bad vfs ops template"); 693 return (error); 694 } 695 696 error = vn_make_ops(name, socktpi_vnodeops_template, &socktpi_vnodeops); 697 if (error != 0) { 698 err_str = "sockinit: bad sock vnode ops template"; 699 /* vn_make_ops() does not reset socktpi_vnodeops on failure. */ 700 socktpi_vnodeops = NULL; 701 goto failure; 702 } 703 704 error = sosctp_init(); 705 if (error != 0) { 706 err_str = NULL; 707 goto failure; 708 } 709 710 error = sosdp_init(); 711 if (error != 0) { 712 err_str = NULL; 713 goto failure; 714 } 715 716 /* 717 * Create sonode caches. We create a special one for AF_UNIX so 718 * that we can track them for netstat(1m). 719 */ 720 socktpi_cache = kmem_cache_create("socktpi_cache", 721 sizeof (struct sonode), 0, socktpi_constructor, 722 socktpi_destructor, NULL, NULL, NULL, 0); 723 724 socktpi_unix_cache = kmem_cache_create("socktpi_unix_cache", 725 sizeof (struct sonode), 0, socktpi_unix_constructor, 726 socktpi_unix_destructor, NULL, NULL, NULL, 0); 727 728 /* 729 * Build initial list mapping socket parameters to vnode. 730 */ 731 rw_init(&splist_lock, NULL, RW_DEFAULT, NULL); 732 733 /* 734 * If sockets are needed before init runs /sbin/soconfig 735 * it is possible to preload the sockparams list here using 736 * calls like: 737 * sockconfig(1,2,3, "/dev/tcp", 0); 738 */ 739 740 /* 741 * Create a unique dev_t for use in so_fsid. 742 */ 743 744 if ((dev = getudev()) == (major_t)-1) 745 dev = 0; 746 sockdev = makedevice(dev, 0); 747 748 mutex_init(&socklist.sl_lock, NULL, MUTEX_DEFAULT, NULL); 749 sendfile_init(); 750 nl7c_init(); 751 752 return (0); 753 754 failure: 755 (void) vfs_freevfsops_by_type(fstype); 756 if (socktpi_vnodeops != NULL) 757 vn_freevnodeops(socktpi_vnodeops); 758 if (err_str != NULL) 759 zcmn_err(GLOBAL_ZONEID, CE_WARN, err_str); 760 return (error); 761 } 762 763 /* 764 * Caller must hold the mutex. Used to set SOLOCKED. 765 */ 766 void 767 so_lock_single(struct sonode *so) 768 { 769 ASSERT(MUTEX_HELD(&so->so_lock)); 770 771 while (so->so_flag & (SOLOCKED | SOASYNC_UNBIND)) { 772 so->so_flag |= SOWANT; 773 cv_wait_stop(&so->so_want_cv, &so->so_lock, 774 SO_LOCK_WAKEUP_TIME); 775 } 776 so->so_flag |= SOLOCKED; 777 } 778 779 /* 780 * Caller must hold the mutex and pass in SOLOCKED or SOASYNC_UNBIND. 781 * Used to clear SOLOCKED or SOASYNC_UNBIND. 782 */ 783 void 784 so_unlock_single(struct sonode *so, int flag) 785 { 786 ASSERT(MUTEX_HELD(&so->so_lock)); 787 ASSERT(flag & (SOLOCKED|SOASYNC_UNBIND)); 788 ASSERT((flag & ~(SOLOCKED|SOASYNC_UNBIND)) == 0); 789 ASSERT(so->so_flag & flag); 790 791 /* 792 * Process the T_DISCON_IND on so_discon_ind_mp. 793 * 794 * Call to so_drain_discon_ind will result in so_lock 795 * being dropped and re-acquired later. 796 */ 797 if (so->so_discon_ind_mp != NULL) 798 so_drain_discon_ind(so); 799 800 if (so->so_flag & SOWANT) 801 cv_broadcast(&so->so_want_cv); 802 so->so_flag &= ~(SOWANT|flag); 803 } 804 805 /* 806 * Caller must hold the mutex. Used to set SOREADLOCKED. 807 * If the caller wants nonblocking behavior it should set fmode. 808 */ 809 int 810 so_lock_read(struct sonode *so, int fmode) 811 { 812 ASSERT(MUTEX_HELD(&so->so_lock)); 813 814 while (so->so_flag & SOREADLOCKED) { 815 if (fmode & (FNDELAY|FNONBLOCK)) 816 return (EWOULDBLOCK); 817 so->so_flag |= SOWANT; 818 cv_wait_stop(&so->so_want_cv, &so->so_lock, 819 SO_LOCK_WAKEUP_TIME); 820 } 821 so->so_flag |= SOREADLOCKED; 822 return (0); 823 } 824 825 /* 826 * Like so_lock_read above but allows signals. 827 */ 828 int 829 so_lock_read_intr(struct sonode *so, int fmode) 830 { 831 ASSERT(MUTEX_HELD(&so->so_lock)); 832 833 while (so->so_flag & SOREADLOCKED) { 834 if (fmode & (FNDELAY|FNONBLOCK)) 835 return (EWOULDBLOCK); 836 so->so_flag |= SOWANT; 837 if (!cv_wait_sig(&so->so_want_cv, &so->so_lock)) 838 return (EINTR); 839 } 840 so->so_flag |= SOREADLOCKED; 841 return (0); 842 } 843 844 /* 845 * Caller must hold the mutex. Used to clear SOREADLOCKED, 846 * set in so_lock_read() or so_lock_read_intr(). 847 */ 848 void 849 so_unlock_read(struct sonode *so) 850 { 851 ASSERT(MUTEX_HELD(&so->so_lock)); 852 ASSERT(so->so_flag & SOREADLOCKED); 853 854 if (so->so_flag & SOWANT) 855 cv_broadcast(&so->so_want_cv); 856 so->so_flag &= ~(SOWANT|SOREADLOCKED); 857 } 858 859 /* 860 * Verify that the specified offset falls within the mblk and 861 * that the resulting pointer is aligned. 862 * Returns NULL if not. 863 */ 864 void * 865 sogetoff(mblk_t *mp, t_uscalar_t offset, 866 t_uscalar_t length, uint_t align_size) 867 { 868 uintptr_t ptr1, ptr2; 869 870 ASSERT(mp && mp->b_wptr >= mp->b_rptr); 871 ptr1 = (uintptr_t)mp->b_rptr + offset; 872 ptr2 = (uintptr_t)ptr1 + length; 873 if (ptr1 < (uintptr_t)mp->b_rptr || ptr2 > (uintptr_t)mp->b_wptr) { 874 eprintline(0); 875 return (NULL); 876 } 877 if ((ptr1 & (align_size - 1)) != 0) { 878 eprintline(0); 879 return (NULL); 880 } 881 return ((void *)ptr1); 882 } 883 884 /* 885 * Return the AF_UNIX underlying filesystem vnode matching a given name. 886 * Makes sure the sending and the destination sonodes are compatible. 887 * The vnode is returned held. 888 * 889 * The underlying filesystem VSOCK vnode has a v_stream pointer that 890 * references the actual stream head (hence indirectly the actual sonode). 891 */ 892 static int 893 so_ux_lookup(struct sonode *so, struct sockaddr_un *soun, int checkaccess, 894 vnode_t **vpp) 895 { 896 vnode_t *vp; /* Underlying filesystem vnode */ 897 vnode_t *svp; /* sockfs vnode */ 898 struct sonode *so2; 899 int error; 900 901 dprintso(so, 1, ("so_ux_lookup(%p) name <%s>\n", 902 so, soun->sun_path)); 903 904 error = lookupname(soun->sun_path, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp); 905 if (error) { 906 eprintsoline(so, error); 907 return (error); 908 } 909 if (vp->v_type != VSOCK) { 910 error = ENOTSOCK; 911 eprintsoline(so, error); 912 goto done2; 913 } 914 915 if (checkaccess) { 916 /* 917 * Check that we have permissions to access the destination 918 * vnode. This check is not done in BSD but it is required 919 * by X/Open. 920 */ 921 if (error = VOP_ACCESS(vp, VREAD|VWRITE, 0, CRED())) { 922 eprintsoline(so, error); 923 goto done2; 924 } 925 } 926 927 /* 928 * Check if the remote socket has been closed. 929 * 930 * Synchronize with vn_rele_stream by holding v_lock while traversing 931 * v_stream->sd_vnode. 932 */ 933 mutex_enter(&vp->v_lock); 934 if (vp->v_stream == NULL) { 935 mutex_exit(&vp->v_lock); 936 if (so->so_type == SOCK_DGRAM) 937 error = EDESTADDRREQ; 938 else 939 error = ECONNREFUSED; 940 941 eprintsoline(so, error); 942 goto done2; 943 } 944 ASSERT(vp->v_stream->sd_vnode); 945 svp = vp->v_stream->sd_vnode; 946 /* 947 * holding v_lock on underlying filesystem vnode and acquiring 948 * it on sockfs vnode. Assumes that no code ever attempts to 949 * acquire these locks in the reverse order. 950 */ 951 VN_HOLD(svp); 952 mutex_exit(&vp->v_lock); 953 954 if (svp->v_type != VSOCK) { 955 error = ENOTSOCK; 956 eprintsoline(so, error); 957 goto done; 958 } 959 960 so2 = VTOSO(svp); 961 962 if (so->so_type != so2->so_type) { 963 error = EPROTOTYPE; 964 eprintsoline(so, error); 965 goto done; 966 } 967 968 VN_RELE(svp); 969 *vpp = vp; 970 return (0); 971 972 done: 973 VN_RELE(svp); 974 done2: 975 VN_RELE(vp); 976 return (error); 977 } 978 979 /* 980 * Verify peer address for connect and sendto/sendmsg. 981 * Since sendto/sendmsg would not get synchronous errors from the transport 982 * provider we have to do these ugly checks in the socket layer to 983 * preserve compatibility with SunOS 4.X. 984 */ 985 int 986 so_addr_verify(struct sonode *so, const struct sockaddr *name, 987 socklen_t namelen) 988 { 989 int family; 990 991 dprintso(so, 1, ("so_addr_verify(%p, %p, %d)\n", so, name, namelen)); 992 993 ASSERT(name != NULL); 994 995 family = so->so_family; 996 switch (family) { 997 case AF_INET: 998 if (name->sa_family != family) { 999 eprintsoline(so, EAFNOSUPPORT); 1000 return (EAFNOSUPPORT); 1001 } 1002 if (namelen != (socklen_t)sizeof (struct sockaddr_in)) { 1003 eprintsoline(so, EINVAL); 1004 return (EINVAL); 1005 } 1006 break; 1007 case AF_INET6: { 1008 #ifdef DEBUG 1009 struct sockaddr_in6 *sin6; 1010 #endif /* DEBUG */ 1011 1012 if (name->sa_family != family) { 1013 eprintsoline(so, EAFNOSUPPORT); 1014 return (EAFNOSUPPORT); 1015 } 1016 if (namelen != (socklen_t)sizeof (struct sockaddr_in6)) { 1017 eprintsoline(so, EINVAL); 1018 return (EINVAL); 1019 } 1020 #ifdef DEBUG 1021 /* Verify that apps don't forget to clear sin6_scope_id etc */ 1022 sin6 = (struct sockaddr_in6 *)name; 1023 if (sin6->sin6_scope_id != 0 && 1024 !IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr)) { 1025 zcmn_err(getzoneid(), CE_WARN, 1026 "connect/send* with uninitialized sin6_scope_id " 1027 "(%d) on socket. Pid = %d\n", 1028 (int)sin6->sin6_scope_id, (int)curproc->p_pid); 1029 } 1030 #endif /* DEBUG */ 1031 break; 1032 } 1033 case AF_UNIX: 1034 if (so->so_state & SS_FADDR_NOXLATE) { 1035 return (0); 1036 } 1037 if (namelen < (socklen_t)sizeof (short)) { 1038 eprintsoline(so, ENOENT); 1039 return (ENOENT); 1040 } 1041 if (name->sa_family != family) { 1042 eprintsoline(so, EAFNOSUPPORT); 1043 return (EAFNOSUPPORT); 1044 } 1045 /* MAXPATHLEN + soun_family + nul termination */ 1046 if (namelen > (socklen_t)(MAXPATHLEN + sizeof (short) + 1)) { 1047 eprintsoline(so, ENAMETOOLONG); 1048 return (ENAMETOOLONG); 1049 } 1050 1051 break; 1052 1053 default: 1054 /* 1055 * Default is don't do any length or sa_family check 1056 * to allow non-sockaddr style addresses. 1057 */ 1058 break; 1059 } 1060 1061 return (0); 1062 } 1063 1064 1065 /* 1066 * Translate an AF_UNIX sockaddr_un to the transport internal name. 1067 * Assumes caller has called so_addr_verify first. 1068 */ 1069 /*ARGSUSED*/ 1070 int 1071 so_ux_addr_xlate(struct sonode *so, struct sockaddr *name, 1072 socklen_t namelen, int checkaccess, 1073 void **addrp, socklen_t *addrlenp) 1074 { 1075 int error; 1076 struct sockaddr_un *soun; 1077 vnode_t *vp; 1078 void *addr; 1079 socklen_t addrlen; 1080 1081 dprintso(so, 1, ("so_ux_addr_xlate(%p, %p, %d, %d)\n", 1082 so, name, namelen, checkaccess)); 1083 1084 ASSERT(name != NULL); 1085 ASSERT(so->so_family == AF_UNIX); 1086 ASSERT(!(so->so_state & SS_FADDR_NOXLATE)); 1087 ASSERT(namelen >= (socklen_t)sizeof (short)); 1088 ASSERT(name->sa_family == AF_UNIX); 1089 soun = (struct sockaddr_un *)name; 1090 /* 1091 * Lookup vnode for the specified path name and verify that 1092 * it is a socket. 1093 */ 1094 error = so_ux_lookup(so, soun, checkaccess, &vp); 1095 if (error) { 1096 eprintsoline(so, error); 1097 return (error); 1098 } 1099 /* 1100 * Use the address of the peer vnode as the address to send 1101 * to. We release the peer vnode here. In case it has been 1102 * closed by the time the T_CONN_REQ or T_UNIDATA_REQ reaches the 1103 * transport the message will get an error or be dropped. 1104 */ 1105 so->so_ux_faddr.soua_vp = vp; 1106 so->so_ux_faddr.soua_magic = SOU_MAGIC_EXPLICIT; 1107 addr = &so->so_ux_faddr; 1108 addrlen = (socklen_t)sizeof (so->so_ux_faddr); 1109 dprintso(so, 1, ("ux_xlate UNIX: addrlen %d, vp %p\n", 1110 addrlen, vp)); 1111 VN_RELE(vp); 1112 *addrp = addr; 1113 *addrlenp = (socklen_t)addrlen; 1114 return (0); 1115 } 1116 1117 /* 1118 * Esballoc free function for messages that contain SO_FILEP option. 1119 * Decrement the reference count on the file pointers using closef. 1120 */ 1121 void 1122 fdbuf_free(struct fdbuf *fdbuf) 1123 { 1124 int i; 1125 struct file *fp; 1126 1127 dprint(1, ("fdbuf_free: %d fds\n", fdbuf->fd_numfd)); 1128 for (i = 0; i < fdbuf->fd_numfd; i++) { 1129 /* 1130 * We need pointer size alignment for fd_fds. On a LP64 1131 * kernel, the required alignment is 8 bytes while 1132 * the option headers and values are only 4 bytes 1133 * aligned. So its safer to do a bcopy compared to 1134 * assigning fdbuf->fd_fds[i] to fp. 1135 */ 1136 bcopy((char *)&fdbuf->fd_fds[i], (char *)&fp, sizeof (fp)); 1137 dprint(1, ("fdbuf_free: [%d] = %p\n", i, fp)); 1138 (void) closef(fp); 1139 } 1140 if (fdbuf->fd_ebuf != NULL) 1141 kmem_free(fdbuf->fd_ebuf, fdbuf->fd_ebuflen); 1142 kmem_free(fdbuf, fdbuf->fd_size); 1143 } 1144 1145 /* 1146 * Allocate an esballoc'ed message for AF_UNIX file descriptor passing. 1147 * Waits if memory is not available. 1148 */ 1149 mblk_t * 1150 fdbuf_allocmsg(int size, struct fdbuf *fdbuf) 1151 { 1152 uchar_t *buf; 1153 mblk_t *mp; 1154 1155 dprint(1, ("fdbuf_allocmsg: size %d, %d fds\n", size, fdbuf->fd_numfd)); 1156 buf = kmem_alloc(size, KM_SLEEP); 1157 fdbuf->fd_ebuf = (caddr_t)buf; 1158 fdbuf->fd_ebuflen = size; 1159 fdbuf->fd_frtn.free_func = fdbuf_free; 1160 fdbuf->fd_frtn.free_arg = (caddr_t)fdbuf; 1161 1162 mp = esballoc_wait(buf, size, BPRI_MED, &fdbuf->fd_frtn); 1163 mp->b_datap->db_type = M_PROTO; 1164 return (mp); 1165 } 1166 1167 /* 1168 * Extract file descriptors from a fdbuf. 1169 * Return list in rights/rightslen. 1170 */ 1171 /*ARGSUSED*/ 1172 static int 1173 fdbuf_extract(struct fdbuf *fdbuf, void *rights, int rightslen) 1174 { 1175 int i, fd; 1176 int *rp; 1177 struct file *fp; 1178 int numfd; 1179 1180 dprint(1, ("fdbuf_extract: %d fds, len %d\n", 1181 fdbuf->fd_numfd, rightslen)); 1182 1183 numfd = fdbuf->fd_numfd; 1184 ASSERT(rightslen == numfd * (int)sizeof (int)); 1185 1186 /* 1187 * Allocate a file descriptor and increment the f_count. 1188 * The latter is needed since we always call fdbuf_free 1189 * which performs a closef. 1190 */ 1191 rp = (int *)rights; 1192 for (i = 0; i < numfd; i++) { 1193 if ((fd = ufalloc(0)) == -1) 1194 goto cleanup; 1195 /* 1196 * We need pointer size alignment for fd_fds. On a LP64 1197 * kernel, the required alignment is 8 bytes while 1198 * the option headers and values are only 4 bytes 1199 * aligned. So its safer to do a bcopy compared to 1200 * assigning fdbuf->fd_fds[i] to fp. 1201 */ 1202 bcopy((char *)&fdbuf->fd_fds[i], (char *)&fp, sizeof (fp)); 1203 mutex_enter(&fp->f_tlock); 1204 fp->f_count++; 1205 mutex_exit(&fp->f_tlock); 1206 setf(fd, fp); 1207 *rp++ = fd; 1208 #ifdef C2_AUDIT 1209 if (audit_active) 1210 audit_fdrecv(fd, fp); 1211 #endif 1212 dprint(1, ("fdbuf_extract: [%d] = %d, %p refcnt %d\n", 1213 i, fd, fp, fp->f_count)); 1214 } 1215 return (0); 1216 1217 cleanup: 1218 /* 1219 * Undo whatever partial work the loop above has done. 1220 */ 1221 { 1222 int j; 1223 1224 rp = (int *)rights; 1225 for (j = 0; j < i; j++) { 1226 dprint(0, 1227 ("fdbuf_extract: cleanup[%d] = %d\n", j, *rp)); 1228 (void) closeandsetf(*rp++, NULL); 1229 } 1230 } 1231 1232 return (EMFILE); 1233 } 1234 1235 /* 1236 * Insert file descriptors into an fdbuf. 1237 * Returns a kmem_alloc'ed fdbuf. The fdbuf should be freed 1238 * by calling fdbuf_free(). 1239 */ 1240 int 1241 fdbuf_create(void *rights, int rightslen, struct fdbuf **fdbufp) 1242 { 1243 int numfd, i; 1244 int *fds; 1245 struct file *fp; 1246 struct fdbuf *fdbuf; 1247 int fdbufsize; 1248 1249 dprint(1, ("fdbuf_create: len %d\n", rightslen)); 1250 1251 numfd = rightslen / (int)sizeof (int); 1252 1253 fdbufsize = (int)FDBUF_HDRSIZE + (numfd * (int)sizeof (struct file *)); 1254 fdbuf = kmem_alloc(fdbufsize, KM_SLEEP); 1255 fdbuf->fd_size = fdbufsize; 1256 fdbuf->fd_numfd = 0; 1257 fdbuf->fd_ebuf = NULL; 1258 fdbuf->fd_ebuflen = 0; 1259 fds = (int *)rights; 1260 for (i = 0; i < numfd; i++) { 1261 if ((fp = getf(fds[i])) == NULL) { 1262 fdbuf_free(fdbuf); 1263 return (EBADF); 1264 } 1265 dprint(1, ("fdbuf_create: [%d] = %d, %p refcnt %d\n", 1266 i, fds[i], fp, fp->f_count)); 1267 mutex_enter(&fp->f_tlock); 1268 fp->f_count++; 1269 mutex_exit(&fp->f_tlock); 1270 /* 1271 * The maximum alignment for fdbuf (or any option header 1272 * and its value) it 4 bytes. On a LP64 kernel, the alignment 1273 * is not sufficient for pointers (fd_fds in this case). Since 1274 * we just did a kmem_alloc (we get a double word alignment), 1275 * we don't need to do anything on the send side (we loose 1276 * the double word alignment because fdbuf goes after an 1277 * option header (eg T_unitdata_req) which is only 4 byte 1278 * aligned). We take care of this when we extract the file 1279 * descriptor in fdbuf_extract or fdbuf_free. 1280 */ 1281 fdbuf->fd_fds[i] = fp; 1282 fdbuf->fd_numfd++; 1283 releasef(fds[i]); 1284 #ifdef C2_AUDIT 1285 if (audit_active) 1286 audit_fdsend(fds[i], fp, 0); 1287 #endif 1288 } 1289 *fdbufp = fdbuf; 1290 return (0); 1291 } 1292 1293 static int 1294 fdbuf_optlen(int rightslen) 1295 { 1296 int numfd; 1297 1298 numfd = rightslen / (int)sizeof (int); 1299 1300 return ((int)FDBUF_HDRSIZE + (numfd * (int)sizeof (struct file *))); 1301 } 1302 1303 static t_uscalar_t 1304 fdbuf_cmsglen(int fdbuflen) 1305 { 1306 return (t_uscalar_t)((fdbuflen - FDBUF_HDRSIZE) / 1307 (int)sizeof (struct file *) * (int)sizeof (int)); 1308 } 1309 1310 1311 /* 1312 * Return non-zero if the mblk and fdbuf are consistent. 1313 */ 1314 static int 1315 fdbuf_verify(mblk_t *mp, struct fdbuf *fdbuf, int fdbuflen) 1316 { 1317 if (fdbuflen >= FDBUF_HDRSIZE && 1318 fdbuflen == fdbuf->fd_size) { 1319 frtn_t *frp = mp->b_datap->db_frtnp; 1320 /* 1321 * Check that the SO_FILEP portion of the 1322 * message has not been modified by 1323 * the loopback transport. The sending sockfs generates 1324 * a message that is esballoc'ed with the free function 1325 * being fdbuf_free() and where free_arg contains the 1326 * identical information as the SO_FILEP content. 1327 * 1328 * If any of these constraints are not satisfied we 1329 * silently ignore the option. 1330 */ 1331 ASSERT(mp); 1332 if (frp != NULL && 1333 frp->free_func == fdbuf_free && 1334 frp->free_arg != NULL && 1335 bcmp(frp->free_arg, fdbuf, fdbuflen) == 0) { 1336 dprint(1, ("fdbuf_verify: fdbuf %p len %d\n", 1337 fdbuf, fdbuflen)); 1338 return (1); 1339 } else { 1340 zcmn_err(getzoneid(), CE_WARN, 1341 "sockfs: mismatched fdbuf content (%p)", 1342 (void *)mp); 1343 return (0); 1344 } 1345 } else { 1346 zcmn_err(getzoneid(), CE_WARN, 1347 "sockfs: mismatched fdbuf len %d, %d\n", 1348 fdbuflen, fdbuf->fd_size); 1349 return (0); 1350 } 1351 } 1352 1353 /* 1354 * When the file descriptors returned by sorecvmsg can not be passed 1355 * to the application this routine will cleanup the references on 1356 * the files. Start at startoff bytes into the buffer. 1357 */ 1358 static void 1359 close_fds(void *fdbuf, int fdbuflen, int startoff) 1360 { 1361 int *fds = (int *)fdbuf; 1362 int numfd = fdbuflen / (int)sizeof (int); 1363 int i; 1364 1365 dprint(1, ("close_fds(%p, %d, %d)\n", fdbuf, fdbuflen, startoff)); 1366 1367 for (i = 0; i < numfd; i++) { 1368 if (startoff < 0) 1369 startoff = 0; 1370 if (startoff < (int)sizeof (int)) { 1371 /* 1372 * This file descriptor is partially or fully after 1373 * the offset 1374 */ 1375 dprint(0, 1376 ("close_fds: cleanup[%d] = %d\n", i, fds[i])); 1377 (void) closeandsetf(fds[i], NULL); 1378 } 1379 startoff -= (int)sizeof (int); 1380 } 1381 } 1382 1383 /* 1384 * Close all file descriptors contained in the control part starting at 1385 * the startoffset. 1386 */ 1387 void 1388 so_closefds(void *control, t_uscalar_t controllen, int oldflg, 1389 int startoff) 1390 { 1391 struct cmsghdr *cmsg; 1392 1393 if (control == NULL) 1394 return; 1395 1396 if (oldflg) { 1397 close_fds(control, controllen, startoff); 1398 return; 1399 } 1400 /* Scan control part for file descriptors. */ 1401 for (cmsg = (struct cmsghdr *)control; 1402 CMSG_VALID(cmsg, control, (uintptr_t)control + controllen); 1403 cmsg = CMSG_NEXT(cmsg)) { 1404 if (cmsg->cmsg_level == SOL_SOCKET && 1405 cmsg->cmsg_type == SCM_RIGHTS) { 1406 close_fds(CMSG_CONTENT(cmsg), 1407 (int)CMSG_CONTENTLEN(cmsg), 1408 startoff - (int)sizeof (struct cmsghdr)); 1409 } 1410 startoff -= cmsg->cmsg_len; 1411 } 1412 } 1413 1414 /* 1415 * Returns a pointer/length for the file descriptors contained 1416 * in the control buffer. Returns with *fdlenp == -1 if there are no 1417 * file descriptor options present. This is different than there being 1418 * a zero-length file descriptor option. 1419 * Fail if there are multiple SCM_RIGHT cmsgs. 1420 */ 1421 int 1422 so_getfdopt(void *control, t_uscalar_t controllen, int oldflg, 1423 void **fdsp, int *fdlenp) 1424 { 1425 struct cmsghdr *cmsg; 1426 void *fds; 1427 int fdlen; 1428 1429 if (control == NULL) { 1430 *fdsp = NULL; 1431 *fdlenp = -1; 1432 return (0); 1433 } 1434 1435 if (oldflg) { 1436 *fdsp = control; 1437 if (controllen == 0) 1438 *fdlenp = -1; 1439 else 1440 *fdlenp = controllen; 1441 dprint(1, ("so_getfdopt: old %d\n", *fdlenp)); 1442 return (0); 1443 } 1444 1445 fds = NULL; 1446 fdlen = 0; 1447 1448 for (cmsg = (struct cmsghdr *)control; 1449 CMSG_VALID(cmsg, control, (uintptr_t)control + controllen); 1450 cmsg = CMSG_NEXT(cmsg)) { 1451 if (cmsg->cmsg_level == SOL_SOCKET && 1452 cmsg->cmsg_type == SCM_RIGHTS) { 1453 if (fds != NULL) 1454 return (EINVAL); 1455 fds = CMSG_CONTENT(cmsg); 1456 fdlen = (int)CMSG_CONTENTLEN(cmsg); 1457 dprint(1, ("so_getfdopt: new %lu\n", 1458 (size_t)CMSG_CONTENTLEN(cmsg))); 1459 } 1460 } 1461 if (fds == NULL) { 1462 dprint(1, ("so_getfdopt: NONE\n")); 1463 *fdlenp = -1; 1464 } else 1465 *fdlenp = fdlen; 1466 *fdsp = fds; 1467 return (0); 1468 } 1469 1470 /* 1471 * Return the length of the options including any file descriptor options. 1472 */ 1473 t_uscalar_t 1474 so_optlen(void *control, t_uscalar_t controllen, int oldflg) 1475 { 1476 struct cmsghdr *cmsg; 1477 t_uscalar_t optlen = 0; 1478 t_uscalar_t len; 1479 1480 if (control == NULL) 1481 return (0); 1482 1483 if (oldflg) 1484 return ((t_uscalar_t)(sizeof (struct T_opthdr) + 1485 fdbuf_optlen(controllen))); 1486 1487 for (cmsg = (struct cmsghdr *)control; 1488 CMSG_VALID(cmsg, control, (uintptr_t)control + controllen); 1489 cmsg = CMSG_NEXT(cmsg)) { 1490 if (cmsg->cmsg_level == SOL_SOCKET && 1491 cmsg->cmsg_type == SCM_RIGHTS) { 1492 len = fdbuf_optlen((int)CMSG_CONTENTLEN(cmsg)); 1493 } else { 1494 len = (t_uscalar_t)CMSG_CONTENTLEN(cmsg); 1495 } 1496 optlen += (t_uscalar_t)(_TPI_ALIGN_TOPT(len) + 1497 sizeof (struct T_opthdr)); 1498 } 1499 dprint(1, ("so_optlen: controllen %d, flg %d -> optlen %d\n", 1500 controllen, oldflg, optlen)); 1501 return (optlen); 1502 } 1503 1504 /* 1505 * Copy options from control to the mblk. Skip any file descriptor options. 1506 */ 1507 void 1508 so_cmsg2opt(void *control, t_uscalar_t controllen, int oldflg, mblk_t *mp) 1509 { 1510 struct T_opthdr toh; 1511 struct cmsghdr *cmsg; 1512 1513 if (control == NULL) 1514 return; 1515 1516 if (oldflg) { 1517 /* No real options - caller has handled file descriptors */ 1518 return; 1519 } 1520 for (cmsg = (struct cmsghdr *)control; 1521 CMSG_VALID(cmsg, control, (uintptr_t)control + controllen); 1522 cmsg = CMSG_NEXT(cmsg)) { 1523 /* 1524 * Note: The caller handles file descriptors prior 1525 * to calling this function. 1526 */ 1527 t_uscalar_t len; 1528 1529 if (cmsg->cmsg_level == SOL_SOCKET && 1530 cmsg->cmsg_type == SCM_RIGHTS) 1531 continue; 1532 1533 len = (t_uscalar_t)CMSG_CONTENTLEN(cmsg); 1534 toh.level = cmsg->cmsg_level; 1535 toh.name = cmsg->cmsg_type; 1536 toh.len = len + (t_uscalar_t)sizeof (struct T_opthdr); 1537 toh.status = 0; 1538 1539 soappendmsg(mp, &toh, sizeof (toh)); 1540 soappendmsg(mp, CMSG_CONTENT(cmsg), len); 1541 mp->b_wptr += _TPI_ALIGN_TOPT(len) - len; 1542 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 1543 } 1544 } 1545 1546 /* 1547 * Return the length of the control message derived from the options. 1548 * Exclude SO_SRCADDR and SO_UNIX_CLOSE options. Include SO_FILEP. 1549 * When oldflg is set only include SO_FILEP. 1550 * so_opt2cmsg and so_cmsglen are inter-related since so_cmsglen 1551 * allocates the space that so_opt2cmsg fills. If one changes, the other should 1552 * also be checked for any possible impacts. 1553 */ 1554 t_uscalar_t 1555 so_cmsglen(mblk_t *mp, void *opt, t_uscalar_t optlen, int oldflg) 1556 { 1557 t_uscalar_t cmsglen = 0; 1558 struct T_opthdr *tohp; 1559 t_uscalar_t len; 1560 t_uscalar_t last_roundup = 0; 1561 1562 ASSERT(__TPI_TOPT_ISALIGNED(opt)); 1563 1564 for (tohp = (struct T_opthdr *)opt; 1565 tohp && _TPI_TOPT_VALID(tohp, opt, (uintptr_t)opt + optlen); 1566 tohp = _TPI_TOPT_NEXTHDR(opt, optlen, tohp)) { 1567 dprint(1, ("so_cmsglen: level 0x%x, name %d, len %d\n", 1568 tohp->level, tohp->name, tohp->len)); 1569 if (tohp->level == SOL_SOCKET && 1570 (tohp->name == SO_SRCADDR || 1571 tohp->name == SO_UNIX_CLOSE)) { 1572 continue; 1573 } 1574 if (tohp->level == SOL_SOCKET && tohp->name == SO_FILEP) { 1575 struct fdbuf *fdbuf; 1576 int fdbuflen; 1577 1578 fdbuf = (struct fdbuf *)_TPI_TOPT_DATA(tohp); 1579 fdbuflen = (int)_TPI_TOPT_DATALEN(tohp); 1580 1581 if (!fdbuf_verify(mp, fdbuf, fdbuflen)) 1582 continue; 1583 if (oldflg) { 1584 cmsglen += fdbuf_cmsglen(fdbuflen); 1585 continue; 1586 } 1587 len = fdbuf_cmsglen(fdbuflen); 1588 } else if (tohp->level == SOL_SOCKET && 1589 tohp->name == SCM_TIMESTAMP) { 1590 if (oldflg) 1591 continue; 1592 1593 if (get_udatamodel() == DATAMODEL_NATIVE) { 1594 len = sizeof (struct timeval); 1595 } else { 1596 len = sizeof (struct timeval32); 1597 } 1598 } else { 1599 if (oldflg) 1600 continue; 1601 len = (t_uscalar_t)_TPI_TOPT_DATALEN(tohp); 1602 } 1603 /* 1604 * Exclude roundup for last option to not set 1605 * MSG_CTRUNC when the cmsg fits but the padding doesn't fit. 1606 */ 1607 last_roundup = (t_uscalar_t) 1608 (ROUNDUP_cmsglen(len + (int)sizeof (struct cmsghdr)) - 1609 (len + (int)sizeof (struct cmsghdr))); 1610 cmsglen += (t_uscalar_t)(len + (int)sizeof (struct cmsghdr)) + 1611 last_roundup; 1612 } 1613 cmsglen -= last_roundup; 1614 dprint(1, ("so_cmsglen: optlen %d, flg %d -> cmsglen %d\n", 1615 optlen, oldflg, cmsglen)); 1616 return (cmsglen); 1617 } 1618 1619 /* 1620 * Copy options from options to the control. Convert SO_FILEP to 1621 * file descriptors. 1622 * Returns errno or zero. 1623 * so_opt2cmsg and so_cmsglen are inter-related since so_cmsglen 1624 * allocates the space that so_opt2cmsg fills. If one changes, the other should 1625 * also be checked for any possible impacts. 1626 */ 1627 int 1628 so_opt2cmsg(mblk_t *mp, void *opt, t_uscalar_t optlen, int oldflg, 1629 void *control, t_uscalar_t controllen) 1630 { 1631 struct T_opthdr *tohp; 1632 struct cmsghdr *cmsg; 1633 struct fdbuf *fdbuf; 1634 int fdbuflen; 1635 int error; 1636 #if defined(DEBUG) || defined(__lint) 1637 struct cmsghdr *cend = (struct cmsghdr *) 1638 (((uint8_t *)control) + ROUNDUP_cmsglen(controllen)); 1639 #endif 1640 cmsg = (struct cmsghdr *)control; 1641 1642 ASSERT(__TPI_TOPT_ISALIGNED(opt)); 1643 1644 for (tohp = (struct T_opthdr *)opt; 1645 tohp && _TPI_TOPT_VALID(tohp, opt, (uintptr_t)opt + optlen); 1646 tohp = _TPI_TOPT_NEXTHDR(opt, optlen, tohp)) { 1647 dprint(1, ("so_opt2cmsg: level 0x%x, name %d, len %d\n", 1648 tohp->level, tohp->name, tohp->len)); 1649 1650 if (tohp->level == SOL_SOCKET && 1651 (tohp->name == SO_SRCADDR || 1652 tohp->name == SO_UNIX_CLOSE)) { 1653 continue; 1654 } 1655 ASSERT((uintptr_t)cmsg <= (uintptr_t)control + controllen); 1656 if (tohp->level == SOL_SOCKET && tohp->name == SO_FILEP) { 1657 fdbuf = (struct fdbuf *)_TPI_TOPT_DATA(tohp); 1658 fdbuflen = (int)_TPI_TOPT_DATALEN(tohp); 1659 1660 if (!fdbuf_verify(mp, fdbuf, fdbuflen)) 1661 return (EPROTO); 1662 if (oldflg) { 1663 error = fdbuf_extract(fdbuf, control, 1664 (int)controllen); 1665 if (error != 0) 1666 return (error); 1667 continue; 1668 } else { 1669 int fdlen; 1670 1671 fdlen = (int)fdbuf_cmsglen( 1672 (int)_TPI_TOPT_DATALEN(tohp)); 1673 1674 cmsg->cmsg_level = tohp->level; 1675 cmsg->cmsg_type = SCM_RIGHTS; 1676 cmsg->cmsg_len = (socklen_t)(fdlen + 1677 sizeof (struct cmsghdr)); 1678 1679 error = fdbuf_extract(fdbuf, 1680 CMSG_CONTENT(cmsg), fdlen); 1681 if (error != 0) 1682 return (error); 1683 } 1684 } else if (tohp->level == SOL_SOCKET && 1685 tohp->name == SCM_TIMESTAMP) { 1686 timestruc_t *timestamp; 1687 1688 if (oldflg) 1689 continue; 1690 1691 cmsg->cmsg_level = tohp->level; 1692 cmsg->cmsg_type = tohp->name; 1693 1694 timestamp = 1695 (timestruc_t *)P2ROUNDUP((intptr_t)&tohp[1], 1696 sizeof (intptr_t)); 1697 1698 if (get_udatamodel() == DATAMODEL_NATIVE) { 1699 struct timeval tv; 1700 1701 cmsg->cmsg_len = sizeof (struct timeval) + 1702 sizeof (struct cmsghdr); 1703 tv.tv_sec = timestamp->tv_sec; 1704 tv.tv_usec = timestamp->tv_nsec / 1705 (NANOSEC / MICROSEC); 1706 /* 1707 * on LP64 systems, the struct timeval in 1708 * the destination will not be 8-byte aligned, 1709 * so use bcopy to avoid alignment trouble 1710 */ 1711 bcopy(&tv, CMSG_CONTENT(cmsg), sizeof (tv)); 1712 } else { 1713 struct timeval32 *time32; 1714 1715 cmsg->cmsg_len = sizeof (struct timeval32) + 1716 sizeof (struct cmsghdr); 1717 time32 = (struct timeval32 *)CMSG_CONTENT(cmsg); 1718 time32->tv_sec = (time32_t)timestamp->tv_sec; 1719 time32->tv_usec = 1720 (int32_t)(timestamp->tv_nsec / 1721 (NANOSEC / MICROSEC)); 1722 } 1723 1724 } else { 1725 if (oldflg) 1726 continue; 1727 1728 cmsg->cmsg_level = tohp->level; 1729 cmsg->cmsg_type = tohp->name; 1730 cmsg->cmsg_len = (socklen_t)(_TPI_TOPT_DATALEN(tohp) + 1731 sizeof (struct cmsghdr)); 1732 1733 /* copy content to control data part */ 1734 bcopy(&tohp[1], CMSG_CONTENT(cmsg), 1735 CMSG_CONTENTLEN(cmsg)); 1736 } 1737 /* move to next CMSG structure! */ 1738 cmsg = CMSG_NEXT(cmsg); 1739 } 1740 dprint(1, ("so_opt2cmsg: buf %p len %d; cend %p; final cmsg %p\n", 1741 control, controllen, cend, cmsg)); 1742 ASSERT(cmsg <= cend); 1743 return (0); 1744 } 1745 1746 /* 1747 * Extract the SO_SRCADDR option value if present. 1748 */ 1749 void 1750 so_getopt_srcaddr(void *opt, t_uscalar_t optlen, void **srcp, 1751 t_uscalar_t *srclenp) 1752 { 1753 struct T_opthdr *tohp; 1754 1755 ASSERT(__TPI_TOPT_ISALIGNED(opt)); 1756 1757 ASSERT(srcp != NULL && srclenp != NULL); 1758 *srcp = NULL; 1759 *srclenp = 0; 1760 1761 for (tohp = (struct T_opthdr *)opt; 1762 tohp && _TPI_TOPT_VALID(tohp, opt, (uintptr_t)opt + optlen); 1763 tohp = _TPI_TOPT_NEXTHDR(opt, optlen, tohp)) { 1764 dprint(1, ("so_getopt_srcaddr: level 0x%x, name %d, len %d\n", 1765 tohp->level, tohp->name, tohp->len)); 1766 if (tohp->level == SOL_SOCKET && 1767 tohp->name == SO_SRCADDR) { 1768 *srcp = _TPI_TOPT_DATA(tohp); 1769 *srclenp = (t_uscalar_t)_TPI_TOPT_DATALEN(tohp); 1770 } 1771 } 1772 } 1773 1774 /* 1775 * Verify if the SO_UNIX_CLOSE option is present. 1776 */ 1777 int 1778 so_getopt_unix_close(void *opt, t_uscalar_t optlen) 1779 { 1780 struct T_opthdr *tohp; 1781 1782 ASSERT(__TPI_TOPT_ISALIGNED(opt)); 1783 1784 for (tohp = (struct T_opthdr *)opt; 1785 tohp && _TPI_TOPT_VALID(tohp, opt, (uintptr_t)opt + optlen); 1786 tohp = _TPI_TOPT_NEXTHDR(opt, optlen, tohp)) { 1787 dprint(1, 1788 ("so_getopt_unix_close: level 0x%x, name %d, len %d\n", 1789 tohp->level, tohp->name, tohp->len)); 1790 if (tohp->level == SOL_SOCKET && 1791 tohp->name == SO_UNIX_CLOSE) 1792 return (1); 1793 } 1794 return (0); 1795 } 1796 1797 /* 1798 * Allocate an M_PROTO message. 1799 * 1800 * If allocation fails the behavior depends on sleepflg: 1801 * _ALLOC_NOSLEEP fail immediately 1802 * _ALLOC_INTR sleep for memory until a signal is caught 1803 * _ALLOC_SLEEP sleep forever. Don't return NULL. 1804 */ 1805 mblk_t * 1806 soallocproto(size_t size, int sleepflg) 1807 { 1808 mblk_t *mp; 1809 1810 /* Round up size for reuse */ 1811 size = MAX(size, 64); 1812 mp = allocb(size, BPRI_MED); 1813 if (mp == NULL) { 1814 int error; /* Dummy - error not returned to caller */ 1815 1816 switch (sleepflg) { 1817 case _ALLOC_SLEEP: 1818 mp = allocb_wait(size, BPRI_MED, STR_NOSIG, &error); 1819 ASSERT(mp); 1820 break; 1821 case _ALLOC_INTR: 1822 mp = allocb_wait(size, BPRI_MED, 0, &error); 1823 if (mp == NULL) { 1824 /* Caught signal while sleeping for memory */ 1825 eprintline(ENOBUFS); 1826 return (NULL); 1827 } 1828 break; 1829 case _ALLOC_NOSLEEP: 1830 default: 1831 eprintline(ENOBUFS); 1832 return (NULL); 1833 } 1834 } 1835 DB_TYPE(mp) = M_PROTO; 1836 return (mp); 1837 } 1838 1839 /* 1840 * Allocate an M_PROTO message with a single component. 1841 * len is the length of buf. size is the amount to allocate. 1842 * 1843 * buf can be NULL with a non-zero len. 1844 * This results in a bzero'ed chunk being placed the message. 1845 */ 1846 mblk_t * 1847 soallocproto1(const void *buf, ssize_t len, ssize_t size, int sleepflg) 1848 { 1849 mblk_t *mp; 1850 1851 if (size == 0) 1852 size = len; 1853 1854 ASSERT(size >= len); 1855 /* Round up size for reuse */ 1856 size = MAX(size, 64); 1857 mp = soallocproto(size, sleepflg); 1858 if (mp == NULL) 1859 return (NULL); 1860 mp->b_datap->db_type = M_PROTO; 1861 if (len != 0) { 1862 if (buf != NULL) 1863 bcopy(buf, mp->b_wptr, len); 1864 else 1865 bzero(mp->b_wptr, len); 1866 mp->b_wptr += len; 1867 } 1868 return (mp); 1869 } 1870 1871 /* 1872 * Append buf/len to mp. 1873 * The caller has to ensure that there is enough room in the mblk. 1874 * 1875 * buf can be NULL with a non-zero len. 1876 * This results in a bzero'ed chunk being placed the message. 1877 */ 1878 void 1879 soappendmsg(mblk_t *mp, const void *buf, ssize_t len) 1880 { 1881 ASSERT(mp); 1882 1883 if (len != 0) { 1884 /* Assert for room left */ 1885 ASSERT(mp->b_datap->db_lim - mp->b_wptr >= len); 1886 if (buf != NULL) 1887 bcopy(buf, mp->b_wptr, len); 1888 else 1889 bzero(mp->b_wptr, len); 1890 } 1891 mp->b_wptr += len; 1892 } 1893 1894 /* 1895 * Create a message using two kernel buffers. 1896 * If size is set that will determine the allocation size (e.g. for future 1897 * soappendmsg calls). If size is zero it is derived from the buffer 1898 * lengths. 1899 */ 1900 mblk_t * 1901 soallocproto2(const void *buf1, ssize_t len1, const void *buf2, ssize_t len2, 1902 ssize_t size, int sleepflg) 1903 { 1904 mblk_t *mp; 1905 1906 if (size == 0) 1907 size = len1 + len2; 1908 ASSERT(size >= len1 + len2); 1909 1910 mp = soallocproto1(buf1, len1, size, sleepflg); 1911 if (mp) 1912 soappendmsg(mp, buf2, len2); 1913 return (mp); 1914 } 1915 1916 /* 1917 * Create a message using three kernel buffers. 1918 * If size is set that will determine the allocation size (for future 1919 * soappendmsg calls). If size is zero it is derived from the buffer 1920 * lengths. 1921 */ 1922 mblk_t * 1923 soallocproto3(const void *buf1, ssize_t len1, const void *buf2, ssize_t len2, 1924 const void *buf3, ssize_t len3, ssize_t size, int sleepflg) 1925 { 1926 mblk_t *mp; 1927 1928 if (size == 0) 1929 size = len1 + len2 +len3; 1930 ASSERT(size >= len1 + len2 + len3); 1931 1932 mp = soallocproto1(buf1, len1, size, sleepflg); 1933 if (mp != NULL) { 1934 soappendmsg(mp, buf2, len2); 1935 soappendmsg(mp, buf3, len3); 1936 } 1937 return (mp); 1938 } 1939 1940 #ifdef DEBUG 1941 char * 1942 pr_state(uint_t state, uint_t mode) 1943 { 1944 static char buf[1024]; 1945 1946 buf[0] = 0; 1947 if (state & SS_ISCONNECTED) 1948 strcat(buf, "ISCONNECTED "); 1949 if (state & SS_ISCONNECTING) 1950 strcat(buf, "ISCONNECTING "); 1951 if (state & SS_ISDISCONNECTING) 1952 strcat(buf, "ISDISCONNECTING "); 1953 if (state & SS_CANTSENDMORE) 1954 strcat(buf, "CANTSENDMORE "); 1955 1956 if (state & SS_CANTRCVMORE) 1957 strcat(buf, "CANTRCVMORE "); 1958 if (state & SS_ISBOUND) 1959 strcat(buf, "ISBOUND "); 1960 if (state & SS_NDELAY) 1961 strcat(buf, "NDELAY "); 1962 if (state & SS_NONBLOCK) 1963 strcat(buf, "NONBLOCK "); 1964 1965 if (state & SS_ASYNC) 1966 strcat(buf, "ASYNC "); 1967 if (state & SS_ACCEPTCONN) 1968 strcat(buf, "ACCEPTCONN "); 1969 if (state & SS_HASCONNIND) 1970 strcat(buf, "HASCONNIND "); 1971 if (state & SS_SAVEDEOR) 1972 strcat(buf, "SAVEDEOR "); 1973 1974 if (state & SS_RCVATMARK) 1975 strcat(buf, "RCVATMARK "); 1976 if (state & SS_OOBPEND) 1977 strcat(buf, "OOBPEND "); 1978 if (state & SS_HAVEOOBDATA) 1979 strcat(buf, "HAVEOOBDATA "); 1980 if (state & SS_HADOOBDATA) 1981 strcat(buf, "HADOOBDATA "); 1982 1983 if (state & SS_FADDR_NOXLATE) 1984 strcat(buf, "FADDR_NOXLATE "); 1985 1986 if (mode & SM_PRIV) 1987 strcat(buf, "PRIV "); 1988 if (mode & SM_ATOMIC) 1989 strcat(buf, "ATOMIC "); 1990 if (mode & SM_ADDR) 1991 strcat(buf, "ADDR "); 1992 if (mode & SM_CONNREQUIRED) 1993 strcat(buf, "CONNREQUIRED "); 1994 1995 if (mode & SM_FDPASSING) 1996 strcat(buf, "FDPASSING "); 1997 if (mode & SM_EXDATA) 1998 strcat(buf, "EXDATA "); 1999 if (mode & SM_OPTDATA) 2000 strcat(buf, "OPTDATA "); 2001 if (mode & SM_BYTESTREAM) 2002 strcat(buf, "BYTESTREAM "); 2003 return (buf); 2004 } 2005 2006 char * 2007 pr_addr(int family, struct sockaddr *addr, t_uscalar_t addrlen) 2008 { 2009 static char buf[1024]; 2010 2011 if (addr == NULL || addrlen == 0) { 2012 sprintf(buf, "(len %d) %p", addrlen, addr); 2013 return (buf); 2014 } 2015 switch (family) { 2016 case AF_INET: { 2017 struct sockaddr_in sin; 2018 2019 bcopy(addr, &sin, sizeof (sin)); 2020 2021 (void) sprintf(buf, "(len %d) %x/%d", 2022 addrlen, ntohl(sin.sin_addr.s_addr), 2023 ntohs(sin.sin_port)); 2024 break; 2025 } 2026 case AF_INET6: { 2027 struct sockaddr_in6 sin6; 2028 uint16_t *piece = (uint16_t *)&sin6.sin6_addr; 2029 2030 bcopy((char *)addr, (char *)&sin6, sizeof (sin6)); 2031 sprintf(buf, "(len %d) %x:%x:%x:%x:%x:%x:%x:%x/%d", 2032 addrlen, 2033 ntohs(piece[0]), ntohs(piece[1]), 2034 ntohs(piece[2]), ntohs(piece[3]), 2035 ntohs(piece[4]), ntohs(piece[5]), 2036 ntohs(piece[6]), ntohs(piece[7]), 2037 ntohs(sin6.sin6_port)); 2038 break; 2039 } 2040 case AF_UNIX: { 2041 struct sockaddr_un *soun = (struct sockaddr_un *)addr; 2042 2043 (void) sprintf(buf, "(len %d) %s", 2044 addrlen, 2045 (soun == NULL) ? "(none)" : soun->sun_path); 2046 break; 2047 } 2048 default: 2049 (void) sprintf(buf, "(unknown af %d)", family); 2050 break; 2051 } 2052 return (buf); 2053 } 2054 2055 /* The logical equivalence operator (a if-and-only-if b) */ 2056 #define EQUIV(a, b) (((a) && (b)) || (!(a) && (!(b)))) 2057 2058 /* 2059 * Verify limitations and invariants on oob state. 2060 * Return 1 if OK, otherwise 0 so that it can be used as 2061 * ASSERT(verify_oobstate(so)); 2062 */ 2063 int 2064 so_verify_oobstate(struct sonode *so) 2065 { 2066 ASSERT(MUTEX_HELD(&so->so_lock)); 2067 2068 /* 2069 * The possible state combinations are: 2070 * 0 2071 * SS_OOBPEND 2072 * SS_OOBPEND|SS_HAVEOOBDATA 2073 * SS_OOBPEND|SS_HADOOBDATA 2074 * SS_HADOOBDATA 2075 */ 2076 switch (so->so_state & (SS_OOBPEND|SS_HAVEOOBDATA|SS_HADOOBDATA)) { 2077 case 0: 2078 case SS_OOBPEND: 2079 case SS_OOBPEND|SS_HAVEOOBDATA: 2080 case SS_OOBPEND|SS_HADOOBDATA: 2081 case SS_HADOOBDATA: 2082 break; 2083 default: 2084 printf("Bad oob state 1 (%p): counts %d/%d state %s\n", 2085 so, so->so_oobsigcnt, 2086 so->so_oobcnt, pr_state(so->so_state, so->so_mode)); 2087 return (0); 2088 } 2089 2090 /* SS_RCVATMARK should only be set when SS_OOBPEND is set */ 2091 if ((so->so_state & (SS_RCVATMARK|SS_OOBPEND)) == SS_RCVATMARK) { 2092 printf("Bad oob state 2 (%p): counts %d/%d state %s\n", 2093 so, so->so_oobsigcnt, 2094 so->so_oobcnt, pr_state(so->so_state, so->so_mode)); 2095 return (0); 2096 } 2097 2098 /* 2099 * (so_oobsigcnt != 0 or SS_RCVATMARK) iff SS_OOBPEND 2100 */ 2101 if (!EQUIV((so->so_oobsigcnt != 0) || (so->so_state & SS_RCVATMARK), 2102 so->so_state & SS_OOBPEND)) { 2103 printf("Bad oob state 3 (%p): counts %d/%d state %s\n", 2104 so, so->so_oobsigcnt, 2105 so->so_oobcnt, pr_state(so->so_state, so->so_mode)); 2106 return (0); 2107 } 2108 2109 /* 2110 * Unless SO_OOBINLINE we have so_oobmsg != NULL iff SS_HAVEOOBDATA 2111 */ 2112 if (!(so->so_options & SO_OOBINLINE) && 2113 !EQUIV(so->so_oobmsg != NULL, so->so_state & SS_HAVEOOBDATA)) { 2114 printf("Bad oob state 4 (%p): counts %d/%d state %s\n", 2115 so, so->so_oobsigcnt, 2116 so->so_oobcnt, pr_state(so->so_state, so->so_mode)); 2117 return (0); 2118 } 2119 if (so->so_oobsigcnt < so->so_oobcnt) { 2120 printf("Bad oob state 5 (%p): counts %d/%d state %s\n", 2121 so, so->so_oobsigcnt, 2122 so->so_oobcnt, pr_state(so->so_state, so->so_mode)); 2123 return (0); 2124 } 2125 return (1); 2126 } 2127 #undef EQUIV 2128 2129 #endif /* DEBUG */ 2130 2131 /* initialize sockfs zone specific kstat related items */ 2132 void * 2133 sock_kstat_init(zoneid_t zoneid) 2134 { 2135 kstat_t *ksp; 2136 2137 ksp = kstat_create_zone("sockfs", 0, "sock_unix_list", "misc", 2138 KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VAR_SIZE|KSTAT_FLAG_VIRTUAL, zoneid); 2139 2140 if (ksp != NULL) { 2141 ksp->ks_update = sockfs_update; 2142 ksp->ks_snapshot = sockfs_snapshot; 2143 ksp->ks_lock = &socklist.sl_lock; 2144 ksp->ks_private = (void *)(uintptr_t)zoneid; 2145 kstat_install(ksp); 2146 } 2147 2148 return (ksp); 2149 } 2150 2151 /* tear down sockfs zone specific kstat related items */ 2152 /*ARGSUSED*/ 2153 void 2154 sock_kstat_fini(zoneid_t zoneid, void *arg) 2155 { 2156 kstat_t *ksp = (kstat_t *)arg; 2157 2158 if (ksp != NULL) { 2159 ASSERT(zoneid == (zoneid_t)(uintptr_t)ksp->ks_private); 2160 kstat_delete(ksp); 2161 } 2162 } 2163 2164 /* 2165 * Zones: 2166 * Note that nactive is going to be different for each zone. 2167 * This means we require kstat to call sockfs_update and then sockfs_snapshot 2168 * for the same zone, or sockfs_snapshot will be taken into the wrong size 2169 * buffer. This is safe, but if the buffer is too small, user will not be 2170 * given details of all sockets. However, as this kstat has a ks_lock, kstat 2171 * driver will keep it locked between the update and the snapshot, so no 2172 * other process (zone) can currently get inbetween resulting in a wrong size 2173 * buffer allocation. 2174 */ 2175 static int 2176 sockfs_update(kstat_t *ksp, int rw) 2177 { 2178 uint_t nactive = 0; /* # of active AF_UNIX sockets */ 2179 struct sonode *so; /* current sonode on socklist */ 2180 zoneid_t myzoneid = (zoneid_t)(uintptr_t)ksp->ks_private; 2181 2182 ASSERT((zoneid_t)(uintptr_t)ksp->ks_private == getzoneid()); 2183 2184 if (rw == KSTAT_WRITE) { /* bounce all writes */ 2185 return (EACCES); 2186 } 2187 2188 for (so = socklist.sl_list; so != NULL; so = so->so_next) { 2189 if (so->so_accessvp != NULL && so->so_zoneid == myzoneid) { 2190 nactive++; 2191 } 2192 } 2193 ksp->ks_ndata = nactive; 2194 ksp->ks_data_size = nactive * sizeof (struct k_sockinfo); 2195 2196 return (0); 2197 } 2198 2199 static int 2200 sockfs_snapshot(kstat_t *ksp, void *buf, int rw) 2201 { 2202 int ns; /* # of sonodes we've copied */ 2203 struct sonode *so; /* current sonode on socklist */ 2204 struct k_sockinfo *pksi; /* where we put sockinfo data */ 2205 t_uscalar_t sn_len; /* soa_len */ 2206 zoneid_t myzoneid = (zoneid_t)(uintptr_t)ksp->ks_private; 2207 2208 ASSERT((zoneid_t)(uintptr_t)ksp->ks_private == getzoneid()); 2209 2210 ksp->ks_snaptime = gethrtime(); 2211 2212 if (rw == KSTAT_WRITE) { /* bounce all writes */ 2213 return (EACCES); 2214 } 2215 2216 /* 2217 * for each sonode on the socklist, we massage the important 2218 * info into buf, in k_sockinfo format. 2219 */ 2220 pksi = (struct k_sockinfo *)buf; 2221 for (ns = 0, so = socklist.sl_list; so != NULL; so = so->so_next) { 2222 /* only stuff active sonodes and the same zone: */ 2223 if (so->so_accessvp == NULL || so->so_zoneid != myzoneid) { 2224 continue; 2225 } 2226 2227 /* 2228 * If the sonode was activated between the update and the 2229 * snapshot, we're done - as this is only a snapshot. 2230 */ 2231 if ((caddr_t)(pksi) >= (caddr_t)buf + ksp->ks_data_size) { 2232 break; 2233 } 2234 2235 /* copy important info into buf: */ 2236 pksi->ks_si.si_size = sizeof (struct k_sockinfo); 2237 pksi->ks_si.si_family = so->so_family; 2238 pksi->ks_si.si_type = so->so_type; 2239 pksi->ks_si.si_flag = so->so_flag; 2240 pksi->ks_si.si_state = so->so_state; 2241 pksi->ks_si.si_serv_type = so->so_serv_type; 2242 pksi->ks_si.si_ux_laddr_sou_magic = so->so_ux_laddr.soua_magic; 2243 pksi->ks_si.si_ux_faddr_sou_magic = so->so_ux_faddr.soua_magic; 2244 pksi->ks_si.si_laddr_soa_len = so->so_laddr.soa_len; 2245 pksi->ks_si.si_faddr_soa_len = so->so_faddr.soa_len; 2246 pksi->ks_si.si_szoneid = so->so_zoneid; 2247 2248 mutex_enter(&so->so_lock); 2249 2250 if (so->so_laddr_sa != NULL) { 2251 ASSERT(so->so_laddr_sa->sa_data != NULL); 2252 sn_len = so->so_laddr_len; 2253 ASSERT(sn_len <= sizeof (short) + 2254 sizeof (pksi->ks_si.si_laddr_sun_path)); 2255 2256 pksi->ks_si.si_laddr_family = 2257 so->so_laddr_sa->sa_family; 2258 if (sn_len != 0) { 2259 /* AF_UNIX socket names are NULL terminated */ 2260 (void) strncpy(pksi->ks_si.si_laddr_sun_path, 2261 so->so_laddr_sa->sa_data, 2262 sizeof (pksi->ks_si.si_laddr_sun_path)); 2263 sn_len = strlen(pksi->ks_si.si_laddr_sun_path); 2264 } 2265 pksi->ks_si.si_laddr_sun_path[sn_len] = 0; 2266 } 2267 2268 if (so->so_faddr_sa != NULL) { 2269 ASSERT(so->so_faddr_sa->sa_data != NULL); 2270 sn_len = so->so_faddr_len; 2271 ASSERT(sn_len <= sizeof (short) + 2272 sizeof (pksi->ks_si.si_faddr_sun_path)); 2273 2274 pksi->ks_si.si_faddr_family = 2275 so->so_faddr_sa->sa_family; 2276 if (sn_len != 0) { 2277 (void) strncpy(pksi->ks_si.si_faddr_sun_path, 2278 so->so_faddr_sa->sa_data, 2279 sizeof (pksi->ks_si.si_faddr_sun_path)); 2280 sn_len = strlen(pksi->ks_si.si_faddr_sun_path); 2281 } 2282 pksi->ks_si.si_faddr_sun_path[sn_len] = 0; 2283 } 2284 2285 mutex_exit(&so->so_lock); 2286 2287 (void) sprintf(pksi->ks_straddr[0], "%p", (void *)so); 2288 (void) sprintf(pksi->ks_straddr[1], "%p", 2289 (void *)so->so_ux_laddr.soua_vp); 2290 (void) sprintf(pksi->ks_straddr[2], "%p", 2291 (void *)so->so_ux_faddr.soua_vp); 2292 2293 ns++; 2294 pksi++; 2295 } 2296 2297 ksp->ks_ndata = ns; 2298 return (0); 2299 } 2300 2301 ssize_t 2302 soreadfile(file_t *fp, uchar_t *buf, u_offset_t fileoff, int *err, size_t size) 2303 { 2304 struct uio auio; 2305 struct iovec aiov[MSG_MAXIOVLEN]; 2306 register vnode_t *vp; 2307 int ioflag, rwflag; 2308 ssize_t cnt; 2309 int error = 0; 2310 int iovcnt = 0; 2311 short fflag; 2312 2313 vp = fp->f_vnode; 2314 fflag = fp->f_flag; 2315 2316 rwflag = 0; 2317 aiov[0].iov_base = (caddr_t)buf; 2318 aiov[0].iov_len = size; 2319 iovcnt = 1; 2320 cnt = (ssize_t)size; 2321 (void) VOP_RWLOCK(vp, rwflag, NULL); 2322 2323 auio.uio_loffset = fileoff; 2324 auio.uio_iov = aiov; 2325 auio.uio_iovcnt = iovcnt; 2326 auio.uio_resid = cnt; 2327 auio.uio_segflg = UIO_SYSSPACE; 2328 auio.uio_llimit = MAXOFFSET_T; 2329 auio.uio_fmode = fflag; 2330 auio.uio_extflg = UIO_COPY_CACHED; 2331 2332 ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC); 2333 2334 /* If read sync is not asked for, filter sync flags */ 2335 if ((ioflag & FRSYNC) == 0) 2336 ioflag &= ~(FSYNC|FDSYNC); 2337 error = VOP_READ(vp, &auio, ioflag, fp->f_cred, NULL); 2338 cnt -= auio.uio_resid; 2339 2340 VOP_RWUNLOCK(vp, rwflag, NULL); 2341 2342 if (error == EINTR && cnt != 0) 2343 error = 0; 2344 out: 2345 if (error != 0) { 2346 *err = error; 2347 return (0); 2348 } else { 2349 *err = 0; 2350 return (cnt); 2351 } 2352 } 2353