1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <sys/t_lock.h> 31 #include <sys/param.h> 32 #include <sys/systm.h> 33 #include <sys/buf.h> 34 #include <sys/conf.h> 35 #include <sys/cred.h> 36 #include <sys/kmem.h> 37 #include <sys/sysmacros.h> 38 #include <sys/vfs.h> 39 #include <sys/vnode.h> 40 #include <sys/debug.h> 41 #include <sys/errno.h> 42 #include <sys/time.h> 43 #include <sys/file.h> 44 #include <sys/open.h> 45 #include <sys/user.h> 46 #include <sys/termios.h> 47 #include <sys/stream.h> 48 #include <sys/strsubr.h> 49 #include <sys/strsun.h> 50 #include <sys/esunddi.h> 51 #include <sys/flock.h> 52 #include <sys/modctl.h> 53 #include <sys/cmn_err.h> 54 #include <sys/mkdev.h> 55 #include <sys/pathname.h> 56 #include <sys/ddi.h> 57 #include <sys/stat.h> 58 #include <sys/fs/snode.h> 59 #include <sys/fs/dv_node.h> 60 #include <sys/zone.h> 61 62 #include <sys/socket.h> 63 #include <sys/socketvar.h> 64 #include <netinet/in.h> 65 #include <sys/un.h> 66 67 #include <sys/ucred.h> 68 69 #include <sys/tiuser.h> 70 #define _SUN_TPI_VERSION 2 71 #include <sys/tihdr.h> 72 73 #include <c2/audit.h> 74 75 #include <fs/sockfs/nl7c.h> 76 77 /* 78 * Macros that operate on struct cmsghdr. 79 * The CMSG_VALID macro does not assume that the last option buffer is padded. 80 */ 81 #define CMSG_CONTENT(cmsg) (&((cmsg)[1])) 82 #define CMSG_CONTENTLEN(cmsg) ((cmsg)->cmsg_len - sizeof (struct cmsghdr)) 83 #define CMSG_VALID(cmsg, start, end) \ 84 (ISALIGNED_cmsghdr(cmsg) && \ 85 ((uintptr_t)(cmsg) >= (uintptr_t)(start)) && \ 86 ((uintptr_t)(cmsg) < (uintptr_t)(end)) && \ 87 ((ssize_t)(cmsg)->cmsg_len >= sizeof (struct cmsghdr)) && \ 88 ((uintptr_t)(cmsg) + (cmsg)->cmsg_len <= (uintptr_t)(end))) 89 #define SO_LOCK_WAKEUP_TIME 3000 /* Wakeup time in milliseconds */ 90 91 static struct kmem_cache *socktpi_cache, *socktpi_unix_cache; 92 93 dev_t sockdev; /* For fsid in getattr */ 94 95 struct sockparams *sphead; 96 krwlock_t splist_lock; 97 98 struct socklist socklist; 99 100 static int sockfs_update(kstat_t *, int); 101 static int sockfs_snapshot(kstat_t *, void *, int); 102 103 extern void sendfile_init(); 104 105 extern void nl7c_init(void); 106 107 #define ADRSTRLEN (2 * sizeof (void *) + 1) 108 /* 109 * kernel structure for passing the sockinfo data back up to the user. 110 * the strings array allows us to convert AF_UNIX addresses into strings 111 * with a common method regardless of which n-bit kernel we're running. 112 */ 113 struct k_sockinfo { 114 struct sockinfo ks_si; 115 char ks_straddr[3][ADRSTRLEN]; 116 }; 117 118 /* 119 * Translate from a device pathname (e.g. "/dev/tcp") to a vnode. 120 * Returns with the vnode held. 121 */ 122 static int 123 sogetvp(char *devpath, vnode_t **vpp, int uioflag) 124 { 125 struct snode *csp; 126 vnode_t *vp, *dvp; 127 major_t maj; 128 int error; 129 130 ASSERT(uioflag == UIO_SYSSPACE || uioflag == UIO_USERSPACE); 131 /* 132 * Lookup the underlying filesystem vnode. 133 */ 134 error = lookupname(devpath, uioflag, FOLLOW, NULLVPP, &vp); 135 if (error) 136 return (error); 137 138 /* Check that it is the correct vnode */ 139 if (vp->v_type != VCHR) { 140 VN_RELE(vp); 141 return (ENOTSOCK); 142 } 143 144 /* 145 * If devpath went through devfs, the device should already 146 * be configured. If devpath is a mknod file, however, we 147 * need to make sure the device is properly configured. 148 * To do this, we do something similar to spec_open() 149 * except that we resolve to the minor/leaf level since 150 * we need to return a vnode. 151 */ 152 csp = VTOS(VTOS(vp)->s_commonvp); 153 if (!(csp->s_flag & SDIPSET)) { 154 char *pathname = kmem_alloc(MAXPATHLEN, KM_SLEEP); 155 error = ddi_dev_pathname(vp->v_rdev, S_IFCHR, pathname); 156 if (error == 0) 157 error = devfs_lookupname(pathname, NULLVPP, &dvp); 158 VN_RELE(vp); 159 kmem_free(pathname, MAXPATHLEN); 160 if (error != 0) 161 return (ENXIO); 162 vp = dvp; /* use the devfs vp */ 163 } 164 165 /* device is configured at this point */ 166 maj = getmajor(vp->v_rdev); 167 if (!STREAMSTAB(maj)) { 168 VN_RELE(vp); 169 return (ENOSTR); 170 } 171 172 *vpp = vp; 173 return (0); 174 } 175 176 /* 177 * Add or delete (latter if devpath is NULL) an enter to the sockparams 178 * table. If devpathlen is zero the devpath with not be kmem_freed. Otherwise 179 * this routine assumes that the caller has kmem_alloced devpath/devpathlen 180 * for this routine to consume. 181 * The zero devpathlen could be used if the kernel wants to create entries 182 * itself by calling sockconfig(1,2,3, "/dev/tcp", 0); 183 */ 184 int 185 soconfig(int domain, int type, int protocol, 186 char *devpath, int devpathlen) 187 { 188 struct sockparams **spp; 189 struct sockparams *sp; 190 int error = 0; 191 192 dprint(0, ("soconfig(%d,%d,%d,%s,%d)\n", 193 domain, type, protocol, devpath, devpathlen)); 194 195 /* 196 * Look for an existing match. 197 */ 198 rw_enter(&splist_lock, RW_WRITER); 199 for (spp = &sphead; (sp = *spp) != NULL; spp = &sp->sp_next) { 200 if (sp->sp_domain == domain && 201 sp->sp_type == type && 202 sp->sp_protocol == protocol) { 203 break; 204 } 205 } 206 if (devpath == NULL) { 207 ASSERT(devpathlen == 0); 208 209 /* Delete existing entry */ 210 if (sp == NULL) { 211 error = ENXIO; 212 goto done; 213 } 214 /* Unlink and free existing entry */ 215 *spp = sp->sp_next; 216 ASSERT(sp->sp_vnode); 217 VN_RELE(sp->sp_vnode); 218 if (sp->sp_devpathlen != 0) 219 kmem_free(sp->sp_devpath, sp->sp_devpathlen); 220 kmem_free(sp, sizeof (*sp)); 221 } else { 222 vnode_t *vp; 223 224 /* Add new entry */ 225 if (sp != NULL) { 226 error = EEXIST; 227 goto done; 228 } 229 230 error = sogetvp(devpath, &vp, UIO_SYSSPACE); 231 if (error) { 232 dprint(0, ("soconfig: vp %s failed with %d\n", 233 devpath, error)); 234 goto done; 235 } 236 237 dprint(0, ("soconfig: %s => vp %p, dev 0x%lx\n", 238 devpath, vp, vp->v_rdev)); 239 240 sp = kmem_alloc(sizeof (*sp), KM_SLEEP); 241 sp->sp_domain = domain; 242 sp->sp_type = type; 243 sp->sp_protocol = protocol; 244 sp->sp_devpath = devpath; 245 sp->sp_devpathlen = devpathlen; 246 sp->sp_vnode = vp; 247 sp->sp_next = NULL; 248 *spp = sp; 249 } 250 done: 251 rw_exit(&splist_lock); 252 if (error) { 253 if (devpath != NULL) 254 kmem_free(devpath, devpathlen); 255 #ifdef SOCK_DEBUG 256 eprintline(error); 257 #endif /* SOCK_DEBUG */ 258 } 259 return (error); 260 } 261 262 /* 263 * Lookup an entry in the sockparams list based on the triple. 264 * If no entry is found and devpath is not NULL translate devpath to a 265 * vnode. Note that devpath is a pointer to a user address! 266 * Returns with the vnode held. 267 * 268 * When this routine uses devpath it does not create an entry in the sockparams 269 * list since this routine can run on behalf of any user and one user 270 * should not be able to effect the transport used by another user. 271 * 272 * In order to return the correct error this routine has to do wildcard scans 273 * of the list. The errors are (in decreasing precedence): 274 * EAFNOSUPPORT - address family not in list 275 * EPROTONOSUPPORT - address family supported but not protocol. 276 * EPROTOTYPE - address family and protocol supported but not socket type. 277 */ 278 vnode_t * 279 solookup(int domain, int type, int protocol, char *devpath, int *errorp) 280 { 281 struct sockparams *sp; 282 int error; 283 vnode_t *vp; 284 285 rw_enter(&splist_lock, RW_READER); 286 for (sp = sphead; sp != NULL; sp = sp->sp_next) { 287 if (sp->sp_domain == domain && 288 sp->sp_type == type && 289 sp->sp_protocol == protocol) { 290 break; 291 } 292 } 293 if (sp == NULL) { 294 dprint(0, ("solookup(%d,%d,%d) not found\n", 295 domain, type, protocol)); 296 if (devpath == NULL) { 297 /* Determine correct error code */ 298 int found = 0; 299 300 for (sp = sphead; sp != NULL; sp = sp->sp_next) { 301 if (sp->sp_domain == domain && found < 1) 302 found = 1; 303 if (sp->sp_domain == domain && 304 sp->sp_protocol == protocol && found < 2) 305 found = 2; 306 } 307 rw_exit(&splist_lock); 308 switch (found) { 309 case 0: 310 *errorp = EAFNOSUPPORT; 311 break; 312 case 1: 313 *errorp = EPROTONOSUPPORT; 314 break; 315 case 2: 316 *errorp = EPROTOTYPE; 317 break; 318 } 319 return (NULL); 320 } 321 rw_exit(&splist_lock); 322 323 /* 324 * Return vp based on devpath. 325 * Do not enter into table to avoid random users 326 * modifying the sockparams list. 327 */ 328 error = sogetvp(devpath, &vp, UIO_USERSPACE); 329 if (error) { 330 dprint(0, ("solookup: vp %p failed with %d\n", 331 devpath, error)); 332 *errorp = EPROTONOSUPPORT; 333 return (NULL); 334 } 335 dprint(0, ("solookup: %p => vp %p, dev 0x%lx\n", 336 devpath, vp, vp->v_rdev)); 337 338 return (vp); 339 } 340 dprint(0, ("solookup(%d,%d,%d) vp %p devpath %s\n", 341 domain, type, protocol, sp->sp_vnode, sp->sp_devpath)); 342 343 vp = sp->sp_vnode; 344 VN_HOLD(vp); 345 rw_exit(&splist_lock); 346 return (vp); 347 } 348 349 /* 350 * Return a socket vnode. 351 * 352 * Assumes that the caller is "passing" an VN_HOLD for accessvp i.e. 353 * when the socket is freed a VN_RELE will take place. 354 * 355 * Note that sockets assume that the driver will clone (either itself 356 * or by using the clone driver) i.e. a socket() call will always 357 * result in a new vnode being created. 358 */ 359 struct vnode * 360 makesockvp(struct vnode *accessvp, int domain, int type, int protocol) 361 { 362 kmem_cache_t *cp; 363 struct sonode *so; 364 struct vnode *vp; 365 time_t now; 366 dev_t dev; 367 368 cp = (domain == AF_UNIX) ? socktpi_unix_cache : socktpi_cache; 369 so = kmem_cache_alloc(cp, KM_SLEEP); 370 so->so_cache = cp; 371 so->so_obj = so; 372 vp = SOTOV(so); 373 now = gethrestime_sec(); 374 375 so->so_flag = 0; 376 ASSERT(so->so_accessvp == NULL); 377 so->so_accessvp = accessvp; 378 dev = accessvp->v_rdev; 379 380 /* 381 * Record in so_flag that it is a clone. 382 */ 383 if (getmajor(dev) == clone_major) { 384 so->so_flag |= SOCLONE; 385 } 386 so->so_dev = dev; 387 388 so->so_state = 0; 389 so->so_mode = 0; 390 391 so->so_fsid = sockdev; 392 so->so_atime = now; 393 so->so_mtime = now; 394 so->so_ctime = now; /* Never modified */ 395 so->so_count = 0; 396 397 so->so_family = (short)domain; 398 so->so_type = (short)type; 399 so->so_protocol = (short)protocol; 400 so->so_pushcnt = 0; 401 402 so->so_options = 0; 403 so->so_linger.l_onoff = 0; 404 so->so_linger.l_linger = 0; 405 so->so_sndbuf = 0; 406 so->so_rcvbuf = 0; 407 so->so_sndlowat = 0; 408 so->so_rcvlowat = 0; 409 #ifdef notyet 410 so->so_sndtimeo = 0; 411 so->so_rcvtimeo = 0; 412 #endif /* notyet */ 413 so->so_error = 0; 414 so->so_delayed_error = 0; 415 416 ASSERT(so->so_oobmsg == NULL); 417 so->so_oobcnt = 0; 418 so->so_oobsigcnt = 0; 419 so->so_pgrp = 0; 420 so->so_provinfo = NULL; 421 422 ASSERT(so->so_laddr_sa == NULL && so->so_faddr_sa == NULL); 423 so->so_laddr_len = so->so_faddr_len = 0; 424 so->so_laddr_maxlen = so->so_faddr_maxlen = 0; 425 so->so_eaddr_mp = NULL; 426 so->so_priv = NULL; 427 428 so->so_peercred = NULL; 429 430 ASSERT(so->so_ack_mp == NULL); 431 ASSERT(so->so_conn_ind_head == NULL); 432 ASSERT(so->so_conn_ind_tail == NULL); 433 ASSERT(so->so_ux_bound_vp == NULL); 434 ASSERT(so->so_unbind_mp == NULL); 435 436 vn_reinit(vp); 437 vp->v_vfsp = rootvfs; 438 vp->v_type = VSOCK; 439 vp->v_rdev = so->so_dev; 440 vn_exists(vp); 441 442 return (vp); 443 } 444 445 void 446 sockfree(struct sonode *so) 447 { 448 mblk_t *mp; 449 vnode_t *vp; 450 451 ASSERT(so->so_count == 0); 452 ASSERT(so->so_accessvp); 453 ASSERT(so->so_discon_ind_mp == NULL); 454 455 vp = so->so_accessvp; 456 VN_RELE(vp); 457 458 /* 459 * Protect so->so_[lf]addr_sa so that sockfs_snapshot() can safely 460 * indirect them. It also uses so_accessvp as a validity test. 461 */ 462 mutex_enter(&so->so_lock); 463 464 so->so_accessvp = NULL; 465 466 if (so->so_laddr_sa) { 467 ASSERT((caddr_t)so->so_faddr_sa == 468 (caddr_t)so->so_laddr_sa + so->so_laddr_maxlen); 469 ASSERT(so->so_faddr_maxlen == so->so_laddr_maxlen); 470 so->so_state &= ~(SS_LADDR_VALID | SS_FADDR_VALID); 471 kmem_free(so->so_laddr_sa, so->so_laddr_maxlen * 2); 472 so->so_laddr_sa = NULL; 473 so->so_laddr_len = so->so_laddr_maxlen = 0; 474 so->so_faddr_sa = NULL; 475 so->so_faddr_len = so->so_faddr_maxlen = 0; 476 } 477 478 mutex_exit(&so->so_lock); 479 480 if ((mp = so->so_eaddr_mp) != NULL) { 481 freemsg(mp); 482 so->so_eaddr_mp = NULL; 483 so->so_delayed_error = 0; 484 } 485 if ((mp = so->so_ack_mp) != NULL) { 486 freemsg(mp); 487 so->so_ack_mp = NULL; 488 } 489 if ((mp = so->so_conn_ind_head) != NULL) { 490 mblk_t *mp1; 491 492 while (mp) { 493 mp1 = mp->b_next; 494 mp->b_next = NULL; 495 freemsg(mp); 496 mp = mp1; 497 } 498 so->so_conn_ind_head = so->so_conn_ind_tail = NULL; 499 so->so_state &= ~SS_HASCONNIND; 500 } 501 #ifdef DEBUG 502 mutex_enter(&so->so_lock); 503 ASSERT(so_verify_oobstate(so)); 504 mutex_exit(&so->so_lock); 505 #endif /* DEBUG */ 506 if ((mp = so->so_oobmsg) != NULL) { 507 freemsg(mp); 508 so->so_oobmsg = NULL; 509 so->so_state &= ~(SS_OOBPEND|SS_HAVEOOBDATA|SS_HADOOBDATA); 510 } 511 512 if ((mp = so->so_nl7c_rcv_mp) != NULL) { 513 so->so_nl7c_rcv_mp = NULL; 514 freemsg(mp); 515 } 516 so->so_nl7c_rcv_rval = 0; 517 if (so->so_nl7c_uri != NULL) { 518 nl7c_urifree(so); 519 /* urifree() cleared nl7c_uri */ 520 } 521 if (so->so_nl7c_flags) { 522 so->so_nl7c_flags = 0; 523 } 524 525 ASSERT(so->so_ux_bound_vp == NULL); 526 if ((mp = so->so_unbind_mp) != NULL) { 527 freemsg(mp); 528 so->so_unbind_mp = NULL; 529 } 530 vn_invalid(SOTOV(so)); 531 532 if (so->so_peercred != NULL) 533 crfree(so->so_peercred); 534 535 kmem_cache_free(so->so_cache, so->so_obj); 536 } 537 538 /* 539 * Update the accessed, updated, or changed times in an sonode 540 * with the current time. 541 * 542 * Note that both SunOS 4.X and 4.4BSD sockets do not present reasonable 543 * attributes in a fstat call. (They return the current time and 0 for 544 * all timestamps, respectively.) We maintain the current timestamps 545 * here primarily so that should sockmod be popped the resulting 546 * file descriptor will behave like a stream w.r.t. the timestamps. 547 */ 548 void 549 so_update_attrs(struct sonode *so, int flag) 550 { 551 time_t now = gethrestime_sec(); 552 553 mutex_enter(&so->so_lock); 554 so->so_flag |= flag; 555 if (flag & SOACC) 556 so->so_atime = now; 557 if (flag & SOMOD) 558 so->so_mtime = now; 559 mutex_exit(&so->so_lock); 560 } 561 562 /*ARGSUSED*/ 563 static int 564 socktpi_constructor(void *buf, void *cdrarg, int kmflags) 565 { 566 struct sonode *so = buf; 567 struct vnode *vp; 568 569 so->so_nl7c_flags = 0; 570 so->so_nl7c_uri = NULL; 571 so->so_nl7c_rcv_mp = NULL; 572 573 so->so_oobmsg = NULL; 574 so->so_ack_mp = NULL; 575 so->so_conn_ind_head = NULL; 576 so->so_conn_ind_tail = NULL; 577 so->so_discon_ind_mp = NULL; 578 so->so_ux_bound_vp = NULL; 579 so->so_unbind_mp = NULL; 580 so->so_accessvp = NULL; 581 so->so_laddr_sa = NULL; 582 so->so_faddr_sa = NULL; 583 so->so_ops = &sotpi_sonodeops; 584 585 vp = vn_alloc(KM_SLEEP); 586 so->so_vnode = vp; 587 588 vn_setops(vp, socktpi_vnodeops); 589 vp->v_data = (caddr_t)so; 590 591 mutex_init(&so->so_lock, NULL, MUTEX_DEFAULT, NULL); 592 mutex_init(&so->so_plumb_lock, NULL, MUTEX_DEFAULT, NULL); 593 cv_init(&so->so_state_cv, NULL, CV_DEFAULT, NULL); 594 cv_init(&so->so_ack_cv, NULL, CV_DEFAULT, NULL); 595 cv_init(&so->so_connind_cv, NULL, CV_DEFAULT, NULL); 596 cv_init(&so->so_want_cv, NULL, CV_DEFAULT, NULL); 597 598 return (0); 599 } 600 601 /*ARGSUSED1*/ 602 static void 603 socktpi_destructor(void *buf, void *cdrarg) 604 { 605 struct sonode *so = buf; 606 struct vnode *vp = SOTOV(so); 607 608 ASSERT(so->so_nl7c_flags == 0); 609 ASSERT(so->so_nl7c_uri == NULL); 610 ASSERT(so->so_nl7c_rcv_mp == NULL); 611 612 ASSERT(so->so_oobmsg == NULL); 613 ASSERT(so->so_ack_mp == NULL); 614 ASSERT(so->so_conn_ind_head == NULL); 615 ASSERT(so->so_conn_ind_tail == NULL); 616 ASSERT(so->so_discon_ind_mp == NULL); 617 ASSERT(so->so_ux_bound_vp == NULL); 618 ASSERT(so->so_unbind_mp == NULL); 619 ASSERT(so->so_ops == &sotpi_sonodeops); 620 621 ASSERT(vn_matchops(vp, socktpi_vnodeops)); 622 ASSERT(vp->v_data == (caddr_t)so); 623 624 vn_free(vp); 625 626 mutex_destroy(&so->so_lock); 627 mutex_destroy(&so->so_plumb_lock); 628 cv_destroy(&so->so_state_cv); 629 cv_destroy(&so->so_ack_cv); 630 cv_destroy(&so->so_connind_cv); 631 cv_destroy(&so->so_want_cv); 632 } 633 634 static int 635 socktpi_unix_constructor(void *buf, void *cdrarg, int kmflags) 636 { 637 int retval; 638 639 if ((retval = socktpi_constructor(buf, cdrarg, kmflags)) == 0) { 640 struct sonode *so = (struct sonode *)buf; 641 642 mutex_enter(&socklist.sl_lock); 643 644 so->so_next = socklist.sl_list; 645 so->so_prev = NULL; 646 if (so->so_next != NULL) 647 so->so_next->so_prev = so; 648 socklist.sl_list = so; 649 650 mutex_exit(&socklist.sl_lock); 651 652 } 653 return (retval); 654 } 655 656 static void 657 socktpi_unix_destructor(void *buf, void *cdrarg) 658 { 659 struct sonode *so = (struct sonode *)buf; 660 661 mutex_enter(&socklist.sl_lock); 662 663 if (so->so_next != NULL) 664 so->so_next->so_prev = so->so_prev; 665 if (so->so_prev != NULL) 666 so->so_prev->so_next = so->so_next; 667 else 668 socklist.sl_list = so->so_next; 669 670 mutex_exit(&socklist.sl_lock); 671 672 socktpi_destructor(buf, cdrarg); 673 } 674 675 /* 676 * Init function called when sockfs is loaded. 677 */ 678 int 679 sockinit(int fstype, char *name) 680 { 681 static const fs_operation_def_t sock_vfsops_template[] = { 682 NULL, NULL 683 }; 684 int error; 685 major_t dev; 686 char *err_str; 687 688 error = vfs_setfsops(fstype, sock_vfsops_template, NULL); 689 if (error != 0) { 690 zcmn_err(GLOBAL_ZONEID, CE_WARN, 691 "sockinit: bad vfs ops template"); 692 return (error); 693 } 694 695 error = vn_make_ops(name, socktpi_vnodeops_template, &socktpi_vnodeops); 696 if (error != 0) { 697 err_str = "sockinit: bad sock vnode ops template"; 698 /* vn_make_ops() does not reset socktpi_vnodeops on failure. */ 699 socktpi_vnodeops = NULL; 700 goto failure; 701 } 702 703 error = sosctp_init(); 704 if (error != 0) { 705 err_str = NULL; 706 goto failure; 707 } 708 709 error = sosdp_init(); 710 if (error != 0) { 711 err_str = NULL; 712 goto failure; 713 } 714 715 /* 716 * Create sonode caches. We create a special one for AF_UNIX so 717 * that we can track them for netstat(1m). 718 */ 719 socktpi_cache = kmem_cache_create("socktpi_cache", 720 sizeof (struct sonode), 0, socktpi_constructor, 721 socktpi_destructor, NULL, NULL, NULL, 0); 722 723 socktpi_unix_cache = kmem_cache_create("socktpi_unix_cache", 724 sizeof (struct sonode), 0, socktpi_unix_constructor, 725 socktpi_unix_destructor, NULL, NULL, NULL, 0); 726 727 /* 728 * Build initial list mapping socket parameters to vnode. 729 */ 730 rw_init(&splist_lock, NULL, RW_DEFAULT, NULL); 731 732 /* 733 * If sockets are needed before init runs /sbin/soconfig 734 * it is possible to preload the sockparams list here using 735 * calls like: 736 * sockconfig(1,2,3, "/dev/tcp", 0); 737 */ 738 739 /* 740 * Create a unique dev_t for use in so_fsid. 741 */ 742 743 if ((dev = getudev()) == (major_t)-1) 744 dev = 0; 745 sockdev = makedevice(dev, 0); 746 747 mutex_init(&socklist.sl_lock, NULL, MUTEX_DEFAULT, NULL); 748 sendfile_init(); 749 nl7c_init(); 750 751 return (0); 752 753 failure: 754 (void) vfs_freevfsops_by_type(fstype); 755 if (socktpi_vnodeops != NULL) 756 vn_freevnodeops(socktpi_vnodeops); 757 if (err_str != NULL) 758 zcmn_err(GLOBAL_ZONEID, CE_WARN, err_str); 759 return (error); 760 } 761 762 /* 763 * Caller must hold the mutex. Used to set SOLOCKED. 764 */ 765 void 766 so_lock_single(struct sonode *so) 767 { 768 ASSERT(MUTEX_HELD(&so->so_lock)); 769 770 while (so->so_flag & (SOLOCKED | SOASYNC_UNBIND)) { 771 so->so_flag |= SOWANT; 772 cv_wait_stop(&so->so_want_cv, &so->so_lock, 773 SO_LOCK_WAKEUP_TIME); 774 } 775 so->so_flag |= SOLOCKED; 776 } 777 778 /* 779 * Caller must hold the mutex and pass in SOLOCKED or SOASYNC_UNBIND. 780 * Used to clear SOLOCKED or SOASYNC_UNBIND. 781 */ 782 void 783 so_unlock_single(struct sonode *so, int flag) 784 { 785 ASSERT(MUTEX_HELD(&so->so_lock)); 786 ASSERT(flag & (SOLOCKED|SOASYNC_UNBIND)); 787 ASSERT((flag & ~(SOLOCKED|SOASYNC_UNBIND)) == 0); 788 ASSERT(so->so_flag & flag); 789 790 /* 791 * Process the T_DISCON_IND on so_discon_ind_mp. 792 * 793 * Call to so_drain_discon_ind will result in so_lock 794 * being dropped and re-acquired later. 795 */ 796 if (so->so_discon_ind_mp != NULL) 797 so_drain_discon_ind(so); 798 799 if (so->so_flag & SOWANT) 800 cv_broadcast(&so->so_want_cv); 801 so->so_flag &= ~(SOWANT|flag); 802 } 803 804 /* 805 * Caller must hold the mutex. Used to set SOREADLOCKED. 806 * If the caller wants nonblocking behavior it should set fmode. 807 */ 808 int 809 so_lock_read(struct sonode *so, int fmode) 810 { 811 ASSERT(MUTEX_HELD(&so->so_lock)); 812 813 while (so->so_flag & SOREADLOCKED) { 814 if (fmode & (FNDELAY|FNONBLOCK)) 815 return (EWOULDBLOCK); 816 so->so_flag |= SOWANT; 817 cv_wait_stop(&so->so_want_cv, &so->so_lock, 818 SO_LOCK_WAKEUP_TIME); 819 } 820 so->so_flag |= SOREADLOCKED; 821 return (0); 822 } 823 824 /* 825 * Like so_lock_read above but allows signals. 826 */ 827 int 828 so_lock_read_intr(struct sonode *so, int fmode) 829 { 830 ASSERT(MUTEX_HELD(&so->so_lock)); 831 832 while (so->so_flag & SOREADLOCKED) { 833 if (fmode & (FNDELAY|FNONBLOCK)) 834 return (EWOULDBLOCK); 835 so->so_flag |= SOWANT; 836 if (!cv_wait_sig(&so->so_want_cv, &so->so_lock)) 837 return (EINTR); 838 } 839 so->so_flag |= SOREADLOCKED; 840 return (0); 841 } 842 843 /* 844 * Caller must hold the mutex. Used to clear SOREADLOCKED, 845 * set in so_lock_read() or so_lock_read_intr(). 846 */ 847 void 848 so_unlock_read(struct sonode *so) 849 { 850 ASSERT(MUTEX_HELD(&so->so_lock)); 851 ASSERT(so->so_flag & SOREADLOCKED); 852 853 if (so->so_flag & SOWANT) 854 cv_broadcast(&so->so_want_cv); 855 so->so_flag &= ~(SOWANT|SOREADLOCKED); 856 } 857 858 /* 859 * Verify that the specified offset falls within the mblk and 860 * that the resulting pointer is aligned. 861 * Returns NULL if not. 862 */ 863 void * 864 sogetoff(mblk_t *mp, t_uscalar_t offset, 865 t_uscalar_t length, uint_t align_size) 866 { 867 uintptr_t ptr1, ptr2; 868 869 ASSERT(mp && mp->b_wptr >= mp->b_rptr); 870 ptr1 = (uintptr_t)mp->b_rptr + offset; 871 ptr2 = (uintptr_t)ptr1 + length; 872 if (ptr1 < (uintptr_t)mp->b_rptr || ptr2 > (uintptr_t)mp->b_wptr) { 873 eprintline(0); 874 return (NULL); 875 } 876 if ((ptr1 & (align_size - 1)) != 0) { 877 eprintline(0); 878 return (NULL); 879 } 880 return ((void *)ptr1); 881 } 882 883 /* 884 * Return the AF_UNIX underlying filesystem vnode matching a given name. 885 * Makes sure the sending and the destination sonodes are compatible. 886 * The vnode is returned held. 887 * 888 * The underlying filesystem VSOCK vnode has a v_stream pointer that 889 * references the actual stream head (hence indirectly the actual sonode). 890 */ 891 static int 892 so_ux_lookup(struct sonode *so, struct sockaddr_un *soun, int checkaccess, 893 vnode_t **vpp) 894 { 895 vnode_t *vp; /* Underlying filesystem vnode */ 896 vnode_t *svp; /* sockfs vnode */ 897 struct sonode *so2; 898 int error; 899 900 dprintso(so, 1, ("so_ux_lookup(%p) name <%s>\n", 901 so, soun->sun_path)); 902 903 error = lookupname(soun->sun_path, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp); 904 if (error) { 905 eprintsoline(so, error); 906 return (error); 907 } 908 if (vp->v_type != VSOCK) { 909 error = ENOTSOCK; 910 eprintsoline(so, error); 911 goto done2; 912 } 913 914 if (checkaccess) { 915 /* 916 * Check that we have permissions to access the destination 917 * vnode. This check is not done in BSD but it is required 918 * by X/Open. 919 */ 920 if (error = VOP_ACCESS(vp, VREAD|VWRITE, 0, CRED())) { 921 eprintsoline(so, error); 922 goto done2; 923 } 924 } 925 926 /* 927 * Check if the remote socket has been closed. 928 * 929 * Synchronize with vn_rele_stream by holding v_lock while traversing 930 * v_stream->sd_vnode. 931 */ 932 mutex_enter(&vp->v_lock); 933 if (vp->v_stream == NULL) { 934 mutex_exit(&vp->v_lock); 935 if (so->so_type == SOCK_DGRAM) 936 error = EDESTADDRREQ; 937 else 938 error = ECONNREFUSED; 939 940 eprintsoline(so, error); 941 goto done2; 942 } 943 ASSERT(vp->v_stream->sd_vnode); 944 svp = vp->v_stream->sd_vnode; 945 /* 946 * holding v_lock on underlying filesystem vnode and acquiring 947 * it on sockfs vnode. Assumes that no code ever attempts to 948 * acquire these locks in the reverse order. 949 */ 950 VN_HOLD(svp); 951 mutex_exit(&vp->v_lock); 952 953 if (svp->v_type != VSOCK) { 954 error = ENOTSOCK; 955 eprintsoline(so, error); 956 goto done; 957 } 958 959 so2 = VTOSO(svp); 960 961 if (so->so_type != so2->so_type) { 962 error = EPROTOTYPE; 963 eprintsoline(so, error); 964 goto done; 965 } 966 967 VN_RELE(svp); 968 *vpp = vp; 969 return (0); 970 971 done: 972 VN_RELE(svp); 973 done2: 974 VN_RELE(vp); 975 return (error); 976 } 977 978 /* 979 * Verify peer address for connect and sendto/sendmsg. 980 * Since sendto/sendmsg would not get synchronous errors from the transport 981 * provider we have to do these ugly checks in the socket layer to 982 * preserve compatibility with SunOS 4.X. 983 */ 984 int 985 so_addr_verify(struct sonode *so, const struct sockaddr *name, 986 socklen_t namelen) 987 { 988 int family; 989 990 dprintso(so, 1, ("so_addr_verify(%p, %p, %d)\n", so, name, namelen)); 991 992 ASSERT(name != NULL); 993 994 family = so->so_family; 995 switch (family) { 996 case AF_INET: 997 if (name->sa_family != family) { 998 eprintsoline(so, EAFNOSUPPORT); 999 return (EAFNOSUPPORT); 1000 } 1001 if (namelen != (socklen_t)sizeof (struct sockaddr_in)) { 1002 eprintsoline(so, EINVAL); 1003 return (EINVAL); 1004 } 1005 break; 1006 case AF_INET6: { 1007 #ifdef DEBUG 1008 struct sockaddr_in6 *sin6; 1009 #endif /* DEBUG */ 1010 1011 if (name->sa_family != family) { 1012 eprintsoline(so, EAFNOSUPPORT); 1013 return (EAFNOSUPPORT); 1014 } 1015 if (namelen != (socklen_t)sizeof (struct sockaddr_in6)) { 1016 eprintsoline(so, EINVAL); 1017 return (EINVAL); 1018 } 1019 #ifdef DEBUG 1020 /* Verify that apps don't forget to clear sin6_scope_id etc */ 1021 sin6 = (struct sockaddr_in6 *)name; 1022 if (sin6->sin6_scope_id != 0 && 1023 !IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr)) { 1024 zcmn_err(getzoneid(), CE_WARN, 1025 "connect/send* with uninitialized sin6_scope_id " 1026 "(%d) on socket. Pid = %d\n", 1027 (int)sin6->sin6_scope_id, (int)curproc->p_pid); 1028 } 1029 #endif /* DEBUG */ 1030 break; 1031 } 1032 case AF_UNIX: 1033 if (so->so_state & SS_FADDR_NOXLATE) { 1034 return (0); 1035 } 1036 if (namelen < (socklen_t)sizeof (short)) { 1037 eprintsoline(so, ENOENT); 1038 return (ENOENT); 1039 } 1040 if (name->sa_family != family) { 1041 eprintsoline(so, EAFNOSUPPORT); 1042 return (EAFNOSUPPORT); 1043 } 1044 /* MAXPATHLEN + soun_family + nul termination */ 1045 if (namelen > (socklen_t)(MAXPATHLEN + sizeof (short) + 1)) { 1046 eprintsoline(so, ENAMETOOLONG); 1047 return (ENAMETOOLONG); 1048 } 1049 1050 break; 1051 1052 default: 1053 /* 1054 * Default is don't do any length or sa_family check 1055 * to allow non-sockaddr style addresses. 1056 */ 1057 break; 1058 } 1059 1060 return (0); 1061 } 1062 1063 1064 /* 1065 * Translate an AF_UNIX sockaddr_un to the transport internal name. 1066 * Assumes caller has called so_addr_verify first. 1067 */ 1068 /*ARGSUSED*/ 1069 int 1070 so_ux_addr_xlate(struct sonode *so, struct sockaddr *name, 1071 socklen_t namelen, int checkaccess, 1072 void **addrp, socklen_t *addrlenp) 1073 { 1074 int error; 1075 struct sockaddr_un *soun; 1076 vnode_t *vp; 1077 void *addr; 1078 socklen_t addrlen; 1079 1080 dprintso(so, 1, ("so_ux_addr_xlate(%p, %p, %d, %d)\n", 1081 so, name, namelen, checkaccess)); 1082 1083 ASSERT(name != NULL); 1084 ASSERT(so->so_family == AF_UNIX); 1085 ASSERT(!(so->so_state & SS_FADDR_NOXLATE)); 1086 ASSERT(namelen >= (socklen_t)sizeof (short)); 1087 ASSERT(name->sa_family == AF_UNIX); 1088 soun = (struct sockaddr_un *)name; 1089 /* 1090 * Lookup vnode for the specified path name and verify that 1091 * it is a socket. 1092 */ 1093 error = so_ux_lookup(so, soun, checkaccess, &vp); 1094 if (error) { 1095 eprintsoline(so, error); 1096 return (error); 1097 } 1098 /* 1099 * Use the address of the peer vnode as the address to send 1100 * to. We release the peer vnode here. In case it has been 1101 * closed by the time the T_CONN_REQ or T_UNIDATA_REQ reaches the 1102 * transport the message will get an error or be dropped. 1103 */ 1104 so->so_ux_faddr.soua_vp = vp; 1105 so->so_ux_faddr.soua_magic = SOU_MAGIC_EXPLICIT; 1106 addr = &so->so_ux_faddr; 1107 addrlen = (socklen_t)sizeof (so->so_ux_faddr); 1108 dprintso(so, 1, ("ux_xlate UNIX: addrlen %d, vp %p\n", 1109 addrlen, vp)); 1110 VN_RELE(vp); 1111 *addrp = addr; 1112 *addrlenp = (socklen_t)addrlen; 1113 return (0); 1114 } 1115 1116 /* 1117 * Esballoc free function for messages that contain SO_FILEP option. 1118 * Decrement the reference count on the file pointers using closef. 1119 */ 1120 void 1121 fdbuf_free(struct fdbuf *fdbuf) 1122 { 1123 int i; 1124 struct file *fp; 1125 1126 dprint(1, ("fdbuf_free: %d fds\n", fdbuf->fd_numfd)); 1127 for (i = 0; i < fdbuf->fd_numfd; i++) { 1128 /* 1129 * We need pointer size alignment for fd_fds. On a LP64 1130 * kernel, the required alignment is 8 bytes while 1131 * the option headers and values are only 4 bytes 1132 * aligned. So its safer to do a bcopy compared to 1133 * assigning fdbuf->fd_fds[i] to fp. 1134 */ 1135 bcopy((char *)&fdbuf->fd_fds[i], (char *)&fp, sizeof (fp)); 1136 dprint(1, ("fdbuf_free: [%d] = %p\n", i, fp)); 1137 (void) closef(fp); 1138 } 1139 if (fdbuf->fd_ebuf != NULL) 1140 kmem_free(fdbuf->fd_ebuf, fdbuf->fd_ebuflen); 1141 kmem_free(fdbuf, fdbuf->fd_size); 1142 } 1143 1144 /* 1145 * Allocate an esballoc'ed message for AF_UNIX file descriptor passing. 1146 * Waits if memory is not available. 1147 */ 1148 mblk_t * 1149 fdbuf_allocmsg(int size, struct fdbuf *fdbuf) 1150 { 1151 uchar_t *buf; 1152 mblk_t *mp; 1153 1154 dprint(1, ("fdbuf_allocmsg: size %d, %d fds\n", size, fdbuf->fd_numfd)); 1155 buf = kmem_alloc(size, KM_SLEEP); 1156 fdbuf->fd_ebuf = (caddr_t)buf; 1157 fdbuf->fd_ebuflen = size; 1158 fdbuf->fd_frtn.free_func = fdbuf_free; 1159 fdbuf->fd_frtn.free_arg = (caddr_t)fdbuf; 1160 1161 mp = esballoc_wait(buf, size, BPRI_MED, &fdbuf->fd_frtn); 1162 mp->b_datap->db_type = M_PROTO; 1163 return (mp); 1164 } 1165 1166 /* 1167 * Extract file descriptors from a fdbuf. 1168 * Return list in rights/rightslen. 1169 */ 1170 /*ARGSUSED*/ 1171 static int 1172 fdbuf_extract(struct fdbuf *fdbuf, void *rights, int rightslen) 1173 { 1174 int i, fd; 1175 int *rp; 1176 struct file *fp; 1177 int numfd; 1178 1179 dprint(1, ("fdbuf_extract: %d fds, len %d\n", 1180 fdbuf->fd_numfd, rightslen)); 1181 1182 numfd = fdbuf->fd_numfd; 1183 ASSERT(rightslen == numfd * (int)sizeof (int)); 1184 1185 /* 1186 * Allocate a file descriptor and increment the f_count. 1187 * The latter is needed since we always call fdbuf_free 1188 * which performs a closef. 1189 */ 1190 rp = (int *)rights; 1191 for (i = 0; i < numfd; i++) { 1192 if ((fd = ufalloc(0)) == -1) 1193 goto cleanup; 1194 /* 1195 * We need pointer size alignment for fd_fds. On a LP64 1196 * kernel, the required alignment is 8 bytes while 1197 * the option headers and values are only 4 bytes 1198 * aligned. So its safer to do a bcopy compared to 1199 * assigning fdbuf->fd_fds[i] to fp. 1200 */ 1201 bcopy((char *)&fdbuf->fd_fds[i], (char *)&fp, sizeof (fp)); 1202 mutex_enter(&fp->f_tlock); 1203 fp->f_count++; 1204 mutex_exit(&fp->f_tlock); 1205 setf(fd, fp); 1206 *rp++ = fd; 1207 #ifdef C2_AUDIT 1208 if (audit_active) 1209 audit_fdrecv(fd, fp); 1210 #endif 1211 dprint(1, ("fdbuf_extract: [%d] = %d, %p refcnt %d\n", 1212 i, fd, fp, fp->f_count)); 1213 } 1214 return (0); 1215 1216 cleanup: 1217 /* 1218 * Undo whatever partial work the loop above has done. 1219 */ 1220 { 1221 int j; 1222 1223 rp = (int *)rights; 1224 for (j = 0; j < i; j++) { 1225 dprint(0, 1226 ("fdbuf_extract: cleanup[%d] = %d\n", j, *rp)); 1227 (void) closeandsetf(*rp++, NULL); 1228 } 1229 } 1230 1231 return (EMFILE); 1232 } 1233 1234 /* 1235 * Insert file descriptors into an fdbuf. 1236 * Returns a kmem_alloc'ed fdbuf. The fdbuf should be freed 1237 * by calling fdbuf_free(). 1238 */ 1239 int 1240 fdbuf_create(void *rights, int rightslen, struct fdbuf **fdbufp) 1241 { 1242 int numfd, i; 1243 int *fds; 1244 struct file *fp; 1245 struct fdbuf *fdbuf; 1246 int fdbufsize; 1247 1248 dprint(1, ("fdbuf_create: len %d\n", rightslen)); 1249 1250 numfd = rightslen / (int)sizeof (int); 1251 1252 fdbufsize = (int)FDBUF_HDRSIZE + (numfd * (int)sizeof (struct file *)); 1253 fdbuf = kmem_alloc(fdbufsize, KM_SLEEP); 1254 fdbuf->fd_size = fdbufsize; 1255 fdbuf->fd_numfd = 0; 1256 fdbuf->fd_ebuf = NULL; 1257 fdbuf->fd_ebuflen = 0; 1258 fds = (int *)rights; 1259 for (i = 0; i < numfd; i++) { 1260 if ((fp = getf(fds[i])) == NULL) { 1261 fdbuf_free(fdbuf); 1262 return (EBADF); 1263 } 1264 dprint(1, ("fdbuf_create: [%d] = %d, %p refcnt %d\n", 1265 i, fds[i], fp, fp->f_count)); 1266 mutex_enter(&fp->f_tlock); 1267 fp->f_count++; 1268 mutex_exit(&fp->f_tlock); 1269 /* 1270 * The maximum alignment for fdbuf (or any option header 1271 * and its value) it 4 bytes. On a LP64 kernel, the alignment 1272 * is not sufficient for pointers (fd_fds in this case). Since 1273 * we just did a kmem_alloc (we get a double word alignment), 1274 * we don't need to do anything on the send side (we loose 1275 * the double word alignment because fdbuf goes after an 1276 * option header (eg T_unitdata_req) which is only 4 byte 1277 * aligned). We take care of this when we extract the file 1278 * descriptor in fdbuf_extract or fdbuf_free. 1279 */ 1280 fdbuf->fd_fds[i] = fp; 1281 fdbuf->fd_numfd++; 1282 releasef(fds[i]); 1283 #ifdef C2_AUDIT 1284 if (audit_active) 1285 audit_fdsend(fds[i], fp, 0); 1286 #endif 1287 } 1288 *fdbufp = fdbuf; 1289 return (0); 1290 } 1291 1292 static int 1293 fdbuf_optlen(int rightslen) 1294 { 1295 int numfd; 1296 1297 numfd = rightslen / (int)sizeof (int); 1298 1299 return ((int)FDBUF_HDRSIZE + (numfd * (int)sizeof (struct file *))); 1300 } 1301 1302 static t_uscalar_t 1303 fdbuf_cmsglen(int fdbuflen) 1304 { 1305 return (t_uscalar_t)((fdbuflen - FDBUF_HDRSIZE) / 1306 (int)sizeof (struct file *) * (int)sizeof (int)); 1307 } 1308 1309 1310 /* 1311 * Return non-zero if the mblk and fdbuf are consistent. 1312 */ 1313 static int 1314 fdbuf_verify(mblk_t *mp, struct fdbuf *fdbuf, int fdbuflen) 1315 { 1316 if (fdbuflen >= FDBUF_HDRSIZE && 1317 fdbuflen == fdbuf->fd_size) { 1318 frtn_t *frp = mp->b_datap->db_frtnp; 1319 /* 1320 * Check that the SO_FILEP portion of the 1321 * message has not been modified by 1322 * the loopback transport. The sending sockfs generates 1323 * a message that is esballoc'ed with the free function 1324 * being fdbuf_free() and where free_arg contains the 1325 * identical information as the SO_FILEP content. 1326 * 1327 * If any of these constraints are not satisfied we 1328 * silently ignore the option. 1329 */ 1330 ASSERT(mp); 1331 if (frp != NULL && 1332 frp->free_func == fdbuf_free && 1333 frp->free_arg != NULL && 1334 bcmp(frp->free_arg, fdbuf, fdbuflen) == 0) { 1335 dprint(1, ("fdbuf_verify: fdbuf %p len %d\n", 1336 fdbuf, fdbuflen)); 1337 return (1); 1338 } else { 1339 zcmn_err(getzoneid(), CE_WARN, 1340 "sockfs: mismatched fdbuf content (%p)", 1341 (void *)mp); 1342 return (0); 1343 } 1344 } else { 1345 zcmn_err(getzoneid(), CE_WARN, 1346 "sockfs: mismatched fdbuf len %d, %d\n", 1347 fdbuflen, fdbuf->fd_size); 1348 return (0); 1349 } 1350 } 1351 1352 /* 1353 * When the file descriptors returned by sorecvmsg can not be passed 1354 * to the application this routine will cleanup the references on 1355 * the files. Start at startoff bytes into the buffer. 1356 */ 1357 static void 1358 close_fds(void *fdbuf, int fdbuflen, int startoff) 1359 { 1360 int *fds = (int *)fdbuf; 1361 int numfd = fdbuflen / (int)sizeof (int); 1362 int i; 1363 1364 dprint(1, ("close_fds(%p, %d, %d)\n", fdbuf, fdbuflen, startoff)); 1365 1366 for (i = 0; i < numfd; i++) { 1367 if (startoff < 0) 1368 startoff = 0; 1369 if (startoff < (int)sizeof (int)) { 1370 /* 1371 * This file descriptor is partially or fully after 1372 * the offset 1373 */ 1374 dprint(0, 1375 ("close_fds: cleanup[%d] = %d\n", i, fds[i])); 1376 (void) closeandsetf(fds[i], NULL); 1377 } 1378 startoff -= (int)sizeof (int); 1379 } 1380 } 1381 1382 /* 1383 * Close all file descriptors contained in the control part starting at 1384 * the startoffset. 1385 */ 1386 void 1387 so_closefds(void *control, t_uscalar_t controllen, int oldflg, 1388 int startoff) 1389 { 1390 struct cmsghdr *cmsg; 1391 1392 if (control == NULL) 1393 return; 1394 1395 if (oldflg) { 1396 close_fds(control, controllen, startoff); 1397 return; 1398 } 1399 /* Scan control part for file descriptors. */ 1400 for (cmsg = (struct cmsghdr *)control; 1401 CMSG_VALID(cmsg, control, (uintptr_t)control + controllen); 1402 cmsg = CMSG_NEXT(cmsg)) { 1403 if (cmsg->cmsg_level == SOL_SOCKET && 1404 cmsg->cmsg_type == SCM_RIGHTS) { 1405 close_fds(CMSG_CONTENT(cmsg), 1406 (int)CMSG_CONTENTLEN(cmsg), 1407 startoff - (int)sizeof (struct cmsghdr)); 1408 } 1409 startoff -= cmsg->cmsg_len; 1410 } 1411 } 1412 1413 /* 1414 * Returns a pointer/length for the file descriptors contained 1415 * in the control buffer. Returns with *fdlenp == -1 if there are no 1416 * file descriptor options present. This is different than there being 1417 * a zero-length file descriptor option. 1418 * Fail if there are multiple SCM_RIGHT cmsgs. 1419 */ 1420 int 1421 so_getfdopt(void *control, t_uscalar_t controllen, int oldflg, 1422 void **fdsp, int *fdlenp) 1423 { 1424 struct cmsghdr *cmsg; 1425 void *fds; 1426 int fdlen; 1427 1428 if (control == NULL) { 1429 *fdsp = NULL; 1430 *fdlenp = -1; 1431 return (0); 1432 } 1433 1434 if (oldflg) { 1435 *fdsp = control; 1436 if (controllen == 0) 1437 *fdlenp = -1; 1438 else 1439 *fdlenp = controllen; 1440 dprint(1, ("so_getfdopt: old %d\n", *fdlenp)); 1441 return (0); 1442 } 1443 1444 fds = NULL; 1445 fdlen = 0; 1446 1447 for (cmsg = (struct cmsghdr *)control; 1448 CMSG_VALID(cmsg, control, (uintptr_t)control + controllen); 1449 cmsg = CMSG_NEXT(cmsg)) { 1450 if (cmsg->cmsg_level == SOL_SOCKET && 1451 cmsg->cmsg_type == SCM_RIGHTS) { 1452 if (fds != NULL) 1453 return (EINVAL); 1454 fds = CMSG_CONTENT(cmsg); 1455 fdlen = (int)CMSG_CONTENTLEN(cmsg); 1456 dprint(1, ("so_getfdopt: new %lu\n", 1457 (size_t)CMSG_CONTENTLEN(cmsg))); 1458 } 1459 } 1460 if (fds == NULL) { 1461 dprint(1, ("so_getfdopt: NONE\n")); 1462 *fdlenp = -1; 1463 } else 1464 *fdlenp = fdlen; 1465 *fdsp = fds; 1466 return (0); 1467 } 1468 1469 /* 1470 * Return the length of the options including any file descriptor options. 1471 */ 1472 t_uscalar_t 1473 so_optlen(void *control, t_uscalar_t controllen, int oldflg) 1474 { 1475 struct cmsghdr *cmsg; 1476 t_uscalar_t optlen = 0; 1477 t_uscalar_t len; 1478 1479 if (control == NULL) 1480 return (0); 1481 1482 if (oldflg) 1483 return ((t_uscalar_t)(sizeof (struct T_opthdr) + 1484 fdbuf_optlen(controllen))); 1485 1486 for (cmsg = (struct cmsghdr *)control; 1487 CMSG_VALID(cmsg, control, (uintptr_t)control + controllen); 1488 cmsg = CMSG_NEXT(cmsg)) { 1489 if (cmsg->cmsg_level == SOL_SOCKET && 1490 cmsg->cmsg_type == SCM_RIGHTS) { 1491 len = fdbuf_optlen((int)CMSG_CONTENTLEN(cmsg)); 1492 } else { 1493 len = (t_uscalar_t)CMSG_CONTENTLEN(cmsg); 1494 } 1495 optlen += (t_uscalar_t)(_TPI_ALIGN_TOPT(len) + 1496 sizeof (struct T_opthdr)); 1497 } 1498 dprint(1, ("so_optlen: controllen %d, flg %d -> optlen %d\n", 1499 controllen, oldflg, optlen)); 1500 return (optlen); 1501 } 1502 1503 /* 1504 * Copy options from control to the mblk. Skip any file descriptor options. 1505 */ 1506 void 1507 so_cmsg2opt(void *control, t_uscalar_t controllen, int oldflg, mblk_t *mp) 1508 { 1509 struct T_opthdr toh; 1510 struct cmsghdr *cmsg; 1511 1512 if (control == NULL) 1513 return; 1514 1515 if (oldflg) { 1516 /* No real options - caller has handled file descriptors */ 1517 return; 1518 } 1519 for (cmsg = (struct cmsghdr *)control; 1520 CMSG_VALID(cmsg, control, (uintptr_t)control + controllen); 1521 cmsg = CMSG_NEXT(cmsg)) { 1522 /* 1523 * Note: The caller handles file descriptors prior 1524 * to calling this function. 1525 */ 1526 t_uscalar_t len; 1527 1528 if (cmsg->cmsg_level == SOL_SOCKET && 1529 cmsg->cmsg_type == SCM_RIGHTS) 1530 continue; 1531 1532 len = (t_uscalar_t)CMSG_CONTENTLEN(cmsg); 1533 toh.level = cmsg->cmsg_level; 1534 toh.name = cmsg->cmsg_type; 1535 toh.len = len + (t_uscalar_t)sizeof (struct T_opthdr); 1536 toh.status = 0; 1537 1538 soappendmsg(mp, &toh, sizeof (toh)); 1539 soappendmsg(mp, CMSG_CONTENT(cmsg), len); 1540 mp->b_wptr += _TPI_ALIGN_TOPT(len) - len; 1541 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 1542 } 1543 } 1544 1545 /* 1546 * Return the length of the control message derived from the options. 1547 * Exclude SO_SRCADDR and SO_UNIX_CLOSE options. Include SO_FILEP. 1548 * When oldflg is set only include SO_FILEP. 1549 * so_opt2cmsg and so_cmsglen are inter-related since so_cmsglen 1550 * allocates the space that so_opt2cmsg fills. If one changes, the other should 1551 * also be checked for any possible impacts. 1552 */ 1553 t_uscalar_t 1554 so_cmsglen(mblk_t *mp, void *opt, t_uscalar_t optlen, int oldflg) 1555 { 1556 t_uscalar_t cmsglen = 0; 1557 struct T_opthdr *tohp; 1558 t_uscalar_t len; 1559 t_uscalar_t last_roundup = 0; 1560 1561 ASSERT(__TPI_TOPT_ISALIGNED(opt)); 1562 1563 for (tohp = (struct T_opthdr *)opt; 1564 tohp && _TPI_TOPT_VALID(tohp, opt, (uintptr_t)opt + optlen); 1565 tohp = _TPI_TOPT_NEXTHDR(opt, optlen, tohp)) { 1566 dprint(1, ("so_cmsglen: level 0x%x, name %d, len %d\n", 1567 tohp->level, tohp->name, tohp->len)); 1568 if (tohp->level == SOL_SOCKET && 1569 (tohp->name == SO_SRCADDR || 1570 tohp->name == SO_UNIX_CLOSE)) { 1571 continue; 1572 } 1573 if (tohp->level == SOL_SOCKET && tohp->name == SO_FILEP) { 1574 struct fdbuf *fdbuf; 1575 int fdbuflen; 1576 1577 fdbuf = (struct fdbuf *)_TPI_TOPT_DATA(tohp); 1578 fdbuflen = (int)_TPI_TOPT_DATALEN(tohp); 1579 1580 if (!fdbuf_verify(mp, fdbuf, fdbuflen)) 1581 continue; 1582 if (oldflg) { 1583 cmsglen += fdbuf_cmsglen(fdbuflen); 1584 continue; 1585 } 1586 len = fdbuf_cmsglen(fdbuflen); 1587 } else if (tohp->level == SOL_SOCKET && 1588 tohp->name == SCM_TIMESTAMP) { 1589 if (oldflg) 1590 continue; 1591 1592 if (get_udatamodel() == DATAMODEL_NATIVE) { 1593 len = sizeof (struct timeval); 1594 } else { 1595 len = sizeof (struct timeval32); 1596 } 1597 } else { 1598 if (oldflg) 1599 continue; 1600 len = (t_uscalar_t)_TPI_TOPT_DATALEN(tohp); 1601 } 1602 /* 1603 * Exclude roundup for last option to not set 1604 * MSG_CTRUNC when the cmsg fits but the padding doesn't fit. 1605 */ 1606 last_roundup = (t_uscalar_t) 1607 (ROUNDUP_cmsglen(len + (int)sizeof (struct cmsghdr)) - 1608 (len + (int)sizeof (struct cmsghdr))); 1609 cmsglen += (t_uscalar_t)(len + (int)sizeof (struct cmsghdr)) + 1610 last_roundup; 1611 } 1612 cmsglen -= last_roundup; 1613 dprint(1, ("so_cmsglen: optlen %d, flg %d -> cmsglen %d\n", 1614 optlen, oldflg, cmsglen)); 1615 return (cmsglen); 1616 } 1617 1618 /* 1619 * Copy options from options to the control. Convert SO_FILEP to 1620 * file descriptors. 1621 * Returns errno or zero. 1622 * so_opt2cmsg and so_cmsglen are inter-related since so_cmsglen 1623 * allocates the space that so_opt2cmsg fills. If one changes, the other should 1624 * also be checked for any possible impacts. 1625 */ 1626 int 1627 so_opt2cmsg(mblk_t *mp, void *opt, t_uscalar_t optlen, int oldflg, 1628 void *control, t_uscalar_t controllen) 1629 { 1630 struct T_opthdr *tohp; 1631 struct cmsghdr *cmsg; 1632 struct fdbuf *fdbuf; 1633 int fdbuflen; 1634 int error; 1635 #if defined(DEBUG) || defined(__lint) 1636 struct cmsghdr *cend = (struct cmsghdr *) 1637 (((uint8_t *)control) + ROUNDUP_cmsglen(controllen)); 1638 #endif 1639 cmsg = (struct cmsghdr *)control; 1640 1641 ASSERT(__TPI_TOPT_ISALIGNED(opt)); 1642 1643 for (tohp = (struct T_opthdr *)opt; 1644 tohp && _TPI_TOPT_VALID(tohp, opt, (uintptr_t)opt + optlen); 1645 tohp = _TPI_TOPT_NEXTHDR(opt, optlen, tohp)) { 1646 dprint(1, ("so_opt2cmsg: level 0x%x, name %d, len %d\n", 1647 tohp->level, tohp->name, tohp->len)); 1648 1649 if (tohp->level == SOL_SOCKET && 1650 (tohp->name == SO_SRCADDR || 1651 tohp->name == SO_UNIX_CLOSE)) { 1652 continue; 1653 } 1654 ASSERT((uintptr_t)cmsg <= (uintptr_t)control + controllen); 1655 if (tohp->level == SOL_SOCKET && tohp->name == SO_FILEP) { 1656 fdbuf = (struct fdbuf *)_TPI_TOPT_DATA(tohp); 1657 fdbuflen = (int)_TPI_TOPT_DATALEN(tohp); 1658 1659 if (!fdbuf_verify(mp, fdbuf, fdbuflen)) 1660 return (EPROTO); 1661 if (oldflg) { 1662 error = fdbuf_extract(fdbuf, control, 1663 (int)controllen); 1664 if (error != 0) 1665 return (error); 1666 continue; 1667 } else { 1668 int fdlen; 1669 1670 fdlen = (int)fdbuf_cmsglen( 1671 (int)_TPI_TOPT_DATALEN(tohp)); 1672 1673 cmsg->cmsg_level = tohp->level; 1674 cmsg->cmsg_type = SCM_RIGHTS; 1675 cmsg->cmsg_len = (socklen_t)(fdlen + 1676 sizeof (struct cmsghdr)); 1677 1678 error = fdbuf_extract(fdbuf, 1679 CMSG_CONTENT(cmsg), fdlen); 1680 if (error != 0) 1681 return (error); 1682 } 1683 } else if (tohp->level == SOL_SOCKET && 1684 tohp->name == SCM_TIMESTAMP) { 1685 timestruc_t *timestamp; 1686 1687 if (oldflg) 1688 continue; 1689 1690 cmsg->cmsg_level = tohp->level; 1691 cmsg->cmsg_type = tohp->name; 1692 1693 timestamp = 1694 (timestruc_t *)P2ROUNDUP((intptr_t)&tohp[1], 1695 sizeof (intptr_t)); 1696 1697 if (get_udatamodel() == DATAMODEL_NATIVE) { 1698 struct timeval tv; 1699 1700 cmsg->cmsg_len = sizeof (struct timeval) + 1701 sizeof (struct cmsghdr); 1702 tv.tv_sec = timestamp->tv_sec; 1703 tv.tv_usec = timestamp->tv_nsec / 1704 (NANOSEC / MICROSEC); 1705 /* 1706 * on LP64 systems, the struct timeval in 1707 * the destination will not be 8-byte aligned, 1708 * so use bcopy to avoid alignment trouble 1709 */ 1710 bcopy(&tv, CMSG_CONTENT(cmsg), sizeof (tv)); 1711 } else { 1712 struct timeval32 *time32; 1713 1714 cmsg->cmsg_len = sizeof (struct timeval32) + 1715 sizeof (struct cmsghdr); 1716 time32 = (struct timeval32 *)CMSG_CONTENT(cmsg); 1717 time32->tv_sec = (time32_t)timestamp->tv_sec; 1718 time32->tv_usec = 1719 (int32_t)(timestamp->tv_nsec / 1720 (NANOSEC / MICROSEC)); 1721 } 1722 1723 } else { 1724 if (oldflg) 1725 continue; 1726 1727 cmsg->cmsg_level = tohp->level; 1728 cmsg->cmsg_type = tohp->name; 1729 cmsg->cmsg_len = (socklen_t)(_TPI_TOPT_DATALEN(tohp) + 1730 sizeof (struct cmsghdr)); 1731 1732 /* copy content to control data part */ 1733 bcopy(&tohp[1], CMSG_CONTENT(cmsg), 1734 CMSG_CONTENTLEN(cmsg)); 1735 } 1736 /* move to next CMSG structure! */ 1737 cmsg = CMSG_NEXT(cmsg); 1738 } 1739 dprint(1, ("so_opt2cmsg: buf %p len %d; cend %p; final cmsg %p\n", 1740 control, controllen, cend, cmsg)); 1741 ASSERT(cmsg <= cend); 1742 return (0); 1743 } 1744 1745 /* 1746 * Extract the SO_SRCADDR option value if present. 1747 */ 1748 void 1749 so_getopt_srcaddr(void *opt, t_uscalar_t optlen, void **srcp, 1750 t_uscalar_t *srclenp) 1751 { 1752 struct T_opthdr *tohp; 1753 1754 ASSERT(__TPI_TOPT_ISALIGNED(opt)); 1755 1756 ASSERT(srcp != NULL && srclenp != NULL); 1757 *srcp = NULL; 1758 *srclenp = 0; 1759 1760 for (tohp = (struct T_opthdr *)opt; 1761 tohp && _TPI_TOPT_VALID(tohp, opt, (uintptr_t)opt + optlen); 1762 tohp = _TPI_TOPT_NEXTHDR(opt, optlen, tohp)) { 1763 dprint(1, ("so_getopt_srcaddr: level 0x%x, name %d, len %d\n", 1764 tohp->level, tohp->name, tohp->len)); 1765 if (tohp->level == SOL_SOCKET && 1766 tohp->name == SO_SRCADDR) { 1767 *srcp = _TPI_TOPT_DATA(tohp); 1768 *srclenp = (t_uscalar_t)_TPI_TOPT_DATALEN(tohp); 1769 } 1770 } 1771 } 1772 1773 /* 1774 * Verify if the SO_UNIX_CLOSE option is present. 1775 */ 1776 int 1777 so_getopt_unix_close(void *opt, t_uscalar_t optlen) 1778 { 1779 struct T_opthdr *tohp; 1780 1781 ASSERT(__TPI_TOPT_ISALIGNED(opt)); 1782 1783 for (tohp = (struct T_opthdr *)opt; 1784 tohp && _TPI_TOPT_VALID(tohp, opt, (uintptr_t)opt + optlen); 1785 tohp = _TPI_TOPT_NEXTHDR(opt, optlen, tohp)) { 1786 dprint(1, 1787 ("so_getopt_unix_close: level 0x%x, name %d, len %d\n", 1788 tohp->level, tohp->name, tohp->len)); 1789 if (tohp->level == SOL_SOCKET && 1790 tohp->name == SO_UNIX_CLOSE) 1791 return (1); 1792 } 1793 return (0); 1794 } 1795 1796 /* 1797 * Allocate an M_PROTO message. 1798 * 1799 * If allocation fails the behavior depends on sleepflg: 1800 * _ALLOC_NOSLEEP fail immediately 1801 * _ALLOC_INTR sleep for memory until a signal is caught 1802 * _ALLOC_SLEEP sleep forever. Don't return NULL. 1803 */ 1804 mblk_t * 1805 soallocproto(size_t size, int sleepflg) 1806 { 1807 mblk_t *mp; 1808 1809 /* Round up size for reuse */ 1810 size = MAX(size, 64); 1811 mp = allocb(size, BPRI_MED); 1812 if (mp == NULL) { 1813 int error; /* Dummy - error not returned to caller */ 1814 1815 switch (sleepflg) { 1816 case _ALLOC_SLEEP: 1817 mp = allocb_wait(size, BPRI_MED, STR_NOSIG, &error); 1818 ASSERT(mp); 1819 break; 1820 case _ALLOC_INTR: 1821 mp = allocb_wait(size, BPRI_MED, 0, &error); 1822 if (mp == NULL) { 1823 /* Caught signal while sleeping for memory */ 1824 eprintline(ENOBUFS); 1825 return (NULL); 1826 } 1827 break; 1828 case _ALLOC_NOSLEEP: 1829 default: 1830 eprintline(ENOBUFS); 1831 return (NULL); 1832 } 1833 } 1834 DB_TYPE(mp) = M_PROTO; 1835 return (mp); 1836 } 1837 1838 /* 1839 * Allocate an M_PROTO message with a single component. 1840 * len is the length of buf. size is the amount to allocate. 1841 * 1842 * buf can be NULL with a non-zero len. 1843 * This results in a bzero'ed chunk being placed the message. 1844 */ 1845 mblk_t * 1846 soallocproto1(const void *buf, ssize_t len, ssize_t size, int sleepflg) 1847 { 1848 mblk_t *mp; 1849 1850 if (size == 0) 1851 size = len; 1852 1853 ASSERT(size >= len); 1854 /* Round up size for reuse */ 1855 size = MAX(size, 64); 1856 mp = soallocproto(size, sleepflg); 1857 if (mp == NULL) 1858 return (NULL); 1859 mp->b_datap->db_type = M_PROTO; 1860 if (len != 0) { 1861 if (buf != NULL) 1862 bcopy(buf, mp->b_wptr, len); 1863 else 1864 bzero(mp->b_wptr, len); 1865 mp->b_wptr += len; 1866 } 1867 return (mp); 1868 } 1869 1870 /* 1871 * Append buf/len to mp. 1872 * The caller has to ensure that there is enough room in the mblk. 1873 * 1874 * buf can be NULL with a non-zero len. 1875 * This results in a bzero'ed chunk being placed the message. 1876 */ 1877 void 1878 soappendmsg(mblk_t *mp, const void *buf, ssize_t len) 1879 { 1880 ASSERT(mp); 1881 1882 if (len != 0) { 1883 /* Assert for room left */ 1884 ASSERT(mp->b_datap->db_lim - mp->b_wptr >= len); 1885 if (buf != NULL) 1886 bcopy(buf, mp->b_wptr, len); 1887 else 1888 bzero(mp->b_wptr, len); 1889 } 1890 mp->b_wptr += len; 1891 } 1892 1893 /* 1894 * Create a message using two kernel buffers. 1895 * If size is set that will determine the allocation size (e.g. for future 1896 * soappendmsg calls). If size is zero it is derived from the buffer 1897 * lengths. 1898 */ 1899 mblk_t * 1900 soallocproto2(const void *buf1, ssize_t len1, const void *buf2, ssize_t len2, 1901 ssize_t size, int sleepflg) 1902 { 1903 mblk_t *mp; 1904 1905 if (size == 0) 1906 size = len1 + len2; 1907 ASSERT(size >= len1 + len2); 1908 1909 mp = soallocproto1(buf1, len1, size, sleepflg); 1910 if (mp) 1911 soappendmsg(mp, buf2, len2); 1912 return (mp); 1913 } 1914 1915 /* 1916 * Create a message using three kernel buffers. 1917 * If size is set that will determine the allocation size (for future 1918 * soappendmsg calls). If size is zero it is derived from the buffer 1919 * lengths. 1920 */ 1921 mblk_t * 1922 soallocproto3(const void *buf1, ssize_t len1, const void *buf2, ssize_t len2, 1923 const void *buf3, ssize_t len3, ssize_t size, int sleepflg) 1924 { 1925 mblk_t *mp; 1926 1927 if (size == 0) 1928 size = len1 + len2 +len3; 1929 ASSERT(size >= len1 + len2 + len3); 1930 1931 mp = soallocproto1(buf1, len1, size, sleepflg); 1932 if (mp != NULL) { 1933 soappendmsg(mp, buf2, len2); 1934 soappendmsg(mp, buf3, len3); 1935 } 1936 return (mp); 1937 } 1938 1939 #ifdef DEBUG 1940 char * 1941 pr_state(uint_t state, uint_t mode) 1942 { 1943 static char buf[1024]; 1944 1945 buf[0] = 0; 1946 if (state & SS_ISCONNECTED) 1947 strcat(buf, "ISCONNECTED "); 1948 if (state & SS_ISCONNECTING) 1949 strcat(buf, "ISCONNECTING "); 1950 if (state & SS_ISDISCONNECTING) 1951 strcat(buf, "ISDISCONNECTING "); 1952 if (state & SS_CANTSENDMORE) 1953 strcat(buf, "CANTSENDMORE "); 1954 1955 if (state & SS_CANTRCVMORE) 1956 strcat(buf, "CANTRCVMORE "); 1957 if (state & SS_ISBOUND) 1958 strcat(buf, "ISBOUND "); 1959 if (state & SS_NDELAY) 1960 strcat(buf, "NDELAY "); 1961 if (state & SS_NONBLOCK) 1962 strcat(buf, "NONBLOCK "); 1963 1964 if (state & SS_ASYNC) 1965 strcat(buf, "ASYNC "); 1966 if (state & SS_ACCEPTCONN) 1967 strcat(buf, "ACCEPTCONN "); 1968 if (state & SS_HASCONNIND) 1969 strcat(buf, "HASCONNIND "); 1970 if (state & SS_SAVEDEOR) 1971 strcat(buf, "SAVEDEOR "); 1972 1973 if (state & SS_RCVATMARK) 1974 strcat(buf, "RCVATMARK "); 1975 if (state & SS_OOBPEND) 1976 strcat(buf, "OOBPEND "); 1977 if (state & SS_HAVEOOBDATA) 1978 strcat(buf, "HAVEOOBDATA "); 1979 if (state & SS_HADOOBDATA) 1980 strcat(buf, "HADOOBDATA "); 1981 1982 if (state & SS_FADDR_NOXLATE) 1983 strcat(buf, "FADDR_NOXLATE "); 1984 1985 if (mode & SM_PRIV) 1986 strcat(buf, "PRIV "); 1987 if (mode & SM_ATOMIC) 1988 strcat(buf, "ATOMIC "); 1989 if (mode & SM_ADDR) 1990 strcat(buf, "ADDR "); 1991 if (mode & SM_CONNREQUIRED) 1992 strcat(buf, "CONNREQUIRED "); 1993 1994 if (mode & SM_FDPASSING) 1995 strcat(buf, "FDPASSING "); 1996 if (mode & SM_EXDATA) 1997 strcat(buf, "EXDATA "); 1998 if (mode & SM_OPTDATA) 1999 strcat(buf, "OPTDATA "); 2000 if (mode & SM_BYTESTREAM) 2001 strcat(buf, "BYTESTREAM "); 2002 return (buf); 2003 } 2004 2005 char * 2006 pr_addr(int family, struct sockaddr *addr, t_uscalar_t addrlen) 2007 { 2008 static char buf[1024]; 2009 2010 if (addr == NULL || addrlen == 0) { 2011 sprintf(buf, "(len %d) %p", addrlen, addr); 2012 return (buf); 2013 } 2014 switch (family) { 2015 case AF_INET: { 2016 struct sockaddr_in sin; 2017 2018 bcopy(addr, &sin, sizeof (sin)); 2019 2020 (void) sprintf(buf, "(len %d) %x/%d", 2021 addrlen, ntohl(sin.sin_addr.s_addr), 2022 ntohs(sin.sin_port)); 2023 break; 2024 } 2025 case AF_INET6: { 2026 struct sockaddr_in6 sin6; 2027 uint16_t *piece = (uint16_t *)&sin6.sin6_addr; 2028 2029 bcopy((char *)addr, (char *)&sin6, sizeof (sin6)); 2030 sprintf(buf, "(len %d) %x:%x:%x:%x:%x:%x:%x:%x/%d", 2031 addrlen, 2032 ntohs(piece[0]), ntohs(piece[1]), 2033 ntohs(piece[2]), ntohs(piece[3]), 2034 ntohs(piece[4]), ntohs(piece[5]), 2035 ntohs(piece[6]), ntohs(piece[7]), 2036 ntohs(sin6.sin6_port)); 2037 break; 2038 } 2039 case AF_UNIX: { 2040 struct sockaddr_un *soun = (struct sockaddr_un *)addr; 2041 2042 (void) sprintf(buf, "(len %d) %s", 2043 addrlen, 2044 (soun == NULL) ? "(none)" : soun->sun_path); 2045 break; 2046 } 2047 default: 2048 (void) sprintf(buf, "(unknown af %d)", family); 2049 break; 2050 } 2051 return (buf); 2052 } 2053 2054 /* The logical equivalence operator (a if-and-only-if b) */ 2055 #define EQUIV(a, b) (((a) && (b)) || (!(a) && (!(b)))) 2056 2057 /* 2058 * Verify limitations and invariants on oob state. 2059 * Return 1 if OK, otherwise 0 so that it can be used as 2060 * ASSERT(verify_oobstate(so)); 2061 */ 2062 int 2063 so_verify_oobstate(struct sonode *so) 2064 { 2065 ASSERT(MUTEX_HELD(&so->so_lock)); 2066 2067 /* 2068 * The possible state combinations are: 2069 * 0 2070 * SS_OOBPEND 2071 * SS_OOBPEND|SS_HAVEOOBDATA 2072 * SS_OOBPEND|SS_HADOOBDATA 2073 * SS_HADOOBDATA 2074 */ 2075 switch (so->so_state & (SS_OOBPEND|SS_HAVEOOBDATA|SS_HADOOBDATA)) { 2076 case 0: 2077 case SS_OOBPEND: 2078 case SS_OOBPEND|SS_HAVEOOBDATA: 2079 case SS_OOBPEND|SS_HADOOBDATA: 2080 case SS_HADOOBDATA: 2081 break; 2082 default: 2083 printf("Bad oob state 1 (%p): counts %d/%d state %s\n", 2084 so, so->so_oobsigcnt, 2085 so->so_oobcnt, pr_state(so->so_state, so->so_mode)); 2086 return (0); 2087 } 2088 2089 /* SS_RCVATMARK should only be set when SS_OOBPEND is set */ 2090 if ((so->so_state & (SS_RCVATMARK|SS_OOBPEND)) == SS_RCVATMARK) { 2091 printf("Bad oob state 2 (%p): counts %d/%d state %s\n", 2092 so, so->so_oobsigcnt, 2093 so->so_oobcnt, pr_state(so->so_state, so->so_mode)); 2094 return (0); 2095 } 2096 2097 /* 2098 * (so_oobsigcnt != 0 or SS_RCVATMARK) iff SS_OOBPEND 2099 */ 2100 if (!EQUIV((so->so_oobsigcnt != 0) || (so->so_state & SS_RCVATMARK), 2101 so->so_state & SS_OOBPEND)) { 2102 printf("Bad oob state 3 (%p): counts %d/%d state %s\n", 2103 so, so->so_oobsigcnt, 2104 so->so_oobcnt, pr_state(so->so_state, so->so_mode)); 2105 return (0); 2106 } 2107 2108 /* 2109 * Unless SO_OOBINLINE we have so_oobmsg != NULL iff SS_HAVEOOBDATA 2110 */ 2111 if (!(so->so_options & SO_OOBINLINE) && 2112 !EQUIV(so->so_oobmsg != NULL, so->so_state & SS_HAVEOOBDATA)) { 2113 printf("Bad oob state 4 (%p): counts %d/%d state %s\n", 2114 so, so->so_oobsigcnt, 2115 so->so_oobcnt, pr_state(so->so_state, so->so_mode)); 2116 return (0); 2117 } 2118 if (so->so_oobsigcnt < so->so_oobcnt) { 2119 printf("Bad oob state 5 (%p): counts %d/%d state %s\n", 2120 so, so->so_oobsigcnt, 2121 so->so_oobcnt, pr_state(so->so_state, so->so_mode)); 2122 return (0); 2123 } 2124 return (1); 2125 } 2126 #undef EQUIV 2127 2128 #endif /* DEBUG */ 2129 2130 /* initialize sockfs zone specific kstat related items */ 2131 void * 2132 sock_kstat_init(zoneid_t zoneid) 2133 { 2134 kstat_t *ksp; 2135 2136 ksp = kstat_create_zone("sockfs", 0, "sock_unix_list", "misc", 2137 KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VAR_SIZE|KSTAT_FLAG_VIRTUAL, zoneid); 2138 2139 if (ksp != NULL) { 2140 ksp->ks_update = sockfs_update; 2141 ksp->ks_snapshot = sockfs_snapshot; 2142 ksp->ks_lock = &socklist.sl_lock; 2143 ksp->ks_private = (void *)(uintptr_t)zoneid; 2144 kstat_install(ksp); 2145 } 2146 2147 return (ksp); 2148 } 2149 2150 /* tear down sockfs zone specific kstat related items */ 2151 /*ARGSUSED*/ 2152 void 2153 sock_kstat_fini(zoneid_t zoneid, void *arg) 2154 { 2155 kstat_t *ksp = (kstat_t *)arg; 2156 2157 if (ksp != NULL) { 2158 ASSERT(zoneid == (zoneid_t)(uintptr_t)ksp->ks_private); 2159 kstat_delete(ksp); 2160 } 2161 } 2162 2163 /* 2164 * Zones: 2165 * Note that nactive is going to be different for each zone. 2166 * This means we require kstat to call sockfs_update and then sockfs_snapshot 2167 * for the same zone, or sockfs_snapshot will be taken into the wrong size 2168 * buffer. This is safe, but if the buffer is too small, user will not be 2169 * given details of all sockets. However, as this kstat has a ks_lock, kstat 2170 * driver will keep it locked between the update and the snapshot, so no 2171 * other process (zone) can currently get inbetween resulting in a wrong size 2172 * buffer allocation. 2173 */ 2174 static int 2175 sockfs_update(kstat_t *ksp, int rw) 2176 { 2177 uint_t nactive = 0; /* # of active AF_UNIX sockets */ 2178 struct sonode *so; /* current sonode on socklist */ 2179 zoneid_t myzoneid = (zoneid_t)(uintptr_t)ksp->ks_private; 2180 2181 ASSERT((zoneid_t)(uintptr_t)ksp->ks_private == getzoneid()); 2182 2183 if (rw == KSTAT_WRITE) { /* bounce all writes */ 2184 return (EACCES); 2185 } 2186 2187 for (so = socklist.sl_list; so != NULL; so = so->so_next) { 2188 if (so->so_accessvp != NULL && so->so_zoneid == myzoneid) { 2189 nactive++; 2190 } 2191 } 2192 ksp->ks_ndata = nactive; 2193 ksp->ks_data_size = nactive * sizeof (struct k_sockinfo); 2194 2195 return (0); 2196 } 2197 2198 static int 2199 sockfs_snapshot(kstat_t *ksp, void *buf, int rw) 2200 { 2201 int ns; /* # of sonodes we've copied */ 2202 struct sonode *so; /* current sonode on socklist */ 2203 struct k_sockinfo *pksi; /* where we put sockinfo data */ 2204 t_uscalar_t sn_len; /* soa_len */ 2205 zoneid_t myzoneid = (zoneid_t)(uintptr_t)ksp->ks_private; 2206 2207 ASSERT((zoneid_t)(uintptr_t)ksp->ks_private == getzoneid()); 2208 2209 ksp->ks_snaptime = gethrtime(); 2210 2211 if (rw == KSTAT_WRITE) { /* bounce all writes */ 2212 return (EACCES); 2213 } 2214 2215 /* 2216 * for each sonode on the socklist, we massage the important 2217 * info into buf, in k_sockinfo format. 2218 */ 2219 pksi = (struct k_sockinfo *)buf; 2220 for (ns = 0, so = socklist.sl_list; so != NULL; so = so->so_next) { 2221 /* only stuff active sonodes and the same zone: */ 2222 if (so->so_accessvp == NULL || so->so_zoneid != myzoneid) { 2223 continue; 2224 } 2225 2226 /* 2227 * If the sonode was activated between the update and the 2228 * snapshot, we're done - as this is only a snapshot. 2229 */ 2230 if ((caddr_t)(pksi) >= (caddr_t)buf + ksp->ks_data_size) { 2231 break; 2232 } 2233 2234 /* copy important info into buf: */ 2235 pksi->ks_si.si_size = sizeof (struct k_sockinfo); 2236 pksi->ks_si.si_family = so->so_family; 2237 pksi->ks_si.si_type = so->so_type; 2238 pksi->ks_si.si_flag = so->so_flag; 2239 pksi->ks_si.si_state = so->so_state; 2240 pksi->ks_si.si_serv_type = so->so_serv_type; 2241 pksi->ks_si.si_ux_laddr_sou_magic = so->so_ux_laddr.soua_magic; 2242 pksi->ks_si.si_ux_faddr_sou_magic = so->so_ux_faddr.soua_magic; 2243 pksi->ks_si.si_laddr_soa_len = so->so_laddr.soa_len; 2244 pksi->ks_si.si_faddr_soa_len = so->so_faddr.soa_len; 2245 pksi->ks_si.si_szoneid = so->so_zoneid; 2246 2247 mutex_enter(&so->so_lock); 2248 2249 if (so->so_laddr_sa != NULL) { 2250 ASSERT(so->so_laddr_sa->sa_data != NULL); 2251 sn_len = so->so_laddr_len; 2252 ASSERT(sn_len <= sizeof (short) + 2253 sizeof (pksi->ks_si.si_laddr_sun_path)); 2254 2255 pksi->ks_si.si_laddr_family = 2256 so->so_laddr_sa->sa_family; 2257 if (sn_len != 0) { 2258 /* AF_UNIX socket names are NULL terminated */ 2259 (void) strncpy(pksi->ks_si.si_laddr_sun_path, 2260 so->so_laddr_sa->sa_data, 2261 sizeof (pksi->ks_si.si_laddr_sun_path)); 2262 sn_len = strlen(pksi->ks_si.si_laddr_sun_path); 2263 } 2264 pksi->ks_si.si_laddr_sun_path[sn_len] = 0; 2265 } 2266 2267 if (so->so_faddr_sa != NULL) { 2268 ASSERT(so->so_faddr_sa->sa_data != NULL); 2269 sn_len = so->so_faddr_len; 2270 ASSERT(sn_len <= sizeof (short) + 2271 sizeof (pksi->ks_si.si_faddr_sun_path)); 2272 2273 pksi->ks_si.si_faddr_family = 2274 so->so_faddr_sa->sa_family; 2275 if (sn_len != 0) { 2276 (void) strncpy(pksi->ks_si.si_faddr_sun_path, 2277 so->so_faddr_sa->sa_data, 2278 sizeof (pksi->ks_si.si_faddr_sun_path)); 2279 sn_len = strlen(pksi->ks_si.si_faddr_sun_path); 2280 } 2281 pksi->ks_si.si_faddr_sun_path[sn_len] = 0; 2282 } 2283 2284 mutex_exit(&so->so_lock); 2285 2286 (void) sprintf(pksi->ks_straddr[0], "%p", (void *)so); 2287 (void) sprintf(pksi->ks_straddr[1], "%p", 2288 (void *)so->so_ux_laddr.soua_vp); 2289 (void) sprintf(pksi->ks_straddr[2], "%p", 2290 (void *)so->so_ux_faddr.soua_vp); 2291 2292 ns++; 2293 pksi++; 2294 } 2295 2296 ksp->ks_ndata = ns; 2297 return (0); 2298 } 2299 2300 ssize_t 2301 soreadfile(file_t *fp, uchar_t *buf, u_offset_t fileoff, int *err, size_t size) 2302 { 2303 struct uio auio; 2304 struct iovec aiov[MSG_MAXIOVLEN]; 2305 register vnode_t *vp; 2306 int ioflag, rwflag; 2307 ssize_t cnt; 2308 int error = 0; 2309 int iovcnt = 0; 2310 short fflag; 2311 2312 vp = fp->f_vnode; 2313 fflag = fp->f_flag; 2314 2315 rwflag = 0; 2316 aiov[0].iov_base = (caddr_t)buf; 2317 aiov[0].iov_len = size; 2318 iovcnt = 1; 2319 cnt = (ssize_t)size; 2320 (void) VOP_RWLOCK(vp, rwflag, NULL); 2321 2322 auio.uio_loffset = fileoff; 2323 auio.uio_iov = aiov; 2324 auio.uio_iovcnt = iovcnt; 2325 auio.uio_resid = cnt; 2326 auio.uio_segflg = UIO_SYSSPACE; 2327 auio.uio_llimit = MAXOFFSET_T; 2328 auio.uio_fmode = fflag; 2329 auio.uio_extflg = UIO_COPY_CACHED; 2330 2331 ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC); 2332 2333 /* If read sync is not asked for, filter sync flags */ 2334 if ((ioflag & FRSYNC) == 0) 2335 ioflag &= ~(FSYNC|FDSYNC); 2336 error = VOP_READ(vp, &auio, ioflag, fp->f_cred, NULL); 2337 cnt -= auio.uio_resid; 2338 2339 VOP_RWUNLOCK(vp, rwflag, NULL); 2340 2341 if (error == EINTR && cnt != 0) 2342 error = 0; 2343 out: 2344 if (error != 0) { 2345 *err = error; 2346 return (0); 2347 } else { 2348 *err = 0; 2349 return (cnt); 2350 } 2351 } 2352