1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <sys/t_lock.h> 31 #include <sys/param.h> 32 #include <sys/systm.h> 33 #include <sys/buf.h> 34 #include <sys/conf.h> 35 #include <sys/cred.h> 36 #include <sys/kmem.h> 37 #include <sys/sysmacros.h> 38 #include <sys/vfs.h> 39 #include <sys/vfs_opreg.h> 40 #include <sys/vnode.h> 41 #include <sys/debug.h> 42 #include <sys/errno.h> 43 #include <sys/time.h> 44 #include <sys/file.h> 45 #include <sys/open.h> 46 #include <sys/user.h> 47 #include <sys/uio.h> 48 #include <sys/termios.h> 49 #include <sys/stream.h> 50 #include <sys/strsubr.h> 51 #include <sys/strsun.h> 52 #include <sys/esunddi.h> 53 #include <sys/flock.h> 54 #include <sys/modctl.h> 55 #include <sys/cmn_err.h> 56 #include <sys/mkdev.h> 57 #include <sys/pathname.h> 58 #include <sys/ddi.h> 59 #include <sys/stat.h> 60 #include <sys/fs/snode.h> 61 #include <sys/fs/dv_node.h> 62 #include <sys/zone.h> 63 64 #include <sys/socket.h> 65 #include <sys/socketvar.h> 66 #include <netinet/in.h> 67 #include <sys/un.h> 68 69 #include <sys/ucred.h> 70 71 #include <sys/tiuser.h> 72 #define _SUN_TPI_VERSION 2 73 #include <sys/tihdr.h> 74 75 #include <c2/audit.h> 76 77 #include <fs/sockfs/nl7c.h> 78 79 /* 80 * Macros that operate on struct cmsghdr. 81 * The CMSG_VALID macro does not assume that the last option buffer is padded. 82 */ 83 #define CMSG_CONTENT(cmsg) (&((cmsg)[1])) 84 #define CMSG_CONTENTLEN(cmsg) ((cmsg)->cmsg_len - sizeof (struct cmsghdr)) 85 #define CMSG_VALID(cmsg, start, end) \ 86 (ISALIGNED_cmsghdr(cmsg) && \ 87 ((uintptr_t)(cmsg) >= (uintptr_t)(start)) && \ 88 ((uintptr_t)(cmsg) < (uintptr_t)(end)) && \ 89 ((ssize_t)(cmsg)->cmsg_len >= sizeof (struct cmsghdr)) && \ 90 ((uintptr_t)(cmsg) + (cmsg)->cmsg_len <= (uintptr_t)(end))) 91 #define SO_LOCK_WAKEUP_TIME 3000 /* Wakeup time in milliseconds */ 92 93 static struct kmem_cache *socktpi_cache, *socktpi_unix_cache; 94 struct kmem_cache *socktpi_sod_cache; 95 96 dev_t sockdev; /* For fsid in getattr */ 97 98 struct sockparams *sphead; 99 krwlock_t splist_lock; 100 101 struct socklist socklist; 102 103 static int sockfs_update(kstat_t *, int); 104 static int sockfs_snapshot(kstat_t *, void *, int); 105 106 extern void sendfile_init(); 107 108 extern void nl7c_init(void); 109 110 extern int sostr_init(); 111 112 #define ADRSTRLEN (2 * sizeof (void *) + 1) 113 /* 114 * kernel structure for passing the sockinfo data back up to the user. 115 * the strings array allows us to convert AF_UNIX addresses into strings 116 * with a common method regardless of which n-bit kernel we're running. 117 */ 118 struct k_sockinfo { 119 struct sockinfo ks_si; 120 char ks_straddr[3][ADRSTRLEN]; 121 }; 122 123 /* 124 * Translate from a device pathname (e.g. "/dev/tcp") to a vnode. 125 * Returns with the vnode held. 126 */ 127 static int 128 sogetvp(char *devpath, vnode_t **vpp, int uioflag) 129 { 130 struct snode *csp; 131 vnode_t *vp, *dvp; 132 major_t maj; 133 int error; 134 135 ASSERT(uioflag == UIO_SYSSPACE || uioflag == UIO_USERSPACE); 136 /* 137 * Lookup the underlying filesystem vnode. 138 */ 139 error = lookupname(devpath, uioflag, FOLLOW, NULLVPP, &vp); 140 if (error) 141 return (error); 142 143 /* Check that it is the correct vnode */ 144 if (vp->v_type != VCHR) { 145 VN_RELE(vp); 146 return (ENOTSOCK); 147 } 148 149 /* 150 * If devpath went through devfs, the device should already 151 * be configured. If devpath is a mknod file, however, we 152 * need to make sure the device is properly configured. 153 * To do this, we do something similar to spec_open() 154 * except that we resolve to the minor/leaf level since 155 * we need to return a vnode. 156 */ 157 csp = VTOS(VTOS(vp)->s_commonvp); 158 if (!(csp->s_flag & SDIPSET)) { 159 char *pathname = kmem_alloc(MAXPATHLEN, KM_SLEEP); 160 error = ddi_dev_pathname(vp->v_rdev, S_IFCHR, pathname); 161 if (error == 0) 162 error = devfs_lookupname(pathname, NULLVPP, &dvp); 163 VN_RELE(vp); 164 kmem_free(pathname, MAXPATHLEN); 165 if (error != 0) 166 return (ENXIO); 167 vp = dvp; /* use the devfs vp */ 168 } 169 170 /* device is configured at this point */ 171 maj = getmajor(vp->v_rdev); 172 if (!STREAMSTAB(maj)) { 173 VN_RELE(vp); 174 return (ENOSTR); 175 } 176 177 *vpp = vp; 178 return (0); 179 } 180 181 /* 182 * Add or delete (latter if devpath is NULL) an enter to the sockparams 183 * table. If devpathlen is zero the devpath with not be kmem_freed. Otherwise 184 * this routine assumes that the caller has kmem_alloced devpath/devpathlen 185 * for this routine to consume. 186 * The zero devpathlen could be used if the kernel wants to create entries 187 * itself by calling sockconfig(1,2,3, "/dev/tcp", 0); 188 */ 189 int 190 soconfig(int domain, int type, int protocol, 191 char *devpath, int devpathlen) 192 { 193 struct sockparams **spp; 194 struct sockparams *sp; 195 int error = 0; 196 197 dprint(0, ("soconfig(%d,%d,%d,%s,%d)\n", 198 domain, type, protocol, devpath, devpathlen)); 199 200 /* 201 * Look for an existing match. 202 */ 203 rw_enter(&splist_lock, RW_WRITER); 204 for (spp = &sphead; (sp = *spp) != NULL; spp = &sp->sp_next) { 205 if (sp->sp_domain == domain && 206 sp->sp_type == type && 207 sp->sp_protocol == protocol) { 208 break; 209 } 210 } 211 if (devpath == NULL) { 212 ASSERT(devpathlen == 0); 213 214 /* Delete existing entry */ 215 if (sp == NULL) { 216 error = ENXIO; 217 goto done; 218 } 219 /* Unlink and free existing entry */ 220 *spp = sp->sp_next; 221 ASSERT(sp->sp_vnode); 222 VN_RELE(sp->sp_vnode); 223 if (sp->sp_devpathlen != 0) 224 kmem_free(sp->sp_devpath, sp->sp_devpathlen); 225 kmem_free(sp, sizeof (*sp)); 226 } else { 227 vnode_t *vp; 228 229 /* Add new entry */ 230 if (sp != NULL) { 231 error = EEXIST; 232 goto done; 233 } 234 235 error = sogetvp(devpath, &vp, UIO_SYSSPACE); 236 if (error) { 237 dprint(0, ("soconfig: vp %s failed with %d\n", 238 devpath, error)); 239 goto done; 240 } 241 242 dprint(0, ("soconfig: %s => vp %p, dev 0x%lx\n", 243 devpath, (void *)vp, vp->v_rdev)); 244 245 sp = kmem_alloc(sizeof (*sp), KM_SLEEP); 246 sp->sp_domain = domain; 247 sp->sp_type = type; 248 sp->sp_protocol = protocol; 249 sp->sp_devpath = devpath; 250 sp->sp_devpathlen = devpathlen; 251 sp->sp_vnode = vp; 252 sp->sp_next = NULL; 253 *spp = sp; 254 } 255 done: 256 rw_exit(&splist_lock); 257 if (error) { 258 if (devpath != NULL) 259 kmem_free(devpath, devpathlen); 260 #ifdef SOCK_DEBUG 261 eprintline(error); 262 #endif /* SOCK_DEBUG */ 263 } 264 return (error); 265 } 266 267 /* 268 * Lookup an entry in the sockparams list based on the triple. 269 * If no entry is found and devpath is not NULL translate devpath to a 270 * vnode. Note that devpath is a pointer to a user address! 271 * Returns with the vnode held. 272 * 273 * When this routine uses devpath it does not create an entry in the sockparams 274 * list since this routine can run on behalf of any user and one user 275 * should not be able to effect the transport used by another user. 276 * 277 * In order to return the correct error this routine has to do wildcard scans 278 * of the list. The errors are (in decreasing precedence): 279 * EAFNOSUPPORT - address family not in list 280 * EPROTONOSUPPORT - address family supported but not protocol. 281 * EPROTOTYPE - address family and protocol supported but not socket type. 282 */ 283 vnode_t * 284 solookup(int domain, int type, int protocol, char *devpath, int *errorp) 285 { 286 struct sockparams *sp; 287 int error; 288 vnode_t *vp; 289 290 rw_enter(&splist_lock, RW_READER); 291 for (sp = sphead; sp != NULL; sp = sp->sp_next) { 292 if (sp->sp_domain == domain && 293 sp->sp_type == type && 294 sp->sp_protocol == protocol) { 295 break; 296 } 297 } 298 if (sp == NULL) { 299 dprint(0, ("solookup(%d,%d,%d) not found\n", 300 domain, type, protocol)); 301 if (devpath == NULL) { 302 /* Determine correct error code */ 303 int found = 0; 304 305 for (sp = sphead; sp != NULL; sp = sp->sp_next) { 306 if (sp->sp_domain == domain && found < 1) 307 found = 1; 308 if (sp->sp_domain == domain && 309 sp->sp_protocol == protocol && found < 2) 310 found = 2; 311 } 312 rw_exit(&splist_lock); 313 switch (found) { 314 case 0: 315 *errorp = EAFNOSUPPORT; 316 break; 317 case 1: 318 *errorp = EPROTONOSUPPORT; 319 break; 320 case 2: 321 *errorp = EPROTOTYPE; 322 break; 323 } 324 return (NULL); 325 } 326 rw_exit(&splist_lock); 327 328 /* 329 * Return vp based on devpath. 330 * Do not enter into table to avoid random users 331 * modifying the sockparams list. 332 */ 333 error = sogetvp(devpath, &vp, UIO_USERSPACE); 334 if (error) { 335 dprint(0, ("solookup: vp %p failed with %d\n", 336 (void *)devpath, error)); 337 *errorp = EPROTONOSUPPORT; 338 return (NULL); 339 } 340 dprint(0, ("solookup: %p => vp %p, dev 0x%lx\n", 341 (void *)devpath, (void *)vp, vp->v_rdev)); 342 343 return (vp); 344 } 345 dprint(0, ("solookup(%d,%d,%d) vp %p devpath %s\n", 346 domain, type, protocol, (void *)sp->sp_vnode, sp->sp_devpath)); 347 348 vp = sp->sp_vnode; 349 VN_HOLD(vp); 350 rw_exit(&splist_lock); 351 return (vp); 352 } 353 354 /* 355 * Return a socket vnode. 356 * 357 * Assumes that the caller is "passing" an VN_HOLD for accessvp i.e. 358 * when the socket is freed a VN_RELE will take place. 359 * 360 * Note that sockets assume that the driver will clone (either itself 361 * or by using the clone driver) i.e. a socket() call will always 362 * result in a new vnode being created. 363 */ 364 struct vnode * 365 makesockvp(struct vnode *accessvp, int domain, int type, int protocol) 366 { 367 kmem_cache_t *cp; 368 struct sonode *so; 369 struct vnode *vp; 370 time_t now; 371 dev_t dev; 372 373 cp = (domain == AF_UNIX) ? socktpi_unix_cache : socktpi_cache; 374 so = kmem_cache_alloc(cp, KM_SLEEP); 375 so->so_cache = cp; 376 so->so_obj = so; 377 vp = SOTOV(so); 378 now = gethrestime_sec(); 379 380 so->so_flag = 0; 381 ASSERT(so->so_accessvp == NULL); 382 so->so_accessvp = accessvp; 383 dev = accessvp->v_rdev; 384 385 /* 386 * Record in so_flag that it is a clone. 387 */ 388 if (getmajor(dev) == clone_major) { 389 so->so_flag |= SOCLONE; 390 } 391 so->so_dev = dev; 392 393 so->so_state = 0; 394 so->so_mode = 0; 395 396 so->so_fsid = sockdev; 397 so->so_atime = now; 398 so->so_mtime = now; 399 so->so_ctime = now; /* Never modified */ 400 so->so_count = 0; 401 402 so->so_family = (short)domain; 403 so->so_type = (short)type; 404 so->so_protocol = (short)protocol; 405 so->so_pushcnt = 0; 406 407 so->so_options = 0; 408 so->so_linger.l_onoff = 0; 409 so->so_linger.l_linger = 0; 410 so->so_sndbuf = 0; 411 so->so_rcvbuf = 0; 412 so->so_sndlowat = 0; 413 so->so_rcvlowat = 0; 414 #ifdef notyet 415 so->so_sndtimeo = 0; 416 so->so_rcvtimeo = 0; 417 #endif /* notyet */ 418 so->so_error = 0; 419 so->so_delayed_error = 0; 420 421 ASSERT(so->so_oobmsg == NULL); 422 so->so_oobcnt = 0; 423 so->so_oobsigcnt = 0; 424 so->so_pgrp = 0; 425 so->so_provinfo = NULL; 426 427 ASSERT(so->so_laddr_sa == NULL && so->so_faddr_sa == NULL); 428 so->so_laddr_len = so->so_faddr_len = 0; 429 so->so_laddr_maxlen = so->so_faddr_maxlen = 0; 430 so->so_eaddr_mp = NULL; 431 so->so_priv = NULL; 432 433 so->so_peercred = NULL; 434 435 ASSERT(so->so_ack_mp == NULL); 436 ASSERT(so->so_conn_ind_head == NULL); 437 ASSERT(so->so_conn_ind_tail == NULL); 438 ASSERT(so->so_ux_bound_vp == NULL); 439 ASSERT(so->so_unbind_mp == NULL); 440 441 vn_reinit(vp); 442 vp->v_vfsp = rootvfs; 443 vp->v_type = VSOCK; 444 vp->v_rdev = so->so_dev; 445 vn_exists(vp); 446 447 return (vp); 448 } 449 450 void 451 sockfree(struct sonode *so) 452 { 453 mblk_t *mp; 454 vnode_t *vp; 455 456 ASSERT(so->so_count == 0); 457 ASSERT(so->so_accessvp); 458 ASSERT(so->so_discon_ind_mp == NULL); 459 460 vp = so->so_accessvp; 461 VN_RELE(vp); 462 463 /* 464 * Protect so->so_[lf]addr_sa so that sockfs_snapshot() can safely 465 * indirect them. It also uses so_accessvp as a validity test. 466 */ 467 mutex_enter(&so->so_lock); 468 469 so->so_accessvp = NULL; 470 471 if (so->so_laddr_sa) { 472 ASSERT((caddr_t)so->so_faddr_sa == 473 (caddr_t)so->so_laddr_sa + so->so_laddr_maxlen); 474 ASSERT(so->so_faddr_maxlen == so->so_laddr_maxlen); 475 so->so_state &= ~(SS_LADDR_VALID | SS_FADDR_VALID); 476 kmem_free(so->so_laddr_sa, so->so_laddr_maxlen * 2); 477 so->so_laddr_sa = NULL; 478 so->so_laddr_len = so->so_laddr_maxlen = 0; 479 so->so_faddr_sa = NULL; 480 so->so_faddr_len = so->so_faddr_maxlen = 0; 481 } 482 483 mutex_exit(&so->so_lock); 484 485 if ((mp = so->so_eaddr_mp) != NULL) { 486 freemsg(mp); 487 so->so_eaddr_mp = NULL; 488 so->so_delayed_error = 0; 489 } 490 if ((mp = so->so_ack_mp) != NULL) { 491 freemsg(mp); 492 so->so_ack_mp = NULL; 493 } 494 if ((mp = so->so_conn_ind_head) != NULL) { 495 mblk_t *mp1; 496 497 while (mp) { 498 mp1 = mp->b_next; 499 mp->b_next = NULL; 500 freemsg(mp); 501 mp = mp1; 502 } 503 so->so_conn_ind_head = so->so_conn_ind_tail = NULL; 504 so->so_state &= ~SS_HASCONNIND; 505 } 506 #ifdef DEBUG 507 mutex_enter(&so->so_lock); 508 ASSERT(so_verify_oobstate(so)); 509 mutex_exit(&so->so_lock); 510 #endif /* DEBUG */ 511 if ((mp = so->so_oobmsg) != NULL) { 512 freemsg(mp); 513 so->so_oobmsg = NULL; 514 so->so_state &= ~(SS_OOBPEND|SS_HAVEOOBDATA|SS_HADOOBDATA); 515 } 516 517 if ((mp = so->so_nl7c_rcv_mp) != NULL) { 518 so->so_nl7c_rcv_mp = NULL; 519 freemsg(mp); 520 } 521 so->so_nl7c_rcv_rval = 0; 522 if (so->so_nl7c_uri != NULL) { 523 nl7c_urifree(so); 524 /* urifree() cleared nl7c_uri */ 525 } 526 if (so->so_nl7c_flags) { 527 so->so_nl7c_flags = 0; 528 } 529 530 if (so->so_direct != NULL) { 531 sodirect_t *sodp = so->so_direct; 532 533 ASSERT(sodp->sod_uioafh == NULL); 534 535 so->so_direct = NULL; 536 kmem_cache_free(socktpi_sod_cache, sodp); 537 } 538 539 ASSERT(so->so_ux_bound_vp == NULL); 540 if ((mp = so->so_unbind_mp) != NULL) { 541 freemsg(mp); 542 so->so_unbind_mp = NULL; 543 } 544 vn_invalid(SOTOV(so)); 545 546 if (so->so_peercred != NULL) 547 crfree(so->so_peercred); 548 549 kmem_cache_free(so->so_cache, so->so_obj); 550 } 551 552 /* 553 * Update the accessed, updated, or changed times in an sonode 554 * with the current time. 555 * 556 * Note that both SunOS 4.X and 4.4BSD sockets do not present reasonable 557 * attributes in a fstat call. (They return the current time and 0 for 558 * all timestamps, respectively.) We maintain the current timestamps 559 * here primarily so that should sockmod be popped the resulting 560 * file descriptor will behave like a stream w.r.t. the timestamps. 561 */ 562 void 563 so_update_attrs(struct sonode *so, int flag) 564 { 565 time_t now = gethrestime_sec(); 566 567 mutex_enter(&so->so_lock); 568 so->so_flag |= flag; 569 if (flag & SOACC) 570 so->so_atime = now; 571 if (flag & SOMOD) 572 so->so_mtime = now; 573 mutex_exit(&so->so_lock); 574 } 575 576 /*ARGSUSED*/ 577 static int 578 socktpi_constructor(void *buf, void *cdrarg, int kmflags) 579 { 580 struct sonode *so = buf; 581 struct vnode *vp; 582 583 vp = so->so_vnode = vn_alloc(kmflags); 584 if (vp == NULL) { 585 return (-1); 586 } 587 vn_setops(vp, socktpi_vnodeops); 588 vp->v_data = so; 589 590 so->so_direct = NULL; 591 592 so->so_nl7c_flags = 0; 593 so->so_nl7c_uri = NULL; 594 so->so_nl7c_rcv_mp = NULL; 595 596 so->so_oobmsg = NULL; 597 so->so_ack_mp = NULL; 598 so->so_conn_ind_head = NULL; 599 so->so_conn_ind_tail = NULL; 600 so->so_discon_ind_mp = NULL; 601 so->so_ux_bound_vp = NULL; 602 so->so_unbind_mp = NULL; 603 so->so_accessvp = NULL; 604 so->so_laddr_sa = NULL; 605 so->so_faddr_sa = NULL; 606 so->so_ops = &sotpi_sonodeops; 607 608 mutex_init(&so->so_lock, NULL, MUTEX_DEFAULT, NULL); 609 mutex_init(&so->so_plumb_lock, NULL, MUTEX_DEFAULT, NULL); 610 cv_init(&so->so_state_cv, NULL, CV_DEFAULT, NULL); 611 cv_init(&so->so_ack_cv, NULL, CV_DEFAULT, NULL); 612 cv_init(&so->so_connind_cv, NULL, CV_DEFAULT, NULL); 613 cv_init(&so->so_want_cv, NULL, CV_DEFAULT, NULL); 614 615 return (0); 616 } 617 618 /*ARGSUSED1*/ 619 static void 620 socktpi_destructor(void *buf, void *cdrarg) 621 { 622 struct sonode *so = buf; 623 struct vnode *vp = SOTOV(so); 624 625 ASSERT(so->so_direct == NULL); 626 627 ASSERT(so->so_nl7c_flags == 0); 628 ASSERT(so->so_nl7c_uri == NULL); 629 ASSERT(so->so_nl7c_rcv_mp == NULL); 630 631 ASSERT(so->so_oobmsg == NULL); 632 ASSERT(so->so_ack_mp == NULL); 633 ASSERT(so->so_conn_ind_head == NULL); 634 ASSERT(so->so_conn_ind_tail == NULL); 635 ASSERT(so->so_discon_ind_mp == NULL); 636 ASSERT(so->so_ux_bound_vp == NULL); 637 ASSERT(so->so_unbind_mp == NULL); 638 ASSERT(so->so_ops == &sotpi_sonodeops); 639 640 ASSERT(vn_matchops(vp, socktpi_vnodeops)); 641 ASSERT(vp->v_data == so); 642 643 vn_free(vp); 644 645 mutex_destroy(&so->so_lock); 646 mutex_destroy(&so->so_plumb_lock); 647 cv_destroy(&so->so_state_cv); 648 cv_destroy(&so->so_ack_cv); 649 cv_destroy(&so->so_connind_cv); 650 cv_destroy(&so->so_want_cv); 651 } 652 653 static int 654 socktpi_unix_constructor(void *buf, void *cdrarg, int kmflags) 655 { 656 int retval; 657 658 if ((retval = socktpi_constructor(buf, cdrarg, kmflags)) == 0) { 659 struct sonode *so = (struct sonode *)buf; 660 661 mutex_enter(&socklist.sl_lock); 662 663 so->so_next = socklist.sl_list; 664 so->so_prev = NULL; 665 if (so->so_next != NULL) 666 so->so_next->so_prev = so; 667 socklist.sl_list = so; 668 669 mutex_exit(&socklist.sl_lock); 670 671 } 672 return (retval); 673 } 674 675 static void 676 socktpi_unix_destructor(void *buf, void *cdrarg) 677 { 678 struct sonode *so = (struct sonode *)buf; 679 680 mutex_enter(&socklist.sl_lock); 681 682 if (so->so_next != NULL) 683 so->so_next->so_prev = so->so_prev; 684 if (so->so_prev != NULL) 685 so->so_prev->so_next = so->so_next; 686 else 687 socklist.sl_list = so->so_next; 688 689 mutex_exit(&socklist.sl_lock); 690 691 socktpi_destructor(buf, cdrarg); 692 } 693 694 /* 695 * Init function called when sockfs is loaded. 696 */ 697 int 698 sockinit(int fstype, char *name) 699 { 700 static const fs_operation_def_t sock_vfsops_template[] = { 701 NULL, NULL 702 }; 703 int error; 704 major_t dev; 705 char *err_str; 706 707 error = vfs_setfsops(fstype, sock_vfsops_template, NULL); 708 if (error != 0) { 709 zcmn_err(GLOBAL_ZONEID, CE_WARN, 710 "sockinit: bad vfs ops template"); 711 return (error); 712 } 713 714 error = vn_make_ops(name, socktpi_vnodeops_template, &socktpi_vnodeops); 715 if (error != 0) { 716 err_str = "sockinit: bad sock vnode ops template"; 717 /* vn_make_ops() does not reset socktpi_vnodeops on failure. */ 718 socktpi_vnodeops = NULL; 719 goto failure; 720 } 721 722 error = sosctp_init(); 723 if (error != 0) { 724 err_str = NULL; 725 goto failure; 726 } 727 728 error = sosdp_init(); 729 if (error != 0) { 730 err_str = NULL; 731 goto failure; 732 } 733 734 error = sostr_init(); 735 if (error != 0) { 736 err_str = NULL; 737 goto failure; 738 } 739 740 /* 741 * Create sonode caches. We create a special one for AF_UNIX so 742 * that we can track them for netstat(1m). 743 */ 744 socktpi_cache = kmem_cache_create("socktpi_cache", 745 sizeof (struct sonode), 0, socktpi_constructor, 746 socktpi_destructor, NULL, NULL, NULL, 0); 747 748 socktpi_unix_cache = kmem_cache_create("socktpi_unix_cache", 749 sizeof (struct sonode), 0, socktpi_unix_constructor, 750 socktpi_unix_destructor, NULL, NULL, NULL, 0); 751 752 /* 753 * Build initial list mapping socket parameters to vnode. 754 */ 755 rw_init(&splist_lock, NULL, RW_DEFAULT, NULL); 756 757 /* 758 * If sockets are needed before init runs /sbin/soconfig 759 * it is possible to preload the sockparams list here using 760 * calls like: 761 * sockconfig(1,2,3, "/dev/tcp", 0); 762 */ 763 764 /* 765 * Create a unique dev_t for use in so_fsid. 766 */ 767 768 if ((dev = getudev()) == (major_t)-1) 769 dev = 0; 770 sockdev = makedevice(dev, 0); 771 772 mutex_init(&socklist.sl_lock, NULL, MUTEX_DEFAULT, NULL); 773 sendfile_init(); 774 nl7c_init(); 775 776 return (0); 777 778 failure: 779 (void) vfs_freevfsops_by_type(fstype); 780 if (socktpi_vnodeops != NULL) 781 vn_freevnodeops(socktpi_vnodeops); 782 if (err_str != NULL) 783 zcmn_err(GLOBAL_ZONEID, CE_WARN, err_str); 784 return (error); 785 } 786 787 /* 788 * Caller must hold the mutex. Used to set SOLOCKED. 789 */ 790 void 791 so_lock_single(struct sonode *so) 792 { 793 ASSERT(MUTEX_HELD(&so->so_lock)); 794 795 while (so->so_flag & (SOLOCKED | SOASYNC_UNBIND)) { 796 so->so_flag |= SOWANT; 797 cv_wait_stop(&so->so_want_cv, &so->so_lock, 798 SO_LOCK_WAKEUP_TIME); 799 } 800 so->so_flag |= SOLOCKED; 801 } 802 803 /* 804 * Caller must hold the mutex and pass in SOLOCKED or SOASYNC_UNBIND. 805 * Used to clear SOLOCKED or SOASYNC_UNBIND. 806 */ 807 void 808 so_unlock_single(struct sonode *so, int flag) 809 { 810 ASSERT(MUTEX_HELD(&so->so_lock)); 811 ASSERT(flag & (SOLOCKED|SOASYNC_UNBIND)); 812 ASSERT((flag & ~(SOLOCKED|SOASYNC_UNBIND)) == 0); 813 ASSERT(so->so_flag & flag); 814 815 /* 816 * Process the T_DISCON_IND on so_discon_ind_mp. 817 * 818 * Call to so_drain_discon_ind will result in so_lock 819 * being dropped and re-acquired later. 820 */ 821 if (so->so_discon_ind_mp != NULL) 822 so_drain_discon_ind(so); 823 824 if (so->so_flag & SOWANT) 825 cv_broadcast(&so->so_want_cv); 826 so->so_flag &= ~(SOWANT|flag); 827 } 828 829 /* 830 * Caller must hold the mutex. Used to set SOREADLOCKED. 831 * If the caller wants nonblocking behavior it should set fmode. 832 */ 833 int 834 so_lock_read(struct sonode *so, int fmode) 835 { 836 ASSERT(MUTEX_HELD(&so->so_lock)); 837 838 while (so->so_flag & SOREADLOCKED) { 839 if (fmode & (FNDELAY|FNONBLOCK)) 840 return (EWOULDBLOCK); 841 so->so_flag |= SOWANT; 842 cv_wait_stop(&so->so_want_cv, &so->so_lock, 843 SO_LOCK_WAKEUP_TIME); 844 } 845 so->so_flag |= SOREADLOCKED; 846 return (0); 847 } 848 849 /* 850 * Like so_lock_read above but allows signals. 851 */ 852 int 853 so_lock_read_intr(struct sonode *so, int fmode) 854 { 855 ASSERT(MUTEX_HELD(&so->so_lock)); 856 857 while (so->so_flag & SOREADLOCKED) { 858 if (fmode & (FNDELAY|FNONBLOCK)) 859 return (EWOULDBLOCK); 860 so->so_flag |= SOWANT; 861 if (!cv_wait_sig(&so->so_want_cv, &so->so_lock)) 862 return (EINTR); 863 } 864 so->so_flag |= SOREADLOCKED; 865 return (0); 866 } 867 868 /* 869 * Caller must hold the mutex. Used to clear SOREADLOCKED, 870 * set in so_lock_read() or so_lock_read_intr(). 871 */ 872 void 873 so_unlock_read(struct sonode *so) 874 { 875 ASSERT(MUTEX_HELD(&so->so_lock)); 876 ASSERT(so->so_flag & SOREADLOCKED); 877 878 if (so->so_flag & SOWANT) 879 cv_broadcast(&so->so_want_cv); 880 so->so_flag &= ~(SOWANT|SOREADLOCKED); 881 } 882 883 /* 884 * Verify that the specified offset falls within the mblk and 885 * that the resulting pointer is aligned. 886 * Returns NULL if not. 887 */ 888 void * 889 sogetoff(mblk_t *mp, t_uscalar_t offset, 890 t_uscalar_t length, uint_t align_size) 891 { 892 uintptr_t ptr1, ptr2; 893 894 ASSERT(mp && mp->b_wptr >= mp->b_rptr); 895 ptr1 = (uintptr_t)mp->b_rptr + offset; 896 ptr2 = (uintptr_t)ptr1 + length; 897 if (ptr1 < (uintptr_t)mp->b_rptr || ptr2 > (uintptr_t)mp->b_wptr) { 898 eprintline(0); 899 return (NULL); 900 } 901 if ((ptr1 & (align_size - 1)) != 0) { 902 eprintline(0); 903 return (NULL); 904 } 905 return ((void *)ptr1); 906 } 907 908 /* 909 * Return the AF_UNIX underlying filesystem vnode matching a given name. 910 * Makes sure the sending and the destination sonodes are compatible. 911 * The vnode is returned held. 912 * 913 * The underlying filesystem VSOCK vnode has a v_stream pointer that 914 * references the actual stream head (hence indirectly the actual sonode). 915 */ 916 static int 917 so_ux_lookup(struct sonode *so, struct sockaddr_un *soun, int checkaccess, 918 vnode_t **vpp) 919 { 920 vnode_t *vp; /* Underlying filesystem vnode */ 921 vnode_t *svp; /* sockfs vnode */ 922 struct sonode *so2; 923 int error; 924 925 dprintso(so, 1, ("so_ux_lookup(%p) name <%s>\n", (void *)so, 926 soun->sun_path)); 927 928 error = lookupname(soun->sun_path, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp); 929 if (error) { 930 eprintsoline(so, error); 931 return (error); 932 } 933 if (vp->v_type != VSOCK) { 934 error = ENOTSOCK; 935 eprintsoline(so, error); 936 goto done2; 937 } 938 939 if (checkaccess) { 940 /* 941 * Check that we have permissions to access the destination 942 * vnode. This check is not done in BSD but it is required 943 * by X/Open. 944 */ 945 if (error = VOP_ACCESS(vp, VREAD|VWRITE, 0, CRED(), NULL)) { 946 eprintsoline(so, error); 947 goto done2; 948 } 949 } 950 951 /* 952 * Check if the remote socket has been closed. 953 * 954 * Synchronize with vn_rele_stream by holding v_lock while traversing 955 * v_stream->sd_vnode. 956 */ 957 mutex_enter(&vp->v_lock); 958 if (vp->v_stream == NULL) { 959 mutex_exit(&vp->v_lock); 960 if (so->so_type == SOCK_DGRAM) 961 error = EDESTADDRREQ; 962 else 963 error = ECONNREFUSED; 964 965 eprintsoline(so, error); 966 goto done2; 967 } 968 ASSERT(vp->v_stream->sd_vnode); 969 svp = vp->v_stream->sd_vnode; 970 /* 971 * holding v_lock on underlying filesystem vnode and acquiring 972 * it on sockfs vnode. Assumes that no code ever attempts to 973 * acquire these locks in the reverse order. 974 */ 975 VN_HOLD(svp); 976 mutex_exit(&vp->v_lock); 977 978 if (svp->v_type != VSOCK) { 979 error = ENOTSOCK; 980 eprintsoline(so, error); 981 goto done; 982 } 983 984 so2 = VTOSO(svp); 985 986 if (so->so_type != so2->so_type) { 987 error = EPROTOTYPE; 988 eprintsoline(so, error); 989 goto done; 990 } 991 992 VN_RELE(svp); 993 *vpp = vp; 994 return (0); 995 996 done: 997 VN_RELE(svp); 998 done2: 999 VN_RELE(vp); 1000 return (error); 1001 } 1002 1003 /* 1004 * Verify peer address for connect and sendto/sendmsg. 1005 * Since sendto/sendmsg would not get synchronous errors from the transport 1006 * provider we have to do these ugly checks in the socket layer to 1007 * preserve compatibility with SunOS 4.X. 1008 */ 1009 int 1010 so_addr_verify(struct sonode *so, const struct sockaddr *name, 1011 socklen_t namelen) 1012 { 1013 int family; 1014 1015 dprintso(so, 1, ("so_addr_verify(%p, %p, %d)\n", 1016 (void *)so, (void *)name, namelen)); 1017 1018 ASSERT(name != NULL); 1019 1020 family = so->so_family; 1021 switch (family) { 1022 case AF_INET: 1023 if (name->sa_family != family) { 1024 eprintsoline(so, EAFNOSUPPORT); 1025 return (EAFNOSUPPORT); 1026 } 1027 if (namelen != (socklen_t)sizeof (struct sockaddr_in)) { 1028 eprintsoline(so, EINVAL); 1029 return (EINVAL); 1030 } 1031 break; 1032 case AF_INET6: { 1033 #ifdef DEBUG 1034 struct sockaddr_in6 *sin6; 1035 #endif /* DEBUG */ 1036 1037 if (name->sa_family != family) { 1038 eprintsoline(so, EAFNOSUPPORT); 1039 return (EAFNOSUPPORT); 1040 } 1041 if (namelen != (socklen_t)sizeof (struct sockaddr_in6)) { 1042 eprintsoline(so, EINVAL); 1043 return (EINVAL); 1044 } 1045 #ifdef DEBUG 1046 /* Verify that apps don't forget to clear sin6_scope_id etc */ 1047 sin6 = (struct sockaddr_in6 *)name; 1048 if (sin6->sin6_scope_id != 0 && 1049 !IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr)) { 1050 zcmn_err(getzoneid(), CE_WARN, 1051 "connect/send* with uninitialized sin6_scope_id " 1052 "(%d) on socket. Pid = %d\n", 1053 (int)sin6->sin6_scope_id, (int)curproc->p_pid); 1054 } 1055 #endif /* DEBUG */ 1056 break; 1057 } 1058 case AF_UNIX: 1059 if (so->so_state & SS_FADDR_NOXLATE) { 1060 return (0); 1061 } 1062 if (namelen < (socklen_t)sizeof (short)) { 1063 eprintsoline(so, ENOENT); 1064 return (ENOENT); 1065 } 1066 if (name->sa_family != family) { 1067 eprintsoline(so, EAFNOSUPPORT); 1068 return (EAFNOSUPPORT); 1069 } 1070 /* MAXPATHLEN + soun_family + nul termination */ 1071 if (namelen > (socklen_t)(MAXPATHLEN + sizeof (short) + 1)) { 1072 eprintsoline(so, ENAMETOOLONG); 1073 return (ENAMETOOLONG); 1074 } 1075 1076 break; 1077 1078 default: 1079 /* 1080 * Default is don't do any length or sa_family check 1081 * to allow non-sockaddr style addresses. 1082 */ 1083 break; 1084 } 1085 1086 return (0); 1087 } 1088 1089 1090 /* 1091 * Translate an AF_UNIX sockaddr_un to the transport internal name. 1092 * Assumes caller has called so_addr_verify first. 1093 */ 1094 /*ARGSUSED*/ 1095 int 1096 so_ux_addr_xlate(struct sonode *so, struct sockaddr *name, 1097 socklen_t namelen, int checkaccess, 1098 void **addrp, socklen_t *addrlenp) 1099 { 1100 int error; 1101 struct sockaddr_un *soun; 1102 vnode_t *vp; 1103 void *addr; 1104 socklen_t addrlen; 1105 1106 dprintso(so, 1, ("so_ux_addr_xlate(%p, %p, %d, %d)\n", 1107 (void *)so, (void *)name, namelen, checkaccess)); 1108 1109 ASSERT(name != NULL); 1110 ASSERT(so->so_family == AF_UNIX); 1111 ASSERT(!(so->so_state & SS_FADDR_NOXLATE)); 1112 ASSERT(namelen >= (socklen_t)sizeof (short)); 1113 ASSERT(name->sa_family == AF_UNIX); 1114 soun = (struct sockaddr_un *)name; 1115 /* 1116 * Lookup vnode for the specified path name and verify that 1117 * it is a socket. 1118 */ 1119 error = so_ux_lookup(so, soun, checkaccess, &vp); 1120 if (error) { 1121 eprintsoline(so, error); 1122 return (error); 1123 } 1124 /* 1125 * Use the address of the peer vnode as the address to send 1126 * to. We release the peer vnode here. In case it has been 1127 * closed by the time the T_CONN_REQ or T_UNIDATA_REQ reaches the 1128 * transport the message will get an error or be dropped. 1129 */ 1130 so->so_ux_faddr.soua_vp = vp; 1131 so->so_ux_faddr.soua_magic = SOU_MAGIC_EXPLICIT; 1132 addr = &so->so_ux_faddr; 1133 addrlen = (socklen_t)sizeof (so->so_ux_faddr); 1134 dprintso(so, 1, ("ux_xlate UNIX: addrlen %d, vp %p\n", 1135 addrlen, (void *)vp)); 1136 VN_RELE(vp); 1137 *addrp = addr; 1138 *addrlenp = (socklen_t)addrlen; 1139 return (0); 1140 } 1141 1142 /* 1143 * Esballoc free function for messages that contain SO_FILEP option. 1144 * Decrement the reference count on the file pointers using closef. 1145 */ 1146 void 1147 fdbuf_free(struct fdbuf *fdbuf) 1148 { 1149 int i; 1150 struct file *fp; 1151 1152 dprint(1, ("fdbuf_free: %d fds\n", fdbuf->fd_numfd)); 1153 for (i = 0; i < fdbuf->fd_numfd; i++) { 1154 /* 1155 * We need pointer size alignment for fd_fds. On a LP64 1156 * kernel, the required alignment is 8 bytes while 1157 * the option headers and values are only 4 bytes 1158 * aligned. So its safer to do a bcopy compared to 1159 * assigning fdbuf->fd_fds[i] to fp. 1160 */ 1161 bcopy((char *)&fdbuf->fd_fds[i], (char *)&fp, sizeof (fp)); 1162 dprint(1, ("fdbuf_free: [%d] = %p\n", i, (void *)fp)); 1163 (void) closef(fp); 1164 } 1165 if (fdbuf->fd_ebuf != NULL) 1166 kmem_free(fdbuf->fd_ebuf, fdbuf->fd_ebuflen); 1167 kmem_free(fdbuf, fdbuf->fd_size); 1168 } 1169 1170 /* 1171 * Allocate an esballoc'ed message for AF_UNIX file descriptor passing. 1172 * Waits if memory is not available. 1173 */ 1174 mblk_t * 1175 fdbuf_allocmsg(int size, struct fdbuf *fdbuf) 1176 { 1177 uchar_t *buf; 1178 mblk_t *mp; 1179 1180 dprint(1, ("fdbuf_allocmsg: size %d, %d fds\n", size, fdbuf->fd_numfd)); 1181 buf = kmem_alloc(size, KM_SLEEP); 1182 fdbuf->fd_ebuf = (caddr_t)buf; 1183 fdbuf->fd_ebuflen = size; 1184 fdbuf->fd_frtn.free_func = fdbuf_free; 1185 fdbuf->fd_frtn.free_arg = (caddr_t)fdbuf; 1186 1187 mp = esballoc_wait(buf, size, BPRI_MED, &fdbuf->fd_frtn); 1188 mp->b_datap->db_type = M_PROTO; 1189 return (mp); 1190 } 1191 1192 /* 1193 * Extract file descriptors from a fdbuf. 1194 * Return list in rights/rightslen. 1195 */ 1196 /*ARGSUSED*/ 1197 static int 1198 fdbuf_extract(struct fdbuf *fdbuf, void *rights, int rightslen) 1199 { 1200 int i, fd; 1201 int *rp; 1202 struct file *fp; 1203 int numfd; 1204 1205 dprint(1, ("fdbuf_extract: %d fds, len %d\n", 1206 fdbuf->fd_numfd, rightslen)); 1207 1208 numfd = fdbuf->fd_numfd; 1209 ASSERT(rightslen == numfd * (int)sizeof (int)); 1210 1211 /* 1212 * Allocate a file descriptor and increment the f_count. 1213 * The latter is needed since we always call fdbuf_free 1214 * which performs a closef. 1215 */ 1216 rp = (int *)rights; 1217 for (i = 0; i < numfd; i++) { 1218 if ((fd = ufalloc(0)) == -1) 1219 goto cleanup; 1220 /* 1221 * We need pointer size alignment for fd_fds. On a LP64 1222 * kernel, the required alignment is 8 bytes while 1223 * the option headers and values are only 4 bytes 1224 * aligned. So its safer to do a bcopy compared to 1225 * assigning fdbuf->fd_fds[i] to fp. 1226 */ 1227 bcopy((char *)&fdbuf->fd_fds[i], (char *)&fp, sizeof (fp)); 1228 mutex_enter(&fp->f_tlock); 1229 fp->f_count++; 1230 mutex_exit(&fp->f_tlock); 1231 setf(fd, fp); 1232 *rp++ = fd; 1233 if (audit_active) 1234 audit_fdrecv(fd, fp); 1235 dprint(1, ("fdbuf_extract: [%d] = %d, %p refcnt %d\n", 1236 i, fd, (void *)fp, fp->f_count)); 1237 } 1238 return (0); 1239 1240 cleanup: 1241 /* 1242 * Undo whatever partial work the loop above has done. 1243 */ 1244 { 1245 int j; 1246 1247 rp = (int *)rights; 1248 for (j = 0; j < i; j++) { 1249 dprint(0, 1250 ("fdbuf_extract: cleanup[%d] = %d\n", j, *rp)); 1251 (void) closeandsetf(*rp++, NULL); 1252 } 1253 } 1254 1255 return (EMFILE); 1256 } 1257 1258 /* 1259 * Insert file descriptors into an fdbuf. 1260 * Returns a kmem_alloc'ed fdbuf. The fdbuf should be freed 1261 * by calling fdbuf_free(). 1262 */ 1263 int 1264 fdbuf_create(void *rights, int rightslen, struct fdbuf **fdbufp) 1265 { 1266 int numfd, i; 1267 int *fds; 1268 struct file *fp; 1269 struct fdbuf *fdbuf; 1270 int fdbufsize; 1271 1272 dprint(1, ("fdbuf_create: len %d\n", rightslen)); 1273 1274 numfd = rightslen / (int)sizeof (int); 1275 1276 fdbufsize = (int)FDBUF_HDRSIZE + (numfd * (int)sizeof (struct file *)); 1277 fdbuf = kmem_alloc(fdbufsize, KM_SLEEP); 1278 fdbuf->fd_size = fdbufsize; 1279 fdbuf->fd_numfd = 0; 1280 fdbuf->fd_ebuf = NULL; 1281 fdbuf->fd_ebuflen = 0; 1282 fds = (int *)rights; 1283 for (i = 0; i < numfd; i++) { 1284 if ((fp = getf(fds[i])) == NULL) { 1285 fdbuf_free(fdbuf); 1286 return (EBADF); 1287 } 1288 dprint(1, ("fdbuf_create: [%d] = %d, %p refcnt %d\n", 1289 i, fds[i], (void *)fp, fp->f_count)); 1290 mutex_enter(&fp->f_tlock); 1291 fp->f_count++; 1292 mutex_exit(&fp->f_tlock); 1293 /* 1294 * The maximum alignment for fdbuf (or any option header 1295 * and its value) it 4 bytes. On a LP64 kernel, the alignment 1296 * is not sufficient for pointers (fd_fds in this case). Since 1297 * we just did a kmem_alloc (we get a double word alignment), 1298 * we don't need to do anything on the send side (we loose 1299 * the double word alignment because fdbuf goes after an 1300 * option header (eg T_unitdata_req) which is only 4 byte 1301 * aligned). We take care of this when we extract the file 1302 * descriptor in fdbuf_extract or fdbuf_free. 1303 */ 1304 fdbuf->fd_fds[i] = fp; 1305 fdbuf->fd_numfd++; 1306 releasef(fds[i]); 1307 if (audit_active) 1308 audit_fdsend(fds[i], fp, 0); 1309 } 1310 *fdbufp = fdbuf; 1311 return (0); 1312 } 1313 1314 static int 1315 fdbuf_optlen(int rightslen) 1316 { 1317 int numfd; 1318 1319 numfd = rightslen / (int)sizeof (int); 1320 1321 return ((int)FDBUF_HDRSIZE + (numfd * (int)sizeof (struct file *))); 1322 } 1323 1324 static t_uscalar_t 1325 fdbuf_cmsglen(int fdbuflen) 1326 { 1327 return (t_uscalar_t)((fdbuflen - FDBUF_HDRSIZE) / 1328 (int)sizeof (struct file *) * (int)sizeof (int)); 1329 } 1330 1331 1332 /* 1333 * Return non-zero if the mblk and fdbuf are consistent. 1334 */ 1335 static int 1336 fdbuf_verify(mblk_t *mp, struct fdbuf *fdbuf, int fdbuflen) 1337 { 1338 if (fdbuflen >= FDBUF_HDRSIZE && 1339 fdbuflen == fdbuf->fd_size) { 1340 frtn_t *frp = mp->b_datap->db_frtnp; 1341 /* 1342 * Check that the SO_FILEP portion of the 1343 * message has not been modified by 1344 * the loopback transport. The sending sockfs generates 1345 * a message that is esballoc'ed with the free function 1346 * being fdbuf_free() and where free_arg contains the 1347 * identical information as the SO_FILEP content. 1348 * 1349 * If any of these constraints are not satisfied we 1350 * silently ignore the option. 1351 */ 1352 ASSERT(mp); 1353 if (frp != NULL && 1354 frp->free_func == fdbuf_free && 1355 frp->free_arg != NULL && 1356 bcmp(frp->free_arg, fdbuf, fdbuflen) == 0) { 1357 dprint(1, ("fdbuf_verify: fdbuf %p len %d\n", 1358 (void *)fdbuf, fdbuflen)); 1359 return (1); 1360 } else { 1361 zcmn_err(getzoneid(), CE_WARN, 1362 "sockfs: mismatched fdbuf content (%p)", 1363 (void *)mp); 1364 return (0); 1365 } 1366 } else { 1367 zcmn_err(getzoneid(), CE_WARN, 1368 "sockfs: mismatched fdbuf len %d, %d\n", 1369 fdbuflen, fdbuf->fd_size); 1370 return (0); 1371 } 1372 } 1373 1374 /* 1375 * When the file descriptors returned by sorecvmsg can not be passed 1376 * to the application this routine will cleanup the references on 1377 * the files. Start at startoff bytes into the buffer. 1378 */ 1379 static void 1380 close_fds(void *fdbuf, int fdbuflen, int startoff) 1381 { 1382 int *fds = (int *)fdbuf; 1383 int numfd = fdbuflen / (int)sizeof (int); 1384 int i; 1385 1386 dprint(1, ("close_fds(%p, %d, %d)\n", fdbuf, fdbuflen, startoff)); 1387 1388 for (i = 0; i < numfd; i++) { 1389 if (startoff < 0) 1390 startoff = 0; 1391 if (startoff < (int)sizeof (int)) { 1392 /* 1393 * This file descriptor is partially or fully after 1394 * the offset 1395 */ 1396 dprint(0, 1397 ("close_fds: cleanup[%d] = %d\n", i, fds[i])); 1398 (void) closeandsetf(fds[i], NULL); 1399 } 1400 startoff -= (int)sizeof (int); 1401 } 1402 } 1403 1404 /* 1405 * Close all file descriptors contained in the control part starting at 1406 * the startoffset. 1407 */ 1408 void 1409 so_closefds(void *control, t_uscalar_t controllen, int oldflg, 1410 int startoff) 1411 { 1412 struct cmsghdr *cmsg; 1413 1414 if (control == NULL) 1415 return; 1416 1417 if (oldflg) { 1418 close_fds(control, controllen, startoff); 1419 return; 1420 } 1421 /* Scan control part for file descriptors. */ 1422 for (cmsg = (struct cmsghdr *)control; 1423 CMSG_VALID(cmsg, control, (uintptr_t)control + controllen); 1424 cmsg = CMSG_NEXT(cmsg)) { 1425 if (cmsg->cmsg_level == SOL_SOCKET && 1426 cmsg->cmsg_type == SCM_RIGHTS) { 1427 close_fds(CMSG_CONTENT(cmsg), 1428 (int)CMSG_CONTENTLEN(cmsg), 1429 startoff - (int)sizeof (struct cmsghdr)); 1430 } 1431 startoff -= cmsg->cmsg_len; 1432 } 1433 } 1434 1435 /* 1436 * Returns a pointer/length for the file descriptors contained 1437 * in the control buffer. Returns with *fdlenp == -1 if there are no 1438 * file descriptor options present. This is different than there being 1439 * a zero-length file descriptor option. 1440 * Fail if there are multiple SCM_RIGHT cmsgs. 1441 */ 1442 int 1443 so_getfdopt(void *control, t_uscalar_t controllen, int oldflg, 1444 void **fdsp, int *fdlenp) 1445 { 1446 struct cmsghdr *cmsg; 1447 void *fds; 1448 int fdlen; 1449 1450 if (control == NULL) { 1451 *fdsp = NULL; 1452 *fdlenp = -1; 1453 return (0); 1454 } 1455 1456 if (oldflg) { 1457 *fdsp = control; 1458 if (controllen == 0) 1459 *fdlenp = -1; 1460 else 1461 *fdlenp = controllen; 1462 dprint(1, ("so_getfdopt: old %d\n", *fdlenp)); 1463 return (0); 1464 } 1465 1466 fds = NULL; 1467 fdlen = 0; 1468 1469 for (cmsg = (struct cmsghdr *)control; 1470 CMSG_VALID(cmsg, control, (uintptr_t)control + controllen); 1471 cmsg = CMSG_NEXT(cmsg)) { 1472 if (cmsg->cmsg_level == SOL_SOCKET && 1473 cmsg->cmsg_type == SCM_RIGHTS) { 1474 if (fds != NULL) 1475 return (EINVAL); 1476 fds = CMSG_CONTENT(cmsg); 1477 fdlen = (int)CMSG_CONTENTLEN(cmsg); 1478 dprint(1, ("so_getfdopt: new %lu\n", 1479 (size_t)CMSG_CONTENTLEN(cmsg))); 1480 } 1481 } 1482 if (fds == NULL) { 1483 dprint(1, ("so_getfdopt: NONE\n")); 1484 *fdlenp = -1; 1485 } else 1486 *fdlenp = fdlen; 1487 *fdsp = fds; 1488 return (0); 1489 } 1490 1491 /* 1492 * Return the length of the options including any file descriptor options. 1493 */ 1494 t_uscalar_t 1495 so_optlen(void *control, t_uscalar_t controllen, int oldflg) 1496 { 1497 struct cmsghdr *cmsg; 1498 t_uscalar_t optlen = 0; 1499 t_uscalar_t len; 1500 1501 if (control == NULL) 1502 return (0); 1503 1504 if (oldflg) 1505 return ((t_uscalar_t)(sizeof (struct T_opthdr) + 1506 fdbuf_optlen(controllen))); 1507 1508 for (cmsg = (struct cmsghdr *)control; 1509 CMSG_VALID(cmsg, control, (uintptr_t)control + controllen); 1510 cmsg = CMSG_NEXT(cmsg)) { 1511 if (cmsg->cmsg_level == SOL_SOCKET && 1512 cmsg->cmsg_type == SCM_RIGHTS) { 1513 len = fdbuf_optlen((int)CMSG_CONTENTLEN(cmsg)); 1514 } else { 1515 len = (t_uscalar_t)CMSG_CONTENTLEN(cmsg); 1516 } 1517 optlen += (t_uscalar_t)(_TPI_ALIGN_TOPT(len) + 1518 sizeof (struct T_opthdr)); 1519 } 1520 dprint(1, ("so_optlen: controllen %d, flg %d -> optlen %d\n", 1521 controllen, oldflg, optlen)); 1522 return (optlen); 1523 } 1524 1525 /* 1526 * Copy options from control to the mblk. Skip any file descriptor options. 1527 */ 1528 void 1529 so_cmsg2opt(void *control, t_uscalar_t controllen, int oldflg, mblk_t *mp) 1530 { 1531 struct T_opthdr toh; 1532 struct cmsghdr *cmsg; 1533 1534 if (control == NULL) 1535 return; 1536 1537 if (oldflg) { 1538 /* No real options - caller has handled file descriptors */ 1539 return; 1540 } 1541 for (cmsg = (struct cmsghdr *)control; 1542 CMSG_VALID(cmsg, control, (uintptr_t)control + controllen); 1543 cmsg = CMSG_NEXT(cmsg)) { 1544 /* 1545 * Note: The caller handles file descriptors prior 1546 * to calling this function. 1547 */ 1548 t_uscalar_t len; 1549 1550 if (cmsg->cmsg_level == SOL_SOCKET && 1551 cmsg->cmsg_type == SCM_RIGHTS) 1552 continue; 1553 1554 len = (t_uscalar_t)CMSG_CONTENTLEN(cmsg); 1555 toh.level = cmsg->cmsg_level; 1556 toh.name = cmsg->cmsg_type; 1557 toh.len = len + (t_uscalar_t)sizeof (struct T_opthdr); 1558 toh.status = 0; 1559 1560 soappendmsg(mp, &toh, sizeof (toh)); 1561 soappendmsg(mp, CMSG_CONTENT(cmsg), len); 1562 mp->b_wptr += _TPI_ALIGN_TOPT(len) - len; 1563 ASSERT(mp->b_wptr <= mp->b_datap->db_lim); 1564 } 1565 } 1566 1567 /* 1568 * Return the length of the control message derived from the options. 1569 * Exclude SO_SRCADDR and SO_UNIX_CLOSE options. Include SO_FILEP. 1570 * When oldflg is set only include SO_FILEP. 1571 * so_opt2cmsg and so_cmsglen are inter-related since so_cmsglen 1572 * allocates the space that so_opt2cmsg fills. If one changes, the other should 1573 * also be checked for any possible impacts. 1574 */ 1575 t_uscalar_t 1576 so_cmsglen(mblk_t *mp, void *opt, t_uscalar_t optlen, int oldflg) 1577 { 1578 t_uscalar_t cmsglen = 0; 1579 struct T_opthdr *tohp; 1580 t_uscalar_t len; 1581 t_uscalar_t last_roundup = 0; 1582 1583 ASSERT(__TPI_TOPT_ISALIGNED(opt)); 1584 1585 for (tohp = (struct T_opthdr *)opt; 1586 tohp && _TPI_TOPT_VALID(tohp, opt, (uintptr_t)opt + optlen); 1587 tohp = _TPI_TOPT_NEXTHDR(opt, optlen, tohp)) { 1588 dprint(1, ("so_cmsglen: level 0x%x, name %d, len %d\n", 1589 tohp->level, tohp->name, tohp->len)); 1590 if (tohp->level == SOL_SOCKET && 1591 (tohp->name == SO_SRCADDR || 1592 tohp->name == SO_UNIX_CLOSE)) { 1593 continue; 1594 } 1595 if (tohp->level == SOL_SOCKET && tohp->name == SO_FILEP) { 1596 struct fdbuf *fdbuf; 1597 int fdbuflen; 1598 1599 fdbuf = (struct fdbuf *)_TPI_TOPT_DATA(tohp); 1600 fdbuflen = (int)_TPI_TOPT_DATALEN(tohp); 1601 1602 if (!fdbuf_verify(mp, fdbuf, fdbuflen)) 1603 continue; 1604 if (oldflg) { 1605 cmsglen += fdbuf_cmsglen(fdbuflen); 1606 continue; 1607 } 1608 len = fdbuf_cmsglen(fdbuflen); 1609 } else if (tohp->level == SOL_SOCKET && 1610 tohp->name == SCM_TIMESTAMP) { 1611 if (oldflg) 1612 continue; 1613 1614 if (get_udatamodel() == DATAMODEL_NATIVE) { 1615 len = sizeof (struct timeval); 1616 } else { 1617 len = sizeof (struct timeval32); 1618 } 1619 } else { 1620 if (oldflg) 1621 continue; 1622 len = (t_uscalar_t)_TPI_TOPT_DATALEN(tohp); 1623 } 1624 /* 1625 * Exclude roundup for last option to not set 1626 * MSG_CTRUNC when the cmsg fits but the padding doesn't fit. 1627 */ 1628 last_roundup = (t_uscalar_t) 1629 (ROUNDUP_cmsglen(len + (int)sizeof (struct cmsghdr)) - 1630 (len + (int)sizeof (struct cmsghdr))); 1631 cmsglen += (t_uscalar_t)(len + (int)sizeof (struct cmsghdr)) + 1632 last_roundup; 1633 } 1634 cmsglen -= last_roundup; 1635 dprint(1, ("so_cmsglen: optlen %d, flg %d -> cmsglen %d\n", 1636 optlen, oldflg, cmsglen)); 1637 return (cmsglen); 1638 } 1639 1640 /* 1641 * Copy options from options to the control. Convert SO_FILEP to 1642 * file descriptors. 1643 * Returns errno or zero. 1644 * so_opt2cmsg and so_cmsglen are inter-related since so_cmsglen 1645 * allocates the space that so_opt2cmsg fills. If one changes, the other should 1646 * also be checked for any possible impacts. 1647 */ 1648 int 1649 so_opt2cmsg(mblk_t *mp, void *opt, t_uscalar_t optlen, int oldflg, 1650 void *control, t_uscalar_t controllen) 1651 { 1652 struct T_opthdr *tohp; 1653 struct cmsghdr *cmsg; 1654 struct fdbuf *fdbuf; 1655 int fdbuflen; 1656 int error; 1657 #if defined(DEBUG) || defined(__lint) 1658 struct cmsghdr *cend = (struct cmsghdr *) 1659 (((uint8_t *)control) + ROUNDUP_cmsglen(controllen)); 1660 #endif 1661 cmsg = (struct cmsghdr *)control; 1662 1663 ASSERT(__TPI_TOPT_ISALIGNED(opt)); 1664 1665 for (tohp = (struct T_opthdr *)opt; 1666 tohp && _TPI_TOPT_VALID(tohp, opt, (uintptr_t)opt + optlen); 1667 tohp = _TPI_TOPT_NEXTHDR(opt, optlen, tohp)) { 1668 dprint(1, ("so_opt2cmsg: level 0x%x, name %d, len %d\n", 1669 tohp->level, tohp->name, tohp->len)); 1670 1671 if (tohp->level == SOL_SOCKET && 1672 (tohp->name == SO_SRCADDR || 1673 tohp->name == SO_UNIX_CLOSE)) { 1674 continue; 1675 } 1676 ASSERT((uintptr_t)cmsg <= (uintptr_t)control + controllen); 1677 if (tohp->level == SOL_SOCKET && tohp->name == SO_FILEP) { 1678 fdbuf = (struct fdbuf *)_TPI_TOPT_DATA(tohp); 1679 fdbuflen = (int)_TPI_TOPT_DATALEN(tohp); 1680 1681 if (!fdbuf_verify(mp, fdbuf, fdbuflen)) 1682 return (EPROTO); 1683 if (oldflg) { 1684 error = fdbuf_extract(fdbuf, control, 1685 (int)controllen); 1686 if (error != 0) 1687 return (error); 1688 continue; 1689 } else { 1690 int fdlen; 1691 1692 fdlen = (int)fdbuf_cmsglen( 1693 (int)_TPI_TOPT_DATALEN(tohp)); 1694 1695 cmsg->cmsg_level = tohp->level; 1696 cmsg->cmsg_type = SCM_RIGHTS; 1697 cmsg->cmsg_len = (socklen_t)(fdlen + 1698 sizeof (struct cmsghdr)); 1699 1700 error = fdbuf_extract(fdbuf, 1701 CMSG_CONTENT(cmsg), fdlen); 1702 if (error != 0) 1703 return (error); 1704 } 1705 } else if (tohp->level == SOL_SOCKET && 1706 tohp->name == SCM_TIMESTAMP) { 1707 timestruc_t *timestamp; 1708 1709 if (oldflg) 1710 continue; 1711 1712 cmsg->cmsg_level = tohp->level; 1713 cmsg->cmsg_type = tohp->name; 1714 1715 timestamp = 1716 (timestruc_t *)P2ROUNDUP((intptr_t)&tohp[1], 1717 sizeof (intptr_t)); 1718 1719 if (get_udatamodel() == DATAMODEL_NATIVE) { 1720 struct timeval tv; 1721 1722 cmsg->cmsg_len = sizeof (struct timeval) + 1723 sizeof (struct cmsghdr); 1724 tv.tv_sec = timestamp->tv_sec; 1725 tv.tv_usec = timestamp->tv_nsec / 1726 (NANOSEC / MICROSEC); 1727 /* 1728 * on LP64 systems, the struct timeval in 1729 * the destination will not be 8-byte aligned, 1730 * so use bcopy to avoid alignment trouble 1731 */ 1732 bcopy(&tv, CMSG_CONTENT(cmsg), sizeof (tv)); 1733 } else { 1734 struct timeval32 *time32; 1735 1736 cmsg->cmsg_len = sizeof (struct timeval32) + 1737 sizeof (struct cmsghdr); 1738 time32 = (struct timeval32 *)CMSG_CONTENT(cmsg); 1739 time32->tv_sec = (time32_t)timestamp->tv_sec; 1740 time32->tv_usec = 1741 (int32_t)(timestamp->tv_nsec / 1742 (NANOSEC / MICROSEC)); 1743 } 1744 1745 } else { 1746 if (oldflg) 1747 continue; 1748 1749 cmsg->cmsg_level = tohp->level; 1750 cmsg->cmsg_type = tohp->name; 1751 cmsg->cmsg_len = (socklen_t)(_TPI_TOPT_DATALEN(tohp) + 1752 sizeof (struct cmsghdr)); 1753 1754 /* copy content to control data part */ 1755 bcopy(&tohp[1], CMSG_CONTENT(cmsg), 1756 CMSG_CONTENTLEN(cmsg)); 1757 } 1758 /* move to next CMSG structure! */ 1759 cmsg = CMSG_NEXT(cmsg); 1760 } 1761 dprint(1, ("so_opt2cmsg: buf %p len %d; cend %p; final cmsg %p\n", 1762 control, controllen, (void *)cend, (void *)cmsg)); 1763 ASSERT(cmsg <= cend); 1764 return (0); 1765 } 1766 1767 /* 1768 * Extract the SO_SRCADDR option value if present. 1769 */ 1770 void 1771 so_getopt_srcaddr(void *opt, t_uscalar_t optlen, void **srcp, 1772 t_uscalar_t *srclenp) 1773 { 1774 struct T_opthdr *tohp; 1775 1776 ASSERT(__TPI_TOPT_ISALIGNED(opt)); 1777 1778 ASSERT(srcp != NULL && srclenp != NULL); 1779 *srcp = NULL; 1780 *srclenp = 0; 1781 1782 for (tohp = (struct T_opthdr *)opt; 1783 tohp && _TPI_TOPT_VALID(tohp, opt, (uintptr_t)opt + optlen); 1784 tohp = _TPI_TOPT_NEXTHDR(opt, optlen, tohp)) { 1785 dprint(1, ("so_getopt_srcaddr: level 0x%x, name %d, len %d\n", 1786 tohp->level, tohp->name, tohp->len)); 1787 if (tohp->level == SOL_SOCKET && 1788 tohp->name == SO_SRCADDR) { 1789 *srcp = _TPI_TOPT_DATA(tohp); 1790 *srclenp = (t_uscalar_t)_TPI_TOPT_DATALEN(tohp); 1791 } 1792 } 1793 } 1794 1795 /* 1796 * Verify if the SO_UNIX_CLOSE option is present. 1797 */ 1798 int 1799 so_getopt_unix_close(void *opt, t_uscalar_t optlen) 1800 { 1801 struct T_opthdr *tohp; 1802 1803 ASSERT(__TPI_TOPT_ISALIGNED(opt)); 1804 1805 for (tohp = (struct T_opthdr *)opt; 1806 tohp && _TPI_TOPT_VALID(tohp, opt, (uintptr_t)opt + optlen); 1807 tohp = _TPI_TOPT_NEXTHDR(opt, optlen, tohp)) { 1808 dprint(1, 1809 ("so_getopt_unix_close: level 0x%x, name %d, len %d\n", 1810 tohp->level, tohp->name, tohp->len)); 1811 if (tohp->level == SOL_SOCKET && 1812 tohp->name == SO_UNIX_CLOSE) 1813 return (1); 1814 } 1815 return (0); 1816 } 1817 1818 /* 1819 * Allocate an M_PROTO message. 1820 * 1821 * If allocation fails the behavior depends on sleepflg: 1822 * _ALLOC_NOSLEEP fail immediately 1823 * _ALLOC_INTR sleep for memory until a signal is caught 1824 * _ALLOC_SLEEP sleep forever. Don't return NULL. 1825 */ 1826 mblk_t * 1827 soallocproto(size_t size, int sleepflg) 1828 { 1829 mblk_t *mp; 1830 1831 /* Round up size for reuse */ 1832 size = MAX(size, 64); 1833 mp = allocb(size, BPRI_MED); 1834 if (mp == NULL) { 1835 int error; /* Dummy - error not returned to caller */ 1836 1837 switch (sleepflg) { 1838 case _ALLOC_SLEEP: 1839 mp = allocb_wait(size, BPRI_MED, STR_NOSIG, &error); 1840 ASSERT(mp); 1841 break; 1842 case _ALLOC_INTR: 1843 mp = allocb_wait(size, BPRI_MED, 0, &error); 1844 if (mp == NULL) { 1845 /* Caught signal while sleeping for memory */ 1846 eprintline(ENOBUFS); 1847 return (NULL); 1848 } 1849 break; 1850 case _ALLOC_NOSLEEP: 1851 default: 1852 eprintline(ENOBUFS); 1853 return (NULL); 1854 } 1855 } 1856 DB_TYPE(mp) = M_PROTO; 1857 return (mp); 1858 } 1859 1860 /* 1861 * Allocate an M_PROTO message with a single component. 1862 * len is the length of buf. size is the amount to allocate. 1863 * 1864 * buf can be NULL with a non-zero len. 1865 * This results in a bzero'ed chunk being placed the message. 1866 */ 1867 mblk_t * 1868 soallocproto1(const void *buf, ssize_t len, ssize_t size, int sleepflg) 1869 { 1870 mblk_t *mp; 1871 1872 if (size == 0) 1873 size = len; 1874 1875 ASSERT(size >= len); 1876 /* Round up size for reuse */ 1877 size = MAX(size, 64); 1878 mp = soallocproto(size, sleepflg); 1879 if (mp == NULL) 1880 return (NULL); 1881 mp->b_datap->db_type = M_PROTO; 1882 if (len != 0) { 1883 if (buf != NULL) 1884 bcopy(buf, mp->b_wptr, len); 1885 else 1886 bzero(mp->b_wptr, len); 1887 mp->b_wptr += len; 1888 } 1889 return (mp); 1890 } 1891 1892 /* 1893 * Append buf/len to mp. 1894 * The caller has to ensure that there is enough room in the mblk. 1895 * 1896 * buf can be NULL with a non-zero len. 1897 * This results in a bzero'ed chunk being placed the message. 1898 */ 1899 void 1900 soappendmsg(mblk_t *mp, const void *buf, ssize_t len) 1901 { 1902 ASSERT(mp); 1903 1904 if (len != 0) { 1905 /* Assert for room left */ 1906 ASSERT(mp->b_datap->db_lim - mp->b_wptr >= len); 1907 if (buf != NULL) 1908 bcopy(buf, mp->b_wptr, len); 1909 else 1910 bzero(mp->b_wptr, len); 1911 } 1912 mp->b_wptr += len; 1913 } 1914 1915 /* 1916 * Create a message using two kernel buffers. 1917 * If size is set that will determine the allocation size (e.g. for future 1918 * soappendmsg calls). If size is zero it is derived from the buffer 1919 * lengths. 1920 */ 1921 mblk_t * 1922 soallocproto2(const void *buf1, ssize_t len1, const void *buf2, ssize_t len2, 1923 ssize_t size, int sleepflg) 1924 { 1925 mblk_t *mp; 1926 1927 if (size == 0) 1928 size = len1 + len2; 1929 ASSERT(size >= len1 + len2); 1930 1931 mp = soallocproto1(buf1, len1, size, sleepflg); 1932 if (mp) 1933 soappendmsg(mp, buf2, len2); 1934 return (mp); 1935 } 1936 1937 /* 1938 * Create a message using three kernel buffers. 1939 * If size is set that will determine the allocation size (for future 1940 * soappendmsg calls). If size is zero it is derived from the buffer 1941 * lengths. 1942 */ 1943 mblk_t * 1944 soallocproto3(const void *buf1, ssize_t len1, const void *buf2, ssize_t len2, 1945 const void *buf3, ssize_t len3, ssize_t size, int sleepflg) 1946 { 1947 mblk_t *mp; 1948 1949 if (size == 0) 1950 size = len1 + len2 +len3; 1951 ASSERT(size >= len1 + len2 + len3); 1952 1953 mp = soallocproto1(buf1, len1, size, sleepflg); 1954 if (mp != NULL) { 1955 soappendmsg(mp, buf2, len2); 1956 soappendmsg(mp, buf3, len3); 1957 } 1958 return (mp); 1959 } 1960 1961 #ifdef DEBUG 1962 char * 1963 pr_state(uint_t state, uint_t mode) 1964 { 1965 static char buf[1024]; 1966 1967 buf[0] = 0; 1968 if (state & SS_ISCONNECTED) 1969 (void) strcat(buf, "ISCONNECTED "); 1970 if (state & SS_ISCONNECTING) 1971 (void) strcat(buf, "ISCONNECTING "); 1972 if (state & SS_ISDISCONNECTING) 1973 (void) strcat(buf, "ISDISCONNECTING "); 1974 if (state & SS_CANTSENDMORE) 1975 (void) strcat(buf, "CANTSENDMORE "); 1976 1977 if (state & SS_CANTRCVMORE) 1978 (void) strcat(buf, "CANTRCVMORE "); 1979 if (state & SS_ISBOUND) 1980 (void) strcat(buf, "ISBOUND "); 1981 if (state & SS_NDELAY) 1982 (void) strcat(buf, "NDELAY "); 1983 if (state & SS_NONBLOCK) 1984 (void) strcat(buf, "NONBLOCK "); 1985 1986 if (state & SS_ASYNC) 1987 (void) strcat(buf, "ASYNC "); 1988 if (state & SS_ACCEPTCONN) 1989 (void) strcat(buf, "ACCEPTCONN "); 1990 if (state & SS_HASCONNIND) 1991 (void) strcat(buf, "HASCONNIND "); 1992 if (state & SS_SAVEDEOR) 1993 (void) strcat(buf, "SAVEDEOR "); 1994 1995 if (state & SS_RCVATMARK) 1996 (void) strcat(buf, "RCVATMARK "); 1997 if (state & SS_OOBPEND) 1998 (void) strcat(buf, "OOBPEND "); 1999 if (state & SS_HAVEOOBDATA) 2000 (void) strcat(buf, "HAVEOOBDATA "); 2001 if (state & SS_HADOOBDATA) 2002 (void) strcat(buf, "HADOOBDATA "); 2003 2004 if (state & SS_FADDR_NOXLATE) 2005 (void) strcat(buf, "FADDR_NOXLATE "); 2006 2007 if (mode & SM_PRIV) 2008 (void) strcat(buf, "PRIV "); 2009 if (mode & SM_ATOMIC) 2010 (void) strcat(buf, "ATOMIC "); 2011 if (mode & SM_ADDR) 2012 (void) strcat(buf, "ADDR "); 2013 if (mode & SM_CONNREQUIRED) 2014 (void) strcat(buf, "CONNREQUIRED "); 2015 2016 if (mode & SM_FDPASSING) 2017 (void) strcat(buf, "FDPASSING "); 2018 if (mode & SM_EXDATA) 2019 (void) strcat(buf, "EXDATA "); 2020 if (mode & SM_OPTDATA) 2021 (void) strcat(buf, "OPTDATA "); 2022 if (mode & SM_BYTESTREAM) 2023 (void) strcat(buf, "BYTESTREAM "); 2024 return (buf); 2025 } 2026 2027 char * 2028 pr_addr(int family, struct sockaddr *addr, t_uscalar_t addrlen) 2029 { 2030 static char buf[1024]; 2031 2032 if (addr == NULL || addrlen == 0) { 2033 (void) sprintf(buf, "(len %d) %p", addrlen, (void *)addr); 2034 return (buf); 2035 } 2036 switch (family) { 2037 case AF_INET: { 2038 struct sockaddr_in sin; 2039 2040 bcopy(addr, &sin, sizeof (sin)); 2041 2042 (void) sprintf(buf, "(len %d) %x/%d", 2043 addrlen, ntohl(sin.sin_addr.s_addr), ntohs(sin.sin_port)); 2044 break; 2045 } 2046 case AF_INET6: { 2047 struct sockaddr_in6 sin6; 2048 uint16_t *piece = (uint16_t *)&sin6.sin6_addr; 2049 2050 bcopy((char *)addr, (char *)&sin6, sizeof (sin6)); 2051 (void) sprintf(buf, "(len %d) %x:%x:%x:%x:%x:%x:%x:%x/%d", 2052 addrlen, 2053 ntohs(piece[0]), ntohs(piece[1]), 2054 ntohs(piece[2]), ntohs(piece[3]), 2055 ntohs(piece[4]), ntohs(piece[5]), 2056 ntohs(piece[6]), ntohs(piece[7]), 2057 ntohs(sin6.sin6_port)); 2058 break; 2059 } 2060 case AF_UNIX: { 2061 struct sockaddr_un *soun = (struct sockaddr_un *)addr; 2062 2063 (void) sprintf(buf, "(len %d) %s", addrlen, 2064 (soun == NULL) ? "(none)" : soun->sun_path); 2065 break; 2066 } 2067 default: 2068 (void) sprintf(buf, "(unknown af %d)", family); 2069 break; 2070 } 2071 return (buf); 2072 } 2073 2074 /* The logical equivalence operator (a if-and-only-if b) */ 2075 #define EQUIV(a, b) (((a) && (b)) || (!(a) && (!(b)))) 2076 2077 /* 2078 * Verify limitations and invariants on oob state. 2079 * Return 1 if OK, otherwise 0 so that it can be used as 2080 * ASSERT(verify_oobstate(so)); 2081 */ 2082 int 2083 so_verify_oobstate(struct sonode *so) 2084 { 2085 ASSERT(MUTEX_HELD(&so->so_lock)); 2086 2087 /* 2088 * The possible state combinations are: 2089 * 0 2090 * SS_OOBPEND 2091 * SS_OOBPEND|SS_HAVEOOBDATA 2092 * SS_OOBPEND|SS_HADOOBDATA 2093 * SS_HADOOBDATA 2094 */ 2095 switch (so->so_state & (SS_OOBPEND|SS_HAVEOOBDATA|SS_HADOOBDATA)) { 2096 case 0: 2097 case SS_OOBPEND: 2098 case SS_OOBPEND|SS_HAVEOOBDATA: 2099 case SS_OOBPEND|SS_HADOOBDATA: 2100 case SS_HADOOBDATA: 2101 break; 2102 default: 2103 printf("Bad oob state 1 (%p): counts %d/%d state %s\n", 2104 (void *)so, so->so_oobsigcnt, 2105 so->so_oobcnt, pr_state(so->so_state, so->so_mode)); 2106 return (0); 2107 } 2108 2109 /* SS_RCVATMARK should only be set when SS_OOBPEND is set */ 2110 if ((so->so_state & (SS_RCVATMARK|SS_OOBPEND)) == SS_RCVATMARK) { 2111 printf("Bad oob state 2 (%p): counts %d/%d state %s\n", 2112 (void *)so, so->so_oobsigcnt, 2113 so->so_oobcnt, pr_state(so->so_state, so->so_mode)); 2114 return (0); 2115 } 2116 2117 /* 2118 * (so_oobsigcnt != 0 or SS_RCVATMARK) iff SS_OOBPEND 2119 */ 2120 if (!EQUIV((so->so_oobsigcnt != 0) || (so->so_state & SS_RCVATMARK), 2121 so->so_state & SS_OOBPEND)) { 2122 printf("Bad oob state 3 (%p): counts %d/%d state %s\n", 2123 (void *)so, so->so_oobsigcnt, 2124 so->so_oobcnt, pr_state(so->so_state, so->so_mode)); 2125 return (0); 2126 } 2127 2128 /* 2129 * Unless SO_OOBINLINE we have so_oobmsg != NULL iff SS_HAVEOOBDATA 2130 */ 2131 if (!(so->so_options & SO_OOBINLINE) && 2132 !EQUIV(so->so_oobmsg != NULL, so->so_state & SS_HAVEOOBDATA)) { 2133 printf("Bad oob state 4 (%p): counts %d/%d state %s\n", 2134 (void *)so, so->so_oobsigcnt, 2135 so->so_oobcnt, pr_state(so->so_state, so->so_mode)); 2136 return (0); 2137 } 2138 if (so->so_oobsigcnt < so->so_oobcnt) { 2139 printf("Bad oob state 5 (%p): counts %d/%d state %s\n", 2140 (void *)so, so->so_oobsigcnt, 2141 so->so_oobcnt, pr_state(so->so_state, so->so_mode)); 2142 return (0); 2143 } 2144 return (1); 2145 } 2146 #undef EQUIV 2147 2148 #endif /* DEBUG */ 2149 2150 /* initialize sockfs zone specific kstat related items */ 2151 void * 2152 sock_kstat_init(zoneid_t zoneid) 2153 { 2154 kstat_t *ksp; 2155 2156 ksp = kstat_create_zone("sockfs", 0, "sock_unix_list", "misc", 2157 KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VAR_SIZE|KSTAT_FLAG_VIRTUAL, zoneid); 2158 2159 if (ksp != NULL) { 2160 ksp->ks_update = sockfs_update; 2161 ksp->ks_snapshot = sockfs_snapshot; 2162 ksp->ks_lock = &socklist.sl_lock; 2163 ksp->ks_private = (void *)(uintptr_t)zoneid; 2164 kstat_install(ksp); 2165 } 2166 2167 return (ksp); 2168 } 2169 2170 /* tear down sockfs zone specific kstat related items */ 2171 /*ARGSUSED*/ 2172 void 2173 sock_kstat_fini(zoneid_t zoneid, void *arg) 2174 { 2175 kstat_t *ksp = (kstat_t *)arg; 2176 2177 if (ksp != NULL) { 2178 ASSERT(zoneid == (zoneid_t)(uintptr_t)ksp->ks_private); 2179 kstat_delete(ksp); 2180 } 2181 } 2182 2183 /* 2184 * Zones: 2185 * Note that nactive is going to be different for each zone. 2186 * This means we require kstat to call sockfs_update and then sockfs_snapshot 2187 * for the same zone, or sockfs_snapshot will be taken into the wrong size 2188 * buffer. This is safe, but if the buffer is too small, user will not be 2189 * given details of all sockets. However, as this kstat has a ks_lock, kstat 2190 * driver will keep it locked between the update and the snapshot, so no 2191 * other process (zone) can currently get inbetween resulting in a wrong size 2192 * buffer allocation. 2193 */ 2194 static int 2195 sockfs_update(kstat_t *ksp, int rw) 2196 { 2197 uint_t nactive = 0; /* # of active AF_UNIX sockets */ 2198 struct sonode *so; /* current sonode on socklist */ 2199 zoneid_t myzoneid = (zoneid_t)(uintptr_t)ksp->ks_private; 2200 2201 ASSERT((zoneid_t)(uintptr_t)ksp->ks_private == getzoneid()); 2202 2203 if (rw == KSTAT_WRITE) { /* bounce all writes */ 2204 return (EACCES); 2205 } 2206 2207 for (so = socklist.sl_list; so != NULL; so = so->so_next) { 2208 if (so->so_accessvp != NULL && so->so_zoneid == myzoneid) { 2209 nactive++; 2210 } 2211 } 2212 ksp->ks_ndata = nactive; 2213 ksp->ks_data_size = nactive * sizeof (struct k_sockinfo); 2214 2215 return (0); 2216 } 2217 2218 static int 2219 sockfs_snapshot(kstat_t *ksp, void *buf, int rw) 2220 { 2221 int ns; /* # of sonodes we've copied */ 2222 struct sonode *so; /* current sonode on socklist */ 2223 struct k_sockinfo *pksi; /* where we put sockinfo data */ 2224 t_uscalar_t sn_len; /* soa_len */ 2225 zoneid_t myzoneid = (zoneid_t)(uintptr_t)ksp->ks_private; 2226 2227 ASSERT((zoneid_t)(uintptr_t)ksp->ks_private == getzoneid()); 2228 2229 ksp->ks_snaptime = gethrtime(); 2230 2231 if (rw == KSTAT_WRITE) { /* bounce all writes */ 2232 return (EACCES); 2233 } 2234 2235 /* 2236 * for each sonode on the socklist, we massage the important 2237 * info into buf, in k_sockinfo format. 2238 */ 2239 pksi = (struct k_sockinfo *)buf; 2240 for (ns = 0, so = socklist.sl_list; so != NULL; so = so->so_next) { 2241 /* only stuff active sonodes and the same zone: */ 2242 if (so->so_accessvp == NULL || so->so_zoneid != myzoneid) { 2243 continue; 2244 } 2245 2246 /* 2247 * If the sonode was activated between the update and the 2248 * snapshot, we're done - as this is only a snapshot. 2249 */ 2250 if ((caddr_t)(pksi) >= (caddr_t)buf + ksp->ks_data_size) { 2251 break; 2252 } 2253 2254 /* copy important info into buf: */ 2255 pksi->ks_si.si_size = sizeof (struct k_sockinfo); 2256 pksi->ks_si.si_family = so->so_family; 2257 pksi->ks_si.si_type = so->so_type; 2258 pksi->ks_si.si_flag = so->so_flag; 2259 pksi->ks_si.si_state = so->so_state; 2260 pksi->ks_si.si_serv_type = so->so_serv_type; 2261 pksi->ks_si.si_ux_laddr_sou_magic = so->so_ux_laddr.soua_magic; 2262 pksi->ks_si.si_ux_faddr_sou_magic = so->so_ux_faddr.soua_magic; 2263 pksi->ks_si.si_laddr_soa_len = so->so_laddr.soa_len; 2264 pksi->ks_si.si_faddr_soa_len = so->so_faddr.soa_len; 2265 pksi->ks_si.si_szoneid = so->so_zoneid; 2266 2267 mutex_enter(&so->so_lock); 2268 2269 if (so->so_laddr_sa != NULL) { 2270 ASSERT(so->so_laddr_sa->sa_data != NULL); 2271 sn_len = so->so_laddr_len; 2272 ASSERT(sn_len <= sizeof (short) + 2273 sizeof (pksi->ks_si.si_laddr_sun_path)); 2274 2275 pksi->ks_si.si_laddr_family = 2276 so->so_laddr_sa->sa_family; 2277 if (sn_len != 0) { 2278 /* AF_UNIX socket names are NULL terminated */ 2279 (void) strncpy(pksi->ks_si.si_laddr_sun_path, 2280 so->so_laddr_sa->sa_data, 2281 sizeof (pksi->ks_si.si_laddr_sun_path)); 2282 sn_len = strlen(pksi->ks_si.si_laddr_sun_path); 2283 } 2284 pksi->ks_si.si_laddr_sun_path[sn_len] = 0; 2285 } 2286 2287 if (so->so_faddr_sa != NULL) { 2288 ASSERT(so->so_faddr_sa->sa_data != NULL); 2289 sn_len = so->so_faddr_len; 2290 ASSERT(sn_len <= sizeof (short) + 2291 sizeof (pksi->ks_si.si_faddr_sun_path)); 2292 2293 pksi->ks_si.si_faddr_family = 2294 so->so_faddr_sa->sa_family; 2295 if (sn_len != 0) { 2296 (void) strncpy(pksi->ks_si.si_faddr_sun_path, 2297 so->so_faddr_sa->sa_data, 2298 sizeof (pksi->ks_si.si_faddr_sun_path)); 2299 sn_len = strlen(pksi->ks_si.si_faddr_sun_path); 2300 } 2301 pksi->ks_si.si_faddr_sun_path[sn_len] = 0; 2302 } 2303 2304 mutex_exit(&so->so_lock); 2305 2306 (void) sprintf(pksi->ks_straddr[0], "%p", (void *)so); 2307 (void) sprintf(pksi->ks_straddr[1], "%p", 2308 (void *)so->so_ux_laddr.soua_vp); 2309 (void) sprintf(pksi->ks_straddr[2], "%p", 2310 (void *)so->so_ux_faddr.soua_vp); 2311 2312 ns++; 2313 pksi++; 2314 } 2315 2316 ksp->ks_ndata = ns; 2317 return (0); 2318 } 2319 2320 ssize_t 2321 soreadfile(file_t *fp, uchar_t *buf, u_offset_t fileoff, int *err, size_t size) 2322 { 2323 struct uio auio; 2324 struct iovec aiov[MSG_MAXIOVLEN]; 2325 register vnode_t *vp; 2326 int ioflag, rwflag; 2327 ssize_t cnt; 2328 int error = 0; 2329 int iovcnt = 0; 2330 short fflag; 2331 2332 vp = fp->f_vnode; 2333 fflag = fp->f_flag; 2334 2335 rwflag = 0; 2336 aiov[0].iov_base = (caddr_t)buf; 2337 aiov[0].iov_len = size; 2338 iovcnt = 1; 2339 cnt = (ssize_t)size; 2340 (void) VOP_RWLOCK(vp, rwflag, NULL); 2341 2342 auio.uio_loffset = fileoff; 2343 auio.uio_iov = aiov; 2344 auio.uio_iovcnt = iovcnt; 2345 auio.uio_resid = cnt; 2346 auio.uio_segflg = UIO_SYSSPACE; 2347 auio.uio_llimit = MAXOFFSET_T; 2348 auio.uio_fmode = fflag; 2349 auio.uio_extflg = UIO_COPY_CACHED; 2350 2351 ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC); 2352 2353 /* If read sync is not asked for, filter sync flags */ 2354 if ((ioflag & FRSYNC) == 0) 2355 ioflag &= ~(FSYNC|FDSYNC); 2356 error = VOP_READ(vp, &auio, ioflag, fp->f_cred, NULL); 2357 cnt -= auio.uio_resid; 2358 2359 VOP_RWUNLOCK(vp, rwflag, NULL); 2360 2361 if (error == EINTR && cnt != 0) 2362 error = 0; 2363 out: 2364 if (error != 0) { 2365 *err = error; 2366 return (0); 2367 } else { 2368 *err = 0; 2369 return (cnt); 2370 } 2371 } 2372