1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. 24 * Copyright 2017 Sebastian Wiedenroth 25 */ 26 27 #include <sys/types.h> 28 #include <sys/param.h> 29 #include <sys/systm.h> 30 #include <sys/sysmacros.h> 31 #include <sys/debug.h> 32 #include <sys/cmn_err.h> 33 #include <sys/vfs.h> 34 #include <sys/policy.h> 35 #include <sys/modctl.h> 36 37 #include <sys/sunddi.h> 38 39 #include <sys/strsun.h> 40 #include <sys/stropts.h> 41 #include <sys/strsubr.h> 42 #include <sys/socket.h> 43 #include <sys/socketvar.h> 44 #include <sys/uio.h> 45 46 #include <inet/ipclassifier.h> 47 #include <fs/sockfs/sockcommon.h> 48 #include <fs/sockfs/sockfilter_impl.h> 49 #include <fs/sockfs/nl7c.h> 50 #include <fs/sockfs/socktpi.h> 51 #include <fs/sockfs/sodirect.h> 52 #include <inet/ip.h> 53 54 extern int xnet_skip_checks, xnet_check_print, xnet_truncate_print; 55 56 /* 57 * Common socket access functions. 58 * 59 * Instead of accessing the sonode switch directly (i.e., SOP_xxx()), 60 * the socket_xxx() function should be used. 61 */ 62 63 /* 64 * Try to create a new sonode of the requested <family, type, protocol>. 65 */ 66 /* ARGSUSED */ 67 struct sonode * 68 socket_create(int family, int type, int protocol, char *devpath, char *mod, 69 int flags, int version, struct cred *cr, int *errorp) 70 { 71 struct sonode *so; 72 struct sockparams *sp = NULL; 73 int saved_error; 74 75 /* 76 * Look for a sockparams entry that match the given criteria. 77 * solookup() returns with the entry held. 78 */ 79 *errorp = solookup(family, type, protocol, &sp); 80 saved_error = *errorp; 81 if (sp == NULL) { 82 int kmflags = (flags == SOCKET_SLEEP) ? KM_SLEEP : KM_NOSLEEP; 83 /* 84 * There is no matching sockparams entry. An ephemeral entry is 85 * created if the caller specifies a device or a socket module. 86 */ 87 if (devpath != NULL) { 88 saved_error = 0; 89 sp = sockparams_hold_ephemeral_bydev(family, type, 90 protocol, devpath, kmflags, errorp); 91 } else if (mod != NULL) { 92 saved_error = 0; 93 sp = sockparams_hold_ephemeral_bymod(family, type, 94 protocol, mod, kmflags, errorp); 95 } else { 96 *errorp = solookup(family, type, 0, &sp); 97 } 98 99 if (sp == NULL) { 100 if (saved_error && (*errorp == EPROTONOSUPPORT || 101 *errorp == EPROTOTYPE || *errorp == ENOPROTOOPT)) 102 *errorp = saved_error; 103 return (NULL); 104 } 105 } 106 107 ASSERT(sp->sp_smod_info != NULL); 108 ASSERT(flags == SOCKET_SLEEP || flags == SOCKET_NOSLEEP); 109 sp->sp_stats.sps_ncreate.value.ui64++; 110 so = sp->sp_smod_info->smod_sock_create_func(sp, family, type, 111 protocol, version, flags, errorp, cr); 112 if (so == NULL) { 113 SOCKPARAMS_DEC_REF(sp); 114 } else { 115 if ((*errorp = SOP_INIT(so, NULL, cr, flags)) == 0) { 116 /* Cannot fail, only bumps so_count */ 117 (void) VOP_OPEN(&SOTOV(so), FREAD|FWRITE, cr, NULL); 118 } else { 119 if (saved_error && (*errorp == EPROTONOSUPPORT || 120 *errorp == EPROTOTYPE || *errorp == ENOPROTOOPT)) 121 *errorp = saved_error; 122 socket_destroy(so); 123 so = NULL; 124 } 125 } 126 return (so); 127 } 128 129 struct sonode * 130 socket_newconn(struct sonode *parent, sock_lower_handle_t lh, 131 sock_downcalls_t *dc, int flags, int *errorp) 132 { 133 struct sonode *so; 134 struct sockparams *sp; 135 struct cred *cr; 136 137 if ((cr = CRED()) == NULL) 138 cr = kcred; 139 140 sp = parent->so_sockparams; 141 ASSERT(sp != NULL); 142 143 sp->sp_stats.sps_ncreate.value.ui64++; 144 so = sp->sp_smod_info->smod_sock_create_func(sp, parent->so_family, 145 parent->so_type, parent->so_protocol, parent->so_version, flags, 146 errorp, cr); 147 if (so != NULL) { 148 SOCKPARAMS_INC_REF(sp); 149 150 so->so_proto_handle = lh; 151 so->so_downcalls = dc; 152 /* 153 * This function may be called in interrupt context, and CRED() 154 * will be NULL. In this case, pass in kcred. 155 */ 156 if ((*errorp = SOP_INIT(so, parent, cr, flags)) == 0) { 157 /* Cannot fail, only bumps so_count */ 158 (void) VOP_OPEN(&SOTOV(so), FREAD|FWRITE, cr, NULL); 159 } else { 160 socket_destroy(so); 161 so = NULL; 162 } 163 } 164 165 return (so); 166 } 167 168 /* 169 * Bind local endpoint. 170 */ 171 int 172 socket_bind(struct sonode *so, struct sockaddr *name, socklen_t namelen, 173 int flags, cred_t *cr) 174 { 175 return (SOP_BIND(so, name, namelen, flags, cr)); 176 } 177 178 /* 179 * Turn socket into a listen socket. 180 */ 181 int 182 socket_listen(struct sonode *so, int backlog, cred_t *cr) 183 { 184 if (backlog < 0) { 185 backlog = 0; 186 } 187 188 /* 189 * Use the same qlimit as in BSD. BSD checks the qlimit 190 * before queuing the next connection implying that a 191 * listen(sock, 0) allows one connection to be queued. 192 * BSD also uses 1.5 times the requested backlog. 193 * 194 * XNS Issue 4 required a strict interpretation of the backlog. 195 * This has been waived subsequently for Issue 4 and the change 196 * incorporated in XNS Issue 5. So we aren't required to do 197 * anything special for XPG apps. 198 */ 199 if (backlog >= (INT_MAX - 1) / 3) 200 backlog = INT_MAX; 201 else 202 backlog = backlog * 3 / 2 + 1; 203 204 return (SOP_LISTEN(so, backlog, cr)); 205 } 206 207 /* 208 * Accept incoming connection. 209 */ 210 int 211 socket_accept(struct sonode *lso, int fflag, cred_t *cr, struct sonode **nsop) 212 { 213 return (SOP_ACCEPT(lso, fflag, cr, nsop)); 214 } 215 216 /* 217 * Active open. 218 */ 219 int 220 socket_connect(struct sonode *so, struct sockaddr *name, 221 socklen_t namelen, int fflag, int flags, cred_t *cr) 222 { 223 int error; 224 225 /* 226 * Handle a connect to a name parameter of type AF_UNSPEC like a 227 * connect to a null address. This is the portable method to 228 * unconnect a socket. 229 */ 230 if ((namelen >= sizeof (sa_family_t)) && 231 (name->sa_family == AF_UNSPEC)) { 232 name = NULL; 233 namelen = 0; 234 } 235 236 error = SOP_CONNECT(so, name, namelen, fflag, flags, cr); 237 238 if (error == EHOSTUNREACH && flags & _SOCONNECT_XPG4_2) { 239 /* 240 * X/Open specification contains a requirement that 241 * ENETUNREACH be returned but does not require 242 * EHOSTUNREACH. In order to keep the test suite 243 * happy we mess with the errno here. 244 */ 245 error = ENETUNREACH; 246 } 247 248 return (error); 249 } 250 251 /* 252 * Get address of remote node. 253 */ 254 int 255 socket_getpeername(struct sonode *so, struct sockaddr *addr, 256 socklen_t *addrlen, boolean_t accept, cred_t *cr) 257 { 258 ASSERT(*addrlen > 0); 259 return (SOP_GETPEERNAME(so, addr, addrlen, accept, cr)); 260 261 } 262 263 /* 264 * Get local address. 265 */ 266 int 267 socket_getsockname(struct sonode *so, struct sockaddr *addr, 268 socklen_t *addrlen, cred_t *cr) 269 { 270 return (SOP_GETSOCKNAME(so, addr, addrlen, cr)); 271 272 } 273 274 /* 275 * Called from shutdown(). 276 */ 277 int 278 socket_shutdown(struct sonode *so, int how, cred_t *cr) 279 { 280 return (SOP_SHUTDOWN(so, how, cr)); 281 } 282 283 /* 284 * Get socket options. 285 */ 286 /*ARGSUSED*/ 287 int 288 socket_getsockopt(struct sonode *so, int level, int option_name, 289 void *optval, socklen_t *optlenp, int flags, cred_t *cr) 290 { 291 return (SOP_GETSOCKOPT(so, level, option_name, optval, 292 optlenp, flags, cr)); 293 } 294 295 /* 296 * Set socket options 297 */ 298 int 299 socket_setsockopt(struct sonode *so, int level, int option_name, 300 const void *optval, t_uscalar_t optlen, cred_t *cr) 301 { 302 int val = 1; 303 /* Caller allocates aligned optval, or passes null */ 304 ASSERT(((uintptr_t)optval & (sizeof (t_scalar_t) - 1)) == 0); 305 /* If optval is null optlen is 0, and vice-versa */ 306 ASSERT(optval != NULL || optlen == 0); 307 ASSERT(optlen != 0 || optval == NULL); 308 309 if (optval == NULL && optlen == 0) 310 optval = &val; 311 312 return (SOP_SETSOCKOPT(so, level, option_name, optval, optlen, cr)); 313 } 314 315 int 316 socket_sendmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop, 317 cred_t *cr) 318 { 319 int error = 0; 320 ssize_t orig_resid = uiop->uio_resid; 321 322 /* 323 * Do not bypass the cache if we are doing a local (AF_UNIX) write. 324 */ 325 if (so->so_family == AF_UNIX) 326 uiop->uio_extflg |= UIO_COPY_CACHED; 327 else 328 uiop->uio_extflg &= ~UIO_COPY_CACHED; 329 330 error = SOP_SENDMSG(so, msg, uiop, cr); 331 switch (error) { 332 default: 333 break; 334 case EINTR: 335 case ENOMEM: 336 /* EAGAIN is EWOULDBLOCK */ 337 case EWOULDBLOCK: 338 /* We did a partial send */ 339 if (uiop->uio_resid != orig_resid) 340 error = 0; 341 break; 342 case EPIPE: 343 if (((so->so_mode & SM_KERNEL) == 0) && 344 ((msg->msg_flags & MSG_NOSIGNAL) == 0)) { 345 tsignal(curthread, SIGPIPE); 346 } 347 break; 348 } 349 350 return (error); 351 } 352 353 int 354 socket_sendmblk(struct sonode *so, struct nmsghdr *msg, int fflag, 355 struct cred *cr, mblk_t **mpp) 356 { 357 int error = 0; 358 359 error = SOP_SENDMBLK(so, msg, fflag, cr, mpp); 360 if (error == EPIPE) { 361 tsignal(curthread, SIGPIPE); 362 } 363 return (error); 364 } 365 366 int 367 socket_recvmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop, 368 cred_t *cr) 369 { 370 int error; 371 ssize_t orig_resid = uiop->uio_resid; 372 373 /* 374 * Do not bypass the cache when reading data, as the application 375 * is likely to access the data shortly. 376 */ 377 uiop->uio_extflg |= UIO_COPY_CACHED; 378 379 error = SOP_RECVMSG(so, msg, uiop, cr); 380 381 switch (error) { 382 case EINTR: 383 /* EAGAIN is EWOULDBLOCK */ 384 case EWOULDBLOCK: 385 /* We did a partial read */ 386 if (uiop->uio_resid != orig_resid) 387 error = 0; 388 break; 389 default: 390 break; 391 } 392 return (error); 393 } 394 395 int 396 socket_ioctl(struct sonode *so, int cmd, intptr_t arg, int mode, 397 struct cred *cr, int32_t *rvalp) 398 { 399 return (SOP_IOCTL(so, cmd, arg, mode, cr, rvalp)); 400 } 401 402 int 403 socket_poll(struct sonode *so, short events, int anyyet, short *reventsp, 404 struct pollhead **phpp) 405 { 406 return (SOP_POLL(so, events, anyyet, reventsp, phpp)); 407 } 408 409 int 410 socket_close(struct sonode *so, int flag, struct cred *cr) 411 { 412 return (VOP_CLOSE(SOTOV(so), flag, 1, 0, cr, NULL)); 413 } 414 415 int 416 socket_close_internal(struct sonode *so, int flag, cred_t *cr) 417 { 418 ASSERT(so->so_count == 0); 419 420 return (SOP_CLOSE(so, flag, cr)); 421 } 422 423 void 424 socket_destroy(struct sonode *so) 425 { 426 vn_invalid(SOTOV(so)); 427 VN_RELE(SOTOV(so)); 428 } 429 430 /* ARGSUSED */ 431 void 432 socket_destroy_internal(struct sonode *so, cred_t *cr) 433 { 434 struct sockparams *sp = so->so_sockparams; 435 ASSERT(so->so_count == 0 && sp != NULL); 436 437 sp->sp_smod_info->smod_sock_destroy_func(so); 438 439 SOCKPARAMS_DEC_REF(sp); 440 } 441 442 /* 443 * TODO Once the common vnode ops is available, then the vnops argument 444 * should be removed. 445 */ 446 /*ARGSUSED*/ 447 int 448 sonode_constructor(void *buf, void *cdrarg, int kmflags) 449 { 450 struct sonode *so = buf; 451 struct vnode *vp; 452 453 vp = so->so_vnode = vn_alloc(kmflags); 454 if (vp == NULL) { 455 return (-1); 456 } 457 vp->v_data = so; 458 vn_setops(vp, socket_vnodeops); 459 460 so->so_priv = NULL; 461 so->so_oobmsg = NULL; 462 463 so->so_proto_handle = NULL; 464 465 so->so_peercred = NULL; 466 467 so->so_rcv_queued = 0; 468 so->so_rcv_q_head = NULL; 469 so->so_rcv_q_last_head = NULL; 470 so->so_rcv_head = NULL; 471 so->so_rcv_last_head = NULL; 472 so->so_rcv_wanted = 0; 473 so->so_rcv_timer_interval = SOCKET_NO_RCVTIMER; 474 so->so_rcv_timer_tid = 0; 475 so->so_rcv_thresh = 0; 476 477 list_create(&so->so_acceptq_list, sizeof (struct sonode), 478 offsetof(struct sonode, so_acceptq_node)); 479 list_create(&so->so_acceptq_defer, sizeof (struct sonode), 480 offsetof(struct sonode, so_acceptq_node)); 481 list_link_init(&so->so_acceptq_node); 482 so->so_acceptq_len = 0; 483 so->so_backlog = 0; 484 so->so_listener = NULL; 485 486 so->so_snd_qfull = B_FALSE; 487 488 so->so_filter_active = 0; 489 so->so_filter_tx = 0; 490 so->so_filter_defertime = 0; 491 so->so_filter_top = NULL; 492 so->so_filter_bottom = NULL; 493 494 mutex_init(&so->so_lock, NULL, MUTEX_DEFAULT, NULL); 495 mutex_init(&so->so_acceptq_lock, NULL, MUTEX_DEFAULT, NULL); 496 rw_init(&so->so_fallback_rwlock, NULL, RW_DEFAULT, NULL); 497 cv_init(&so->so_state_cv, NULL, CV_DEFAULT, NULL); 498 cv_init(&so->so_single_cv, NULL, CV_DEFAULT, NULL); 499 cv_init(&so->so_read_cv, NULL, CV_DEFAULT, NULL); 500 501 cv_init(&so->so_acceptq_cv, NULL, CV_DEFAULT, NULL); 502 cv_init(&so->so_snd_cv, NULL, CV_DEFAULT, NULL); 503 cv_init(&so->so_rcv_cv, NULL, CV_DEFAULT, NULL); 504 cv_init(&so->so_copy_cv, NULL, CV_DEFAULT, NULL); 505 cv_init(&so->so_closing_cv, NULL, CV_DEFAULT, NULL); 506 507 return (0); 508 } 509 510 /*ARGSUSED*/ 511 void 512 sonode_destructor(void *buf, void *cdrarg) 513 { 514 struct sonode *so = buf; 515 struct vnode *vp = SOTOV(so); 516 517 ASSERT(so->so_priv == NULL); 518 ASSERT(so->so_peercred == NULL); 519 520 ASSERT(so->so_oobmsg == NULL); 521 522 ASSERT(so->so_rcv_q_head == NULL); 523 524 list_destroy(&so->so_acceptq_list); 525 list_destroy(&so->so_acceptq_defer); 526 ASSERT(!list_link_active(&so->so_acceptq_node)); 527 ASSERT(so->so_listener == NULL); 528 529 ASSERT(so->so_filter_active == 0); 530 ASSERT(so->so_filter_tx == 0); 531 ASSERT(so->so_filter_top == NULL); 532 ASSERT(so->so_filter_bottom == NULL); 533 534 ASSERT(vp->v_data == so); 535 ASSERT(vn_matchops(vp, socket_vnodeops)); 536 537 vn_free(vp); 538 539 mutex_destroy(&so->so_lock); 540 mutex_destroy(&so->so_acceptq_lock); 541 rw_destroy(&so->so_fallback_rwlock); 542 543 cv_destroy(&so->so_state_cv); 544 cv_destroy(&so->so_single_cv); 545 cv_destroy(&so->so_read_cv); 546 cv_destroy(&so->so_acceptq_cv); 547 cv_destroy(&so->so_snd_cv); 548 cv_destroy(&so->so_rcv_cv); 549 cv_destroy(&so->so_closing_cv); 550 } 551 552 void 553 sonode_init(struct sonode *so, struct sockparams *sp, int family, 554 int type, int protocol, sonodeops_t *sops) 555 { 556 vnode_t *vp; 557 558 vp = SOTOV(so); 559 560 so->so_flag = 0; 561 562 so->so_state = 0; 563 so->so_mode = 0; 564 565 so->so_count = 0; 566 567 so->so_family = family; 568 so->so_type = type; 569 so->so_protocol = protocol; 570 571 SOCK_CONNID_INIT(so->so_proto_connid); 572 573 so->so_options = 0; 574 so->so_linger.l_onoff = 0; 575 so->so_linger.l_linger = 0; 576 so->so_sndbuf = 0; 577 so->so_error = 0; 578 so->so_rcvtimeo = 0; 579 so->so_sndtimeo = 0; 580 so->so_xpg_rcvbuf = 0; 581 582 ASSERT(so->so_oobmsg == NULL); 583 so->so_oobmark = 0; 584 so->so_pgrp = 0; 585 586 ASSERT(so->so_peercred == NULL); 587 588 so->so_zoneid = getzoneid(); 589 590 so->so_sockparams = sp; 591 592 so->so_ops = sops; 593 594 so->so_not_str = (sops != &sotpi_sonodeops); 595 596 so->so_proto_handle = NULL; 597 598 so->so_downcalls = NULL; 599 600 so->so_copyflag = 0; 601 602 vn_reinit(vp); 603 vp->v_vfsp = rootvfs; 604 vp->v_type = VSOCK; 605 vp->v_rdev = sockdev; 606 607 so->so_snd_qfull = B_FALSE; 608 so->so_minpsz = 0; 609 610 so->so_rcv_wakeup = B_FALSE; 611 so->so_snd_wakeup = B_FALSE; 612 so->so_flowctrld = B_FALSE; 613 614 so->so_pollev = 0; 615 bzero(&so->so_poll_list, sizeof (so->so_poll_list)); 616 bzero(&so->so_proto_props, sizeof (struct sock_proto_props)); 617 618 bzero(&(so->so_ksock_callbacks), sizeof (ksocket_callbacks_t)); 619 so->so_ksock_cb_arg = NULL; 620 621 so->so_max_addr_len = sizeof (struct sockaddr_storage); 622 623 so->so_direct = NULL; 624 625 vn_exists(vp); 626 } 627 628 void 629 sonode_fini(struct sonode *so) 630 { 631 vnode_t *vp; 632 633 ASSERT(so->so_count == 0); 634 635 if (so->so_rcv_timer_tid) { 636 ASSERT(MUTEX_NOT_HELD(&so->so_lock)); 637 (void) untimeout(so->so_rcv_timer_tid); 638 so->so_rcv_timer_tid = 0; 639 } 640 641 if (so->so_poll_list.ph_list != NULL) { 642 pollwakeup(&so->so_poll_list, POLLERR); 643 pollhead_clean(&so->so_poll_list); 644 } 645 646 if (so->so_direct != NULL) 647 sod_sock_fini(so); 648 649 vp = SOTOV(so); 650 vn_invalid(vp); 651 652 if (so->so_peercred != NULL) { 653 crfree(so->so_peercred); 654 so->so_peercred = NULL; 655 } 656 /* Detach and destroy filters */ 657 if (so->so_filter_top != NULL) 658 sof_sonode_cleanup(so); 659 660 ASSERT(list_is_empty(&so->so_acceptq_list)); 661 ASSERT(list_is_empty(&so->so_acceptq_defer)); 662 ASSERT(!list_link_active(&so->so_acceptq_node)); 663 664 ASSERT(so->so_rcv_queued == 0); 665 ASSERT(so->so_rcv_q_head == NULL); 666 ASSERT(so->so_rcv_q_last_head == NULL); 667 ASSERT(so->so_rcv_head == NULL); 668 ASSERT(so->so_rcv_last_head == NULL); 669 } 670