1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <sys/systm.h> 31 #include <sys/stream.h> 32 #include <sys/cmn_err.h> 33 #include <sys/kmem.h> 34 #define _SUN_TPI_VERSION 2 35 #include <sys/tihdr.h> 36 #include <sys/stropts.h> 37 #include <sys/socket.h> 38 #include <sys/random.h> 39 #include <sys/policy.h> 40 #include <sys/tsol/tndb.h> 41 #include <sys/tsol/tnet.h> 42 43 #include <netinet/in.h> 44 #include <netinet/ip6.h> 45 46 #include <inet/common.h> 47 #include <inet/ip.h> 48 #include <inet/ip6.h> 49 #include <inet/ipclassifier.h> 50 #include "sctp_impl.h" 51 #include "sctp_asconf.h" 52 #include "sctp_addr.h" 53 54 uint_t sctp_next_port_to_try; 55 56 /* 57 * Returns 0 on success, EACCES on permission failure. 58 */ 59 static int 60 sctp_select_port(sctp_t *sctp, in_port_t *requested_port, int *user_specified) 61 { 62 /* 63 * Get a valid port (within the anonymous range and should not 64 * be a privileged one) to use if the user has not given a port. 65 * If multiple threads are here, they may all start with 66 * with the same initial port. But, it should be fine as long as 67 * sctp_bindi will ensure that no two threads will be assigned 68 * the same port. 69 */ 70 if (*requested_port == 0) { 71 *requested_port = sctp_update_next_port(sctp_next_port_to_try, 72 crgetzone(sctp->sctp_credp)); 73 if (*requested_port == 0) 74 return (EACCES); 75 *user_specified = 0; 76 } else { 77 int i; 78 boolean_t priv = B_FALSE; 79 80 /* 81 * If the requested_port is in the well-known privileged range, 82 * verify that the stream was opened by a privileged user. 83 * Note: No locks are held when inspecting sctp_g_*epriv_ports 84 * but instead the code relies on: 85 * - the fact that the address of the array and its size never 86 * changes 87 * - the atomic assignment of the elements of the array 88 */ 89 if (*requested_port < sctp_smallest_nonpriv_port) { 90 priv = B_TRUE; 91 } else { 92 for (i = 0; i < sctp_g_num_epriv_ports; i++) { 93 if (*requested_port == sctp_g_epriv_ports[i]) { 94 priv = B_TRUE; 95 break; 96 } 97 } 98 } 99 if (priv) { 100 /* 101 * sctp_bind() should take a cred_t argument so that 102 * we can use it here. 103 */ 104 if (secpolicy_net_privaddr(sctp->sctp_credp, 105 *requested_port) != 0) { 106 dprint(1, 107 ("sctp_bind(x): no prive for port %d", 108 *requested_port)); 109 return (EACCES); 110 } 111 } 112 *user_specified = 1; 113 } 114 115 return (0); 116 } 117 118 int 119 sctp_listen(sctp_t *sctp) 120 { 121 sctp_tf_t *tf; 122 123 RUN_SCTP(sctp); 124 /* 125 * TCP handles listen() increasing the backlog, need to check 126 * if it should be handled here too 127 */ 128 if (sctp->sctp_state > SCTPS_BOUND) { 129 WAKE_SCTP(sctp); 130 return (EINVAL); 131 } 132 133 /* Do an anonymous bind for unbound socket doing listen(). */ 134 if (sctp->sctp_nsaddrs == 0) { 135 struct sockaddr_storage ss; 136 int ret; 137 138 bzero(&ss, sizeof (ss)); 139 ss.ss_family = sctp->sctp_family; 140 141 WAKE_SCTP(sctp); 142 if ((ret = sctp_bind(sctp, (struct sockaddr *)&ss, 143 sizeof (ss))) != 0) 144 return (ret); 145 RUN_SCTP(sctp) 146 } 147 148 sctp->sctp_state = SCTPS_LISTEN; 149 (void) random_get_pseudo_bytes(sctp->sctp_secret, SCTP_SECRET_LEN); 150 sctp->sctp_last_secret_update = lbolt64; 151 bzero(sctp->sctp_old_secret, SCTP_SECRET_LEN); 152 tf = &sctp_listen_fanout[SCTP_LISTEN_HASH(ntohs(sctp->sctp_lport))]; 153 sctp_listen_hash_insert(tf, sctp); 154 WAKE_SCTP(sctp); 155 return (0); 156 } 157 158 /* 159 * Bind the sctp_t to a sockaddr, which includes an address and other 160 * information, such as port or flowinfo. 161 */ 162 int 163 sctp_bind(sctp_t *sctp, struct sockaddr *sa, socklen_t len) 164 { 165 int user_specified; 166 boolean_t bind_to_req_port_only; 167 in_port_t requested_port; 168 in_port_t allocated_port; 169 int err = 0; 170 171 ASSERT(sctp != NULL); 172 ASSERT(sa); 173 174 RUN_SCTP(sctp); 175 176 if (sctp->sctp_state > SCTPS_BOUND) { 177 err = EINVAL; 178 goto done; 179 } 180 181 switch (sa->sa_family) { 182 case AF_INET: 183 if (len < sizeof (struct sockaddr_in) || 184 sctp->sctp_family == AF_INET6) { 185 err = EINVAL; 186 goto done; 187 } 188 requested_port = ntohs(((struct sockaddr_in *)sa)->sin_port); 189 break; 190 case AF_INET6: 191 if (len < sizeof (struct sockaddr_in6) || 192 sctp->sctp_family == AF_INET) { 193 err = EINVAL; 194 goto done; 195 } 196 requested_port = ntohs(((struct sockaddr_in6 *)sa)->sin6_port); 197 /* Set the flowinfo. */ 198 sctp->sctp_ip6h->ip6_vcf = 199 (IPV6_DEFAULT_VERS_AND_FLOW & IPV6_VERS_AND_FLOW_MASK) | 200 (((struct sockaddr_in6 *)sa)->sin6_flowinfo & 201 ~IPV6_VERS_AND_FLOW_MASK); 202 break; 203 default: 204 err = EAFNOSUPPORT; 205 goto done; 206 } 207 bind_to_req_port_only = requested_port == 0 ? B_FALSE : B_TRUE; 208 209 err = sctp_select_port(sctp, &requested_port, &user_specified); 210 if (err != 0) 211 goto done; 212 213 if ((err = sctp_bind_add(sctp, sa, 1, B_TRUE, 214 user_specified == 1 ? htons(requested_port) : 0)) != 0) { 215 goto done; 216 } 217 err = sctp_bindi(sctp, requested_port, bind_to_req_port_only, 218 user_specified, &allocated_port); 219 if (err != 0) { 220 sctp_free_saddrs(sctp); 221 } else { 222 ASSERT(sctp->sctp_state == SCTPS_BOUND); 223 } 224 done: 225 WAKE_SCTP(sctp); 226 return (err); 227 } 228 229 /* 230 * Perform bind/unbind operation of a list of addresses on a sctp_t 231 */ 232 int 233 sctp_bindx(sctp_t *sctp, const void *addrs, int addrcnt, int bindop) 234 { 235 ASSERT(sctp != NULL); 236 ASSERT(addrs != NULL); 237 ASSERT(addrcnt > 0); 238 239 switch (bindop) { 240 case SCTP_BINDX_ADD_ADDR: 241 return (sctp_bind_add(sctp, addrs, addrcnt, B_FALSE, 242 sctp->sctp_lport)); 243 case SCTP_BINDX_REM_ADDR: 244 return (sctp_bind_del(sctp, addrs, addrcnt, B_FALSE)); 245 default: 246 return (EINVAL); 247 } 248 } 249 250 /* 251 * Add a list of addresses to a sctp_t. 252 */ 253 int 254 sctp_bind_add(sctp_t *sctp, const void *addrs, uint32_t addrcnt, 255 boolean_t caller_hold_lock, in_port_t port) 256 { 257 int err = 0; 258 boolean_t do_asconf = B_FALSE; 259 260 if (!caller_hold_lock) 261 RUN_SCTP(sctp); 262 263 if (sctp->sctp_state > SCTPS_ESTABLISHED) { 264 if (!caller_hold_lock) 265 WAKE_SCTP(sctp); 266 return (EINVAL); 267 } 268 269 if (sctp->sctp_state > SCTPS_LISTEN) { 270 /* 271 * Let's do some checking here rather than undoing the 272 * add later (for these reasons). 273 */ 274 if (!sctp_addip_enabled || !sctp->sctp_understands_asconf || 275 !sctp->sctp_understands_addip) { 276 if (!caller_hold_lock) 277 WAKE_SCTP(sctp); 278 return (EINVAL); 279 } 280 do_asconf = B_TRUE; 281 } 282 /* 283 * On a clustered node, for an inaddr_any bind, we will pass the list 284 * of all the addresses in the global list, minus any address on the 285 * loopback interface, and expect the clustering susbsystem to give us 286 * the correct list for the 'port'. For explicit binds we give the 287 * list of addresses and the clustering module validates it for the 288 * 'port'. 289 * 290 * On a non-clustered node, cl_sctp_check_addrs will be NULL and 291 * we proceed as usual. 292 */ 293 if (cl_sctp_check_addrs != NULL) { 294 uchar_t *addrlist = NULL; 295 size_t size = 0; 296 int unspec = 0; 297 boolean_t do_listen; 298 uchar_t *llist = NULL; 299 size_t lsize = 0; 300 301 /* 302 * If we are adding addresses after listening, but before 303 * an association is established, we need to update the 304 * clustering module with this info. 305 */ 306 do_listen = !do_asconf && sctp->sctp_state > SCTPS_BOUND && 307 cl_sctp_listen != NULL; 308 309 err = sctp_get_addrlist(sctp, addrs, &addrcnt, &addrlist, 310 &unspec, &size); 311 if (err != 0) { 312 ASSERT(addrlist == NULL); 313 ASSERT(addrcnt == 0); 314 ASSERT(size == 0); 315 if (!caller_hold_lock) 316 WAKE_SCTP(sctp); 317 SCTP_KSTAT(sctp_cl_check_addrs); 318 return (err); 319 } 320 ASSERT(addrlist != NULL); 321 (*cl_sctp_check_addrs)(sctp->sctp_family, port, &addrlist, 322 size, &addrcnt, unspec == 1); 323 if (addrcnt == 0) { 324 /* We free the list */ 325 kmem_free(addrlist, size); 326 if (!caller_hold_lock) 327 WAKE_SCTP(sctp); 328 return (EINVAL); 329 } 330 if (do_listen) { 331 lsize = sizeof (in6_addr_t) * addrcnt; 332 llist = kmem_alloc(lsize, KM_SLEEP); 333 } 334 err = sctp_valid_addr_list(sctp, addrlist, addrcnt, llist, 335 lsize); 336 if (err == 0 && do_listen) { 337 (*cl_sctp_listen)(sctp->sctp_family, llist, 338 addrcnt, sctp->sctp_lport); 339 /* list will be freed by the clustering module */ 340 } else if (err != 0 && llist != NULL) { 341 kmem_free(llist, lsize); 342 } 343 /* free the list we allocated */ 344 kmem_free(addrlist, size); 345 } else { 346 err = sctp_valid_addr_list(sctp, addrs, addrcnt, NULL, 0); 347 } 348 if (err != 0) { 349 if (!caller_hold_lock) 350 WAKE_SCTP(sctp); 351 return (err); 352 } 353 /* Need to send ASCONF messages */ 354 if (do_asconf) { 355 err = sctp_add_ip(sctp, addrs, addrcnt); 356 if (err != 0) { 357 sctp_del_saddr_list(sctp, addrs, addrcnt, B_FALSE); 358 if (!caller_hold_lock) 359 WAKE_SCTP(sctp); 360 return (err); 361 } 362 } 363 if (!caller_hold_lock) 364 WAKE_SCTP(sctp); 365 if (do_asconf) 366 sctp_process_sendq(sctp); 367 return (0); 368 } 369 370 /* 371 * Remove one or more addresses bound to the sctp_t. 372 */ 373 int 374 sctp_bind_del(sctp_t *sctp, const void *addrs, uint32_t addrcnt, 375 boolean_t caller_hold_lock) 376 { 377 int error = 0; 378 boolean_t do_asconf = B_FALSE; 379 uchar_t *ulist = NULL; 380 size_t usize = 0; 381 382 if (!caller_hold_lock) 383 RUN_SCTP(sctp); 384 385 if (sctp->sctp_state > SCTPS_ESTABLISHED) { 386 if (!caller_hold_lock) 387 WAKE_SCTP(sctp); 388 return (EINVAL); 389 } 390 /* 391 * Fail the remove if we are beyond listen, but can't send this 392 * to the peer. 393 */ 394 if (sctp->sctp_state > SCTPS_LISTEN) { 395 if (!sctp_addip_enabled || !sctp->sctp_understands_asconf || 396 !sctp->sctp_understands_addip) { 397 if (!caller_hold_lock) 398 WAKE_SCTP(sctp); 399 return (EINVAL); 400 } 401 do_asconf = B_TRUE; 402 } 403 404 /* Can't delete the last address nor all of the addresses */ 405 if (sctp->sctp_nsaddrs == 1 || addrcnt >= sctp->sctp_nsaddrs) { 406 if (!caller_hold_lock) 407 WAKE_SCTP(sctp); 408 return (EINVAL); 409 } 410 411 if (cl_sctp_unlisten != NULL && !do_asconf && 412 sctp->sctp_state > SCTPS_BOUND) { 413 usize = sizeof (in6_addr_t) * addrcnt; 414 ulist = kmem_alloc(usize, KM_SLEEP); 415 } 416 417 error = sctp_del_ip(sctp, addrs, addrcnt, ulist, usize); 418 if (error != 0) { 419 if (ulist != NULL) 420 kmem_free(ulist, usize); 421 if (!caller_hold_lock) 422 WAKE_SCTP(sctp); 423 return (error); 424 } 425 /* ulist will be non-NULL only if cl_sctp_unlisten is non-NULL */ 426 if (ulist != NULL) { 427 ASSERT(cl_sctp_unlisten != NULL); 428 (*cl_sctp_unlisten)(sctp->sctp_family, ulist, addrcnt, 429 sctp->sctp_lport); 430 /* ulist will be freed by the clustering module */ 431 } 432 if (!caller_hold_lock) 433 WAKE_SCTP(sctp); 434 if (do_asconf) 435 sctp_process_sendq(sctp); 436 return (error); 437 } 438 439 /* 440 * Returns 0 for success, errno value otherwise. 441 * 442 * If the "bind_to_req_port_only" parameter is set and the requested port 443 * number is available, then set allocated_port to it. If not available, 444 * return an error. 445 * 446 * If the "bind_to_req_port_only" parameter is not set and the requested port 447 * number is available, then set allocated_port to it. If not available, 448 * find the first anonymous port we can and set allocated_port to that. If no 449 * anonymous ports are available, return an error. 450 * 451 * In either case, when succeeding, update the sctp_t to record the port number 452 * and insert it in the bind hash table. 453 */ 454 int 455 sctp_bindi(sctp_t *sctp, in_port_t port, boolean_t bind_to_req_port_only, 456 int user_specified, in_port_t *allocated_port) 457 { 458 /* number of times we have run around the loop */ 459 int count = 0; 460 /* maximum number of times to run around the loop */ 461 int loopmax; 462 zoneid_t zoneid = sctp->sctp_zoneid; 463 zone_t *zone = crgetzone(sctp->sctp_credp); 464 465 /* 466 * Lookup for free addresses is done in a loop and "loopmax" 467 * influences how long we spin in the loop 468 */ 469 if (bind_to_req_port_only) { 470 /* 471 * If the requested port is busy, don't bother to look 472 * for a new one. Setting loop maximum count to 1 has 473 * that effect. 474 */ 475 loopmax = 1; 476 } else { 477 /* 478 * If the requested port is busy, look for a free one 479 * in the anonymous port range. 480 * Set loopmax appropriately so that one does not look 481 * forever in the case all of the anonymous ports are in use. 482 */ 483 loopmax = (sctp_largest_anon_port - 484 sctp_smallest_anon_port + 1); 485 } 486 do { 487 uint16_t lport; 488 sctp_tf_t *tbf; 489 sctp_t *lsctp; 490 int addrcmp; 491 492 lport = htons(port); 493 494 /* 495 * Ensure that the sctp_t is not currently in the bind hash. 496 * Hold the lock on the hash bucket to ensure that 497 * the duplicate check plus the insertion is an atomic 498 * operation. 499 * 500 * This function does an inline lookup on the bind hash list 501 * Make sure that we access only members of sctp_t 502 * and that we don't look at sctp_sctp, since we are not 503 * doing a SCTPB_REFHOLD. For more details please see the notes 504 * in sctp_compress() 505 */ 506 sctp_bind_hash_remove(sctp); 507 tbf = &sctp_bind_fanout[SCTP_BIND_HASH(port)]; 508 mutex_enter(&tbf->tf_lock); 509 for (lsctp = tbf->tf_sctp; lsctp != NULL; 510 lsctp = lsctp->sctp_bind_hash) { 511 512 if (lport != lsctp->sctp_lport || 513 lsctp->sctp_state < SCTPS_BOUND) 514 continue; 515 516 /* 517 * On a labeled system, we must treat bindings to ports 518 * on shared IP addresses by sockets with MAC exemption 519 * privilege as being in all zones, as there's 520 * otherwise no way to identify the right receiver. 521 */ 522 if (lsctp->sctp_zoneid != zoneid && 523 !lsctp->sctp_mac_exempt && !sctp->sctp_mac_exempt) 524 continue; 525 526 addrcmp = sctp_compare_saddrs(sctp, lsctp); 527 if (addrcmp != SCTP_ADDR_DISJOINT) { 528 if (!sctp->sctp_reuseaddr) { 529 /* in use */ 530 break; 531 } else if (lsctp->sctp_state == SCTPS_BOUND || 532 lsctp->sctp_state == SCTPS_LISTEN) { 533 /* 534 * socket option SO_REUSEADDR is set 535 * on the binding sctp_t. 536 * 537 * We have found a match of IP source 538 * address and source port, which is 539 * refused regardless of the 540 * SO_REUSEADDR setting, so we break. 541 */ 542 break; 543 } 544 } 545 } 546 if (lsctp != NULL) { 547 /* The port number is busy */ 548 mutex_exit(&tbf->tf_lock); 549 } else { 550 conn_t *connp = sctp->sctp_connp; 551 552 if (is_system_labeled()) { 553 mlp_type_t addrtype, mlptype; 554 555 /* 556 * On a labeled system we must check the type 557 * of the binding requested by the user (either 558 * MLP or SLP on shared and private addresses), 559 * and that the user's requested binding 560 * is permitted. 561 */ 562 addrtype = tsol_mlp_addr_type(zone->zone_id, 563 sctp->sctp_ipversion, 564 sctp->sctp_ipversion == IPV4_VERSION ? 565 (void *)&sctp->sctp_ipha->ipha_src : 566 (void *)&sctp->sctp_ip6h->ip6_src); 567 568 /* 569 * tsol_mlp_addr_type returns the possibilities 570 * for the selected address. Since all local 571 * addresses are either private or shared, the 572 * return value mlptSingle means "local address 573 * not valid (interface not present)." 574 */ 575 if (addrtype == mlptSingle) { 576 mutex_exit(&tbf->tf_lock); 577 return (EADDRNOTAVAIL); 578 } 579 mlptype = tsol_mlp_port_type(zone, IPPROTO_SCTP, 580 port, addrtype); 581 if (mlptype != mlptSingle) { 582 if (secpolicy_net_bindmlp(connp-> 583 conn_cred) != 0) { 584 mutex_exit(&tbf->tf_lock); 585 return (EACCES); 586 } 587 /* 588 * If we're binding a shared MLP, then 589 * make sure that this zone is the one 590 * that owns that MLP. Shared MLPs can 591 * be owned by at most one zone. 592 */ 593 594 if (mlptype == mlptShared && 595 addrtype == mlptShared && 596 connp->conn_zoneid != 597 tsol_mlp_findzone(IPPROTO_SCTP, 598 lport)) { 599 mutex_exit(&tbf->tf_lock); 600 return (EACCES); 601 } 602 connp->conn_mlp_type = mlptype; 603 } 604 } 605 /* 606 * This port is ours. Insert in fanout and mark as 607 * bound to prevent others from getting the port 608 * number. 609 */ 610 sctp->sctp_state = SCTPS_BOUND; 611 sctp->sctp_lport = lport; 612 sctp->sctp_sctph->sh_sport = lport; 613 614 ASSERT(&sctp_bind_fanout[SCTP_BIND_HASH(port)] == tbf); 615 sctp_bind_hash_insert(tbf, sctp, 1); 616 617 mutex_exit(&tbf->tf_lock); 618 619 /* 620 * We don't want sctp_next_port_to_try to "inherit" 621 * a port number supplied by the user in a bind. 622 * 623 * This is the only place where sctp_next_port_to_try 624 * is updated. After the update, it may or may not 625 * be in the valid range. 626 */ 627 if (user_specified == 0) 628 sctp_next_port_to_try = port + 1; 629 630 *allocated_port = port; 631 632 return (0); 633 } 634 635 if ((count == 0) && (user_specified)) { 636 /* 637 * We may have to return an anonymous port. So 638 * get one to start with. 639 */ 640 port = sctp_update_next_port(sctp_next_port_to_try, 641 zone); 642 user_specified = 0; 643 } else { 644 port = sctp_update_next_port(port + 1, zone); 645 } 646 if (port == 0) 647 break; 648 649 /* 650 * Don't let this loop run forever in the case where 651 * all of the anonymous ports are in use. 652 */ 653 } while (++count < loopmax); 654 655 return (bind_to_req_port_only ? EADDRINUSE : EADDRNOTAVAIL); 656 } 657 658 /* 659 * Don't let port fall into the privileged range. 660 * Since the extra privileged ports can be arbitrary we also 661 * ensure that we exclude those from consideration. 662 * sctp_g_epriv_ports is not sorted thus we loop over it until 663 * there are no changes. 664 * 665 * Note: No locks are held when inspecting sctp_g_*epriv_ports 666 * but instead the code relies on: 667 * - the fact that the address of the array and its size never changes 668 * - the atomic assignment of the elements of the array 669 */ 670 in_port_t 671 sctp_update_next_port(in_port_t port, zone_t *zone) 672 { 673 int i; 674 boolean_t restart = B_FALSE; 675 676 retry: 677 if (port < sctp_smallest_anon_port) 678 port = sctp_smallest_anon_port; 679 680 if (port > sctp_largest_anon_port) { 681 if (restart) 682 return (0); 683 restart = B_TRUE; 684 port = sctp_smallest_anon_port; 685 } 686 687 if (port < sctp_smallest_nonpriv_port) 688 port = sctp_smallest_nonpriv_port; 689 690 for (i = 0; i < sctp_g_num_epriv_ports; i++) { 691 if (port == sctp_g_epriv_ports[i]) { 692 port++; 693 /* 694 * Make sure whether the port is in the 695 * valid range. 696 * 697 * XXX Note that if sctp_g_epriv_ports contains 698 * all the anonymous ports this will be an 699 * infinite loop. 700 */ 701 goto retry; 702 } 703 } 704 705 if (is_system_labeled() && 706 (i = tsol_next_port(zone, port, IPPROTO_SCTP, B_TRUE)) != 0) { 707 port = i; 708 goto retry; 709 } 710 711 return (port); 712 } 713