1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/types.h> 29 #include <sys/systm.h> 30 #include <sys/stream.h> 31 #include <sys/cmn_err.h> 32 #include <sys/kmem.h> 33 #define _SUN_TPI_VERSION 2 34 #include <sys/tihdr.h> 35 #include <sys/stropts.h> 36 #include <sys/socket.h> 37 #include <sys/random.h> 38 #include <sys/policy.h> 39 #include <sys/tsol/tndb.h> 40 #include <sys/tsol/tnet.h> 41 42 #include <netinet/in.h> 43 #include <netinet/ip6.h> 44 45 #include <inet/common.h> 46 #include <inet/ip.h> 47 #include <inet/ip6.h> 48 #include <inet/ipclassifier.h> 49 #include "sctp_impl.h" 50 #include "sctp_asconf.h" 51 #include "sctp_addr.h" 52 53 uint_t sctp_next_port_to_try; 54 55 /* 56 * Returns 0 on success, EACCES on permission failure. 57 */ 58 static int 59 sctp_select_port(sctp_t *sctp, in_port_t *requested_port, int *user_specified) 60 { 61 /* 62 * Get a valid port (within the anonymous range and should not 63 * be a privileged one) to use if the user has not given a port. 64 * If multiple threads are here, they may all start with 65 * with the same initial port. But, it should be fine as long as 66 * sctp_bindi will ensure that no two threads will be assigned 67 * the same port. 68 */ 69 if (*requested_port == 0) { 70 *requested_port = sctp_update_next_port(sctp_next_port_to_try, 71 crgetzone(sctp->sctp_credp)); 72 if (*requested_port == 0) 73 return (EACCES); 74 *user_specified = 0; 75 } else { 76 int i; 77 boolean_t priv = B_FALSE; 78 79 /* 80 * If the requested_port is in the well-known privileged range, 81 * verify that the stream was opened by a privileged user. 82 * Note: No locks are held when inspecting sctp_g_*epriv_ports 83 * but instead the code relies on: 84 * - the fact that the address of the array and its size never 85 * changes 86 * - the atomic assignment of the elements of the array 87 */ 88 if (*requested_port < sctp_smallest_nonpriv_port) { 89 priv = B_TRUE; 90 } else { 91 for (i = 0; i < sctp_g_num_epriv_ports; i++) { 92 if (*requested_port == sctp_g_epriv_ports[i]) { 93 priv = B_TRUE; 94 break; 95 } 96 } 97 } 98 if (priv) { 99 /* 100 * sctp_bind() should take a cred_t argument so that 101 * we can use it here. 102 */ 103 if (secpolicy_net_privaddr(sctp->sctp_credp, 104 *requested_port) != 0) { 105 dprint(1, 106 ("sctp_bind(x): no prive for port %d", 107 *requested_port)); 108 return (EACCES); 109 } 110 } 111 *user_specified = 1; 112 } 113 114 return (0); 115 } 116 117 int 118 sctp_listen(sctp_t *sctp) 119 { 120 sctp_tf_t *tf; 121 122 RUN_SCTP(sctp); 123 /* 124 * TCP handles listen() increasing the backlog, need to check 125 * if it should be handled here too 126 */ 127 if (sctp->sctp_state > SCTPS_BOUND) { 128 WAKE_SCTP(sctp); 129 return (EINVAL); 130 } 131 132 /* Do an anonymous bind for unbound socket doing listen(). */ 133 if (sctp->sctp_nsaddrs == 0) { 134 struct sockaddr_storage ss; 135 int ret; 136 137 bzero(&ss, sizeof (ss)); 138 ss.ss_family = sctp->sctp_family; 139 140 WAKE_SCTP(sctp); 141 if ((ret = sctp_bind(sctp, (struct sockaddr *)&ss, 142 sizeof (ss))) != 0) 143 return (ret); 144 RUN_SCTP(sctp) 145 } 146 147 sctp->sctp_state = SCTPS_LISTEN; 148 (void) random_get_pseudo_bytes(sctp->sctp_secret, SCTP_SECRET_LEN); 149 sctp->sctp_last_secret_update = lbolt64; 150 bzero(sctp->sctp_old_secret, SCTP_SECRET_LEN); 151 tf = &sctp_listen_fanout[SCTP_LISTEN_HASH(ntohs(sctp->sctp_lport))]; 152 sctp_listen_hash_insert(tf, sctp); 153 WAKE_SCTP(sctp); 154 return (0); 155 } 156 157 /* 158 * Bind the sctp_t to a sockaddr, which includes an address and other 159 * information, such as port or flowinfo. 160 */ 161 int 162 sctp_bind(sctp_t *sctp, struct sockaddr *sa, socklen_t len) 163 { 164 int user_specified; 165 boolean_t bind_to_req_port_only; 166 in_port_t requested_port; 167 in_port_t allocated_port; 168 int err = 0; 169 170 ASSERT(sctp != NULL); 171 ASSERT(sa); 172 173 RUN_SCTP(sctp); 174 175 if (sctp->sctp_state > SCTPS_BOUND) { 176 err = EINVAL; 177 goto done; 178 } 179 180 switch (sa->sa_family) { 181 case AF_INET: 182 if (len < sizeof (struct sockaddr_in) || 183 sctp->sctp_family == AF_INET6) { 184 err = EINVAL; 185 goto done; 186 } 187 requested_port = ntohs(((struct sockaddr_in *)sa)->sin_port); 188 break; 189 case AF_INET6: 190 if (len < sizeof (struct sockaddr_in6) || 191 sctp->sctp_family == AF_INET) { 192 err = EINVAL; 193 goto done; 194 } 195 requested_port = ntohs(((struct sockaddr_in6 *)sa)->sin6_port); 196 /* Set the flowinfo. */ 197 sctp->sctp_ip6h->ip6_vcf = 198 (IPV6_DEFAULT_VERS_AND_FLOW & IPV6_VERS_AND_FLOW_MASK) | 199 (((struct sockaddr_in6 *)sa)->sin6_flowinfo & 200 ~IPV6_VERS_AND_FLOW_MASK); 201 break; 202 default: 203 err = EAFNOSUPPORT; 204 goto done; 205 } 206 bind_to_req_port_only = requested_port == 0 ? B_FALSE : B_TRUE; 207 208 err = sctp_select_port(sctp, &requested_port, &user_specified); 209 if (err != 0) 210 goto done; 211 212 if ((err = sctp_bind_add(sctp, sa, 1, B_TRUE, 213 user_specified == 1 ? htons(requested_port) : 0)) != 0) { 214 goto done; 215 } 216 err = sctp_bindi(sctp, requested_port, bind_to_req_port_only, 217 user_specified, &allocated_port); 218 if (err != 0) { 219 sctp_free_saddrs(sctp); 220 } else { 221 ASSERT(sctp->sctp_state == SCTPS_BOUND); 222 } 223 done: 224 WAKE_SCTP(sctp); 225 return (err); 226 } 227 228 /* 229 * Perform bind/unbind operation of a list of addresses on a sctp_t 230 */ 231 int 232 sctp_bindx(sctp_t *sctp, const void *addrs, int addrcnt, int bindop) 233 { 234 ASSERT(sctp != NULL); 235 ASSERT(addrs != NULL); 236 ASSERT(addrcnt > 0); 237 238 switch (bindop) { 239 case SCTP_BINDX_ADD_ADDR: 240 return (sctp_bind_add(sctp, addrs, addrcnt, B_FALSE, 241 sctp->sctp_lport)); 242 case SCTP_BINDX_REM_ADDR: 243 return (sctp_bind_del(sctp, addrs, addrcnt, B_FALSE)); 244 default: 245 return (EINVAL); 246 } 247 } 248 249 /* 250 * Add a list of addresses to a sctp_t. 251 */ 252 int 253 sctp_bind_add(sctp_t *sctp, const void *addrs, uint32_t addrcnt, 254 boolean_t caller_hold_lock, in_port_t port) 255 { 256 int err = 0; 257 boolean_t do_asconf = B_FALSE; 258 259 if (!caller_hold_lock) 260 RUN_SCTP(sctp); 261 262 if (sctp->sctp_state > SCTPS_ESTABLISHED) { 263 if (!caller_hold_lock) 264 WAKE_SCTP(sctp); 265 return (EINVAL); 266 } 267 268 if (sctp->sctp_state > SCTPS_LISTEN) { 269 /* 270 * Let's do some checking here rather than undoing the 271 * add later (for these reasons). 272 */ 273 if (!sctp_addip_enabled || !sctp->sctp_understands_asconf || 274 !sctp->sctp_understands_addip) { 275 if (!caller_hold_lock) 276 WAKE_SCTP(sctp); 277 return (EINVAL); 278 } 279 do_asconf = B_TRUE; 280 } 281 /* 282 * On a clustered node, for an inaddr_any bind, we will pass the list 283 * of all the addresses in the global list, minus any address on the 284 * loopback interface, and expect the clustering susbsystem to give us 285 * the correct list for the 'port'. For explicit binds we give the 286 * list of addresses and the clustering module validates it for the 287 * 'port'. 288 * 289 * On a non-clustered node, cl_sctp_check_addrs will be NULL and 290 * we proceed as usual. 291 */ 292 if (cl_sctp_check_addrs != NULL) { 293 uchar_t *addrlist = NULL; 294 size_t size = 0; 295 int unspec = 0; 296 boolean_t do_listen; 297 uchar_t *llist = NULL; 298 size_t lsize = 0; 299 300 /* 301 * If we are adding addresses after listening, but before 302 * an association is established, we need to update the 303 * clustering module with this info. 304 */ 305 do_listen = !do_asconf && sctp->sctp_state > SCTPS_BOUND && 306 cl_sctp_listen != NULL; 307 308 err = sctp_get_addrlist(sctp, addrs, &addrcnt, &addrlist, 309 &unspec, &size); 310 if (err != 0) { 311 ASSERT(addrlist == NULL); 312 ASSERT(addrcnt == 0); 313 ASSERT(size == 0); 314 if (!caller_hold_lock) 315 WAKE_SCTP(sctp); 316 return (err); 317 } 318 ASSERT(addrlist != NULL); 319 (*cl_sctp_check_addrs)(sctp->sctp_family, port, &addrlist, 320 size, &addrcnt, unspec == 1); 321 if (addrcnt == 0) { 322 /* We free the list */ 323 kmem_free(addrlist, size); 324 if (!caller_hold_lock) 325 WAKE_SCTP(sctp); 326 return (EINVAL); 327 } 328 if (do_listen) { 329 lsize = sizeof (in6_addr_t) * addrcnt; 330 llist = kmem_alloc(lsize, KM_SLEEP); 331 } 332 err = sctp_valid_addr_list(sctp, addrlist, addrcnt, llist, 333 lsize); 334 if (err == 0 && do_listen) { 335 (*cl_sctp_listen)(sctp->sctp_family, llist, 336 addrcnt, sctp->sctp_lport); 337 /* list will be freed by the clustering module */ 338 } else if (err != 0 && llist != NULL) { 339 kmem_free(llist, lsize); 340 } 341 /* free the list we allocated */ 342 kmem_free(addrlist, size); 343 } else { 344 err = sctp_valid_addr_list(sctp, addrs, addrcnt, NULL, 0); 345 } 346 if (err != 0) { 347 if (!caller_hold_lock) 348 WAKE_SCTP(sctp); 349 return (err); 350 } 351 /* Need to send ASCONF messages */ 352 if (do_asconf) { 353 err = sctp_add_ip(sctp, addrs, addrcnt); 354 if (err != 0) { 355 sctp_del_saddr_list(sctp, addrs, addrcnt, B_FALSE); 356 if (!caller_hold_lock) 357 WAKE_SCTP(sctp); 358 return (err); 359 } 360 } 361 if (!caller_hold_lock) 362 WAKE_SCTP(sctp); 363 if (do_asconf) 364 sctp_process_sendq(sctp); 365 return (0); 366 } 367 368 /* 369 * Remove one or more addresses bound to the sctp_t. 370 */ 371 int 372 sctp_bind_del(sctp_t *sctp, const void *addrs, uint32_t addrcnt, 373 boolean_t caller_hold_lock) 374 { 375 int error = 0; 376 boolean_t do_asconf = B_FALSE; 377 uchar_t *ulist = NULL; 378 size_t usize = 0; 379 380 if (!caller_hold_lock) 381 RUN_SCTP(sctp); 382 383 if (sctp->sctp_state > SCTPS_ESTABLISHED) { 384 if (!caller_hold_lock) 385 WAKE_SCTP(sctp); 386 return (EINVAL); 387 } 388 /* 389 * Fail the remove if we are beyond listen, but can't send this 390 * to the peer. 391 */ 392 if (sctp->sctp_state > SCTPS_LISTEN) { 393 if (!sctp_addip_enabled || !sctp->sctp_understands_asconf || 394 !sctp->sctp_understands_addip) { 395 if (!caller_hold_lock) 396 WAKE_SCTP(sctp); 397 return (EINVAL); 398 } 399 do_asconf = B_TRUE; 400 } 401 402 /* Can't delete the last address nor all of the addresses */ 403 if (sctp->sctp_nsaddrs == 1 || addrcnt >= sctp->sctp_nsaddrs) { 404 if (!caller_hold_lock) 405 WAKE_SCTP(sctp); 406 return (EINVAL); 407 } 408 409 if (cl_sctp_unlisten != NULL && !do_asconf && 410 sctp->sctp_state > SCTPS_BOUND) { 411 usize = sizeof (in6_addr_t) * addrcnt; 412 ulist = kmem_alloc(usize, KM_SLEEP); 413 } 414 415 error = sctp_del_ip(sctp, addrs, addrcnt, ulist, usize); 416 if (error != 0) { 417 if (ulist != NULL) 418 kmem_free(ulist, usize); 419 if (!caller_hold_lock) 420 WAKE_SCTP(sctp); 421 return (error); 422 } 423 /* ulist will be non-NULL only if cl_sctp_unlisten is non-NULL */ 424 if (ulist != NULL) { 425 ASSERT(cl_sctp_unlisten != NULL); 426 (*cl_sctp_unlisten)(sctp->sctp_family, ulist, addrcnt, 427 sctp->sctp_lport); 428 /* ulist will be freed by the clustering module */ 429 } 430 if (!caller_hold_lock) 431 WAKE_SCTP(sctp); 432 if (do_asconf) 433 sctp_process_sendq(sctp); 434 return (error); 435 } 436 437 /* 438 * Returns 0 for success, errno value otherwise. 439 * 440 * If the "bind_to_req_port_only" parameter is set and the requested port 441 * number is available, then set allocated_port to it. If not available, 442 * return an error. 443 * 444 * If the "bind_to_req_port_only" parameter is not set and the requested port 445 * number is available, then set allocated_port to it. If not available, 446 * find the first anonymous port we can and set allocated_port to that. If no 447 * anonymous ports are available, return an error. 448 * 449 * In either case, when succeeding, update the sctp_t to record the port number 450 * and insert it in the bind hash table. 451 */ 452 int 453 sctp_bindi(sctp_t *sctp, in_port_t port, boolean_t bind_to_req_port_only, 454 int user_specified, in_port_t *allocated_port) 455 { 456 /* number of times we have run around the loop */ 457 int count = 0; 458 /* maximum number of times to run around the loop */ 459 int loopmax; 460 zoneid_t zoneid = sctp->sctp_zoneid; 461 zone_t *zone = crgetzone(sctp->sctp_credp); 462 463 /* 464 * Lookup for free addresses is done in a loop and "loopmax" 465 * influences how long we spin in the loop 466 */ 467 if (bind_to_req_port_only) { 468 /* 469 * If the requested port is busy, don't bother to look 470 * for a new one. Setting loop maximum count to 1 has 471 * that effect. 472 */ 473 loopmax = 1; 474 } else { 475 /* 476 * If the requested port is busy, look for a free one 477 * in the anonymous port range. 478 * Set loopmax appropriately so that one does not look 479 * forever in the case all of the anonymous ports are in use. 480 */ 481 loopmax = (sctp_largest_anon_port - 482 sctp_smallest_anon_port + 1); 483 } 484 do { 485 uint16_t lport; 486 sctp_tf_t *tbf; 487 sctp_t *lsctp; 488 int addrcmp; 489 490 lport = htons(port); 491 492 /* 493 * Ensure that the sctp_t is not currently in the bind hash. 494 * Hold the lock on the hash bucket to ensure that 495 * the duplicate check plus the insertion is an atomic 496 * operation. 497 * 498 * This function does an inline lookup on the bind hash list 499 * Make sure that we access only members of sctp_t 500 * and that we don't look at sctp_sctp, since we are not 501 * doing a SCTPB_REFHOLD. For more details please see the notes 502 * in sctp_compress() 503 */ 504 sctp_bind_hash_remove(sctp); 505 tbf = &sctp_bind_fanout[SCTP_BIND_HASH(port)]; 506 mutex_enter(&tbf->tf_lock); 507 for (lsctp = tbf->tf_sctp; lsctp != NULL; 508 lsctp = lsctp->sctp_bind_hash) { 509 510 if (lport != lsctp->sctp_lport || 511 lsctp->sctp_state < SCTPS_BOUND) 512 continue; 513 514 /* 515 * On a labeled system, we must treat bindings to ports 516 * on shared IP addresses by sockets with MAC exemption 517 * privilege as being in all zones, as there's 518 * otherwise no way to identify the right receiver. 519 */ 520 if (lsctp->sctp_zoneid != zoneid && 521 !lsctp->sctp_mac_exempt && !sctp->sctp_mac_exempt) 522 continue; 523 524 addrcmp = sctp_compare_saddrs(sctp, lsctp); 525 if (addrcmp != SCTP_ADDR_DISJOINT) { 526 if (!sctp->sctp_reuseaddr) { 527 /* in use */ 528 break; 529 } else if (lsctp->sctp_state == SCTPS_BOUND || 530 lsctp->sctp_state == SCTPS_LISTEN) { 531 /* 532 * socket option SO_REUSEADDR is set 533 * on the binding sctp_t. 534 * 535 * We have found a match of IP source 536 * address and source port, which is 537 * refused regardless of the 538 * SO_REUSEADDR setting, so we break. 539 */ 540 break; 541 } 542 } 543 } 544 if (lsctp != NULL) { 545 /* The port number is busy */ 546 mutex_exit(&tbf->tf_lock); 547 } else { 548 conn_t *connp = sctp->sctp_connp; 549 550 if (is_system_labeled()) { 551 mlp_type_t addrtype, mlptype; 552 553 /* 554 * On a labeled system we must check the type 555 * of the binding requested by the user (either 556 * MLP or SLP on shared and private addresses), 557 * and that the user's requested binding 558 * is permitted. 559 */ 560 addrtype = tsol_mlp_addr_type(zone->zone_id, 561 sctp->sctp_ipversion, 562 sctp->sctp_ipversion == IPV4_VERSION ? 563 (void *)&sctp->sctp_ipha->ipha_src : 564 (void *)&sctp->sctp_ip6h->ip6_src); 565 566 /* 567 * tsol_mlp_addr_type returns the possibilities 568 * for the selected address. Since all local 569 * addresses are either private or shared, the 570 * return value mlptSingle means "local address 571 * not valid (interface not present)." 572 */ 573 if (addrtype == mlptSingle) { 574 mutex_exit(&tbf->tf_lock); 575 return (EADDRNOTAVAIL); 576 } 577 mlptype = tsol_mlp_port_type(zone, IPPROTO_SCTP, 578 port, addrtype); 579 if (mlptype != mlptSingle) { 580 if (secpolicy_net_bindmlp(connp-> 581 conn_cred) != 0) { 582 mutex_exit(&tbf->tf_lock); 583 return (EACCES); 584 } 585 /* 586 * If we're binding a shared MLP, then 587 * make sure that this zone is the one 588 * that owns that MLP. Shared MLPs can 589 * be owned by at most one zone. 590 */ 591 592 if (mlptype == mlptShared && 593 addrtype == mlptShared && 594 connp->conn_zoneid != 595 tsol_mlp_findzone(IPPROTO_SCTP, 596 lport)) { 597 mutex_exit(&tbf->tf_lock); 598 return (EACCES); 599 } 600 connp->conn_mlp_type = mlptype; 601 } 602 } 603 /* 604 * This port is ours. Insert in fanout and mark as 605 * bound to prevent others from getting the port 606 * number. 607 */ 608 sctp->sctp_state = SCTPS_BOUND; 609 sctp->sctp_lport = lport; 610 sctp->sctp_sctph->sh_sport = lport; 611 612 ASSERT(&sctp_bind_fanout[SCTP_BIND_HASH(port)] == tbf); 613 sctp_bind_hash_insert(tbf, sctp, 1); 614 615 mutex_exit(&tbf->tf_lock); 616 617 /* 618 * We don't want sctp_next_port_to_try to "inherit" 619 * a port number supplied by the user in a bind. 620 * 621 * This is the only place where sctp_next_port_to_try 622 * is updated. After the update, it may or may not 623 * be in the valid range. 624 */ 625 if (user_specified == 0) 626 sctp_next_port_to_try = port + 1; 627 628 *allocated_port = port; 629 630 return (0); 631 } 632 633 if ((count == 0) && (user_specified)) { 634 /* 635 * We may have to return an anonymous port. So 636 * get one to start with. 637 */ 638 port = sctp_update_next_port(sctp_next_port_to_try, 639 zone); 640 user_specified = 0; 641 } else { 642 port = sctp_update_next_port(port + 1, zone); 643 } 644 if (port == 0) 645 break; 646 647 /* 648 * Don't let this loop run forever in the case where 649 * all of the anonymous ports are in use. 650 */ 651 } while (++count < loopmax); 652 653 return (bind_to_req_port_only ? EADDRINUSE : EADDRNOTAVAIL); 654 } 655 656 /* 657 * Don't let port fall into the privileged range. 658 * Since the extra privileged ports can be arbitrary we also 659 * ensure that we exclude those from consideration. 660 * sctp_g_epriv_ports is not sorted thus we loop over it until 661 * there are no changes. 662 * 663 * Note: No locks are held when inspecting sctp_g_*epriv_ports 664 * but instead the code relies on: 665 * - the fact that the address of the array and its size never changes 666 * - the atomic assignment of the elements of the array 667 */ 668 in_port_t 669 sctp_update_next_port(in_port_t port, zone_t *zone) 670 { 671 int i; 672 boolean_t restart = B_FALSE; 673 674 retry: 675 if (port < sctp_smallest_anon_port) 676 port = sctp_smallest_anon_port; 677 678 if (port > sctp_largest_anon_port) { 679 if (restart) 680 return (0); 681 restart = B_TRUE; 682 port = sctp_smallest_anon_port; 683 } 684 685 if (port < sctp_smallest_nonpriv_port) 686 port = sctp_smallest_nonpriv_port; 687 688 for (i = 0; i < sctp_g_num_epriv_ports; i++) { 689 if (port == sctp_g_epriv_ports[i]) { 690 port++; 691 /* 692 * Make sure whether the port is in the 693 * valid range. 694 * 695 * XXX Note that if sctp_g_epriv_ports contains 696 * all the anonymous ports this will be an 697 * infinite loop. 698 */ 699 goto retry; 700 } 701 } 702 703 if (is_system_labeled() && 704 (i = tsol_next_port(zone, port, IPPROTO_SCTP, B_TRUE)) != 0) { 705 port = i; 706 goto retry; 707 } 708 709 return (port); 710 } 711