1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <sys/systm.h> 31 #include <sys/stream.h> 32 #include <sys/cmn_err.h> 33 #include <sys/kmem.h> 34 #define _SUN_TPI_VERSION 2 35 #include <sys/tihdr.h> 36 #include <sys/stropts.h> 37 #include <sys/socket.h> 38 #include <sys/random.h> 39 #include <sys/policy.h> 40 41 #include <netinet/in.h> 42 #include <netinet/ip6.h> 43 44 #include <inet/common.h> 45 #include <inet/ip.h> 46 #include <inet/ip6.h> 47 #include <inet/ipclassifier.h> 48 #include "sctp_impl.h" 49 #include "sctp_asconf.h" 50 #include "sctp_addr.h" 51 52 uint_t sctp_next_port_to_try; 53 54 /* 55 * Returns 0 on success, EACCES on permission failure. 56 */ 57 static int 58 sctp_select_port(sctp_t *sctp, in_port_t *requested_port, int *user_specified) 59 { 60 /* 61 * Get a valid port (within the anonymous range and should not 62 * be a privileged one) to use if the user has not given a port. 63 * If multiple threads are here, they may all start with 64 * with the same initial port. But, it should be fine as long as 65 * sctp_bindi will ensure that no two threads will be assigned 66 * the same port. 67 */ 68 if (*requested_port == 0) { 69 *requested_port = sctp_update_next_port(sctp_next_port_to_try); 70 *user_specified = 0; 71 } else { 72 int i; 73 boolean_t priv = B_FALSE; 74 75 /* 76 * If the requested_port is in the well-known privileged range, 77 * verify that the stream was opened by a privileged user. 78 * Note: No locks are held when inspecting sctp_g_*epriv_ports 79 * but instead the code relies on: 80 * - the fact that the address of the array and its size never 81 * changes 82 * - the atomic assignment of the elements of the array 83 */ 84 if (*requested_port < sctp_smallest_nonpriv_port) { 85 priv = B_TRUE; 86 } else { 87 for (i = 0; i < sctp_g_num_epriv_ports; i++) { 88 if (*requested_port == sctp_g_epriv_ports[i]) { 89 priv = B_TRUE; 90 break; 91 } 92 } 93 } 94 if (priv) { 95 /* 96 * sctp_bind() should take a cred_t argument so that 97 * we can use it here. 98 */ 99 if (secpolicy_net_privaddr(sctp->sctp_credp, 100 *requested_port) != 0) { 101 dprint(1, 102 ("sctp_bind(x): no prive for port %d", 103 *requested_port)); 104 return (TACCES); 105 } 106 } 107 *user_specified = 1; 108 } 109 110 return (0); 111 } 112 113 int 114 sctp_listen(sctp_t *sctp) 115 { 116 sctp_tf_t *tf; 117 118 RUN_SCTP(sctp); 119 /* 120 * TCP handles listen() increasing the backlog, need to check 121 * if it should be handled here too 122 */ 123 if (sctp->sctp_state > SCTPS_BOUND) { 124 WAKE_SCTP(sctp); 125 return (EINVAL); 126 } 127 128 /* Do an anonymous bind for unbound socket doing listen(). */ 129 if (sctp->sctp_nsaddrs == 0) { 130 struct sockaddr_storage ss; 131 int ret; 132 133 bzero(&ss, sizeof (ss)); 134 ss.ss_family = sctp->sctp_family; 135 136 WAKE_SCTP(sctp); 137 if ((ret = sctp_bind(sctp, (struct sockaddr *)&ss, 138 sizeof (ss))) != 0) 139 return (ret); 140 RUN_SCTP(sctp) 141 } 142 143 sctp->sctp_state = SCTPS_LISTEN; 144 (void) random_get_pseudo_bytes(sctp->sctp_secret, SCTP_SECRET_LEN); 145 sctp->sctp_last_secret_update = lbolt64; 146 bzero(sctp->sctp_old_secret, SCTP_SECRET_LEN); 147 tf = &sctp_listen_fanout[SCTP_LISTEN_HASH(ntohs(sctp->sctp_lport))]; 148 sctp_listen_hash_insert(tf, sctp); 149 WAKE_SCTP(sctp); 150 return (0); 151 } 152 153 /* 154 * Bind the sctp_t to a sockaddr, which includes an address and other 155 * information, such as port or flowinfo. 156 */ 157 int 158 sctp_bind(sctp_t *sctp, struct sockaddr *sa, socklen_t len) 159 { 160 int user_specified; 161 boolean_t bind_to_req_port_only; 162 in_port_t requested_port; 163 in_port_t allocated_port; 164 int err = 0; 165 166 ASSERT(sctp != NULL); 167 ASSERT(sa); 168 169 RUN_SCTP(sctp); 170 171 if (sctp->sctp_state > SCTPS_BOUND) { 172 err = EINVAL; 173 goto done; 174 } 175 176 switch (sa->sa_family) { 177 case AF_INET: 178 if (len < sizeof (struct sockaddr_in) || 179 sctp->sctp_family == AF_INET6) { 180 err = EINVAL; 181 goto done; 182 } 183 requested_port = ntohs(((struct sockaddr_in *)sa)->sin_port); 184 break; 185 case AF_INET6: 186 if (len < sizeof (struct sockaddr_in6) || 187 sctp->sctp_family == AF_INET) { 188 err = EINVAL; 189 goto done; 190 } 191 requested_port = ntohs(((struct sockaddr_in6 *)sa)->sin6_port); 192 /* Set the flowinfo. */ 193 sctp->sctp_ip6h->ip6_vcf = 194 (IPV6_DEFAULT_VERS_AND_FLOW & IPV6_VERS_AND_FLOW_MASK) | 195 (((struct sockaddr_in6 *)sa)->sin6_flowinfo & 196 ~IPV6_VERS_AND_FLOW_MASK); 197 break; 198 default: 199 err = EAFNOSUPPORT; 200 goto done; 201 } 202 bind_to_req_port_only = requested_port == 0 ? B_FALSE : B_TRUE; 203 204 if (sctp_select_port(sctp, &requested_port, &user_specified) != 0) { 205 err = EPERM; 206 goto done; 207 } 208 209 if ((err = sctp_bind_add(sctp, sa, 1, B_TRUE, 210 user_specified == 1 ? htons(requested_port) : 0)) != 0) { 211 goto done; 212 } 213 allocated_port = sctp_bindi(sctp, requested_port, 214 bind_to_req_port_only, user_specified); 215 if (allocated_port == 0) { 216 sctp_free_saddrs(sctp); 217 if (bind_to_req_port_only) { 218 err = EADDRINUSE; 219 goto done; 220 } else { 221 err = EADDRNOTAVAIL; 222 goto done; 223 } 224 } 225 ASSERT(sctp->sctp_state == SCTPS_BOUND); 226 done: 227 WAKE_SCTP(sctp); 228 return (err); 229 } 230 231 /* 232 * Perform bind/unbind operation of a list of addresses on a sctp_t 233 */ 234 int 235 sctp_bindx(sctp_t *sctp, const void *addrs, int addrcnt, int bindop) 236 { 237 ASSERT(sctp != NULL); 238 ASSERT(addrs != NULL); 239 ASSERT(addrcnt > 0); 240 241 switch (bindop) { 242 case SCTP_BINDX_ADD_ADDR: 243 return (sctp_bind_add(sctp, addrs, addrcnt, B_FALSE, 244 sctp->sctp_lport)); 245 case SCTP_BINDX_REM_ADDR: 246 return (sctp_bind_del(sctp, addrs, addrcnt, B_FALSE)); 247 default: 248 return (EINVAL); 249 } 250 } 251 252 /* 253 * Add a list of addresses to a sctp_t. 254 */ 255 int 256 sctp_bind_add(sctp_t *sctp, const void *addrs, uint32_t addrcnt, 257 boolean_t caller_hold_lock, in_port_t port) 258 { 259 int err = 0; 260 boolean_t do_asconf = B_FALSE; 261 262 if (!caller_hold_lock) 263 RUN_SCTP(sctp); 264 265 if (sctp->sctp_state > SCTPS_ESTABLISHED) { 266 if (!caller_hold_lock) 267 WAKE_SCTP(sctp); 268 return (EINVAL); 269 } 270 271 if (sctp->sctp_state > SCTPS_LISTEN) { 272 /* 273 * Let's do some checking here rather than undoing the 274 * add later (for these reasons). 275 */ 276 if (!sctp_addip_enabled || !sctp->sctp_understands_asconf || 277 !sctp->sctp_understands_addip) { 278 if (!caller_hold_lock) 279 WAKE_SCTP(sctp); 280 return (EINVAL); 281 } 282 do_asconf = B_TRUE; 283 } 284 /* 285 * On a clustered node, for an inaddr_any bind, we will pass the list 286 * of all the addresses in the global list, minus any address on the 287 * loopback interface, and expect the clustering susbsystem to give us 288 * the correct list for the 'port'. For explicit binds we give the 289 * list of addresses and the clustering module validates it for the 290 * 'port'. 291 * 292 * On a non-clustered node, cl_sctp_check_addrs will be NULL and 293 * we proceed as usual. 294 */ 295 if (cl_sctp_check_addrs != NULL) { 296 uchar_t *addrlist = NULL; 297 size_t size = 0; 298 int unspec = 0; 299 boolean_t do_listen; 300 uchar_t *llist = NULL; 301 size_t lsize = 0; 302 303 /* 304 * If we are adding addresses after listening, but before 305 * an association is established, we need to update the 306 * clustering module with this info. 307 */ 308 do_listen = !do_asconf && sctp->sctp_state > SCTPS_BOUND && 309 cl_sctp_listen != NULL; 310 311 err = sctp_get_addrlist(sctp, addrs, &addrcnt, &addrlist, 312 &unspec, &size); 313 if (err != 0) { 314 ASSERT(addrlist == NULL); 315 ASSERT(addrcnt == 0); 316 ASSERT(size == 0); 317 if (!caller_hold_lock) 318 WAKE_SCTP(sctp); 319 return (err); 320 } 321 ASSERT(addrlist != NULL); 322 (*cl_sctp_check_addrs)(sctp->sctp_family, port, &addrlist, 323 size, &addrcnt, unspec == 1); 324 if (addrcnt == 0) { 325 /* We free the list */ 326 kmem_free(addrlist, size); 327 if (!caller_hold_lock) 328 WAKE_SCTP(sctp); 329 return (EINVAL); 330 } 331 if (do_listen) { 332 lsize = sizeof (in6_addr_t) * addrcnt; 333 llist = kmem_alloc(lsize, KM_SLEEP); 334 } 335 err = sctp_valid_addr_list(sctp, addrlist, addrcnt, llist, 336 lsize); 337 if (err == 0 && do_listen) { 338 (*cl_sctp_listen)(sctp->sctp_family, llist, 339 addrcnt, sctp->sctp_lport); 340 /* list will be freed by the clustering module */ 341 } else if (err != 0 && llist != NULL) { 342 kmem_free(llist, lsize); 343 } 344 /* free the list we allocated */ 345 kmem_free(addrlist, size); 346 } else { 347 err = sctp_valid_addr_list(sctp, addrs, addrcnt, NULL, 0); 348 } 349 if (err != 0) { 350 if (!caller_hold_lock) 351 WAKE_SCTP(sctp); 352 return (err); 353 } 354 /* Need to send ASCONF messages */ 355 if (do_asconf) { 356 err = sctp_add_ip(sctp, addrs, addrcnt); 357 if (err != 0) { 358 sctp_del_saddr_list(sctp, addrs, addrcnt, B_FALSE); 359 if (!caller_hold_lock) 360 WAKE_SCTP(sctp); 361 return (err); 362 } 363 } 364 if (!caller_hold_lock) 365 WAKE_SCTP(sctp); 366 if (do_asconf) 367 sctp_process_sendq(sctp); 368 return (0); 369 } 370 371 /* 372 * Remove one or more addresses bound to the sctp_t. 373 */ 374 int 375 sctp_bind_del(sctp_t *sctp, const void *addrs, uint32_t addrcnt, 376 boolean_t caller_hold_lock) 377 { 378 int error = 0; 379 boolean_t do_asconf = B_FALSE; 380 uchar_t *ulist = NULL; 381 size_t usize = 0; 382 383 if (!caller_hold_lock) 384 RUN_SCTP(sctp); 385 386 if (sctp->sctp_state > SCTPS_ESTABLISHED) { 387 if (!caller_hold_lock) 388 WAKE_SCTP(sctp); 389 return (EINVAL); 390 } 391 /* 392 * Fail the remove if we are beyond listen, but can't send this 393 * to the peer. 394 */ 395 if (sctp->sctp_state > SCTPS_LISTEN) { 396 if (!sctp_addip_enabled || !sctp->sctp_understands_asconf || 397 !sctp->sctp_understands_addip) { 398 if (!caller_hold_lock) 399 WAKE_SCTP(sctp); 400 return (EINVAL); 401 } 402 do_asconf = B_TRUE; 403 } 404 405 /* Can't delete the last address nor all of the addresses */ 406 if (sctp->sctp_nsaddrs == 1 || addrcnt >= sctp->sctp_nsaddrs) { 407 if (!caller_hold_lock) 408 WAKE_SCTP(sctp); 409 return (EINVAL); 410 } 411 412 if (cl_sctp_unlisten != NULL && !do_asconf && 413 sctp->sctp_state > SCTPS_BOUND) { 414 usize = sizeof (in6_addr_t) * addrcnt; 415 ulist = kmem_alloc(usize, KM_SLEEP); 416 } 417 418 error = sctp_del_ip(sctp, addrs, addrcnt, ulist, usize); 419 if (error != 0) { 420 if (ulist != NULL) 421 kmem_free(ulist, usize); 422 if (!caller_hold_lock) 423 WAKE_SCTP(sctp); 424 return (error); 425 } 426 /* ulist will be non-NULL only if cl_sctp_unlisten is non-NULL */ 427 if (ulist != NULL) { 428 ASSERT(cl_sctp_unlisten != NULL); 429 (*cl_sctp_unlisten)(sctp->sctp_family, ulist, addrcnt, 430 sctp->sctp_lport); 431 /* ulist will be freed by the clustering module */ 432 } 433 if (!caller_hold_lock) 434 WAKE_SCTP(sctp); 435 if (do_asconf) 436 sctp_process_sendq(sctp); 437 return (error); 438 } 439 440 /* 441 * If the "bind_to_req_port_only" parameter is set, if the requested port 442 * number is available, return it, If not return 0 443 * 444 * If "bind_to_req_port_only" parameter is not set and 445 * If the requested port number is available, return it. If not, return 446 * the first anonymous port we happen across. If no anonymous ports are 447 * available, return 0. addr is the requested local address, if any. 448 * 449 * In either case, when succeeding update the sctp_t to record the port number 450 * and insert it in the bind hash table. 451 */ 452 in_port_t 453 sctp_bindi(sctp_t *sctp, in_port_t port, int bind_to_req_port_only, 454 int user_specified) 455 { 456 /* number of times we have run around the loop */ 457 int count = 0; 458 /* maximum number of times to run around the loop */ 459 int loopmax; 460 zoneid_t zoneid = sctp->sctp_zoneid; 461 462 /* 463 * Lookup for free addresses is done in a loop and "loopmax" 464 * influences how long we spin in the loop 465 */ 466 if (bind_to_req_port_only) { 467 /* 468 * If the requested port is busy, don't bother to look 469 * for a new one. Setting loop maximum count to 1 has 470 * that effect. 471 */ 472 loopmax = 1; 473 } else { 474 /* 475 * If the requested port is busy, look for a free one 476 * in the anonymous port range. 477 * Set loopmax appropriately so that one does not look 478 * forever in the case all of the anonymous ports are in use. 479 */ 480 loopmax = (sctp_largest_anon_port - 481 sctp_smallest_anon_port + 1); 482 } 483 do { 484 uint16_t lport; 485 sctp_tf_t *tbf; 486 sctp_t *lsctp; 487 int addrcmp; 488 489 lport = htons(port); 490 491 /* 492 * Ensure that the sctp_t is not currently in the bind hash. 493 * Hold the lock on the hash bucket to ensure that 494 * the duplicate check plus the insertion is an atomic 495 * operation. 496 * 497 * This function does an inline lookup on the bind hash list 498 * Make sure that we access only members of sctp_t 499 * and that we don't look at sctp_sctp, since we are not 500 * doing a SCTPB_REFHOLD. For more details please see the notes 501 * in sctp_compress() 502 */ 503 sctp_bind_hash_remove(sctp); 504 tbf = &sctp_bind_fanout[SCTP_BIND_HASH(port)]; 505 mutex_enter(&tbf->tf_lock); 506 for (lsctp = tbf->tf_sctp; lsctp != NULL; 507 lsctp = lsctp->sctp_bind_hash) { 508 509 if (lport != lsctp->sctp_lport || 510 lsctp->sctp_zoneid != zoneid || 511 lsctp->sctp_state < SCTPS_BOUND) 512 continue; 513 514 addrcmp = sctp_compare_saddrs(sctp, lsctp); 515 if (addrcmp != SCTP_ADDR_DISJOINT) { 516 if (!sctp->sctp_reuseaddr) { 517 /* in use */ 518 break; 519 } else if (lsctp->sctp_state == SCTPS_BOUND || 520 lsctp->sctp_state == SCTPS_LISTEN) { 521 /* 522 * socket option SO_REUSEADDR is set 523 * on the binding sctp_t. 524 * 525 * We have found a match of IP source 526 * address and source port, which is 527 * refused regardless of the 528 * SO_REUSEADDR setting, so we break. 529 */ 530 break; 531 } 532 } 533 } 534 if (lsctp != NULL) { 535 /* The port number is busy */ 536 mutex_exit(&tbf->tf_lock); 537 } else { 538 /* 539 * This port is ours. Insert in fanout and mark as 540 * bound to prevent others from getting the port 541 * number. 542 */ 543 sctp->sctp_state = SCTPS_BOUND; 544 sctp->sctp_lport = lport; 545 sctp->sctp_sctph->sh_sport = sctp->sctp_lport; 546 547 ASSERT(&sctp_bind_fanout[SCTP_BIND_HASH(port)] == tbf); 548 sctp_bind_hash_insert(tbf, sctp, 1); 549 550 mutex_exit(&tbf->tf_lock); 551 552 /* 553 * We don't want sctp_next_port_to_try to "inherit" 554 * a port number supplied by the user in a bind. 555 */ 556 if (user_specified != 0) 557 return (port); 558 559 /* 560 * This is the only place where sctp_next_port_to_try 561 * is updated. After the update, it may or may not 562 * be in the valid range. 563 */ 564 sctp_next_port_to_try = port + 1; 565 return (port); 566 } 567 568 if ((count == 0) && (user_specified)) { 569 /* 570 * We may have to return an anonymous port. So 571 * get one to start with. 572 */ 573 port = sctp_update_next_port(sctp_next_port_to_try); 574 user_specified = 0; 575 } else { 576 port = sctp_update_next_port(port + 1); 577 } 578 579 /* 580 * Don't let this loop run forever in the case where 581 * all of the anonymous ports are in use. 582 */ 583 } while (++count < loopmax); 584 return (0); 585 } 586 587 /* 588 * Don't let port fall into the privileged range. 589 * Since the extra privileged ports can be arbitrary we also 590 * ensure that we exclude those from consideration. 591 * sctp_g_epriv_ports is not sorted thus we loop over it until 592 * there are no changes. 593 * 594 * Note: No locks are held when inspecting sctp_g_*epriv_ports 595 * but instead the code relies on: 596 * - the fact that the address of the array and its size never changes 597 * - the atomic assignment of the elements of the array 598 */ 599 in_port_t 600 sctp_update_next_port(in_port_t port) 601 { 602 int i; 603 604 retry: 605 if (port < sctp_smallest_anon_port || port > sctp_largest_anon_port) 606 port = sctp_smallest_anon_port; 607 608 if (port < sctp_smallest_nonpriv_port) 609 port = sctp_smallest_nonpriv_port; 610 611 for (i = 0; i < sctp_g_num_epriv_ports; i++) { 612 if (port == sctp_g_epriv_ports[i]) { 613 port++; 614 /* 615 * Make sure whether the port is in the 616 * valid range. 617 * 618 * XXX Note that if sctp_g_epriv_ports contains 619 * all the anonymous ports this will be an 620 * infinite loop. 621 */ 622 goto retry; 623 } 624 } 625 return (port); 626 } 627