1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <sys/systm.h> 31 #include <sys/stream.h> 32 #include <sys/cmn_err.h> 33 #include <sys/kmem.h> 34 #define _SUN_TPI_VERSION 2 35 #include <sys/tihdr.h> 36 #include <sys/stropts.h> 37 #include <sys/strsubr.h> 38 #include <sys/socket.h> 39 #include <sys/tsol/tndb.h> 40 41 #include <netinet/in.h> 42 #include <netinet/ip6.h> 43 44 #include <inet/common.h> 45 #include <inet/ip.h> 46 #include <inet/ip6.h> 47 #include <inet/ipclassifier.h> 48 #include <inet/ipsec_impl.h> 49 50 #include "sctp_impl.h" 51 #include "sctp_addr.h" 52 53 /* 54 * Common accept code. Called by sctp_conn_request. 55 * cr_pkt is the INIT / INIT ACK packet. 56 */ 57 static int 58 sctp_accept_comm(sctp_t *listener, sctp_t *acceptor, mblk_t *cr_pkt, 59 uint_t ip_hdr_len, sctp_init_chunk_t *iack) 60 { 61 62 sctp_hdr_t *sctph; 63 sctp_chunk_hdr_t *ich; 64 sctp_init_chunk_t *init; 65 int err; 66 uint_t sctp_options; 67 conn_t *aconnp; 68 conn_t *lconnp; 69 cred_t *cr; 70 sctp_stack_t *sctps = listener->sctp_sctps; 71 72 sctph = (sctp_hdr_t *)(cr_pkt->b_rptr + ip_hdr_len); 73 ASSERT(OK_32PTR(sctph)); 74 75 acceptor->sctp_lport = listener->sctp_lport; 76 acceptor->sctp_fport = sctph->sh_sport; 77 78 ich = (sctp_chunk_hdr_t *)(iack + 1); 79 init = (sctp_init_chunk_t *)(ich + 1); 80 81 /* acceptor isn't in any fanouts yet, so don't need to hold locks */ 82 ASSERT(acceptor->sctp_faddrs == NULL); 83 err = sctp_get_addrparams(acceptor, listener, cr_pkt, ich, 84 &sctp_options); 85 if (err != 0) 86 return (err); 87 88 aconnp = acceptor->sctp_connp; 89 lconnp = listener->sctp_connp; 90 if (lconnp->conn_mlp_type != mlptSingle) { 91 cr = aconnp->conn_peercred = DB_CRED(cr_pkt); 92 if (cr != NULL) 93 crhold(cr); 94 } 95 96 if ((err = sctp_set_hdraddrs(acceptor)) != 0) 97 return (err); 98 99 if ((sctp_options & SCTP_PRSCTP_OPTION) && 100 listener->sctp_prsctp_aware && sctps->sctps_prsctp_enabled) { 101 acceptor->sctp_prsctp_aware = B_TRUE; 102 } else { 103 acceptor->sctp_prsctp_aware = B_FALSE; 104 } 105 /* The new sctp_t is fully bound now. */ 106 acceptor->sctp_connp->conn_fully_bound = B_TRUE; 107 108 /* Get initial TSNs */ 109 acceptor->sctp_ltsn = ntohl(iack->sic_inittsn); 110 acceptor->sctp_recovery_tsn = acceptor->sctp_lastack_rxd = 111 acceptor->sctp_ltsn - 1; 112 acceptor->sctp_adv_pap = acceptor->sctp_lastack_rxd; 113 /* Serial numbers are initialized to the same value as the TSNs */ 114 acceptor->sctp_lcsn = acceptor->sctp_ltsn; 115 116 if (!sctp_initialize_params(acceptor, init, iack)) 117 return (ENOMEM); 118 119 /* 120 * Copy sctp_secret from the listener in case we need to validate 121 * a possibly delayed cookie. 122 */ 123 bcopy(listener->sctp_secret, acceptor->sctp_secret, SCTP_SECRET_LEN); 124 bcopy(listener->sctp_old_secret, acceptor->sctp_old_secret, 125 SCTP_SECRET_LEN); 126 acceptor->sctp_last_secret_update = lbolt64; 127 128 /* 129 * After acceptor is inserted in the hash list, it can be found. 130 * So we need to lock it here. 131 */ 132 RUN_SCTP(acceptor); 133 134 sctp_conn_hash_insert(&sctps->sctps_conn_fanout[ 135 SCTP_CONN_HASH(sctps, acceptor->sctp_ports)], acceptor, 0); 136 sctp_bind_hash_insert(&sctps->sctps_bind_fanout[ 137 SCTP_BIND_HASH(ntohs(acceptor->sctp_lport))], acceptor, 0); 138 139 /* 140 * No need to check for multicast destination since ip will only pass 141 * up multicasts to those that have expressed interest 142 * TODO: what about rejecting broadcasts? 143 * Also check that source is not a multicast or broadcast address. 144 */ 145 /* XXXSCTP */ 146 acceptor->sctp_state = SCTPS_ESTABLISHED; 147 acceptor->sctp_assoc_start_time = (uint32_t)lbolt; 148 /* 149 * listener->sctp_rwnd should be the default window size or a 150 * window size changed via SO_RCVBUF option. 151 */ 152 acceptor->sctp_rwnd = listener->sctp_rwnd; 153 acceptor->sctp_irwnd = acceptor->sctp_rwnd; 154 acceptor->sctp_pd_point = acceptor->sctp_rwnd; 155 bcopy(&listener->sctp_upcalls, &acceptor->sctp_upcalls, 156 sizeof (sctp_upcalls_t)); 157 158 return (0); 159 } 160 161 /* Process the COOKIE packet, mp, directed at the listener 'sctp' */ 162 sctp_t * 163 sctp_conn_request(sctp_t *sctp, mblk_t *mp, uint_t ifindex, uint_t ip_hdr_len, 164 sctp_init_chunk_t *iack, mblk_t *ipsec_mp) 165 { 166 sctp_t *eager; 167 uint_t ipvers; 168 ip6_t *ip6h; 169 int err; 170 conn_t *connp, *econnp; 171 sctp_stack_t *sctps; 172 173 /* 174 * No need to check for duplicate as this is the listener 175 * and we are holding the lock. This means that no new 176 * connection can be created out of it. And since the 177 * fanout already done cannot find a match, it means that 178 * there is no duplicate. 179 */ 180 ipvers = IPH_HDR_VERSION(mp->b_rptr); 181 ASSERT(ipvers == IPV6_VERSION || ipvers == IPV4_VERSION); 182 ASSERT(OK_32PTR(mp->b_rptr)); 183 184 if ((eager = sctp_create_eager(sctp)) == NULL) { 185 return (NULL); 186 } 187 188 if (ipvers != IPV4_VERSION) { 189 ip6h = (ip6_t *)mp->b_rptr; 190 if (IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) 191 eager->sctp_linklocal = 1; 192 /* 193 * Record ifindex (might be zero) to tie this connection to 194 * that interface if either the listener was bound or 195 * if the connection is using link-local addresses. 196 */ 197 if (sctp->sctp_bound_if == ifindex || 198 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) 199 eager->sctp_bound_if = ifindex; 200 /* 201 * XXX broken. bound_if is always overwritten by statement 202 * below. What is the right thing to do here? 203 */ 204 eager->sctp_bound_if = sctp->sctp_bound_if; 205 } 206 207 connp = sctp->sctp_connp; 208 sctps = sctp->sctp_sctps; 209 econnp = eager->sctp_connp; 210 211 if (connp->conn_policy != NULL) { 212 ipsec_in_t *ii; 213 214 ASSERT(ipsec_mp != NULL); 215 ii = (ipsec_in_t *)(ipsec_mp->b_rptr); 216 ASSERT(ii->ipsec_in_policy == NULL); 217 IPPH_REFHOLD(connp->conn_policy); 218 ii->ipsec_in_policy = connp->conn_policy; 219 220 ipsec_mp->b_datap->db_type = IPSEC_POLICY_SET; 221 if (!ip_bind_ipsec_policy_set(econnp, ipsec_mp)) { 222 sctp_close_eager(eager); 223 BUMP_MIB(&sctps->sctps_mib, sctpListenDrop); 224 return (NULL); 225 } 226 } 227 228 if (ipsec_mp != NULL) { 229 /* 230 * XXX need to fix the cached policy issue here. 231 * We temporarily set the conn_src/conn_rem here so 232 * that IPsec can use it for the latched policy 233 * selector. This is obvioursly wrong as SCTP can 234 * use different addresses... 235 */ 236 if (ipvers == IPV4_VERSION) { 237 ipha_t *ipha; 238 239 ipha = (ipha_t *)mp->b_rptr; 240 econnp->conn_src = ipha->ipha_dst; 241 econnp->conn_rem = ipha->ipha_src; 242 } else { 243 econnp->conn_srcv6 = ip6h->ip6_dst; 244 econnp->conn_remv6 = ip6h->ip6_src; 245 } 246 } 247 if (ipsec_conn_cache_policy(econnp, ipvers == IPV4_VERSION) != 0) { 248 sctp_close_eager(eager); 249 BUMP_MIB(&sctps->sctps_mib, sctpListenDrop); 250 return (NULL); 251 } 252 253 err = sctp_accept_comm(sctp, eager, mp, ip_hdr_len, iack); 254 if (err) { 255 sctp_close_eager(eager); 256 BUMP_MIB(&sctps->sctps_mib, sctpListenDrop); 257 return (NULL); 258 } 259 260 /* 261 * On a clustered note send this notification to the clustering 262 * subsystem. 263 */ 264 if (cl_sctp_connect != NULL) { 265 uchar_t *slist; 266 uchar_t *flist; 267 size_t fsize; 268 size_t ssize; 269 270 fsize = sizeof (in6_addr_t) * eager->sctp_nfaddrs; 271 ssize = sizeof (in6_addr_t) * eager->sctp_nsaddrs; 272 slist = kmem_alloc(ssize, KM_NOSLEEP); 273 flist = kmem_alloc(fsize, KM_NOSLEEP); 274 if (slist == NULL || flist == NULL) { 275 if (slist != NULL) 276 kmem_free(slist, ssize); 277 if (flist != NULL) 278 kmem_free(flist, fsize); 279 sctp_close_eager(eager); 280 BUMP_MIB(&sctps->sctps_mib, sctpListenDrop); 281 SCTP_KSTAT(sctps, sctp_cl_connect); 282 return (NULL); 283 } 284 /* The clustering module frees these list */ 285 sctp_get_saddr_list(eager, slist, ssize); 286 sctp_get_faddr_list(eager, flist, fsize); 287 (*cl_sctp_connect)(eager->sctp_family, slist, 288 eager->sctp_nsaddrs, eager->sctp_lport, flist, 289 eager->sctp_nfaddrs, eager->sctp_fport, B_FALSE, 290 (cl_sctp_handle_t)eager); 291 } 292 293 /* Connection established, so send up the conn_ind */ 294 if ((eager->sctp_ulpd = sctp->sctp_ulp_newconn(sctp->sctp_ulpd, 295 eager)) == NULL) { 296 sctp_close_eager(eager); 297 BUMP_MIB(&sctps->sctps_mib, sctpListenDrop); 298 return (NULL); 299 } 300 ASSERT(SCTP_IS_DETACHED(eager)); 301 eager->sctp_detached = B_FALSE; 302 if (eager->sctp_family == AF_INET) { 303 eager->sctp_ulp_prop(eager->sctp_ulpd, 304 sctps->sctps_wroff_xtra + sizeof (sctp_data_hdr_t) + 305 sctp->sctp_hdr_len, strmsgsz); 306 } else { 307 eager->sctp_ulp_prop(eager->sctp_ulpd, 308 sctps->sctps_wroff_xtra + sizeof (sctp_data_hdr_t) + 309 sctp->sctp_hdr6_len, strmsgsz); 310 } 311 return (eager); 312 } 313 314 /* 315 * Connect to a peer - this function inserts the sctp in the 316 * bind and conn fanouts, sends the INIT, and replies to the client 317 * with an OK ack. 318 */ 319 int 320 sctp_connect(sctp_t *sctp, const struct sockaddr *dst, uint32_t addrlen) 321 { 322 sin_t *sin; 323 sin6_t *sin6; 324 in6_addr_t dstaddr; 325 in_port_t dstport; 326 mblk_t *initmp; 327 sctp_tf_t *tbf; 328 sctp_t *lsctp; 329 char buf[INET6_ADDRSTRLEN]; 330 int sleep = sctp->sctp_cansleep ? KM_SLEEP : KM_NOSLEEP; 331 int hdrlen; 332 ip6_rthdr_t *rth; 333 int err; 334 sctp_faddr_t *cur_fp; 335 sctp_stack_t *sctps = sctp->sctp_sctps; 336 337 /* 338 * Determine packet type based on type of address passed in 339 * the request should contain an IPv4 or IPv6 address. 340 * Make sure that address family matches the type of 341 * family of the the address passed down 342 */ 343 if (addrlen < sizeof (sin_t)) { 344 return (EINVAL); 345 } 346 switch (dst->sa_family) { 347 case AF_INET: 348 sin = (sin_t *)dst; 349 350 /* Check for attempt to connect to non-unicast */ 351 if (IN_MULTICAST(sin->sin_addr.s_addr) || 352 (sin->sin_addr.s_addr == INADDR_BROADCAST)) { 353 ip0dbg(("sctp_connect: non-unicast\n")); 354 return (EINVAL); 355 } 356 if (sctp->sctp_connp->conn_ipv6_v6only) 357 return (EAFNOSUPPORT); 358 359 /* convert to v6 mapped */ 360 /* Check for attempt to connect to INADDR_ANY */ 361 if (sin->sin_addr.s_addr == INADDR_ANY) { 362 struct in_addr v4_addr; 363 /* 364 * SunOS 4.x and 4.3 BSD allow an application 365 * to connect a TCP socket to INADDR_ANY. 366 * When they do this, the kernel picks the 367 * address of one interface and uses it 368 * instead. The kernel usually ends up 369 * picking the address of the loopback 370 * interface. This is an undocumented feature. 371 * However, we provide the same thing here 372 * in case any TCP apps that use this feature 373 * are being ported to SCTP... 374 */ 375 v4_addr.s_addr = htonl(INADDR_LOOPBACK); 376 IN6_INADDR_TO_V4MAPPED(&v4_addr, &dstaddr); 377 } else { 378 IN6_INADDR_TO_V4MAPPED(&sin->sin_addr, &dstaddr); 379 } 380 dstport = sin->sin_port; 381 if (sin->sin_family == AF_INET) { 382 hdrlen = sctp->sctp_hdr_len; 383 } else { 384 hdrlen = sctp->sctp_hdr6_len; 385 } 386 break; 387 case AF_INET6: 388 sin6 = (sin6_t *)dst; 389 /* Check for attempt to connect to non-unicast. */ 390 if ((addrlen < sizeof (sin6_t)) || 391 IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) { 392 ip0dbg(("sctp_connect: non-unicast\n")); 393 return (EINVAL); 394 } 395 if (sctp->sctp_connp->conn_ipv6_v6only && 396 IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 397 return (EAFNOSUPPORT); 398 } 399 /* check for attempt to connect to unspec */ 400 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { 401 dstaddr = ipv6_loopback; 402 } else { 403 dstaddr = sin6->sin6_addr; 404 if (IN6_IS_ADDR_LINKLOCAL(&dstaddr)) 405 sctp->sctp_linklocal = 1; 406 } 407 dstport = sin6->sin6_port; 408 hdrlen = sctp->sctp_hdr6_len; 409 break; 410 default: 411 dprint(1, ("sctp_connect: unknown family %d\n", 412 dst->sa_family)); 413 return (EAFNOSUPPORT); 414 } 415 416 (void) inet_ntop(AF_INET6, &dstaddr, buf, sizeof (buf)); 417 dprint(1, ("sctp_connect: attempting connect to %s...\n", buf)); 418 419 RUN_SCTP(sctp); 420 421 if (sctp->sctp_family != dst->sa_family || 422 (sctp->sctp_connp->conn_state_flags & CONN_CLOSING)) { 423 WAKE_SCTP(sctp); 424 return (EINVAL); 425 } 426 427 switch (sctp->sctp_state) { 428 case SCTPS_IDLE: { 429 struct sockaddr_storage ss; 430 431 /* 432 * We support a quick connect capability here, allowing 433 * clients to transition directly from IDLE to COOKIE_WAIT. 434 * sctp_bindi will pick an unused port, insert the connection 435 * in the bind hash and transition to BOUND state. SCTP 436 * picks and uses what it considers the optimal local address 437 * set (just like specifiying INADDR_ANY to bind()). 438 */ 439 dprint(1, ("sctp_connect: idle, attempting bind...\n")); 440 ASSERT(sctp->sctp_nsaddrs == 0); 441 442 bzero(&ss, sizeof (ss)); 443 ss.ss_family = sctp->sctp_family; 444 WAKE_SCTP(sctp); 445 if ((err = sctp_bind(sctp, (struct sockaddr *)&ss, 446 sizeof (ss))) != 0) { 447 return (err); 448 } 449 RUN_SCTP(sctp); 450 sctp->sctp_bound_to_all = 1; 451 /* FALLTHRU */ 452 } 453 454 case SCTPS_BOUND: 455 ASSERT(sctp->sctp_nsaddrs > 0); 456 457 /* do the connect */ 458 /* XXX check for attempt to connect to self */ 459 sctp->sctp_fport = dstport; 460 461 ASSERT(sctp->sctp_iphc); 462 ASSERT(sctp->sctp_iphc6); 463 464 /* 465 * Don't allow this connection to completely duplicate 466 * an existing connection. 467 * 468 * Ensure that the duplicate check and insertion is atomic. 469 */ 470 sctp_conn_hash_remove(sctp); 471 tbf = &sctps->sctps_conn_fanout[SCTP_CONN_HASH(sctps, 472 sctp->sctp_ports)]; 473 mutex_enter(&tbf->tf_lock); 474 lsctp = sctp_lookup(sctp, &dstaddr, tbf, &sctp->sctp_ports, 475 SCTPS_COOKIE_WAIT); 476 if (lsctp != NULL) { 477 /* found a duplicate connection */ 478 mutex_exit(&tbf->tf_lock); 479 SCTP_REFRELE(lsctp); 480 WAKE_SCTP(sctp); 481 return (EADDRINUSE); 482 } 483 /* 484 * OK; set up the peer addr (this may grow after we get 485 * the INIT ACK from the peer with additional addresses). 486 */ 487 if ((err = sctp_add_faddr(sctp, &dstaddr, sleep, 488 B_FALSE)) != 0) { 489 mutex_exit(&tbf->tf_lock); 490 WAKE_SCTP(sctp); 491 return (err); 492 } 493 /* No valid src addr, return. */ 494 if (sctp->sctp_faddrs->state == SCTP_FADDRS_UNREACH) { 495 mutex_exit(&tbf->tf_lock); 496 WAKE_SCTP(sctp); 497 return (EADDRNOTAVAIL); 498 } 499 sctp->sctp_primary = sctp->sctp_faddrs; 500 sctp->sctp_current = sctp->sctp_faddrs; 501 cur_fp = sctp->sctp_current; 502 sctp->sctp_mss = sctp->sctp_faddrs->sfa_pmss; 503 sctp_conn_hash_insert(tbf, sctp, 1); 504 mutex_exit(&tbf->tf_lock); 505 506 /* initialize composite headers */ 507 if ((err = sctp_set_hdraddrs(sctp)) != 0) { 508 sctp_conn_hash_remove(sctp); 509 WAKE_SCTP(sctp); 510 return (err); 511 } 512 513 /* 514 * Massage a routing header (if present) putting the first hop 515 * in ip6_dst. 516 */ 517 rth = ip_find_rthdr_v6(sctp->sctp_ip6h, 518 (uint8_t *)sctp->sctp_sctph6); 519 if (rth != NULL) { 520 (void) ip_massage_options_v6(sctp->sctp_ip6h, rth, 521 sctps->sctps_netstack); 522 } 523 524 /* 525 * Turn off the don't fragment bit on the (only) faddr, 526 * so that if one of the messages exchanged during the 527 * initialization sequence exceeds the path mtu, it 528 * at least has a chance to get there. SCTP does no 529 * fragmentation of initialization messages. The DF bit 530 * will be turned on again in sctp_send_cookie_echo() 531 * (but the cookie echo will still be sent with the df bit 532 * off). 533 */ 534 cur_fp->df = B_FALSE; 535 536 /* Mark this address as alive */ 537 cur_fp->state = SCTP_FADDRS_ALIVE; 538 539 /* This sctp_t is fully bound now. */ 540 sctp->sctp_connp->conn_fully_bound = B_TRUE; 541 542 /* Send the INIT to the peer */ 543 SCTP_FADDR_TIMER_RESTART(sctp, cur_fp, cur_fp->rto); 544 /* 545 * sctp_init_mp() could result in modifying the source 546 * address list, so take the hash lock. 547 */ 548 mutex_enter(&tbf->tf_lock); 549 initmp = sctp_init_mp(sctp); 550 if (initmp == NULL) { 551 mutex_exit(&tbf->tf_lock); 552 WAKE_SCTP(sctp); 553 /* let timer retry */ 554 return (0); 555 } 556 mutex_exit(&tbf->tf_lock); 557 sctp->sctp_state = SCTPS_COOKIE_WAIT; 558 /* 559 * On a clustered note send this notification to the clustering 560 * subsystem. 561 */ 562 if (cl_sctp_connect != NULL) { 563 uchar_t *slist; 564 uchar_t *flist; 565 size_t ssize; 566 size_t fsize; 567 568 fsize = sizeof (in6_addr_t) * sctp->sctp_nfaddrs; 569 ssize = sizeof (in6_addr_t) * sctp->sctp_nsaddrs; 570 slist = kmem_alloc(ssize, KM_SLEEP); 571 flist = kmem_alloc(fsize, KM_SLEEP); 572 /* The clustering module frees the lists */ 573 sctp_get_saddr_list(sctp, slist, ssize); 574 sctp_get_faddr_list(sctp, flist, fsize); 575 (*cl_sctp_connect)(sctp->sctp_family, slist, 576 sctp->sctp_nsaddrs, sctp->sctp_lport, 577 flist, sctp->sctp_nfaddrs, sctp->sctp_fport, 578 B_TRUE, (cl_sctp_handle_t)sctp); 579 } 580 WAKE_SCTP(sctp); 581 /* OK to call IP_PUT() here instead of sctp_add_sendq(). */ 582 CONN_INC_REF(sctp->sctp_connp); 583 initmp->b_flag |= MSGHASREF; 584 IP_PUT(initmp, sctp->sctp_connp, sctp->sctp_current->isv4); 585 BUMP_LOCAL(sctp->sctp_opkts); 586 587 sctp->sctp_ulp_prop(sctp->sctp_ulpd, 588 sctps->sctps_wroff_xtra + hdrlen + sizeof (sctp_data_hdr_t), 589 0); 590 591 return (0); 592 default: 593 ip0dbg(("sctp_connect: invalid state. %d\n", sctp->sctp_state)); 594 WAKE_SCTP(sctp); 595 return (EINVAL); 596 } 597 } 598