1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <sys/systm.h> 31 #include <sys/stream.h> 32 #include <sys/cmn_err.h> 33 #include <sys/kmem.h> 34 #define _SUN_TPI_VERSION 2 35 #include <sys/tihdr.h> 36 #include <sys/stropts.h> 37 #include <sys/strsubr.h> 38 #include <sys/socket.h> 39 40 #include <netinet/in.h> 41 #include <netinet/ip6.h> 42 43 #include <inet/common.h> 44 #include <inet/ip.h> 45 #include <inet/ip6.h> 46 #include <inet/ipclassifier.h> 47 #include <inet/ipsec_impl.h> 48 49 #include "sctp_impl.h" 50 #include "sctp_addr.h" 51 52 /* 53 * Common accept code. Called by sctp_conn_request. 54 * cr_pkt is the INIT / INIT ACK packet. 55 */ 56 static int 57 sctp_accept_comm(sctp_t *listener, sctp_t *acceptor, mblk_t *cr_pkt, 58 uint_t ip_hdr_len, sctp_init_chunk_t *iack) 59 { 60 61 sctp_hdr_t *sctph; 62 sctp_chunk_hdr_t *ich; 63 sctp_init_chunk_t *init; 64 int err; 65 uint_t sctp_options; 66 67 sctph = (sctp_hdr_t *)(cr_pkt->b_rptr + ip_hdr_len); 68 ASSERT(OK_32PTR(sctph)); 69 70 acceptor->sctp_lport = listener->sctp_lport; 71 acceptor->sctp_fport = sctph->sh_sport; 72 73 ich = (sctp_chunk_hdr_t *)(iack + 1); 74 init = (sctp_init_chunk_t *)(ich + 1); 75 76 /* acceptor isn't in any fanouts yet, so don't need to hold locks */ 77 ASSERT(acceptor->sctp_faddrs == NULL); 78 err = sctp_get_addrparams(acceptor, listener, cr_pkt, ich, 79 &sctp_options); 80 if (err != 0) 81 return (err); 82 83 if ((sctp_options & SCTP_PRSCTP_OPTION) && 84 listener->sctp_prsctp_aware && sctp_prsctp_enabled) { 85 acceptor->sctp_prsctp_aware = B_TRUE; 86 } else { 87 acceptor->sctp_prsctp_aware = B_FALSE; 88 } 89 /* The new sctp_t is fully bound now. */ 90 acceptor->sctp_connp->conn_fully_bound = B_TRUE; 91 92 sctp_set_hdraddrs(acceptor); 93 94 /* Get initial TSNs */ 95 acceptor->sctp_ltsn = ntohl(iack->sic_inittsn); 96 acceptor->sctp_recovery_tsn = acceptor->sctp_lastack_rxd = 97 acceptor->sctp_ltsn - 1; 98 acceptor->sctp_adv_pap = acceptor->sctp_lastack_rxd; 99 /* Serial numbers are initialized to the same value as the TSNs */ 100 acceptor->sctp_lcsn = acceptor->sctp_ltsn; 101 102 if (!sctp_initialize_params(acceptor, init, iack)) 103 return (ENOMEM); 104 105 /* 106 * Copy sctp_secret from the listener in case we need to validate 107 * a possibly delayed cookie. 108 */ 109 bcopy(listener->sctp_secret, acceptor->sctp_secret, SCTP_SECRET_LEN); 110 bcopy(listener->sctp_old_secret, acceptor->sctp_old_secret, 111 SCTP_SECRET_LEN); 112 acceptor->sctp_last_secret_update = lbolt64; 113 114 /* 115 * After acceptor is inserted in the hash list, it can be found. 116 * So we need to lock it here. 117 */ 118 RUN_SCTP(acceptor); 119 120 sctp_conn_hash_insert(&sctp_conn_fanout[ 121 SCTP_CONN_HASH(acceptor->sctp_ports)], acceptor, 0); 122 sctp_bind_hash_insert(&sctp_bind_fanout[ 123 SCTP_BIND_HASH(ntohs(acceptor->sctp_lport))], acceptor, 0); 124 125 /* 126 * No need to check for multicast destination since ip will only pass 127 * up multicasts to those that have expressed interest 128 * TODO: what about rejecting broadcasts? 129 * Also check that source is not a multicast or broadcast address. 130 */ 131 /* XXXSCTP */ 132 acceptor->sctp_state = SCTPS_ESTABLISHED; 133 acceptor->sctp_assoc_start_time = (uint32_t)lbolt; 134 /* 135 * listener->sctp_rwnd should be the default window size or a 136 * window size changed via SO_RCVBUF option. 137 */ 138 acceptor->sctp_rwnd = listener->sctp_rwnd; 139 acceptor->sctp_irwnd = acceptor->sctp_rwnd; 140 bcopy(&listener->sctp_upcalls, &acceptor->sctp_upcalls, 141 sizeof (sctp_upcalls_t)); 142 143 return (0); 144 } 145 146 /* Process the COOKIE packet, mp, directed at the listener 'sctp' */ 147 sctp_t * 148 sctp_conn_request(sctp_t *sctp, mblk_t *mp, uint_t ifindex, uint_t ip_hdr_len, 149 sctp_init_chunk_t *iack, mblk_t *ipsec_mp) 150 { 151 sctp_t *eager; 152 uint_t ipvers; 153 ip6_t *ip6h; 154 int err; 155 conn_t *connp, *econnp; 156 157 /* 158 * No need to check for duplicate as this is the listener 159 * and we are holding the lock. This means that no new 160 * connection can be created out of it. And since the 161 * fanout already done cannot find a match, it means that 162 * there is no duplicate. 163 */ 164 ipvers = IPH_HDR_VERSION(mp->b_rptr); 165 ASSERT(ipvers == IPV6_VERSION || ipvers == IPV4_VERSION); 166 ASSERT(OK_32PTR(mp->b_rptr)); 167 168 if ((eager = sctp_create_eager(sctp)) == NULL) { 169 return (NULL); 170 } 171 172 if (ipvers != IPV4_VERSION) { 173 ip6h = (ip6_t *)mp->b_rptr; 174 if (IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) 175 eager->sctp_linklocal = 1; 176 /* 177 * Record ifindex (might be zero) to tie this connection to 178 * that interface if either the listener was bound or 179 * if the connection is using link-local addresses. 180 */ 181 if (sctp->sctp_bound_if == ifindex || 182 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) 183 eager->sctp_bound_if = ifindex; 184 /* 185 * XXX broken. bound_if is always overwritten by statement 186 * below. What is the right thing to do here? 187 */ 188 eager->sctp_bound_if = sctp->sctp_bound_if; 189 } 190 191 connp = sctp->sctp_connp; 192 econnp = eager->sctp_connp; 193 194 if (connp->conn_policy != NULL) { 195 ipsec_in_t *ii; 196 197 ASSERT(ipsec_mp != NULL); 198 ii = (ipsec_in_t *)(ipsec_mp->b_rptr); 199 ASSERT(ii->ipsec_in_policy == NULL); 200 IPPH_REFHOLD(connp->conn_policy); 201 ii->ipsec_in_policy = connp->conn_policy; 202 203 ipsec_mp->b_datap->db_type = IPSEC_POLICY_SET; 204 if (!ip_bind_ipsec_policy_set(econnp, ipsec_mp)) { 205 sctp_close_eager(eager); 206 BUMP_MIB(&sctp_mib, sctpListenDrop); 207 return (NULL); 208 } 209 } 210 211 if (ipsec_mp != NULL) { 212 /* 213 * XXX need to fix the cached policy issue here. 214 * We temporarily set the conn_src/conn_rem here so 215 * that IPsec can use it for the latched policy 216 * selector. This is obvioursly wrong as SCTP can 217 * use different addresses... 218 */ 219 if (ipvers == IPV4_VERSION) { 220 ipha_t *ipha; 221 222 ipha = (ipha_t *)mp->b_rptr; 223 econnp->conn_src = ipha->ipha_dst; 224 econnp->conn_rem = ipha->ipha_src; 225 } else { 226 econnp->conn_srcv6 = ip6h->ip6_dst; 227 econnp->conn_remv6 = ip6h->ip6_src; 228 } 229 } 230 if (ipsec_conn_cache_policy(econnp, ipvers == IPV4_VERSION) != 0) { 231 sctp_close_eager(eager); 232 BUMP_MIB(&sctp_mib, sctpListenDrop); 233 return (NULL); 234 } 235 236 err = sctp_accept_comm(sctp, eager, mp, ip_hdr_len, iack); 237 if (err) { 238 sctp_close_eager(eager); 239 BUMP_MIB(&sctp_mib, sctpListenDrop); 240 return (NULL); 241 } 242 243 /* 244 * On a clustered note send this notification to the clustering 245 * subsystem. 246 */ 247 if (cl_sctp_connect != NULL) { 248 uchar_t *slist; 249 uchar_t *flist; 250 size_t fsize; 251 size_t ssize; 252 253 fsize = sizeof (in6_addr_t) * eager->sctp_nfaddrs; 254 ssize = sizeof (in6_addr_t) * eager->sctp_nsaddrs; 255 slist = kmem_alloc(ssize, KM_NOSLEEP); 256 flist = kmem_alloc(fsize, KM_NOSLEEP); 257 if (slist == NULL || flist == NULL) { 258 if (slist != NULL) 259 kmem_free(slist, ssize); 260 if (flist != NULL) 261 kmem_free(flist, fsize); 262 sctp_close_eager(eager); 263 BUMP_MIB(&sctp_mib, sctpListenDrop); 264 return (NULL); 265 } 266 /* The clustering module frees these list */ 267 sctp_get_saddr_list(eager, slist, ssize); 268 sctp_get_faddr_list(eager, flist, fsize); 269 (*cl_sctp_connect)(eager->sctp_family, slist, 270 eager->sctp_nsaddrs, eager->sctp_lport, flist, 271 eager->sctp_nfaddrs, eager->sctp_fport, B_FALSE, 272 (cl_sctp_handle_t)eager); 273 } 274 275 /* Connection established, so send up the conn_ind */ 276 if ((eager->sctp_ulpd = sctp->sctp_ulp_newconn(sctp->sctp_ulpd, 277 eager)) == NULL) { 278 sctp_close_eager(eager); 279 BUMP_MIB(&sctp_mib, sctpListenDrop); 280 return (NULL); 281 } 282 ASSERT(SCTP_IS_DETACHED(eager)); 283 eager->sctp_detached = B_FALSE; 284 if (eager->sctp_family == AF_INET) { 285 eager->sctp_ulp_prop(eager->sctp_ulpd, 286 sctp_wroff_xtra + sizeof (sctp_data_hdr_t) + 287 sctp->sctp_hdr_len, strmsgsz); 288 } else { 289 eager->sctp_ulp_prop(eager->sctp_ulpd, 290 sctp_wroff_xtra + sizeof (sctp_data_hdr_t) + 291 sctp->sctp_hdr6_len, strmsgsz); 292 } 293 return (eager); 294 } 295 296 /* 297 * Connect to a peer - this function inserts the sctp in the 298 * bind and conn fanouts, sends the INIT, and replies to the client 299 * with an OK ack. 300 */ 301 /* ARGSUSED */ 302 int 303 sctp_connect(sctp_t *sctp, const struct sockaddr *dst, uint32_t addrlen) 304 { 305 sin_t *sin; 306 sin6_t *sin6; 307 in6_addr_t dstaddr; 308 in_port_t dstport; 309 mblk_t *initmp; 310 sctp_tf_t *tbf; 311 sctp_t *lsctp; 312 char buf[INET6_ADDRSTRLEN]; 313 int sleep = sctp->sctp_cansleep ? KM_SLEEP : KM_NOSLEEP; 314 int hdrlen; 315 ip6_rthdr_t *rth; 316 sctp_faddr_t *cur_fp; 317 318 /* 319 * Determine packet type based on type of address passed in 320 * the request should contain an IPv4 or IPv6 address. 321 * Make sure that address family matches the type of 322 * family of the the address passed down 323 */ 324 if (addrlen < sizeof (sin_t)) { 325 return (EINVAL); 326 } 327 switch (dst->sa_family) { 328 case AF_INET: 329 sin = (sin_t *)dst; 330 331 /* Check for attempt to connect to non-unicast */ 332 if (IN_MULTICAST(sin->sin_addr.s_addr) || 333 (sin->sin_addr.s_addr == INADDR_BROADCAST)) { 334 ip0dbg(("sctp_connect: non-unicast\n")); 335 return (EINVAL); 336 } 337 if (sctp->sctp_connp->conn_ipv6_v6only) 338 return (EAFNOSUPPORT); 339 340 /* convert to v6 mapped */ 341 /* Check for attempt to connect to INADDR_ANY */ 342 if (sin->sin_addr.s_addr == INADDR_ANY) { 343 struct in_addr v4_addr; 344 /* 345 * SunOS 4.x and 4.3 BSD allow an application 346 * to connect a TCP socket to INADDR_ANY. 347 * When they do this, the kernel picks the 348 * address of one interface and uses it 349 * instead. The kernel usually ends up 350 * picking the address of the loopback 351 * interface. This is an undocumented feature. 352 * However, we provide the same thing here 353 * in case any TCP apps that use this feature 354 * are being ported to SCTP... 355 */ 356 v4_addr.s_addr = htonl(INADDR_LOOPBACK); 357 IN6_INADDR_TO_V4MAPPED(&v4_addr, &dstaddr); 358 } else { 359 IN6_INADDR_TO_V4MAPPED(&sin->sin_addr, &dstaddr); 360 } 361 dstport = sin->sin_port; 362 if (sin->sin_family == AF_INET) { 363 hdrlen = sctp->sctp_hdr_len; 364 } else { 365 hdrlen = sctp->sctp_hdr6_len; 366 } 367 break; 368 case AF_INET6: 369 sin6 = (sin6_t *)dst; 370 /* Check for attempt to connect to non-unicast. */ 371 if ((addrlen < sizeof (sin6_t)) || 372 IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) { 373 ip0dbg(("sctp_connect: non-unicast\n")); 374 return (EINVAL); 375 } 376 if (sctp->sctp_connp->conn_ipv6_v6only && 377 IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 378 return (EAFNOSUPPORT); 379 } 380 /* check for attempt to connect to unspec */ 381 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { 382 dstaddr = ipv6_loopback; 383 } else { 384 dstaddr = sin6->sin6_addr; 385 if (IN6_IS_ADDR_LINKLOCAL(&dstaddr)) 386 sctp->sctp_linklocal = 1; 387 } 388 dstport = sin6->sin6_port; 389 hdrlen = sctp->sctp_hdr6_len; 390 break; 391 default: 392 dprint(1, ("sctp_connect: unknown family %d\n", 393 dst->sa_family)); 394 return (EAFNOSUPPORT); 395 } 396 397 (void) inet_ntop(AF_INET6, &dstaddr, buf, sizeof (buf)); 398 dprint(1, ("sctp_connect: attempting connect to %s...\n", buf)); 399 400 RUN_SCTP(sctp); 401 402 if (sctp->sctp_family != dst->sa_family) { 403 WAKE_SCTP(sctp); 404 return (EINVAL); 405 } 406 407 switch (sctp->sctp_state) { 408 case SCTPS_IDLE: { 409 int err; 410 struct sockaddr_storage ss; 411 412 /* 413 * We support a quick connect capability here, allowing 414 * clients to transition directly from IDLE to COOKIE_WAIT. 415 * sctp_bindi will pick an unused port, insert the connection 416 * in the bind hash and transition to BOUND state. SCTP 417 * picks and uses what it considers the optimal local address 418 * set (just like specifiying INADDR_ANY to bind()). 419 */ 420 dprint(1, ("sctp_connect: idle, attempting bind...\n")); 421 ASSERT(sctp->sctp_nsaddrs == 0); 422 423 bzero(&ss, sizeof (ss)); 424 ss.ss_family = sctp->sctp_family; 425 WAKE_SCTP(sctp); 426 if ((err = sctp_bind(sctp, (struct sockaddr *)&ss, 427 sizeof (ss))) != 0) { 428 return (err); 429 } 430 RUN_SCTP(sctp); 431 sctp->sctp_bound_to_all = 1; 432 /* FALLTHRU */ 433 } 434 435 case SCTPS_BOUND: 436 ASSERT(sctp->sctp_nsaddrs > 0); 437 438 /* do the connect */ 439 /* XXX check for attempt to connect to self */ 440 sctp->sctp_fport = dstport; 441 442 ASSERT(sctp->sctp_iphc); 443 ASSERT(sctp->sctp_iphc6); 444 445 /* 446 * Don't allow this connection to completely duplicate 447 * an existing connection. 448 * 449 * Ensure that the duplicate check and insertion is atomic. 450 */ 451 sctp_conn_hash_remove(sctp); 452 tbf = &sctp_conn_fanout[SCTP_CONN_HASH(sctp->sctp_ports)]; 453 mutex_enter(&tbf->tf_lock); 454 lsctp = sctp_lookup(sctp, &dstaddr, tbf, &sctp->sctp_ports, 455 SCTPS_COOKIE_WAIT); 456 if (lsctp != NULL) { 457 /* found a duplicate connection */ 458 mutex_exit(&tbf->tf_lock); 459 SCTP_REFRELE(lsctp); 460 WAKE_SCTP(sctp); 461 return (EADDRINUSE); 462 } 463 /* 464 * OK; set up the peer addr (this may grow after we get 465 * the INIT ACK from the peer with additional addresses). 466 */ 467 if (sctp_add_faddr(sctp, &dstaddr, sleep) < 0) { 468 mutex_exit(&tbf->tf_lock); 469 WAKE_SCTP(sctp); 470 return (ENOMEM); 471 } 472 /* No valid src addr, return. */ 473 if (sctp->sctp_faddrs->state == SCTP_FADDRS_UNREACH) { 474 mutex_exit(&tbf->tf_lock); 475 WAKE_SCTP(sctp); 476 return (EADDRNOTAVAIL); 477 } 478 sctp->sctp_primary = sctp->sctp_faddrs; 479 sctp->sctp_current = sctp->sctp_faddrs; 480 cur_fp = sctp->sctp_current; 481 sctp->sctp_mss = sctp->sctp_faddrs->sfa_pmss; 482 sctp_conn_hash_insert(tbf, sctp, 1); 483 mutex_exit(&tbf->tf_lock); 484 485 /* initialize composite headers */ 486 sctp_set_hdraddrs(sctp); 487 488 /* 489 * Massage a routing header (if present) putting the first hop 490 * in ip6_dst. 491 */ 492 rth = ip_find_rthdr_v6(sctp->sctp_ip6h, 493 (uint8_t *)sctp->sctp_sctph6); 494 if (rth != NULL) 495 (void) ip_massage_options_v6(sctp->sctp_ip6h, rth); 496 497 /* 498 * Turn off the don't fragment bit on the (only) faddr, 499 * so that if one of the messages exchanged during the 500 * initialization sequence exceeds the path mtu, it 501 * at least has a chance to get there. SCTP does no 502 * fragmentation of initialization messages. The DF bit 503 * will be turned on again in sctp_send_cookie_echo() 504 * (but the cookie echo will still be sent with the df bit 505 * off). 506 */ 507 cur_fp->df = B_FALSE; 508 509 /* Mark this address as alive */ 510 cur_fp->state = SCTP_FADDRS_ALIVE; 511 512 /* This sctp_t is fully bound now. */ 513 sctp->sctp_connp->conn_fully_bound = B_TRUE; 514 515 /* Send the INIT to the peer */ 516 SCTP_FADDR_TIMER_RESTART(sctp, cur_fp, cur_fp->rto); 517 /* 518 * sctp_init_mp() could result in modifying the source 519 * address list, so take the hash lock. 520 */ 521 mutex_enter(&tbf->tf_lock); 522 initmp = sctp_init_mp(sctp); 523 if (initmp == NULL) { 524 mutex_exit(&tbf->tf_lock); 525 WAKE_SCTP(sctp); 526 /* let timer retry */ 527 return (0); 528 } 529 mutex_exit(&tbf->tf_lock); 530 sctp->sctp_state = SCTPS_COOKIE_WAIT; 531 /* 532 * On a clustered note send this notification to the clustering 533 * subsystem. 534 */ 535 if (cl_sctp_connect != NULL) { 536 uchar_t *slist; 537 uchar_t *flist; 538 size_t ssize; 539 size_t fsize; 540 541 fsize = sizeof (in6_addr_t) * sctp->sctp_nfaddrs; 542 ssize = sizeof (in6_addr_t) * sctp->sctp_nsaddrs; 543 slist = kmem_alloc(ssize, KM_SLEEP); 544 flist = kmem_alloc(fsize, KM_SLEEP); 545 /* The clustering module frees the lists */ 546 sctp_get_saddr_list(sctp, slist, ssize); 547 sctp_get_faddr_list(sctp, flist, fsize); 548 (*cl_sctp_connect)(sctp->sctp_family, slist, 549 sctp->sctp_nsaddrs, sctp->sctp_lport, 550 flist, sctp->sctp_nfaddrs, sctp->sctp_fport, 551 B_TRUE, (cl_sctp_handle_t)sctp); 552 } 553 WAKE_SCTP(sctp); 554 /* OK to call IP_PUT() here instead of sctp_add_sendq(). */ 555 CONN_INC_REF(sctp->sctp_connp); 556 initmp->b_flag |= MSGHASREF; 557 IP_PUT(initmp, sctp->sctp_connp, sctp->sctp_current->isv4); 558 BUMP_LOCAL(sctp->sctp_opkts); 559 560 sctp->sctp_ulp_prop(sctp->sctp_ulpd, 561 sctp_wroff_xtra + hdrlen + sizeof (sctp_data_hdr_t), 0); 562 563 return (0); 564 default: 565 ip0dbg(("sctp_connect: invalid state. %d\n", sctp->sctp_state)); 566 WAKE_SCTP(sctp); 567 return (EINVAL); 568 } 569 } 570