1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/types.h> 29 #include <sys/systm.h> 30 #include <sys/stream.h> 31 #include <sys/cmn_err.h> 32 #include <sys/kmem.h> 33 #define _SUN_TPI_VERSION 2 34 #include <sys/tihdr.h> 35 #include <sys/stropts.h> 36 #include <sys/strsubr.h> 37 #include <sys/socket.h> 38 #include <sys/tsol/tndb.h> 39 40 #include <netinet/in.h> 41 #include <netinet/ip6.h> 42 43 #include <inet/common.h> 44 #include <inet/ip.h> 45 #include <inet/ip6.h> 46 #include <inet/ipclassifier.h> 47 #include <inet/ipsec_impl.h> 48 49 #include "sctp_impl.h" 50 #include "sctp_addr.h" 51 52 /* 53 * Common accept code. Called by sctp_conn_request. 54 * cr_pkt is the INIT / INIT ACK packet. 55 */ 56 static int 57 sctp_accept_comm(sctp_t *listener, sctp_t *acceptor, mblk_t *cr_pkt, 58 uint_t ip_hdr_len, sctp_init_chunk_t *iack) 59 { 60 61 sctp_hdr_t *sctph; 62 sctp_chunk_hdr_t *ich; 63 sctp_init_chunk_t *init; 64 int err; 65 uint_t sctp_options; 66 conn_t *lconnp; 67 cred_t *cr; 68 69 sctph = (sctp_hdr_t *)(cr_pkt->b_rptr + ip_hdr_len); 70 ASSERT(OK_32PTR(sctph)); 71 72 acceptor->sctp_lport = listener->sctp_lport; 73 acceptor->sctp_fport = sctph->sh_sport; 74 75 ich = (sctp_chunk_hdr_t *)(iack + 1); 76 init = (sctp_init_chunk_t *)(ich + 1); 77 78 /* acceptor isn't in any fanouts yet, so don't need to hold locks */ 79 ASSERT(acceptor->sctp_faddrs == NULL); 80 err = sctp_get_addrparams(acceptor, listener, cr_pkt, ich, 81 &sctp_options); 82 if (err != 0) 83 return (err); 84 85 lconnp = listener->sctp_connp; 86 if (lconnp->conn_mlp_type != mlptSingle) { 87 cr = lconnp->conn_peercred = DB_CRED(cr_pkt); 88 if (cr != NULL) 89 crhold(cr); 90 else 91 cr = lconnp->conn_cred; 92 } else { 93 cr = lconnp->conn_cred; 94 } 95 96 if ((err = sctp_set_hdraddrs(acceptor, cr)) != 0) 97 return (err); 98 99 if ((sctp_options & SCTP_PRSCTP_OPTION) && 100 listener->sctp_prsctp_aware && sctp_prsctp_enabled) { 101 acceptor->sctp_prsctp_aware = B_TRUE; 102 } else { 103 acceptor->sctp_prsctp_aware = B_FALSE; 104 } 105 /* The new sctp_t is fully bound now. */ 106 acceptor->sctp_connp->conn_fully_bound = B_TRUE; 107 108 /* Get initial TSNs */ 109 acceptor->sctp_ltsn = ntohl(iack->sic_inittsn); 110 acceptor->sctp_recovery_tsn = acceptor->sctp_lastack_rxd = 111 acceptor->sctp_ltsn - 1; 112 acceptor->sctp_adv_pap = acceptor->sctp_lastack_rxd; 113 /* Serial numbers are initialized to the same value as the TSNs */ 114 acceptor->sctp_lcsn = acceptor->sctp_ltsn; 115 116 if (!sctp_initialize_params(acceptor, init, iack)) 117 return (ENOMEM); 118 119 /* 120 * Copy sctp_secret from the listener in case we need to validate 121 * a possibly delayed cookie. 122 */ 123 bcopy(listener->sctp_secret, acceptor->sctp_secret, SCTP_SECRET_LEN); 124 bcopy(listener->sctp_old_secret, acceptor->sctp_old_secret, 125 SCTP_SECRET_LEN); 126 acceptor->sctp_last_secret_update = lbolt64; 127 128 /* 129 * After acceptor is inserted in the hash list, it can be found. 130 * So we need to lock it here. 131 */ 132 RUN_SCTP(acceptor); 133 134 sctp_conn_hash_insert(&sctp_conn_fanout[ 135 SCTP_CONN_HASH(acceptor->sctp_ports)], acceptor, 0); 136 sctp_bind_hash_insert(&sctp_bind_fanout[ 137 SCTP_BIND_HASH(ntohs(acceptor->sctp_lport))], acceptor, 0); 138 139 /* 140 * No need to check for multicast destination since ip will only pass 141 * up multicasts to those that have expressed interest 142 * TODO: what about rejecting broadcasts? 143 * Also check that source is not a multicast or broadcast address. 144 */ 145 /* XXXSCTP */ 146 acceptor->sctp_state = SCTPS_ESTABLISHED; 147 acceptor->sctp_assoc_start_time = (uint32_t)lbolt; 148 /* 149 * listener->sctp_rwnd should be the default window size or a 150 * window size changed via SO_RCVBUF option. 151 */ 152 acceptor->sctp_rwnd = listener->sctp_rwnd; 153 acceptor->sctp_irwnd = acceptor->sctp_rwnd; 154 bcopy(&listener->sctp_upcalls, &acceptor->sctp_upcalls, 155 sizeof (sctp_upcalls_t)); 156 157 return (0); 158 } 159 160 /* Process the COOKIE packet, mp, directed at the listener 'sctp' */ 161 sctp_t * 162 sctp_conn_request(sctp_t *sctp, mblk_t *mp, uint_t ifindex, uint_t ip_hdr_len, 163 sctp_init_chunk_t *iack, mblk_t *ipsec_mp) 164 { 165 sctp_t *eager; 166 uint_t ipvers; 167 ip6_t *ip6h; 168 int err; 169 conn_t *connp, *econnp; 170 171 /* 172 * No need to check for duplicate as this is the listener 173 * and we are holding the lock. This means that no new 174 * connection can be created out of it. And since the 175 * fanout already done cannot find a match, it means that 176 * there is no duplicate. 177 */ 178 ipvers = IPH_HDR_VERSION(mp->b_rptr); 179 ASSERT(ipvers == IPV6_VERSION || ipvers == IPV4_VERSION); 180 ASSERT(OK_32PTR(mp->b_rptr)); 181 182 if ((eager = sctp_create_eager(sctp)) == NULL) { 183 return (NULL); 184 } 185 186 if (ipvers != IPV4_VERSION) { 187 ip6h = (ip6_t *)mp->b_rptr; 188 if (IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) 189 eager->sctp_linklocal = 1; 190 /* 191 * Record ifindex (might be zero) to tie this connection to 192 * that interface if either the listener was bound or 193 * if the connection is using link-local addresses. 194 */ 195 if (sctp->sctp_bound_if == ifindex || 196 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) 197 eager->sctp_bound_if = ifindex; 198 /* 199 * XXX broken. bound_if is always overwritten by statement 200 * below. What is the right thing to do here? 201 */ 202 eager->sctp_bound_if = sctp->sctp_bound_if; 203 } 204 205 connp = sctp->sctp_connp; 206 econnp = eager->sctp_connp; 207 208 if (connp->conn_policy != NULL) { 209 ipsec_in_t *ii; 210 211 ASSERT(ipsec_mp != NULL); 212 ii = (ipsec_in_t *)(ipsec_mp->b_rptr); 213 ASSERT(ii->ipsec_in_policy == NULL); 214 IPPH_REFHOLD(connp->conn_policy); 215 ii->ipsec_in_policy = connp->conn_policy; 216 217 ipsec_mp->b_datap->db_type = IPSEC_POLICY_SET; 218 if (!ip_bind_ipsec_policy_set(econnp, ipsec_mp)) { 219 sctp_close_eager(eager); 220 BUMP_MIB(&sctp_mib, sctpListenDrop); 221 return (NULL); 222 } 223 } 224 225 if (ipsec_mp != NULL) { 226 /* 227 * XXX need to fix the cached policy issue here. 228 * We temporarily set the conn_src/conn_rem here so 229 * that IPsec can use it for the latched policy 230 * selector. This is obvioursly wrong as SCTP can 231 * use different addresses... 232 */ 233 if (ipvers == IPV4_VERSION) { 234 ipha_t *ipha; 235 236 ipha = (ipha_t *)mp->b_rptr; 237 econnp->conn_src = ipha->ipha_dst; 238 econnp->conn_rem = ipha->ipha_src; 239 } else { 240 econnp->conn_srcv6 = ip6h->ip6_dst; 241 econnp->conn_remv6 = ip6h->ip6_src; 242 } 243 } 244 if (ipsec_conn_cache_policy(econnp, ipvers == IPV4_VERSION) != 0) { 245 sctp_close_eager(eager); 246 BUMP_MIB(&sctp_mib, sctpListenDrop); 247 return (NULL); 248 } 249 250 err = sctp_accept_comm(sctp, eager, mp, ip_hdr_len, iack); 251 if (err) { 252 sctp_close_eager(eager); 253 BUMP_MIB(&sctp_mib, sctpListenDrop); 254 return (NULL); 255 } 256 257 /* 258 * On a clustered note send this notification to the clustering 259 * subsystem. 260 */ 261 if (cl_sctp_connect != NULL) { 262 uchar_t *slist; 263 uchar_t *flist; 264 size_t fsize; 265 size_t ssize; 266 267 fsize = sizeof (in6_addr_t) * eager->sctp_nfaddrs; 268 ssize = sizeof (in6_addr_t) * eager->sctp_nsaddrs; 269 slist = kmem_alloc(ssize, KM_NOSLEEP); 270 flist = kmem_alloc(fsize, KM_NOSLEEP); 271 if (slist == NULL || flist == NULL) { 272 if (slist != NULL) 273 kmem_free(slist, ssize); 274 if (flist != NULL) 275 kmem_free(flist, fsize); 276 sctp_close_eager(eager); 277 BUMP_MIB(&sctp_mib, sctpListenDrop); 278 return (NULL); 279 } 280 /* The clustering module frees these list */ 281 sctp_get_saddr_list(eager, slist, ssize); 282 sctp_get_faddr_list(eager, flist, fsize); 283 (*cl_sctp_connect)(eager->sctp_family, slist, 284 eager->sctp_nsaddrs, eager->sctp_lport, flist, 285 eager->sctp_nfaddrs, eager->sctp_fport, B_FALSE, 286 (cl_sctp_handle_t)eager); 287 } 288 289 /* Connection established, so send up the conn_ind */ 290 if ((eager->sctp_ulpd = sctp->sctp_ulp_newconn(sctp->sctp_ulpd, 291 eager)) == NULL) { 292 sctp_close_eager(eager); 293 BUMP_MIB(&sctp_mib, sctpListenDrop); 294 return (NULL); 295 } 296 ASSERT(SCTP_IS_DETACHED(eager)); 297 eager->sctp_detached = B_FALSE; 298 if (eager->sctp_family == AF_INET) { 299 eager->sctp_ulp_prop(eager->sctp_ulpd, 300 sctp_wroff_xtra + sizeof (sctp_data_hdr_t) + 301 sctp->sctp_hdr_len, strmsgsz); 302 } else { 303 eager->sctp_ulp_prop(eager->sctp_ulpd, 304 sctp_wroff_xtra + sizeof (sctp_data_hdr_t) + 305 sctp->sctp_hdr6_len, strmsgsz); 306 } 307 return (eager); 308 } 309 310 /* 311 * Connect to a peer - this function inserts the sctp in the 312 * bind and conn fanouts, sends the INIT, and replies to the client 313 * with an OK ack. 314 */ 315 int 316 sctp_connect(sctp_t *sctp, const struct sockaddr *dst, uint32_t addrlen) 317 { 318 sin_t *sin; 319 sin6_t *sin6; 320 in6_addr_t dstaddr; 321 in_port_t dstport; 322 mblk_t *initmp; 323 sctp_tf_t *tbf; 324 sctp_t *lsctp; 325 char buf[INET6_ADDRSTRLEN]; 326 int sleep = sctp->sctp_cansleep ? KM_SLEEP : KM_NOSLEEP; 327 int hdrlen; 328 ip6_rthdr_t *rth; 329 int err; 330 sctp_faddr_t *cur_fp; 331 332 /* 333 * Determine packet type based on type of address passed in 334 * the request should contain an IPv4 or IPv6 address. 335 * Make sure that address family matches the type of 336 * family of the the address passed down 337 */ 338 if (addrlen < sizeof (sin_t)) { 339 return (EINVAL); 340 } 341 switch (dst->sa_family) { 342 case AF_INET: 343 sin = (sin_t *)dst; 344 345 /* Check for attempt to connect to non-unicast */ 346 if (IN_MULTICAST(sin->sin_addr.s_addr) || 347 (sin->sin_addr.s_addr == INADDR_BROADCAST)) { 348 ip0dbg(("sctp_connect: non-unicast\n")); 349 return (EINVAL); 350 } 351 if (sctp->sctp_connp->conn_ipv6_v6only) 352 return (EAFNOSUPPORT); 353 354 /* convert to v6 mapped */ 355 /* Check for attempt to connect to INADDR_ANY */ 356 if (sin->sin_addr.s_addr == INADDR_ANY) { 357 struct in_addr v4_addr; 358 /* 359 * SunOS 4.x and 4.3 BSD allow an application 360 * to connect a TCP socket to INADDR_ANY. 361 * When they do this, the kernel picks the 362 * address of one interface and uses it 363 * instead. The kernel usually ends up 364 * picking the address of the loopback 365 * interface. This is an undocumented feature. 366 * However, we provide the same thing here 367 * in case any TCP apps that use this feature 368 * are being ported to SCTP... 369 */ 370 v4_addr.s_addr = htonl(INADDR_LOOPBACK); 371 IN6_INADDR_TO_V4MAPPED(&v4_addr, &dstaddr); 372 } else { 373 IN6_INADDR_TO_V4MAPPED(&sin->sin_addr, &dstaddr); 374 } 375 dstport = sin->sin_port; 376 if (sin->sin_family == AF_INET) { 377 hdrlen = sctp->sctp_hdr_len; 378 } else { 379 hdrlen = sctp->sctp_hdr6_len; 380 } 381 break; 382 case AF_INET6: 383 sin6 = (sin6_t *)dst; 384 /* Check for attempt to connect to non-unicast. */ 385 if ((addrlen < sizeof (sin6_t)) || 386 IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) { 387 ip0dbg(("sctp_connect: non-unicast\n")); 388 return (EINVAL); 389 } 390 if (sctp->sctp_connp->conn_ipv6_v6only && 391 IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 392 return (EAFNOSUPPORT); 393 } 394 /* check for attempt to connect to unspec */ 395 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { 396 dstaddr = ipv6_loopback; 397 } else { 398 dstaddr = sin6->sin6_addr; 399 if (IN6_IS_ADDR_LINKLOCAL(&dstaddr)) 400 sctp->sctp_linklocal = 1; 401 } 402 dstport = sin6->sin6_port; 403 hdrlen = sctp->sctp_hdr6_len; 404 break; 405 default: 406 dprint(1, ("sctp_connect: unknown family %d\n", 407 dst->sa_family)); 408 return (EAFNOSUPPORT); 409 } 410 411 (void) inet_ntop(AF_INET6, &dstaddr, buf, sizeof (buf)); 412 dprint(1, ("sctp_connect: attempting connect to %s...\n", buf)); 413 414 RUN_SCTP(sctp); 415 416 if (sctp->sctp_family != dst->sa_family) { 417 WAKE_SCTP(sctp); 418 return (EINVAL); 419 } 420 421 switch (sctp->sctp_state) { 422 case SCTPS_IDLE: { 423 struct sockaddr_storage ss; 424 425 /* 426 * We support a quick connect capability here, allowing 427 * clients to transition directly from IDLE to COOKIE_WAIT. 428 * sctp_bindi will pick an unused port, insert the connection 429 * in the bind hash and transition to BOUND state. SCTP 430 * picks and uses what it considers the optimal local address 431 * set (just like specifiying INADDR_ANY to bind()). 432 */ 433 dprint(1, ("sctp_connect: idle, attempting bind...\n")); 434 ASSERT(sctp->sctp_nsaddrs == 0); 435 436 bzero(&ss, sizeof (ss)); 437 ss.ss_family = sctp->sctp_family; 438 WAKE_SCTP(sctp); 439 if ((err = sctp_bind(sctp, (struct sockaddr *)&ss, 440 sizeof (ss))) != 0) { 441 return (err); 442 } 443 RUN_SCTP(sctp); 444 sctp->sctp_bound_to_all = 1; 445 /* FALLTHRU */ 446 } 447 448 case SCTPS_BOUND: 449 ASSERT(sctp->sctp_nsaddrs > 0); 450 451 /* do the connect */ 452 /* XXX check for attempt to connect to self */ 453 sctp->sctp_fport = dstport; 454 455 ASSERT(sctp->sctp_iphc); 456 ASSERT(sctp->sctp_iphc6); 457 458 /* 459 * Don't allow this connection to completely duplicate 460 * an existing connection. 461 * 462 * Ensure that the duplicate check and insertion is atomic. 463 */ 464 sctp_conn_hash_remove(sctp); 465 tbf = &sctp_conn_fanout[SCTP_CONN_HASH(sctp->sctp_ports)]; 466 mutex_enter(&tbf->tf_lock); 467 lsctp = sctp_lookup(sctp, &dstaddr, tbf, &sctp->sctp_ports, 468 SCTPS_COOKIE_WAIT); 469 if (lsctp != NULL) { 470 /* found a duplicate connection */ 471 mutex_exit(&tbf->tf_lock); 472 SCTP_REFRELE(lsctp); 473 WAKE_SCTP(sctp); 474 return (EADDRINUSE); 475 } 476 /* 477 * OK; set up the peer addr (this may grow after we get 478 * the INIT ACK from the peer with additional addresses). 479 */ 480 if ((err = sctp_add_faddr(sctp, &dstaddr, sleep)) != 0) { 481 mutex_exit(&tbf->tf_lock); 482 WAKE_SCTP(sctp); 483 return (err); 484 } 485 /* No valid src addr, return. */ 486 if (sctp->sctp_faddrs->state == SCTP_FADDRS_UNREACH) { 487 mutex_exit(&tbf->tf_lock); 488 WAKE_SCTP(sctp); 489 return (EADDRNOTAVAIL); 490 } 491 sctp->sctp_primary = sctp->sctp_faddrs; 492 sctp->sctp_current = sctp->sctp_faddrs; 493 cur_fp = sctp->sctp_current; 494 sctp->sctp_mss = sctp->sctp_faddrs->sfa_pmss; 495 sctp_conn_hash_insert(tbf, sctp, 1); 496 mutex_exit(&tbf->tf_lock); 497 498 /* initialize composite headers */ 499 if ((err = sctp_set_hdraddrs(sctp, NULL)) != 0) { 500 sctp_conn_hash_remove(sctp); 501 WAKE_SCTP(sctp); 502 return (err); 503 } 504 505 /* 506 * Massage a routing header (if present) putting the first hop 507 * in ip6_dst. 508 */ 509 rth = ip_find_rthdr_v6(sctp->sctp_ip6h, 510 (uint8_t *)sctp->sctp_sctph6); 511 if (rth != NULL) 512 (void) ip_massage_options_v6(sctp->sctp_ip6h, rth); 513 514 /* 515 * Turn off the don't fragment bit on the (only) faddr, 516 * so that if one of the messages exchanged during the 517 * initialization sequence exceeds the path mtu, it 518 * at least has a chance to get there. SCTP does no 519 * fragmentation of initialization messages. The DF bit 520 * will be turned on again in sctp_send_cookie_echo() 521 * (but the cookie echo will still be sent with the df bit 522 * off). 523 */ 524 cur_fp->df = B_FALSE; 525 526 /* Mark this address as alive */ 527 cur_fp->state = SCTP_FADDRS_ALIVE; 528 529 /* This sctp_t is fully bound now. */ 530 sctp->sctp_connp->conn_fully_bound = B_TRUE; 531 532 /* Send the INIT to the peer */ 533 SCTP_FADDR_TIMER_RESTART(sctp, cur_fp, cur_fp->rto); 534 /* 535 * sctp_init_mp() could result in modifying the source 536 * address list, so take the hash lock. 537 */ 538 mutex_enter(&tbf->tf_lock); 539 initmp = sctp_init_mp(sctp); 540 if (initmp == NULL) { 541 mutex_exit(&tbf->tf_lock); 542 WAKE_SCTP(sctp); 543 /* let timer retry */ 544 return (0); 545 } 546 mutex_exit(&tbf->tf_lock); 547 sctp->sctp_state = SCTPS_COOKIE_WAIT; 548 /* 549 * On a clustered note send this notification to the clustering 550 * subsystem. 551 */ 552 if (cl_sctp_connect != NULL) { 553 uchar_t *slist; 554 uchar_t *flist; 555 size_t ssize; 556 size_t fsize; 557 558 fsize = sizeof (in6_addr_t) * sctp->sctp_nfaddrs; 559 ssize = sizeof (in6_addr_t) * sctp->sctp_nsaddrs; 560 slist = kmem_alloc(ssize, KM_SLEEP); 561 flist = kmem_alloc(fsize, KM_SLEEP); 562 /* The clustering module frees the lists */ 563 sctp_get_saddr_list(sctp, slist, ssize); 564 sctp_get_faddr_list(sctp, flist, fsize); 565 (*cl_sctp_connect)(sctp->sctp_family, slist, 566 sctp->sctp_nsaddrs, sctp->sctp_lport, 567 flist, sctp->sctp_nfaddrs, sctp->sctp_fport, 568 B_TRUE, (cl_sctp_handle_t)sctp); 569 } 570 WAKE_SCTP(sctp); 571 /* OK to call IP_PUT() here instead of sctp_add_sendq(). */ 572 CONN_INC_REF(sctp->sctp_connp); 573 initmp->b_flag |= MSGHASREF; 574 IP_PUT(initmp, sctp->sctp_connp, sctp->sctp_current->isv4); 575 BUMP_LOCAL(sctp->sctp_opkts); 576 577 sctp->sctp_ulp_prop(sctp->sctp_ulpd, 578 sctp_wroff_xtra + hdrlen + sizeof (sctp_data_hdr_t), 0); 579 580 return (0); 581 default: 582 ip0dbg(("sctp_connect: invalid state. %d\n", sctp->sctp_state)); 583 WAKE_SCTP(sctp); 584 return (EINVAL); 585 } 586 } 587