1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <sys/systm.h> 31 #include <sys/stream.h> 32 #include <sys/cmn_err.h> 33 #include <sys/kmem.h> 34 #define _SUN_TPI_VERSION 2 35 #include <sys/tihdr.h> 36 #include <sys/stropts.h> 37 #include <sys/strsubr.h> 38 #include <sys/socket.h> 39 #include <sys/tsol/tndb.h> 40 41 #include <netinet/in.h> 42 #include <netinet/ip6.h> 43 44 #include <inet/common.h> 45 #include <inet/ip.h> 46 #include <inet/ip6.h> 47 #include <inet/ipclassifier.h> 48 #include <inet/ipsec_impl.h> 49 50 #include "sctp_impl.h" 51 #include "sctp_addr.h" 52 53 /* 54 * Common accept code. Called by sctp_conn_request. 55 * cr_pkt is the INIT / INIT ACK packet. 56 */ 57 static int 58 sctp_accept_comm(sctp_t *listener, sctp_t *acceptor, mblk_t *cr_pkt, 59 uint_t ip_hdr_len, sctp_init_chunk_t *iack) 60 { 61 62 sctp_hdr_t *sctph; 63 sctp_chunk_hdr_t *ich; 64 sctp_init_chunk_t *init; 65 int err; 66 uint_t sctp_options; 67 conn_t *aconnp; 68 conn_t *lconnp; 69 cred_t *cr; 70 sctp_stack_t *sctps = listener->sctp_sctps; 71 72 sctph = (sctp_hdr_t *)(cr_pkt->b_rptr + ip_hdr_len); 73 ASSERT(OK_32PTR(sctph)); 74 75 acceptor->sctp_lport = listener->sctp_lport; 76 acceptor->sctp_fport = sctph->sh_sport; 77 78 ich = (sctp_chunk_hdr_t *)(iack + 1); 79 init = (sctp_init_chunk_t *)(ich + 1); 80 81 /* acceptor isn't in any fanouts yet, so don't need to hold locks */ 82 ASSERT(acceptor->sctp_faddrs == NULL); 83 err = sctp_get_addrparams(acceptor, listener, cr_pkt, ich, 84 &sctp_options); 85 if (err != 0) 86 return (err); 87 88 aconnp = acceptor->sctp_connp; 89 lconnp = listener->sctp_connp; 90 if (lconnp->conn_mlp_type != mlptSingle) { 91 cr = aconnp->conn_peercred = DB_CRED(cr_pkt); 92 if (cr != NULL) 93 crhold(cr); 94 } 95 96 if ((err = sctp_set_hdraddrs(acceptor)) != 0) 97 return (err); 98 99 if ((sctp_options & SCTP_PRSCTP_OPTION) && 100 listener->sctp_prsctp_aware && sctps->sctps_prsctp_enabled) { 101 acceptor->sctp_prsctp_aware = B_TRUE; 102 } else { 103 acceptor->sctp_prsctp_aware = B_FALSE; 104 } 105 /* The new sctp_t is fully bound now. */ 106 acceptor->sctp_connp->conn_fully_bound = B_TRUE; 107 108 /* Get initial TSNs */ 109 acceptor->sctp_ltsn = ntohl(iack->sic_inittsn); 110 acceptor->sctp_recovery_tsn = acceptor->sctp_lastack_rxd = 111 acceptor->sctp_ltsn - 1; 112 acceptor->sctp_adv_pap = acceptor->sctp_lastack_rxd; 113 /* Serial numbers are initialized to the same value as the TSNs */ 114 acceptor->sctp_lcsn = acceptor->sctp_ltsn; 115 116 if (!sctp_initialize_params(acceptor, init, iack)) 117 return (ENOMEM); 118 119 /* 120 * Copy sctp_secret from the listener in case we need to validate 121 * a possibly delayed cookie. 122 */ 123 bcopy(listener->sctp_secret, acceptor->sctp_secret, SCTP_SECRET_LEN); 124 bcopy(listener->sctp_old_secret, acceptor->sctp_old_secret, 125 SCTP_SECRET_LEN); 126 acceptor->sctp_last_secret_update = lbolt64; 127 128 /* 129 * After acceptor is inserted in the hash list, it can be found. 130 * So we need to lock it here. 131 */ 132 RUN_SCTP(acceptor); 133 134 sctp_conn_hash_insert(&sctps->sctps_conn_fanout[ 135 SCTP_CONN_HASH(sctps, acceptor->sctp_ports)], acceptor, 0); 136 sctp_bind_hash_insert(&sctps->sctps_bind_fanout[ 137 SCTP_BIND_HASH(ntohs(acceptor->sctp_lport))], acceptor, 0); 138 139 /* 140 * No need to check for multicast destination since ip will only pass 141 * up multicasts to those that have expressed interest 142 * TODO: what about rejecting broadcasts? 143 * Also check that source is not a multicast or broadcast address. 144 */ 145 /* XXXSCTP */ 146 acceptor->sctp_state = SCTPS_ESTABLISHED; 147 acceptor->sctp_assoc_start_time = (uint32_t)lbolt; 148 /* 149 * listener->sctp_rwnd should be the default window size or a 150 * window size changed via SO_RCVBUF option. 151 */ 152 acceptor->sctp_rwnd = listener->sctp_rwnd; 153 acceptor->sctp_irwnd = acceptor->sctp_rwnd; 154 bcopy(&listener->sctp_upcalls, &acceptor->sctp_upcalls, 155 sizeof (sctp_upcalls_t)); 156 157 return (0); 158 } 159 160 /* Process the COOKIE packet, mp, directed at the listener 'sctp' */ 161 sctp_t * 162 sctp_conn_request(sctp_t *sctp, mblk_t *mp, uint_t ifindex, uint_t ip_hdr_len, 163 sctp_init_chunk_t *iack, mblk_t *ipsec_mp) 164 { 165 sctp_t *eager; 166 uint_t ipvers; 167 ip6_t *ip6h; 168 int err; 169 conn_t *connp, *econnp; 170 sctp_stack_t *sctps; 171 172 /* 173 * No need to check for duplicate as this is the listener 174 * and we are holding the lock. This means that no new 175 * connection can be created out of it. And since the 176 * fanout already done cannot find a match, it means that 177 * there is no duplicate. 178 */ 179 ipvers = IPH_HDR_VERSION(mp->b_rptr); 180 ASSERT(ipvers == IPV6_VERSION || ipvers == IPV4_VERSION); 181 ASSERT(OK_32PTR(mp->b_rptr)); 182 183 if ((eager = sctp_create_eager(sctp)) == NULL) { 184 return (NULL); 185 } 186 187 if (ipvers != IPV4_VERSION) { 188 ip6h = (ip6_t *)mp->b_rptr; 189 if (IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) 190 eager->sctp_linklocal = 1; 191 /* 192 * Record ifindex (might be zero) to tie this connection to 193 * that interface if either the listener was bound or 194 * if the connection is using link-local addresses. 195 */ 196 if (sctp->sctp_bound_if == ifindex || 197 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) 198 eager->sctp_bound_if = ifindex; 199 /* 200 * XXX broken. bound_if is always overwritten by statement 201 * below. What is the right thing to do here? 202 */ 203 eager->sctp_bound_if = sctp->sctp_bound_if; 204 } 205 206 connp = sctp->sctp_connp; 207 sctps = sctp->sctp_sctps; 208 econnp = eager->sctp_connp; 209 210 if (connp->conn_policy != NULL) { 211 ipsec_in_t *ii; 212 213 ASSERT(ipsec_mp != NULL); 214 ii = (ipsec_in_t *)(ipsec_mp->b_rptr); 215 ASSERT(ii->ipsec_in_policy == NULL); 216 IPPH_REFHOLD(connp->conn_policy); 217 ii->ipsec_in_policy = connp->conn_policy; 218 219 ipsec_mp->b_datap->db_type = IPSEC_POLICY_SET; 220 if (!ip_bind_ipsec_policy_set(econnp, ipsec_mp)) { 221 sctp_close_eager(eager); 222 BUMP_MIB(&sctps->sctps_mib, sctpListenDrop); 223 return (NULL); 224 } 225 } 226 227 if (ipsec_mp != NULL) { 228 /* 229 * XXX need to fix the cached policy issue here. 230 * We temporarily set the conn_src/conn_rem here so 231 * that IPsec can use it for the latched policy 232 * selector. This is obvioursly wrong as SCTP can 233 * use different addresses... 234 */ 235 if (ipvers == IPV4_VERSION) { 236 ipha_t *ipha; 237 238 ipha = (ipha_t *)mp->b_rptr; 239 econnp->conn_src = ipha->ipha_dst; 240 econnp->conn_rem = ipha->ipha_src; 241 } else { 242 econnp->conn_srcv6 = ip6h->ip6_dst; 243 econnp->conn_remv6 = ip6h->ip6_src; 244 } 245 } 246 if (ipsec_conn_cache_policy(econnp, ipvers == IPV4_VERSION) != 0) { 247 sctp_close_eager(eager); 248 BUMP_MIB(&sctps->sctps_mib, sctpListenDrop); 249 return (NULL); 250 } 251 252 err = sctp_accept_comm(sctp, eager, mp, ip_hdr_len, iack); 253 if (err) { 254 sctp_close_eager(eager); 255 BUMP_MIB(&sctps->sctps_mib, sctpListenDrop); 256 return (NULL); 257 } 258 259 /* 260 * On a clustered note send this notification to the clustering 261 * subsystem. 262 */ 263 if (cl_sctp_connect != NULL) { 264 uchar_t *slist; 265 uchar_t *flist; 266 size_t fsize; 267 size_t ssize; 268 269 fsize = sizeof (in6_addr_t) * eager->sctp_nfaddrs; 270 ssize = sizeof (in6_addr_t) * eager->sctp_nsaddrs; 271 slist = kmem_alloc(ssize, KM_NOSLEEP); 272 flist = kmem_alloc(fsize, KM_NOSLEEP); 273 if (slist == NULL || flist == NULL) { 274 if (slist != NULL) 275 kmem_free(slist, ssize); 276 if (flist != NULL) 277 kmem_free(flist, fsize); 278 sctp_close_eager(eager); 279 BUMP_MIB(&sctps->sctps_mib, sctpListenDrop); 280 SCTP_KSTAT(sctps, sctp_cl_connect); 281 return (NULL); 282 } 283 /* The clustering module frees these list */ 284 sctp_get_saddr_list(eager, slist, ssize); 285 sctp_get_faddr_list(eager, flist, fsize); 286 (*cl_sctp_connect)(eager->sctp_family, slist, 287 eager->sctp_nsaddrs, eager->sctp_lport, flist, 288 eager->sctp_nfaddrs, eager->sctp_fport, B_FALSE, 289 (cl_sctp_handle_t)eager); 290 } 291 292 /* Connection established, so send up the conn_ind */ 293 if ((eager->sctp_ulpd = sctp->sctp_ulp_newconn(sctp->sctp_ulpd, 294 eager)) == NULL) { 295 sctp_close_eager(eager); 296 BUMP_MIB(&sctps->sctps_mib, sctpListenDrop); 297 return (NULL); 298 } 299 ASSERT(SCTP_IS_DETACHED(eager)); 300 eager->sctp_detached = B_FALSE; 301 if (eager->sctp_family == AF_INET) { 302 eager->sctp_ulp_prop(eager->sctp_ulpd, 303 sctps->sctps_wroff_xtra + sizeof (sctp_data_hdr_t) + 304 sctp->sctp_hdr_len, strmsgsz); 305 } else { 306 eager->sctp_ulp_prop(eager->sctp_ulpd, 307 sctps->sctps_wroff_xtra + sizeof (sctp_data_hdr_t) + 308 sctp->sctp_hdr6_len, strmsgsz); 309 } 310 return (eager); 311 } 312 313 /* 314 * Connect to a peer - this function inserts the sctp in the 315 * bind and conn fanouts, sends the INIT, and replies to the client 316 * with an OK ack. 317 */ 318 int 319 sctp_connect(sctp_t *sctp, const struct sockaddr *dst, uint32_t addrlen) 320 { 321 sin_t *sin; 322 sin6_t *sin6; 323 in6_addr_t dstaddr; 324 in_port_t dstport; 325 mblk_t *initmp; 326 sctp_tf_t *tbf; 327 sctp_t *lsctp; 328 char buf[INET6_ADDRSTRLEN]; 329 int sleep = sctp->sctp_cansleep ? KM_SLEEP : KM_NOSLEEP; 330 int hdrlen; 331 ip6_rthdr_t *rth; 332 int err; 333 sctp_faddr_t *cur_fp; 334 sctp_stack_t *sctps = sctp->sctp_sctps; 335 336 /* 337 * Determine packet type based on type of address passed in 338 * the request should contain an IPv4 or IPv6 address. 339 * Make sure that address family matches the type of 340 * family of the the address passed down 341 */ 342 if (addrlen < sizeof (sin_t)) { 343 return (EINVAL); 344 } 345 switch (dst->sa_family) { 346 case AF_INET: 347 sin = (sin_t *)dst; 348 349 /* Check for attempt to connect to non-unicast */ 350 if (IN_MULTICAST(sin->sin_addr.s_addr) || 351 (sin->sin_addr.s_addr == INADDR_BROADCAST)) { 352 ip0dbg(("sctp_connect: non-unicast\n")); 353 return (EINVAL); 354 } 355 if (sctp->sctp_connp->conn_ipv6_v6only) 356 return (EAFNOSUPPORT); 357 358 /* convert to v6 mapped */ 359 /* Check for attempt to connect to INADDR_ANY */ 360 if (sin->sin_addr.s_addr == INADDR_ANY) { 361 struct in_addr v4_addr; 362 /* 363 * SunOS 4.x and 4.3 BSD allow an application 364 * to connect a TCP socket to INADDR_ANY. 365 * When they do this, the kernel picks the 366 * address of one interface and uses it 367 * instead. The kernel usually ends up 368 * picking the address of the loopback 369 * interface. This is an undocumented feature. 370 * However, we provide the same thing here 371 * in case any TCP apps that use this feature 372 * are being ported to SCTP... 373 */ 374 v4_addr.s_addr = htonl(INADDR_LOOPBACK); 375 IN6_INADDR_TO_V4MAPPED(&v4_addr, &dstaddr); 376 } else { 377 IN6_INADDR_TO_V4MAPPED(&sin->sin_addr, &dstaddr); 378 } 379 dstport = sin->sin_port; 380 if (sin->sin_family == AF_INET) { 381 hdrlen = sctp->sctp_hdr_len; 382 } else { 383 hdrlen = sctp->sctp_hdr6_len; 384 } 385 break; 386 case AF_INET6: 387 sin6 = (sin6_t *)dst; 388 /* Check for attempt to connect to non-unicast. */ 389 if ((addrlen < sizeof (sin6_t)) || 390 IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) { 391 ip0dbg(("sctp_connect: non-unicast\n")); 392 return (EINVAL); 393 } 394 if (sctp->sctp_connp->conn_ipv6_v6only && 395 IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 396 return (EAFNOSUPPORT); 397 } 398 /* check for attempt to connect to unspec */ 399 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { 400 dstaddr = ipv6_loopback; 401 } else { 402 dstaddr = sin6->sin6_addr; 403 if (IN6_IS_ADDR_LINKLOCAL(&dstaddr)) 404 sctp->sctp_linklocal = 1; 405 } 406 dstport = sin6->sin6_port; 407 hdrlen = sctp->sctp_hdr6_len; 408 break; 409 default: 410 dprint(1, ("sctp_connect: unknown family %d\n", 411 dst->sa_family)); 412 return (EAFNOSUPPORT); 413 } 414 415 (void) inet_ntop(AF_INET6, &dstaddr, buf, sizeof (buf)); 416 dprint(1, ("sctp_connect: attempting connect to %s...\n", buf)); 417 418 RUN_SCTP(sctp); 419 420 if (sctp->sctp_family != dst->sa_family) { 421 WAKE_SCTP(sctp); 422 return (EINVAL); 423 } 424 425 switch (sctp->sctp_state) { 426 case SCTPS_IDLE: { 427 struct sockaddr_storage ss; 428 429 /* 430 * We support a quick connect capability here, allowing 431 * clients to transition directly from IDLE to COOKIE_WAIT. 432 * sctp_bindi will pick an unused port, insert the connection 433 * in the bind hash and transition to BOUND state. SCTP 434 * picks and uses what it considers the optimal local address 435 * set (just like specifiying INADDR_ANY to bind()). 436 */ 437 dprint(1, ("sctp_connect: idle, attempting bind...\n")); 438 ASSERT(sctp->sctp_nsaddrs == 0); 439 440 bzero(&ss, sizeof (ss)); 441 ss.ss_family = sctp->sctp_family; 442 WAKE_SCTP(sctp); 443 if ((err = sctp_bind(sctp, (struct sockaddr *)&ss, 444 sizeof (ss))) != 0) { 445 return (err); 446 } 447 RUN_SCTP(sctp); 448 sctp->sctp_bound_to_all = 1; 449 /* FALLTHRU */ 450 } 451 452 case SCTPS_BOUND: 453 ASSERT(sctp->sctp_nsaddrs > 0); 454 455 /* do the connect */ 456 /* XXX check for attempt to connect to self */ 457 sctp->sctp_fport = dstport; 458 459 ASSERT(sctp->sctp_iphc); 460 ASSERT(sctp->sctp_iphc6); 461 462 /* 463 * Don't allow this connection to completely duplicate 464 * an existing connection. 465 * 466 * Ensure that the duplicate check and insertion is atomic. 467 */ 468 sctp_conn_hash_remove(sctp); 469 tbf = &sctps->sctps_conn_fanout[SCTP_CONN_HASH(sctps, 470 sctp->sctp_ports)]; 471 mutex_enter(&tbf->tf_lock); 472 lsctp = sctp_lookup(sctp, &dstaddr, tbf, &sctp->sctp_ports, 473 SCTPS_COOKIE_WAIT); 474 if (lsctp != NULL) { 475 /* found a duplicate connection */ 476 mutex_exit(&tbf->tf_lock); 477 SCTP_REFRELE(lsctp); 478 WAKE_SCTP(sctp); 479 return (EADDRINUSE); 480 } 481 /* 482 * OK; set up the peer addr (this may grow after we get 483 * the INIT ACK from the peer with additional addresses). 484 */ 485 if ((err = sctp_add_faddr(sctp, &dstaddr, sleep, 486 B_FALSE)) != 0) { 487 mutex_exit(&tbf->tf_lock); 488 WAKE_SCTP(sctp); 489 return (err); 490 } 491 /* No valid src addr, return. */ 492 if (sctp->sctp_faddrs->state == SCTP_FADDRS_UNREACH) { 493 mutex_exit(&tbf->tf_lock); 494 WAKE_SCTP(sctp); 495 return (EADDRNOTAVAIL); 496 } 497 sctp->sctp_primary = sctp->sctp_faddrs; 498 sctp->sctp_current = sctp->sctp_faddrs; 499 cur_fp = sctp->sctp_current; 500 sctp->sctp_mss = sctp->sctp_faddrs->sfa_pmss; 501 sctp_conn_hash_insert(tbf, sctp, 1); 502 mutex_exit(&tbf->tf_lock); 503 504 /* initialize composite headers */ 505 if ((err = sctp_set_hdraddrs(sctp)) != 0) { 506 sctp_conn_hash_remove(sctp); 507 WAKE_SCTP(sctp); 508 return (err); 509 } 510 511 /* 512 * Massage a routing header (if present) putting the first hop 513 * in ip6_dst. 514 */ 515 rth = ip_find_rthdr_v6(sctp->sctp_ip6h, 516 (uint8_t *)sctp->sctp_sctph6); 517 if (rth != NULL) { 518 (void) ip_massage_options_v6(sctp->sctp_ip6h, rth, 519 sctps->sctps_netstack); 520 } 521 522 /* 523 * Turn off the don't fragment bit on the (only) faddr, 524 * so that if one of the messages exchanged during the 525 * initialization sequence exceeds the path mtu, it 526 * at least has a chance to get there. SCTP does no 527 * fragmentation of initialization messages. The DF bit 528 * will be turned on again in sctp_send_cookie_echo() 529 * (but the cookie echo will still be sent with the df bit 530 * off). 531 */ 532 cur_fp->df = B_FALSE; 533 534 /* Mark this address as alive */ 535 cur_fp->state = SCTP_FADDRS_ALIVE; 536 537 /* This sctp_t is fully bound now. */ 538 sctp->sctp_connp->conn_fully_bound = B_TRUE; 539 540 /* Send the INIT to the peer */ 541 SCTP_FADDR_TIMER_RESTART(sctp, cur_fp, cur_fp->rto); 542 /* 543 * sctp_init_mp() could result in modifying the source 544 * address list, so take the hash lock. 545 */ 546 mutex_enter(&tbf->tf_lock); 547 initmp = sctp_init_mp(sctp); 548 if (initmp == NULL) { 549 mutex_exit(&tbf->tf_lock); 550 WAKE_SCTP(sctp); 551 /* let timer retry */ 552 return (0); 553 } 554 mutex_exit(&tbf->tf_lock); 555 sctp->sctp_state = SCTPS_COOKIE_WAIT; 556 /* 557 * On a clustered note send this notification to the clustering 558 * subsystem. 559 */ 560 if (cl_sctp_connect != NULL) { 561 uchar_t *slist; 562 uchar_t *flist; 563 size_t ssize; 564 size_t fsize; 565 566 fsize = sizeof (in6_addr_t) * sctp->sctp_nfaddrs; 567 ssize = sizeof (in6_addr_t) * sctp->sctp_nsaddrs; 568 slist = kmem_alloc(ssize, KM_SLEEP); 569 flist = kmem_alloc(fsize, KM_SLEEP); 570 /* The clustering module frees the lists */ 571 sctp_get_saddr_list(sctp, slist, ssize); 572 sctp_get_faddr_list(sctp, flist, fsize); 573 (*cl_sctp_connect)(sctp->sctp_family, slist, 574 sctp->sctp_nsaddrs, sctp->sctp_lport, 575 flist, sctp->sctp_nfaddrs, sctp->sctp_fport, 576 B_TRUE, (cl_sctp_handle_t)sctp); 577 } 578 WAKE_SCTP(sctp); 579 /* OK to call IP_PUT() here instead of sctp_add_sendq(). */ 580 CONN_INC_REF(sctp->sctp_connp); 581 initmp->b_flag |= MSGHASREF; 582 IP_PUT(initmp, sctp->sctp_connp, sctp->sctp_current->isv4); 583 BUMP_LOCAL(sctp->sctp_opkts); 584 585 sctp->sctp_ulp_prop(sctp->sctp_ulpd, 586 sctps->sctps_wroff_xtra + hdrlen + sizeof (sctp_data_hdr_t), 587 0); 588 589 return (0); 590 default: 591 ip0dbg(("sctp_connect: invalid state. %d\n", sctp->sctp_state)); 592 WAKE_SCTP(sctp); 593 return (EINVAL); 594 } 595 } 596