1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <sys/systm.h> 31 #include <sys/stream.h> 32 #include <sys/cmn_err.h> 33 #include <sys/kmem.h> 34 #define _SUN_TPI_VERSION 2 35 #include <sys/tihdr.h> 36 #include <sys/stropts.h> 37 #include <sys/strsubr.h> 38 #include <sys/socket.h> 39 #include <sys/tsol/tndb.h> 40 41 #include <netinet/in.h> 42 #include <netinet/ip6.h> 43 44 #include <inet/common.h> 45 #include <inet/ip.h> 46 #include <inet/ip6.h> 47 #include <inet/ipclassifier.h> 48 #include <inet/ipsec_impl.h> 49 50 #include "sctp_impl.h" 51 #include "sctp_addr.h" 52 53 /* 54 * Common accept code. Called by sctp_conn_request. 55 * cr_pkt is the INIT / INIT ACK packet. 56 */ 57 static int 58 sctp_accept_comm(sctp_t *listener, sctp_t *acceptor, mblk_t *cr_pkt, 59 uint_t ip_hdr_len, sctp_init_chunk_t *iack) 60 { 61 62 sctp_hdr_t *sctph; 63 sctp_chunk_hdr_t *ich; 64 sctp_init_chunk_t *init; 65 int err; 66 uint_t sctp_options; 67 conn_t *lconnp; 68 cred_t *cr; 69 70 sctph = (sctp_hdr_t *)(cr_pkt->b_rptr + ip_hdr_len); 71 ASSERT(OK_32PTR(sctph)); 72 73 acceptor->sctp_lport = listener->sctp_lport; 74 acceptor->sctp_fport = sctph->sh_sport; 75 76 ich = (sctp_chunk_hdr_t *)(iack + 1); 77 init = (sctp_init_chunk_t *)(ich + 1); 78 79 /* acceptor isn't in any fanouts yet, so don't need to hold locks */ 80 ASSERT(acceptor->sctp_faddrs == NULL); 81 err = sctp_get_addrparams(acceptor, listener, cr_pkt, ich, 82 &sctp_options); 83 if (err != 0) 84 return (err); 85 86 lconnp = listener->sctp_connp; 87 if (lconnp->conn_mlp_type != mlptSingle) { 88 cr = lconnp->conn_peercred = DB_CRED(cr_pkt); 89 if (cr != NULL) 90 crhold(cr); 91 else 92 cr = lconnp->conn_cred; 93 } else { 94 cr = lconnp->conn_cred; 95 } 96 97 if ((err = sctp_set_hdraddrs(acceptor)) != 0) 98 return (err); 99 100 if ((sctp_options & SCTP_PRSCTP_OPTION) && 101 listener->sctp_prsctp_aware && sctp_prsctp_enabled) { 102 acceptor->sctp_prsctp_aware = B_TRUE; 103 } else { 104 acceptor->sctp_prsctp_aware = B_FALSE; 105 } 106 /* The new sctp_t is fully bound now. */ 107 acceptor->sctp_connp->conn_fully_bound = B_TRUE; 108 109 /* Get initial TSNs */ 110 acceptor->sctp_ltsn = ntohl(iack->sic_inittsn); 111 acceptor->sctp_recovery_tsn = acceptor->sctp_lastack_rxd = 112 acceptor->sctp_ltsn - 1; 113 acceptor->sctp_adv_pap = acceptor->sctp_lastack_rxd; 114 /* Serial numbers are initialized to the same value as the TSNs */ 115 acceptor->sctp_lcsn = acceptor->sctp_ltsn; 116 117 if (!sctp_initialize_params(acceptor, init, iack)) 118 return (ENOMEM); 119 120 /* 121 * Copy sctp_secret from the listener in case we need to validate 122 * a possibly delayed cookie. 123 */ 124 bcopy(listener->sctp_secret, acceptor->sctp_secret, SCTP_SECRET_LEN); 125 bcopy(listener->sctp_old_secret, acceptor->sctp_old_secret, 126 SCTP_SECRET_LEN); 127 acceptor->sctp_last_secret_update = lbolt64; 128 129 /* 130 * After acceptor is inserted in the hash list, it can be found. 131 * So we need to lock it here. 132 */ 133 RUN_SCTP(acceptor); 134 135 sctp_conn_hash_insert(&sctp_conn_fanout[ 136 SCTP_CONN_HASH(acceptor->sctp_ports)], acceptor, 0); 137 sctp_bind_hash_insert(&sctp_bind_fanout[ 138 SCTP_BIND_HASH(ntohs(acceptor->sctp_lport))], acceptor, 0); 139 140 /* 141 * No need to check for multicast destination since ip will only pass 142 * up multicasts to those that have expressed interest 143 * TODO: what about rejecting broadcasts? 144 * Also check that source is not a multicast or broadcast address. 145 */ 146 /* XXXSCTP */ 147 acceptor->sctp_state = SCTPS_ESTABLISHED; 148 acceptor->sctp_assoc_start_time = (uint32_t)lbolt; 149 /* 150 * listener->sctp_rwnd should be the default window size or a 151 * window size changed via SO_RCVBUF option. 152 */ 153 acceptor->sctp_rwnd = listener->sctp_rwnd; 154 acceptor->sctp_irwnd = acceptor->sctp_rwnd; 155 bcopy(&listener->sctp_upcalls, &acceptor->sctp_upcalls, 156 sizeof (sctp_upcalls_t)); 157 158 return (0); 159 } 160 161 /* Process the COOKIE packet, mp, directed at the listener 'sctp' */ 162 sctp_t * 163 sctp_conn_request(sctp_t *sctp, mblk_t *mp, uint_t ifindex, uint_t ip_hdr_len, 164 sctp_init_chunk_t *iack, mblk_t *ipsec_mp) 165 { 166 sctp_t *eager; 167 uint_t ipvers; 168 ip6_t *ip6h; 169 int err; 170 conn_t *connp, *econnp; 171 172 /* 173 * No need to check for duplicate as this is the listener 174 * and we are holding the lock. This means that no new 175 * connection can be created out of it. And since the 176 * fanout already done cannot find a match, it means that 177 * there is no duplicate. 178 */ 179 ipvers = IPH_HDR_VERSION(mp->b_rptr); 180 ASSERT(ipvers == IPV6_VERSION || ipvers == IPV4_VERSION); 181 ASSERT(OK_32PTR(mp->b_rptr)); 182 183 if ((eager = sctp_create_eager(sctp)) == NULL) { 184 return (NULL); 185 } 186 187 if (ipvers != IPV4_VERSION) { 188 ip6h = (ip6_t *)mp->b_rptr; 189 if (IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) 190 eager->sctp_linklocal = 1; 191 /* 192 * Record ifindex (might be zero) to tie this connection to 193 * that interface if either the listener was bound or 194 * if the connection is using link-local addresses. 195 */ 196 if (sctp->sctp_bound_if == ifindex || 197 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) 198 eager->sctp_bound_if = ifindex; 199 /* 200 * XXX broken. bound_if is always overwritten by statement 201 * below. What is the right thing to do here? 202 */ 203 eager->sctp_bound_if = sctp->sctp_bound_if; 204 } 205 206 connp = sctp->sctp_connp; 207 econnp = eager->sctp_connp; 208 209 if (connp->conn_policy != NULL) { 210 ipsec_in_t *ii; 211 212 ASSERT(ipsec_mp != NULL); 213 ii = (ipsec_in_t *)(ipsec_mp->b_rptr); 214 ASSERT(ii->ipsec_in_policy == NULL); 215 IPPH_REFHOLD(connp->conn_policy); 216 ii->ipsec_in_policy = connp->conn_policy; 217 218 ipsec_mp->b_datap->db_type = IPSEC_POLICY_SET; 219 if (!ip_bind_ipsec_policy_set(econnp, ipsec_mp)) { 220 sctp_close_eager(eager); 221 BUMP_MIB(&sctp_mib, sctpListenDrop); 222 return (NULL); 223 } 224 } 225 226 if (ipsec_mp != NULL) { 227 /* 228 * XXX need to fix the cached policy issue here. 229 * We temporarily set the conn_src/conn_rem here so 230 * that IPsec can use it for the latched policy 231 * selector. This is obvioursly wrong as SCTP can 232 * use different addresses... 233 */ 234 if (ipvers == IPV4_VERSION) { 235 ipha_t *ipha; 236 237 ipha = (ipha_t *)mp->b_rptr; 238 econnp->conn_src = ipha->ipha_dst; 239 econnp->conn_rem = ipha->ipha_src; 240 } else { 241 econnp->conn_srcv6 = ip6h->ip6_dst; 242 econnp->conn_remv6 = ip6h->ip6_src; 243 } 244 } 245 if (ipsec_conn_cache_policy(econnp, ipvers == IPV4_VERSION) != 0) { 246 sctp_close_eager(eager); 247 BUMP_MIB(&sctp_mib, sctpListenDrop); 248 return (NULL); 249 } 250 251 err = sctp_accept_comm(sctp, eager, mp, ip_hdr_len, iack); 252 if (err) { 253 sctp_close_eager(eager); 254 BUMP_MIB(&sctp_mib, sctpListenDrop); 255 return (NULL); 256 } 257 258 /* 259 * On a clustered note send this notification to the clustering 260 * subsystem. 261 */ 262 if (cl_sctp_connect != NULL) { 263 uchar_t *slist; 264 uchar_t *flist; 265 size_t fsize; 266 size_t ssize; 267 268 fsize = sizeof (in6_addr_t) * eager->sctp_nfaddrs; 269 ssize = sizeof (in6_addr_t) * eager->sctp_nsaddrs; 270 slist = kmem_alloc(ssize, KM_NOSLEEP); 271 flist = kmem_alloc(fsize, KM_NOSLEEP); 272 if (slist == NULL || flist == NULL) { 273 if (slist != NULL) 274 kmem_free(slist, ssize); 275 if (flist != NULL) 276 kmem_free(flist, fsize); 277 sctp_close_eager(eager); 278 BUMP_MIB(&sctp_mib, sctpListenDrop); 279 SCTP_KSTAT(sctp_cl_connect); 280 return (NULL); 281 } 282 /* The clustering module frees these list */ 283 sctp_get_saddr_list(eager, slist, ssize); 284 sctp_get_faddr_list(eager, flist, fsize); 285 (*cl_sctp_connect)(eager->sctp_family, slist, 286 eager->sctp_nsaddrs, eager->sctp_lport, flist, 287 eager->sctp_nfaddrs, eager->sctp_fport, B_FALSE, 288 (cl_sctp_handle_t)eager); 289 } 290 291 /* Connection established, so send up the conn_ind */ 292 if ((eager->sctp_ulpd = sctp->sctp_ulp_newconn(sctp->sctp_ulpd, 293 eager)) == NULL) { 294 sctp_close_eager(eager); 295 BUMP_MIB(&sctp_mib, sctpListenDrop); 296 return (NULL); 297 } 298 ASSERT(SCTP_IS_DETACHED(eager)); 299 eager->sctp_detached = B_FALSE; 300 if (eager->sctp_family == AF_INET) { 301 eager->sctp_ulp_prop(eager->sctp_ulpd, 302 sctp_wroff_xtra + sizeof (sctp_data_hdr_t) + 303 sctp->sctp_hdr_len, strmsgsz); 304 } else { 305 eager->sctp_ulp_prop(eager->sctp_ulpd, 306 sctp_wroff_xtra + sizeof (sctp_data_hdr_t) + 307 sctp->sctp_hdr6_len, strmsgsz); 308 } 309 return (eager); 310 } 311 312 /* 313 * Connect to a peer - this function inserts the sctp in the 314 * bind and conn fanouts, sends the INIT, and replies to the client 315 * with an OK ack. 316 */ 317 int 318 sctp_connect(sctp_t *sctp, const struct sockaddr *dst, uint32_t addrlen) 319 { 320 sin_t *sin; 321 sin6_t *sin6; 322 in6_addr_t dstaddr; 323 in_port_t dstport; 324 mblk_t *initmp; 325 sctp_tf_t *tbf; 326 sctp_t *lsctp; 327 char buf[INET6_ADDRSTRLEN]; 328 int sleep = sctp->sctp_cansleep ? KM_SLEEP : KM_NOSLEEP; 329 int hdrlen; 330 ip6_rthdr_t *rth; 331 int err; 332 sctp_faddr_t *cur_fp; 333 334 /* 335 * Determine packet type based on type of address passed in 336 * the request should contain an IPv4 or IPv6 address. 337 * Make sure that address family matches the type of 338 * family of the the address passed down 339 */ 340 if (addrlen < sizeof (sin_t)) { 341 return (EINVAL); 342 } 343 switch (dst->sa_family) { 344 case AF_INET: 345 sin = (sin_t *)dst; 346 347 /* Check for attempt to connect to non-unicast */ 348 if (IN_MULTICAST(sin->sin_addr.s_addr) || 349 (sin->sin_addr.s_addr == INADDR_BROADCAST)) { 350 ip0dbg(("sctp_connect: non-unicast\n")); 351 return (EINVAL); 352 } 353 if (sctp->sctp_connp->conn_ipv6_v6only) 354 return (EAFNOSUPPORT); 355 356 /* convert to v6 mapped */ 357 /* Check for attempt to connect to INADDR_ANY */ 358 if (sin->sin_addr.s_addr == INADDR_ANY) { 359 struct in_addr v4_addr; 360 /* 361 * SunOS 4.x and 4.3 BSD allow an application 362 * to connect a TCP socket to INADDR_ANY. 363 * When they do this, the kernel picks the 364 * address of one interface and uses it 365 * instead. The kernel usually ends up 366 * picking the address of the loopback 367 * interface. This is an undocumented feature. 368 * However, we provide the same thing here 369 * in case any TCP apps that use this feature 370 * are being ported to SCTP... 371 */ 372 v4_addr.s_addr = htonl(INADDR_LOOPBACK); 373 IN6_INADDR_TO_V4MAPPED(&v4_addr, &dstaddr); 374 } else { 375 IN6_INADDR_TO_V4MAPPED(&sin->sin_addr, &dstaddr); 376 } 377 dstport = sin->sin_port; 378 if (sin->sin_family == AF_INET) { 379 hdrlen = sctp->sctp_hdr_len; 380 } else { 381 hdrlen = sctp->sctp_hdr6_len; 382 } 383 break; 384 case AF_INET6: 385 sin6 = (sin6_t *)dst; 386 /* Check for attempt to connect to non-unicast. */ 387 if ((addrlen < sizeof (sin6_t)) || 388 IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) { 389 ip0dbg(("sctp_connect: non-unicast\n")); 390 return (EINVAL); 391 } 392 if (sctp->sctp_connp->conn_ipv6_v6only && 393 IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 394 return (EAFNOSUPPORT); 395 } 396 /* check for attempt to connect to unspec */ 397 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { 398 dstaddr = ipv6_loopback; 399 } else { 400 dstaddr = sin6->sin6_addr; 401 if (IN6_IS_ADDR_LINKLOCAL(&dstaddr)) 402 sctp->sctp_linklocal = 1; 403 } 404 dstport = sin6->sin6_port; 405 hdrlen = sctp->sctp_hdr6_len; 406 break; 407 default: 408 dprint(1, ("sctp_connect: unknown family %d\n", 409 dst->sa_family)); 410 return (EAFNOSUPPORT); 411 } 412 413 (void) inet_ntop(AF_INET6, &dstaddr, buf, sizeof (buf)); 414 dprint(1, ("sctp_connect: attempting connect to %s...\n", buf)); 415 416 RUN_SCTP(sctp); 417 418 if (sctp->sctp_family != dst->sa_family) { 419 WAKE_SCTP(sctp); 420 return (EINVAL); 421 } 422 423 switch (sctp->sctp_state) { 424 case SCTPS_IDLE: { 425 struct sockaddr_storage ss; 426 427 /* 428 * We support a quick connect capability here, allowing 429 * clients to transition directly from IDLE to COOKIE_WAIT. 430 * sctp_bindi will pick an unused port, insert the connection 431 * in the bind hash and transition to BOUND state. SCTP 432 * picks and uses what it considers the optimal local address 433 * set (just like specifiying INADDR_ANY to bind()). 434 */ 435 dprint(1, ("sctp_connect: idle, attempting bind...\n")); 436 ASSERT(sctp->sctp_nsaddrs == 0); 437 438 bzero(&ss, sizeof (ss)); 439 ss.ss_family = sctp->sctp_family; 440 WAKE_SCTP(sctp); 441 if ((err = sctp_bind(sctp, (struct sockaddr *)&ss, 442 sizeof (ss))) != 0) { 443 return (err); 444 } 445 RUN_SCTP(sctp); 446 sctp->sctp_bound_to_all = 1; 447 /* FALLTHRU */ 448 } 449 450 case SCTPS_BOUND: 451 ASSERT(sctp->sctp_nsaddrs > 0); 452 453 /* do the connect */ 454 /* XXX check for attempt to connect to self */ 455 sctp->sctp_fport = dstport; 456 457 ASSERT(sctp->sctp_iphc); 458 ASSERT(sctp->sctp_iphc6); 459 460 /* 461 * Don't allow this connection to completely duplicate 462 * an existing connection. 463 * 464 * Ensure that the duplicate check and insertion is atomic. 465 */ 466 sctp_conn_hash_remove(sctp); 467 tbf = &sctp_conn_fanout[SCTP_CONN_HASH(sctp->sctp_ports)]; 468 mutex_enter(&tbf->tf_lock); 469 lsctp = sctp_lookup(sctp, &dstaddr, tbf, &sctp->sctp_ports, 470 SCTPS_COOKIE_WAIT); 471 if (lsctp != NULL) { 472 /* found a duplicate connection */ 473 mutex_exit(&tbf->tf_lock); 474 SCTP_REFRELE(lsctp); 475 WAKE_SCTP(sctp); 476 return (EADDRINUSE); 477 } 478 /* 479 * OK; set up the peer addr (this may grow after we get 480 * the INIT ACK from the peer with additional addresses). 481 */ 482 if ((err = sctp_add_faddr(sctp, &dstaddr, sleep, 483 B_FALSE)) != 0) { 484 mutex_exit(&tbf->tf_lock); 485 WAKE_SCTP(sctp); 486 return (err); 487 } 488 /* No valid src addr, return. */ 489 if (sctp->sctp_faddrs->state == SCTP_FADDRS_UNREACH) { 490 mutex_exit(&tbf->tf_lock); 491 WAKE_SCTP(sctp); 492 return (EADDRNOTAVAIL); 493 } 494 sctp->sctp_primary = sctp->sctp_faddrs; 495 sctp->sctp_current = sctp->sctp_faddrs; 496 cur_fp = sctp->sctp_current; 497 sctp->sctp_mss = sctp->sctp_faddrs->sfa_pmss; 498 sctp_conn_hash_insert(tbf, sctp, 1); 499 mutex_exit(&tbf->tf_lock); 500 501 /* initialize composite headers */ 502 if ((err = sctp_set_hdraddrs(sctp)) != 0) { 503 sctp_conn_hash_remove(sctp); 504 WAKE_SCTP(sctp); 505 return (err); 506 } 507 508 /* 509 * Massage a routing header (if present) putting the first hop 510 * in ip6_dst. 511 */ 512 rth = ip_find_rthdr_v6(sctp->sctp_ip6h, 513 (uint8_t *)sctp->sctp_sctph6); 514 if (rth != NULL) 515 (void) ip_massage_options_v6(sctp->sctp_ip6h, rth); 516 517 /* 518 * Turn off the don't fragment bit on the (only) faddr, 519 * so that if one of the messages exchanged during the 520 * initialization sequence exceeds the path mtu, it 521 * at least has a chance to get there. SCTP does no 522 * fragmentation of initialization messages. The DF bit 523 * will be turned on again in sctp_send_cookie_echo() 524 * (but the cookie echo will still be sent with the df bit 525 * off). 526 */ 527 cur_fp->df = B_FALSE; 528 529 /* Mark this address as alive */ 530 cur_fp->state = SCTP_FADDRS_ALIVE; 531 532 /* This sctp_t is fully bound now. */ 533 sctp->sctp_connp->conn_fully_bound = B_TRUE; 534 535 /* Send the INIT to the peer */ 536 SCTP_FADDR_TIMER_RESTART(sctp, cur_fp, cur_fp->rto); 537 /* 538 * sctp_init_mp() could result in modifying the source 539 * address list, so take the hash lock. 540 */ 541 mutex_enter(&tbf->tf_lock); 542 initmp = sctp_init_mp(sctp); 543 if (initmp == NULL) { 544 mutex_exit(&tbf->tf_lock); 545 WAKE_SCTP(sctp); 546 /* let timer retry */ 547 return (0); 548 } 549 mutex_exit(&tbf->tf_lock); 550 sctp->sctp_state = SCTPS_COOKIE_WAIT; 551 /* 552 * On a clustered note send this notification to the clustering 553 * subsystem. 554 */ 555 if (cl_sctp_connect != NULL) { 556 uchar_t *slist; 557 uchar_t *flist; 558 size_t ssize; 559 size_t fsize; 560 561 fsize = sizeof (in6_addr_t) * sctp->sctp_nfaddrs; 562 ssize = sizeof (in6_addr_t) * sctp->sctp_nsaddrs; 563 slist = kmem_alloc(ssize, KM_SLEEP); 564 flist = kmem_alloc(fsize, KM_SLEEP); 565 /* The clustering module frees the lists */ 566 sctp_get_saddr_list(sctp, slist, ssize); 567 sctp_get_faddr_list(sctp, flist, fsize); 568 (*cl_sctp_connect)(sctp->sctp_family, slist, 569 sctp->sctp_nsaddrs, sctp->sctp_lport, 570 flist, sctp->sctp_nfaddrs, sctp->sctp_fport, 571 B_TRUE, (cl_sctp_handle_t)sctp); 572 } 573 WAKE_SCTP(sctp); 574 /* OK to call IP_PUT() here instead of sctp_add_sendq(). */ 575 CONN_INC_REF(sctp->sctp_connp); 576 initmp->b_flag |= MSGHASREF; 577 IP_PUT(initmp, sctp->sctp_connp, sctp->sctp_current->isv4); 578 BUMP_LOCAL(sctp->sctp_opkts); 579 580 sctp->sctp_ulp_prop(sctp->sctp_ulpd, 581 sctp_wroff_xtra + hdrlen + sizeof (sctp_data_hdr_t), 0); 582 583 return (0); 584 default: 585 ip0dbg(("sctp_connect: invalid state. %d\n", sctp->sctp_state)); 586 WAKE_SCTP(sctp); 587 return (EINVAL); 588 } 589 } 590