1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <sys/systm.h> 31 #include <sys/stream.h> 32 #include <sys/cmn_err.h> 33 #include <sys/kmem.h> 34 #define _SUN_TPI_VERSION 2 35 #include <sys/tihdr.h> 36 #include <sys/stropts.h> 37 #include <sys/strsubr.h> 38 #include <sys/socket.h> 39 #include <sys/tsol/tndb.h> 40 41 #include <netinet/in.h> 42 #include <netinet/ip6.h> 43 44 #include <inet/common.h> 45 #include <inet/ip.h> 46 #include <inet/ip6.h> 47 #include <inet/ipclassifier.h> 48 #include <inet/ipsec_impl.h> 49 50 #include "sctp_impl.h" 51 #include "sctp_addr.h" 52 53 /* 54 * Common accept code. Called by sctp_conn_request. 55 * cr_pkt is the INIT / INIT ACK packet. 56 */ 57 static int 58 sctp_accept_comm(sctp_t *listener, sctp_t *acceptor, mblk_t *cr_pkt, 59 uint_t ip_hdr_len, sctp_init_chunk_t *iack) 60 { 61 62 sctp_hdr_t *sctph; 63 sctp_chunk_hdr_t *ich; 64 sctp_init_chunk_t *init; 65 int err; 66 uint_t sctp_options; 67 conn_t *aconnp; 68 conn_t *lconnp; 69 cred_t *cr; 70 71 sctph = (sctp_hdr_t *)(cr_pkt->b_rptr + ip_hdr_len); 72 ASSERT(OK_32PTR(sctph)); 73 74 acceptor->sctp_lport = listener->sctp_lport; 75 acceptor->sctp_fport = sctph->sh_sport; 76 77 ich = (sctp_chunk_hdr_t *)(iack + 1); 78 init = (sctp_init_chunk_t *)(ich + 1); 79 80 /* acceptor isn't in any fanouts yet, so don't need to hold locks */ 81 ASSERT(acceptor->sctp_faddrs == NULL); 82 err = sctp_get_addrparams(acceptor, listener, cr_pkt, ich, 83 &sctp_options); 84 if (err != 0) 85 return (err); 86 87 aconnp = acceptor->sctp_connp; 88 lconnp = listener->sctp_connp; 89 if (lconnp->conn_mlp_type != mlptSingle) { 90 cr = aconnp->conn_peercred = DB_CRED(cr_pkt); 91 if (cr != NULL) 92 crhold(cr); 93 } 94 95 if ((err = sctp_set_hdraddrs(acceptor)) != 0) 96 return (err); 97 98 if ((sctp_options & SCTP_PRSCTP_OPTION) && 99 listener->sctp_prsctp_aware && sctp_prsctp_enabled) { 100 acceptor->sctp_prsctp_aware = B_TRUE; 101 } else { 102 acceptor->sctp_prsctp_aware = B_FALSE; 103 } 104 /* The new sctp_t is fully bound now. */ 105 acceptor->sctp_connp->conn_fully_bound = B_TRUE; 106 107 /* Get initial TSNs */ 108 acceptor->sctp_ltsn = ntohl(iack->sic_inittsn); 109 acceptor->sctp_recovery_tsn = acceptor->sctp_lastack_rxd = 110 acceptor->sctp_ltsn - 1; 111 acceptor->sctp_adv_pap = acceptor->sctp_lastack_rxd; 112 /* Serial numbers are initialized to the same value as the TSNs */ 113 acceptor->sctp_lcsn = acceptor->sctp_ltsn; 114 115 if (!sctp_initialize_params(acceptor, init, iack)) 116 return (ENOMEM); 117 118 /* 119 * Copy sctp_secret from the listener in case we need to validate 120 * a possibly delayed cookie. 121 */ 122 bcopy(listener->sctp_secret, acceptor->sctp_secret, SCTP_SECRET_LEN); 123 bcopy(listener->sctp_old_secret, acceptor->sctp_old_secret, 124 SCTP_SECRET_LEN); 125 acceptor->sctp_last_secret_update = lbolt64; 126 127 /* 128 * After acceptor is inserted in the hash list, it can be found. 129 * So we need to lock it here. 130 */ 131 RUN_SCTP(acceptor); 132 133 sctp_conn_hash_insert(&sctp_conn_fanout[ 134 SCTP_CONN_HASH(acceptor->sctp_ports)], acceptor, 0); 135 sctp_bind_hash_insert(&sctp_bind_fanout[ 136 SCTP_BIND_HASH(ntohs(acceptor->sctp_lport))], acceptor, 0); 137 138 /* 139 * No need to check for multicast destination since ip will only pass 140 * up multicasts to those that have expressed interest 141 * TODO: what about rejecting broadcasts? 142 * Also check that source is not a multicast or broadcast address. 143 */ 144 /* XXXSCTP */ 145 acceptor->sctp_state = SCTPS_ESTABLISHED; 146 acceptor->sctp_assoc_start_time = (uint32_t)lbolt; 147 /* 148 * listener->sctp_rwnd should be the default window size or a 149 * window size changed via SO_RCVBUF option. 150 */ 151 acceptor->sctp_rwnd = listener->sctp_rwnd; 152 acceptor->sctp_irwnd = acceptor->sctp_rwnd; 153 bcopy(&listener->sctp_upcalls, &acceptor->sctp_upcalls, 154 sizeof (sctp_upcalls_t)); 155 156 return (0); 157 } 158 159 /* Process the COOKIE packet, mp, directed at the listener 'sctp' */ 160 sctp_t * 161 sctp_conn_request(sctp_t *sctp, mblk_t *mp, uint_t ifindex, uint_t ip_hdr_len, 162 sctp_init_chunk_t *iack, mblk_t *ipsec_mp) 163 { 164 sctp_t *eager; 165 uint_t ipvers; 166 ip6_t *ip6h; 167 int err; 168 conn_t *connp, *econnp; 169 170 /* 171 * No need to check for duplicate as this is the listener 172 * and we are holding the lock. This means that no new 173 * connection can be created out of it. And since the 174 * fanout already done cannot find a match, it means that 175 * there is no duplicate. 176 */ 177 ipvers = IPH_HDR_VERSION(mp->b_rptr); 178 ASSERT(ipvers == IPV6_VERSION || ipvers == IPV4_VERSION); 179 ASSERT(OK_32PTR(mp->b_rptr)); 180 181 if ((eager = sctp_create_eager(sctp)) == NULL) { 182 return (NULL); 183 } 184 185 if (ipvers != IPV4_VERSION) { 186 ip6h = (ip6_t *)mp->b_rptr; 187 if (IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) 188 eager->sctp_linklocal = 1; 189 /* 190 * Record ifindex (might be zero) to tie this connection to 191 * that interface if either the listener was bound or 192 * if the connection is using link-local addresses. 193 */ 194 if (sctp->sctp_bound_if == ifindex || 195 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src)) 196 eager->sctp_bound_if = ifindex; 197 /* 198 * XXX broken. bound_if is always overwritten by statement 199 * below. What is the right thing to do here? 200 */ 201 eager->sctp_bound_if = sctp->sctp_bound_if; 202 } 203 204 connp = sctp->sctp_connp; 205 econnp = eager->sctp_connp; 206 207 if (connp->conn_policy != NULL) { 208 ipsec_in_t *ii; 209 210 ASSERT(ipsec_mp != NULL); 211 ii = (ipsec_in_t *)(ipsec_mp->b_rptr); 212 ASSERT(ii->ipsec_in_policy == NULL); 213 IPPH_REFHOLD(connp->conn_policy); 214 ii->ipsec_in_policy = connp->conn_policy; 215 216 ipsec_mp->b_datap->db_type = IPSEC_POLICY_SET; 217 if (!ip_bind_ipsec_policy_set(econnp, ipsec_mp)) { 218 sctp_close_eager(eager); 219 BUMP_MIB(&sctp_mib, sctpListenDrop); 220 return (NULL); 221 } 222 } 223 224 if (ipsec_mp != NULL) { 225 /* 226 * XXX need to fix the cached policy issue here. 227 * We temporarily set the conn_src/conn_rem here so 228 * that IPsec can use it for the latched policy 229 * selector. This is obvioursly wrong as SCTP can 230 * use different addresses... 231 */ 232 if (ipvers == IPV4_VERSION) { 233 ipha_t *ipha; 234 235 ipha = (ipha_t *)mp->b_rptr; 236 econnp->conn_src = ipha->ipha_dst; 237 econnp->conn_rem = ipha->ipha_src; 238 } else { 239 econnp->conn_srcv6 = ip6h->ip6_dst; 240 econnp->conn_remv6 = ip6h->ip6_src; 241 } 242 } 243 if (ipsec_conn_cache_policy(econnp, ipvers == IPV4_VERSION) != 0) { 244 sctp_close_eager(eager); 245 BUMP_MIB(&sctp_mib, sctpListenDrop); 246 return (NULL); 247 } 248 249 err = sctp_accept_comm(sctp, eager, mp, ip_hdr_len, iack); 250 if (err) { 251 sctp_close_eager(eager); 252 BUMP_MIB(&sctp_mib, sctpListenDrop); 253 return (NULL); 254 } 255 256 /* 257 * On a clustered note send this notification to the clustering 258 * subsystem. 259 */ 260 if (cl_sctp_connect != NULL) { 261 uchar_t *slist; 262 uchar_t *flist; 263 size_t fsize; 264 size_t ssize; 265 266 fsize = sizeof (in6_addr_t) * eager->sctp_nfaddrs; 267 ssize = sizeof (in6_addr_t) * eager->sctp_nsaddrs; 268 slist = kmem_alloc(ssize, KM_NOSLEEP); 269 flist = kmem_alloc(fsize, KM_NOSLEEP); 270 if (slist == NULL || flist == NULL) { 271 if (slist != NULL) 272 kmem_free(slist, ssize); 273 if (flist != NULL) 274 kmem_free(flist, fsize); 275 sctp_close_eager(eager); 276 BUMP_MIB(&sctp_mib, sctpListenDrop); 277 SCTP_KSTAT(sctp_cl_connect); 278 return (NULL); 279 } 280 /* The clustering module frees these list */ 281 sctp_get_saddr_list(eager, slist, ssize); 282 sctp_get_faddr_list(eager, flist, fsize); 283 (*cl_sctp_connect)(eager->sctp_family, slist, 284 eager->sctp_nsaddrs, eager->sctp_lport, flist, 285 eager->sctp_nfaddrs, eager->sctp_fport, B_FALSE, 286 (cl_sctp_handle_t)eager); 287 } 288 289 /* Connection established, so send up the conn_ind */ 290 if ((eager->sctp_ulpd = sctp->sctp_ulp_newconn(sctp->sctp_ulpd, 291 eager)) == NULL) { 292 sctp_close_eager(eager); 293 BUMP_MIB(&sctp_mib, sctpListenDrop); 294 return (NULL); 295 } 296 ASSERT(SCTP_IS_DETACHED(eager)); 297 eager->sctp_detached = B_FALSE; 298 if (eager->sctp_family == AF_INET) { 299 eager->sctp_ulp_prop(eager->sctp_ulpd, 300 sctp_wroff_xtra + sizeof (sctp_data_hdr_t) + 301 sctp->sctp_hdr_len, strmsgsz); 302 } else { 303 eager->sctp_ulp_prop(eager->sctp_ulpd, 304 sctp_wroff_xtra + sizeof (sctp_data_hdr_t) + 305 sctp->sctp_hdr6_len, strmsgsz); 306 } 307 return (eager); 308 } 309 310 /* 311 * Connect to a peer - this function inserts the sctp in the 312 * bind and conn fanouts, sends the INIT, and replies to the client 313 * with an OK ack. 314 */ 315 int 316 sctp_connect(sctp_t *sctp, const struct sockaddr *dst, uint32_t addrlen) 317 { 318 sin_t *sin; 319 sin6_t *sin6; 320 in6_addr_t dstaddr; 321 in_port_t dstport; 322 mblk_t *initmp; 323 sctp_tf_t *tbf; 324 sctp_t *lsctp; 325 char buf[INET6_ADDRSTRLEN]; 326 int sleep = sctp->sctp_cansleep ? KM_SLEEP : KM_NOSLEEP; 327 int hdrlen; 328 ip6_rthdr_t *rth; 329 int err; 330 sctp_faddr_t *cur_fp; 331 332 /* 333 * Determine packet type based on type of address passed in 334 * the request should contain an IPv4 or IPv6 address. 335 * Make sure that address family matches the type of 336 * family of the the address passed down 337 */ 338 if (addrlen < sizeof (sin_t)) { 339 return (EINVAL); 340 } 341 switch (dst->sa_family) { 342 case AF_INET: 343 sin = (sin_t *)dst; 344 345 /* Check for attempt to connect to non-unicast */ 346 if (IN_MULTICAST(sin->sin_addr.s_addr) || 347 (sin->sin_addr.s_addr == INADDR_BROADCAST)) { 348 ip0dbg(("sctp_connect: non-unicast\n")); 349 return (EINVAL); 350 } 351 if (sctp->sctp_connp->conn_ipv6_v6only) 352 return (EAFNOSUPPORT); 353 354 /* convert to v6 mapped */ 355 /* Check for attempt to connect to INADDR_ANY */ 356 if (sin->sin_addr.s_addr == INADDR_ANY) { 357 struct in_addr v4_addr; 358 /* 359 * SunOS 4.x and 4.3 BSD allow an application 360 * to connect a TCP socket to INADDR_ANY. 361 * When they do this, the kernel picks the 362 * address of one interface and uses it 363 * instead. The kernel usually ends up 364 * picking the address of the loopback 365 * interface. This is an undocumented feature. 366 * However, we provide the same thing here 367 * in case any TCP apps that use this feature 368 * are being ported to SCTP... 369 */ 370 v4_addr.s_addr = htonl(INADDR_LOOPBACK); 371 IN6_INADDR_TO_V4MAPPED(&v4_addr, &dstaddr); 372 } else { 373 IN6_INADDR_TO_V4MAPPED(&sin->sin_addr, &dstaddr); 374 } 375 dstport = sin->sin_port; 376 if (sin->sin_family == AF_INET) { 377 hdrlen = sctp->sctp_hdr_len; 378 } else { 379 hdrlen = sctp->sctp_hdr6_len; 380 } 381 break; 382 case AF_INET6: 383 sin6 = (sin6_t *)dst; 384 /* Check for attempt to connect to non-unicast. */ 385 if ((addrlen < sizeof (sin6_t)) || 386 IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) { 387 ip0dbg(("sctp_connect: non-unicast\n")); 388 return (EINVAL); 389 } 390 if (sctp->sctp_connp->conn_ipv6_v6only && 391 IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 392 return (EAFNOSUPPORT); 393 } 394 /* check for attempt to connect to unspec */ 395 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { 396 dstaddr = ipv6_loopback; 397 } else { 398 dstaddr = sin6->sin6_addr; 399 if (IN6_IS_ADDR_LINKLOCAL(&dstaddr)) 400 sctp->sctp_linklocal = 1; 401 } 402 dstport = sin6->sin6_port; 403 hdrlen = sctp->sctp_hdr6_len; 404 break; 405 default: 406 dprint(1, ("sctp_connect: unknown family %d\n", 407 dst->sa_family)); 408 return (EAFNOSUPPORT); 409 } 410 411 (void) inet_ntop(AF_INET6, &dstaddr, buf, sizeof (buf)); 412 dprint(1, ("sctp_connect: attempting connect to %s...\n", buf)); 413 414 RUN_SCTP(sctp); 415 416 if (sctp->sctp_family != dst->sa_family) { 417 WAKE_SCTP(sctp); 418 return (EINVAL); 419 } 420 421 switch (sctp->sctp_state) { 422 case SCTPS_IDLE: { 423 struct sockaddr_storage ss; 424 425 /* 426 * We support a quick connect capability here, allowing 427 * clients to transition directly from IDLE to COOKIE_WAIT. 428 * sctp_bindi will pick an unused port, insert the connection 429 * in the bind hash and transition to BOUND state. SCTP 430 * picks and uses what it considers the optimal local address 431 * set (just like specifiying INADDR_ANY to bind()). 432 */ 433 dprint(1, ("sctp_connect: idle, attempting bind...\n")); 434 ASSERT(sctp->sctp_nsaddrs == 0); 435 436 bzero(&ss, sizeof (ss)); 437 ss.ss_family = sctp->sctp_family; 438 WAKE_SCTP(sctp); 439 if ((err = sctp_bind(sctp, (struct sockaddr *)&ss, 440 sizeof (ss))) != 0) { 441 return (err); 442 } 443 RUN_SCTP(sctp); 444 sctp->sctp_bound_to_all = 1; 445 /* FALLTHRU */ 446 } 447 448 case SCTPS_BOUND: 449 ASSERT(sctp->sctp_nsaddrs > 0); 450 451 /* do the connect */ 452 /* XXX check for attempt to connect to self */ 453 sctp->sctp_fport = dstport; 454 455 ASSERT(sctp->sctp_iphc); 456 ASSERT(sctp->sctp_iphc6); 457 458 /* 459 * Don't allow this connection to completely duplicate 460 * an existing connection. 461 * 462 * Ensure that the duplicate check and insertion is atomic. 463 */ 464 sctp_conn_hash_remove(sctp); 465 tbf = &sctp_conn_fanout[SCTP_CONN_HASH(sctp->sctp_ports)]; 466 mutex_enter(&tbf->tf_lock); 467 lsctp = sctp_lookup(sctp, &dstaddr, tbf, &sctp->sctp_ports, 468 SCTPS_COOKIE_WAIT); 469 if (lsctp != NULL) { 470 /* found a duplicate connection */ 471 mutex_exit(&tbf->tf_lock); 472 SCTP_REFRELE(lsctp); 473 WAKE_SCTP(sctp); 474 return (EADDRINUSE); 475 } 476 /* 477 * OK; set up the peer addr (this may grow after we get 478 * the INIT ACK from the peer with additional addresses). 479 */ 480 if ((err = sctp_add_faddr(sctp, &dstaddr, sleep, 481 B_FALSE)) != 0) { 482 mutex_exit(&tbf->tf_lock); 483 WAKE_SCTP(sctp); 484 return (err); 485 } 486 /* No valid src addr, return. */ 487 if (sctp->sctp_faddrs->state == SCTP_FADDRS_UNREACH) { 488 mutex_exit(&tbf->tf_lock); 489 WAKE_SCTP(sctp); 490 return (EADDRNOTAVAIL); 491 } 492 sctp->sctp_primary = sctp->sctp_faddrs; 493 sctp->sctp_current = sctp->sctp_faddrs; 494 cur_fp = sctp->sctp_current; 495 sctp->sctp_mss = sctp->sctp_faddrs->sfa_pmss; 496 sctp_conn_hash_insert(tbf, sctp, 1); 497 mutex_exit(&tbf->tf_lock); 498 499 /* initialize composite headers */ 500 if ((err = sctp_set_hdraddrs(sctp)) != 0) { 501 sctp_conn_hash_remove(sctp); 502 WAKE_SCTP(sctp); 503 return (err); 504 } 505 506 /* 507 * Massage a routing header (if present) putting the first hop 508 * in ip6_dst. 509 */ 510 rth = ip_find_rthdr_v6(sctp->sctp_ip6h, 511 (uint8_t *)sctp->sctp_sctph6); 512 if (rth != NULL) 513 (void) ip_massage_options_v6(sctp->sctp_ip6h, rth); 514 515 /* 516 * Turn off the don't fragment bit on the (only) faddr, 517 * so that if one of the messages exchanged during the 518 * initialization sequence exceeds the path mtu, it 519 * at least has a chance to get there. SCTP does no 520 * fragmentation of initialization messages. The DF bit 521 * will be turned on again in sctp_send_cookie_echo() 522 * (but the cookie echo will still be sent with the df bit 523 * off). 524 */ 525 cur_fp->df = B_FALSE; 526 527 /* Mark this address as alive */ 528 cur_fp->state = SCTP_FADDRS_ALIVE; 529 530 /* This sctp_t is fully bound now. */ 531 sctp->sctp_connp->conn_fully_bound = B_TRUE; 532 533 /* Send the INIT to the peer */ 534 SCTP_FADDR_TIMER_RESTART(sctp, cur_fp, cur_fp->rto); 535 /* 536 * sctp_init_mp() could result in modifying the source 537 * address list, so take the hash lock. 538 */ 539 mutex_enter(&tbf->tf_lock); 540 initmp = sctp_init_mp(sctp); 541 if (initmp == NULL) { 542 mutex_exit(&tbf->tf_lock); 543 WAKE_SCTP(sctp); 544 /* let timer retry */ 545 return (0); 546 } 547 mutex_exit(&tbf->tf_lock); 548 sctp->sctp_state = SCTPS_COOKIE_WAIT; 549 /* 550 * On a clustered note send this notification to the clustering 551 * subsystem. 552 */ 553 if (cl_sctp_connect != NULL) { 554 uchar_t *slist; 555 uchar_t *flist; 556 size_t ssize; 557 size_t fsize; 558 559 fsize = sizeof (in6_addr_t) * sctp->sctp_nfaddrs; 560 ssize = sizeof (in6_addr_t) * sctp->sctp_nsaddrs; 561 slist = kmem_alloc(ssize, KM_SLEEP); 562 flist = kmem_alloc(fsize, KM_SLEEP); 563 /* The clustering module frees the lists */ 564 sctp_get_saddr_list(sctp, slist, ssize); 565 sctp_get_faddr_list(sctp, flist, fsize); 566 (*cl_sctp_connect)(sctp->sctp_family, slist, 567 sctp->sctp_nsaddrs, sctp->sctp_lport, 568 flist, sctp->sctp_nfaddrs, sctp->sctp_fport, 569 B_TRUE, (cl_sctp_handle_t)sctp); 570 } 571 WAKE_SCTP(sctp); 572 /* OK to call IP_PUT() here instead of sctp_add_sendq(). */ 573 CONN_INC_REF(sctp->sctp_connp); 574 initmp->b_flag |= MSGHASREF; 575 IP_PUT(initmp, sctp->sctp_connp, sctp->sctp_current->isv4); 576 BUMP_LOCAL(sctp->sctp_opkts); 577 578 sctp->sctp_ulp_prop(sctp->sctp_ulpd, 579 sctp_wroff_xtra + hdrlen + sizeof (sctp_data_hdr_t), 0); 580 581 return (0); 582 default: 583 ip0dbg(("sctp_connect: invalid state. %d\n", sctp->sctp_state)); 584 WAKE_SCTP(sctp); 585 return (EINVAL); 586 } 587 } 588