1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include <sys/types.h> 28 #include <sys/stream.h> 29 #include <sys/strsubr.h> 30 #include <sys/stropts.h> 31 #include <sys/strsun.h> 32 #define _SUN_TPI_VERSION 2 33 #include <sys/tihdr.h> 34 #include <sys/ddi.h> 35 #include <sys/sunddi.h> 36 #include <sys/xti_inet.h> 37 #include <sys/cmn_err.h> 38 #include <sys/debug.h> 39 #include <sys/vtrace.h> 40 #include <sys/kmem.h> 41 #include <sys/cpuvar.h> 42 #include <sys/random.h> 43 #include <sys/priv.h> 44 #include <sys/sunldi.h> 45 46 #include <sys/errno.h> 47 #include <sys/signal.h> 48 #include <sys/socket.h> 49 #include <sys/isa_defs.h> 50 #include <netinet/in.h> 51 #include <netinet/tcp.h> 52 #include <netinet/ip6.h> 53 #include <netinet/icmp6.h> 54 #include <netinet/sctp.h> 55 #include <net/if.h> 56 57 #include <inet/common.h> 58 #include <inet/ip.h> 59 #include <inet/ip_if.h> 60 #include <inet/ip_ire.h> 61 #include <inet/ip6.h> 62 #include <inet/mi.h> 63 #include <inet/mib2.h> 64 #include <inet/kstatcom.h> 65 #include <inet/optcom.h> 66 #include <inet/ipclassifier.h> 67 #include <inet/ipsec_impl.h> 68 #include <inet/sctp_ip.h> 69 #include <inet/sctp_crc32.h> 70 71 #include "sctp_impl.h" 72 #include "sctp_addr.h" 73 #include "sctp_asconf.h" 74 75 int sctpdebug; 76 sin6_t sctp_sin6_null; /* Zero address for quick clears */ 77 78 static void sctp_closei_local(sctp_t *sctp); 79 static int sctp_init_values(sctp_t *, sctp_t *, int); 80 static void sctp_icmp_error_ipv6(sctp_t *sctp, mblk_t *mp); 81 static void sctp_process_recvq(void *); 82 static void sctp_rq_tq_init(sctp_stack_t *); 83 static void sctp_rq_tq_fini(sctp_stack_t *); 84 static void sctp_conn_cache_init(); 85 static void sctp_conn_cache_fini(); 86 static int sctp_conn_cache_constructor(); 87 static void sctp_conn_cache_destructor(); 88 static void sctp_conn_clear(conn_t *); 89 static void sctp_notify(void *, ip_xmit_attr_t *, ixa_notify_type_t, 90 ixa_notify_arg_t); 91 92 static void *sctp_stack_init(netstackid_t stackid, netstack_t *ns); 93 static void sctp_stack_fini(netstackid_t stackid, void *arg); 94 95 /* 96 * SCTP receive queue taskq 97 * 98 * At SCTP initialization time, a default taskq is created for 99 * servicing packets received when the interrupt thread cannot 100 * get a hold on the sctp_t. The number of taskq can be increased in 101 * sctp_find_next_tq() when an existing taskq cannot be dispatched. 102 * The taskqs are never removed. But the max number of taskq which 103 * can be created is controlled by sctp_recvq_tq_list_max_sz. Note 104 * that SCTP recvq taskq is not tied to any specific CPU or ill. 105 * 106 * Those taskqs are stored in an array recvq_tq_list. And they are 107 * used in a round robin fashion. The current taskq being used is 108 * determined by recvq_tq_list_cur. 109 */ 110 111 /* /etc/system variables */ 112 /* The minimum number of threads for each taskq. */ 113 int sctp_recvq_tq_thr_min = 4; 114 /* The maximum number of threads for each taskq. */ 115 int sctp_recvq_tq_thr_max = 16; 116 /* The minimum number of tasks for each taskq. */ 117 int sctp_recvq_tq_task_min = 5; 118 /* The maxiimum number of tasks for each taskq. */ 119 int sctp_recvq_tq_task_max = 50; 120 121 /* 122 * SCTP tunables related declarations. Definitions are in sctp_tunables.c 123 */ 124 extern mod_prop_info_t sctp_propinfo_tbl[]; 125 extern int sctp_propinfo_count; 126 127 /* sctp_t/conn_t kmem cache */ 128 struct kmem_cache *sctp_conn_cache; 129 130 #define SCTP_CONDEMNED(sctp) \ 131 mutex_enter(&(sctp)->sctp_reflock); \ 132 ((sctp)->sctp_condemned = B_TRUE); \ 133 mutex_exit(&(sctp)->sctp_reflock); 134 135 /* Link/unlink a sctp_t to/from the global list. */ 136 #define SCTP_LINK(sctp, sctps) \ 137 mutex_enter(&(sctps)->sctps_g_lock); \ 138 list_insert_tail(&sctps->sctps_g_list, (sctp)); \ 139 mutex_exit(&(sctps)->sctps_g_lock); 140 141 #define SCTP_UNLINK(sctp, sctps) \ 142 mutex_enter(&(sctps)->sctps_g_lock); \ 143 ASSERT((sctp)->sctp_condemned); \ 144 list_remove(&(sctps)->sctps_g_list, (sctp)); \ 145 mutex_exit(&(sctps)->sctps_g_lock); 146 147 /* 148 * Hooks for Sun Cluster. On non-clustered nodes these will remain NULL. 149 * PSARC/2005/602. 150 */ 151 void (*cl_sctp_listen)(sa_family_t, uchar_t *, uint_t, in_port_t) = NULL; 152 void (*cl_sctp_unlisten)(sa_family_t, uchar_t *, uint_t, in_port_t) = NULL; 153 void (*cl_sctp_connect)(sa_family_t, uchar_t *, uint_t, in_port_t, 154 uchar_t *, uint_t, in_port_t, boolean_t, cl_sctp_handle_t) = NULL; 155 void (*cl_sctp_disconnect)(sa_family_t, cl_sctp_handle_t) = NULL; 156 void (*cl_sctp_assoc_change)(sa_family_t, uchar_t *, size_t, uint_t, 157 uchar_t *, size_t, uint_t, int, cl_sctp_handle_t) = NULL; 158 void (*cl_sctp_check_addrs)(sa_family_t, in_port_t, uchar_t **, size_t, 159 uint_t *, boolean_t) = NULL; 160 /* 161 * Return the version number of the SCTP kernel interface. 162 */ 163 int 164 sctp_itf_ver(int cl_ver) 165 { 166 if (cl_ver != SCTP_ITF_VER) 167 return (-1); 168 return (SCTP_ITF_VER); 169 } 170 171 /* 172 * Called when we need a new sctp instantiation but don't really have a 173 * new q to hang it off of. Copy the priv flag from the passed in structure. 174 */ 175 sctp_t * 176 sctp_create_eager(sctp_t *psctp) 177 { 178 sctp_t *sctp; 179 mblk_t *ack_mp, *hb_mp; 180 conn_t *connp; 181 cred_t *credp; 182 sctp_stack_t *sctps = psctp->sctp_sctps; 183 184 if ((connp = ipcl_conn_create(IPCL_SCTPCONN, KM_NOSLEEP, 185 sctps->sctps_netstack)) == NULL) { 186 return (NULL); 187 } 188 189 sctp = CONN2SCTP(connp); 190 sctp->sctp_sctps = sctps; 191 192 if ((ack_mp = sctp_timer_alloc(sctp, sctp_ack_timer, 193 KM_NOSLEEP)) == NULL || 194 (hb_mp = sctp_timer_alloc(sctp, sctp_heartbeat_timer, 195 KM_NOSLEEP)) == NULL) { 196 if (ack_mp != NULL) 197 freeb(ack_mp); 198 sctp_conn_clear(connp); 199 sctp->sctp_sctps = NULL; 200 kmem_cache_free(sctp_conn_cache, connp); 201 return (NULL); 202 } 203 204 sctp->sctp_ack_mp = ack_mp; 205 sctp->sctp_heartbeat_mp = hb_mp; 206 207 if (sctp_init_values(sctp, psctp, KM_NOSLEEP) != 0) { 208 freeb(ack_mp); 209 freeb(hb_mp); 210 sctp_conn_clear(connp); 211 sctp->sctp_sctps = NULL; 212 kmem_cache_free(sctp_conn_cache, connp); 213 return (NULL); 214 } 215 216 if ((credp = psctp->sctp_connp->conn_cred) != NULL) { 217 connp->conn_cred = credp; 218 crhold(credp); 219 } 220 221 sctp->sctp_mss = psctp->sctp_mss; 222 sctp->sctp_detached = B_TRUE; 223 /* 224 * Link to the global as soon as possible so that this sctp_t 225 * can be found. 226 */ 227 SCTP_LINK(sctp, sctps); 228 229 return (sctp); 230 } 231 232 /* 233 * We are dying for some reason. Try to do it gracefully. 234 */ 235 void 236 sctp_clean_death(sctp_t *sctp, int err) 237 { 238 ASSERT(sctp != NULL); 239 240 dprint(3, ("sctp_clean_death %p, state %d\n", (void *)sctp, 241 sctp->sctp_state)); 242 243 sctp->sctp_client_errno = err; 244 /* 245 * Check to see if we need to notify upper layer. 246 */ 247 if ((sctp->sctp_state >= SCTPS_COOKIE_WAIT) && 248 !SCTP_IS_DETACHED(sctp)) { 249 if (sctp->sctp_xmit_head || sctp->sctp_xmit_unsent) { 250 sctp_regift_xmitlist(sctp); 251 } 252 if (sctp->sctp_ulp_disconnected(sctp->sctp_ulpd, 0, err)) { 253 /* 254 * Socket is gone, detach. 255 */ 256 sctp->sctp_detached = B_TRUE; 257 sctp->sctp_ulpd = NULL; 258 sctp->sctp_upcalls = NULL; 259 } 260 } 261 262 /* Remove this sctp from all hashes. */ 263 sctp_closei_local(sctp); 264 265 /* 266 * If the sctp_t is detached, we need to finish freeing up 267 * the resources. At this point, ip_fanout_sctp() should have 268 * a hold on this sctp_t. Some thread doing snmp stuff can 269 * have a hold. And a taskq can also have a hold waiting to 270 * work. sctp_unlink() the sctp_t from the global list so 271 * that no new thread can find it. Then do a SCTP_REFRELE(). 272 * The sctp_t will be freed after all those threads are done. 273 */ 274 if (SCTP_IS_DETACHED(sctp)) { 275 SCTP_CONDEMNED(sctp); 276 SCTP_REFRELE(sctp); 277 } 278 } 279 280 /* 281 * Called by upper layer when it wants to close this association. 282 * Depending on the state of this assoication, we need to do 283 * different things. 284 * 285 * If the state is below COOKIE_ECHOED or it is COOKIE_ECHOED but with 286 * no sent data, just remove this sctp from all the hashes. This 287 * makes sure that all packets from the other end will go to the default 288 * sctp handling. The upper layer will then do a sctp_close() to clean 289 * up. 290 * 291 * Otherwise, check and see if SO_LINGER is set. If it is set, check 292 * the value. If the value is 0, consider this an abortive close. Send 293 * an ABORT message and kill the associatiion. 294 * 295 */ 296 int 297 sctp_disconnect(sctp_t *sctp) 298 { 299 int error = 0; 300 conn_t *connp = sctp->sctp_connp; 301 302 dprint(3, ("sctp_disconnect %p, state %d\n", (void *)sctp, 303 sctp->sctp_state)); 304 305 RUN_SCTP(sctp); 306 307 switch (sctp->sctp_state) { 308 case SCTPS_IDLE: 309 case SCTPS_BOUND: 310 case SCTPS_LISTEN: 311 break; 312 case SCTPS_COOKIE_WAIT: 313 case SCTPS_COOKIE_ECHOED: 314 /* 315 * Close during the connect 3-way handshake 316 * but here there may or may not be pending data 317 * already on queue. Process almost same as in 318 * the ESTABLISHED state. 319 */ 320 if (sctp->sctp_xmit_head == NULL && 321 sctp->sctp_xmit_unsent == NULL) { 322 break; 323 } 324 /* FALLTHRU */ 325 default: 326 /* 327 * If SO_LINGER has set a zero linger time, terminate the 328 * association and send an ABORT. 329 */ 330 if (connp->conn_linger && connp->conn_lingertime == 0) { 331 sctp_user_abort(sctp, NULL); 332 WAKE_SCTP(sctp); 333 return (error); 334 } 335 336 /* 337 * In there is unread data, send an ABORT and terminate the 338 * association. 339 */ 340 if (sctp->sctp_rxqueued > 0 || sctp->sctp_irwnd > 341 sctp->sctp_rwnd) { 342 sctp_user_abort(sctp, NULL); 343 WAKE_SCTP(sctp); 344 return (error); 345 } 346 /* 347 * Transmit the shutdown before detaching the sctp_t. 348 * After sctp_detach returns this queue/perimeter 349 * no longer owns the sctp_t thus others can modify it. 350 */ 351 sctp_send_shutdown(sctp, 0); 352 353 /* Pass gathered wisdom to IP for keeping */ 354 sctp_update_dce(sctp); 355 356 /* 357 * If lingering on close then wait until the shutdown 358 * is complete, or the SO_LINGER time passes, or an 359 * ABORT is sent/received. Note that sctp_disconnect() 360 * can be called more than once. Make sure that only 361 * one thread waits. 362 */ 363 if (connp->conn_linger && connp->conn_lingertime > 0 && 364 sctp->sctp_state >= SCTPS_ESTABLISHED && 365 !sctp->sctp_lingering) { 366 clock_t stoptime; /* in ticks */ 367 clock_t ret; 368 369 sctp->sctp_lingering = 1; 370 sctp->sctp_client_errno = 0; 371 stoptime = ddi_get_lbolt() + 372 connp->conn_lingertime * hz; 373 374 mutex_enter(&sctp->sctp_lock); 375 sctp->sctp_running = B_FALSE; 376 while (sctp->sctp_state >= SCTPS_ESTABLISHED && 377 sctp->sctp_client_errno == 0) { 378 cv_broadcast(&sctp->sctp_cv); 379 ret = cv_timedwait_sig(&sctp->sctp_cv, 380 &sctp->sctp_lock, stoptime); 381 if (ret < 0) { 382 /* Stoptime has reached. */ 383 sctp->sctp_client_errno = EWOULDBLOCK; 384 break; 385 } else if (ret == 0) { 386 /* Got a signal. */ 387 break; 388 } 389 } 390 error = sctp->sctp_client_errno; 391 sctp->sctp_client_errno = 0; 392 mutex_exit(&sctp->sctp_lock); 393 } 394 395 WAKE_SCTP(sctp); 396 return (error); 397 } 398 399 400 /* Remove this sctp from all hashes so nobody can find it. */ 401 sctp_closei_local(sctp); 402 WAKE_SCTP(sctp); 403 return (error); 404 } 405 406 void 407 sctp_close(sctp_t *sctp) 408 { 409 dprint(3, ("sctp_close %p, state %d\n", (void *)sctp, 410 sctp->sctp_state)); 411 412 RUN_SCTP(sctp); 413 sctp->sctp_detached = 1; 414 sctp->sctp_ulpd = NULL; 415 sctp->sctp_upcalls = NULL; 416 bzero(&sctp->sctp_events, sizeof (sctp->sctp_events)); 417 418 /* If the graceful shutdown has not been completed, just return. */ 419 if (sctp->sctp_state != SCTPS_IDLE) { 420 WAKE_SCTP(sctp); 421 return; 422 } 423 424 /* 425 * Since sctp_t is in SCTPS_IDLE state, so the only thread which 426 * can have a hold on the sctp_t is doing snmp stuff. Just do 427 * a SCTP_REFRELE() here after the SCTP_UNLINK(). It will 428 * be freed when the other thread is done. 429 */ 430 SCTP_CONDEMNED(sctp); 431 WAKE_SCTP(sctp); 432 SCTP_REFRELE(sctp); 433 } 434 435 /* 436 * Unlink from global list and do the eager close. 437 * Remove the refhold implicit in being on the global list. 438 */ 439 void 440 sctp_close_eager(sctp_t *sctp) 441 { 442 SCTP_CONDEMNED(sctp); 443 sctp_closei_local(sctp); 444 SCTP_REFRELE(sctp); 445 } 446 447 /* 448 * The sctp_t is going away. Remove it from all lists and set it 449 * to SCTPS_IDLE. The caller has to remove it from the 450 * global list. The freeing up of memory is deferred until 451 * sctp_free(). This is needed since a thread in sctp_input() might have 452 * done a SCTP_REFHOLD on this structure before it was removed from the 453 * hashes. 454 */ 455 static void 456 sctp_closei_local(sctp_t *sctp) 457 { 458 mblk_t *mp; 459 conn_t *connp = sctp->sctp_connp; 460 461 /* Sanity check, don't do the same thing twice. */ 462 if (connp->conn_state_flags & CONN_CLOSING) { 463 ASSERT(sctp->sctp_state == SCTPS_IDLE); 464 return; 465 } 466 467 /* Stop and free the timers */ 468 sctp_free_faddr_timers(sctp); 469 if ((mp = sctp->sctp_heartbeat_mp) != NULL) { 470 sctp_timer_free(mp); 471 sctp->sctp_heartbeat_mp = NULL; 472 } 473 if ((mp = sctp->sctp_ack_mp) != NULL) { 474 sctp_timer_free(mp); 475 sctp->sctp_ack_mp = NULL; 476 } 477 478 /* Set the CONN_CLOSING flag so that IP will not cache IRE again. */ 479 mutex_enter(&connp->conn_lock); 480 connp->conn_state_flags |= CONN_CLOSING; 481 mutex_exit(&connp->conn_lock); 482 483 /* Remove from all hashes. */ 484 sctp_bind_hash_remove(sctp); 485 sctp_conn_hash_remove(sctp); 486 sctp_listen_hash_remove(sctp); 487 sctp->sctp_state = SCTPS_IDLE; 488 489 /* 490 * Clean up the recvq as much as possible. All those packets 491 * will be silently dropped as this sctp_t is now in idle state. 492 */ 493 mutex_enter(&sctp->sctp_recvq_lock); 494 while ((mp = sctp->sctp_recvq) != NULL) { 495 sctp->sctp_recvq = mp->b_next; 496 mp->b_next = NULL; 497 498 if (ip_recv_attr_is_mblk(mp)) 499 mp = ip_recv_attr_free_mblk(mp); 500 501 freemsg(mp); 502 } 503 mutex_exit(&sctp->sctp_recvq_lock); 504 } 505 506 /* 507 * Free memory associated with the sctp/ip header template. 508 */ 509 static void 510 sctp_headers_free(sctp_t *sctp) 511 { 512 if (sctp->sctp_iphc != NULL) { 513 kmem_free(sctp->sctp_iphc, sctp->sctp_iphc_len); 514 sctp->sctp_iphc = NULL; 515 sctp->sctp_ipha = NULL; 516 sctp->sctp_hdr_len = 0; 517 sctp->sctp_ip_hdr_len = 0; 518 sctp->sctp_iphc_len = 0; 519 sctp->sctp_sctph = NULL; 520 sctp->sctp_hdr_len = 0; 521 } 522 if (sctp->sctp_iphc6 != NULL) { 523 kmem_free(sctp->sctp_iphc6, sctp->sctp_iphc6_len); 524 sctp->sctp_iphc6 = NULL; 525 sctp->sctp_ip6h = NULL; 526 sctp->sctp_hdr6_len = 0; 527 sctp->sctp_ip_hdr6_len = 0; 528 sctp->sctp_iphc6_len = 0; 529 sctp->sctp_sctph6 = NULL; 530 sctp->sctp_hdr6_len = 0; 531 } 532 } 533 534 static void 535 sctp_free_xmit_data(sctp_t *sctp) 536 { 537 mblk_t *ump = NULL; 538 mblk_t *nump; 539 mblk_t *mp; 540 mblk_t *nmp; 541 542 sctp->sctp_xmit_unacked = NULL; 543 ump = sctp->sctp_xmit_head; 544 sctp->sctp_xmit_tail = sctp->sctp_xmit_head = NULL; 545 free_unsent: 546 for (; ump != NULL; ump = nump) { 547 for (mp = ump->b_cont; mp != NULL; mp = nmp) { 548 nmp = mp->b_next; 549 mp->b_next = NULL; 550 mp->b_prev = NULL; 551 freemsg(mp); 552 } 553 ASSERT(DB_REF(ump) == 1); 554 nump = ump->b_next; 555 ump->b_next = NULL; 556 ump->b_prev = NULL; 557 ump->b_cont = NULL; 558 freeb(ump); 559 } 560 if ((ump = sctp->sctp_xmit_unsent) == NULL) { 561 ASSERT(sctp->sctp_xmit_unsent_tail == NULL); 562 return; 563 } 564 sctp->sctp_xmit_unsent = sctp->sctp_xmit_unsent_tail = NULL; 565 goto free_unsent; 566 } 567 568 /* 569 * Cleanup all the messages in the stream queue and the reassembly lists. 570 * If 'free' is true, then delete the streams as well. 571 */ 572 void 573 sctp_instream_cleanup(sctp_t *sctp, boolean_t free) 574 { 575 int i; 576 mblk_t *mp; 577 mblk_t *mp1; 578 579 if (sctp->sctp_instr != NULL) { 580 /* walk thru and flush out anything remaining in the Q */ 581 for (i = 0; i < sctp->sctp_num_istr; i++) { 582 mp = sctp->sctp_instr[i].istr_msgs; 583 while (mp != NULL) { 584 mp1 = mp->b_next; 585 mp->b_next = mp->b_prev = NULL; 586 freemsg(mp); 587 mp = mp1; 588 } 589 sctp->sctp_instr[i].istr_msgs = NULL; 590 sctp->sctp_instr[i].istr_nmsgs = 0; 591 sctp_free_reass((sctp->sctp_instr) + i); 592 sctp->sctp_instr[i].nextseq = 0; 593 } 594 if (free) { 595 kmem_free(sctp->sctp_instr, 596 sizeof (*sctp->sctp_instr) * sctp->sctp_num_istr); 597 sctp->sctp_instr = NULL; 598 sctp->sctp_num_istr = 0; 599 } 600 } 601 /* un-ordered fragments */ 602 if (sctp->sctp_uo_frags != NULL) { 603 for (mp = sctp->sctp_uo_frags; mp != NULL; mp = mp1) { 604 mp1 = mp->b_next; 605 mp->b_next = mp->b_prev = NULL; 606 freemsg(mp); 607 } 608 sctp->sctp_uo_frags = NULL; 609 } 610 } 611 612 /* 613 * Last reference to the sctp_t is gone. Free all memory associated with it. 614 * Called from SCTP_REFRELE. Called inline in sctp_close() 615 */ 616 void 617 sctp_free(conn_t *connp) 618 { 619 sctp_t *sctp = CONN2SCTP(connp); 620 int cnt; 621 sctp_stack_t *sctps = sctp->sctp_sctps; 622 623 ASSERT(sctps != NULL); 624 /* Unlink it from the global list */ 625 SCTP_UNLINK(sctp, sctps); 626 627 ASSERT(connp->conn_ref == 0); 628 ASSERT(connp->conn_proto == IPPROTO_SCTP); 629 ASSERT(!MUTEX_HELD(&sctp->sctp_reflock)); 630 ASSERT(sctp->sctp_refcnt == 0); 631 632 ASSERT(sctp->sctp_ptpbhn == NULL && sctp->sctp_bind_hash == NULL); 633 ASSERT(sctp->sctp_conn_hash_next == NULL && 634 sctp->sctp_conn_hash_prev == NULL); 635 636 637 /* Free up all the resources. */ 638 639 /* blow away sctp stream management */ 640 if (sctp->sctp_ostrcntrs != NULL) { 641 kmem_free(sctp->sctp_ostrcntrs, 642 sizeof (uint16_t) * sctp->sctp_num_ostr); 643 sctp->sctp_ostrcntrs = NULL; 644 } 645 sctp_instream_cleanup(sctp, B_TRUE); 646 647 /* Remove all data transfer resources. */ 648 sctp->sctp_istr_nmsgs = 0; 649 sctp->sctp_rxqueued = 0; 650 sctp_free_xmit_data(sctp); 651 sctp->sctp_unacked = 0; 652 sctp->sctp_unsent = 0; 653 if (sctp->sctp_cxmit_list != NULL) 654 sctp_asconf_free_cxmit(sctp, NULL); 655 656 sctp->sctp_lastdata = NULL; 657 658 /* Clear out default xmit settings */ 659 sctp->sctp_def_stream = 0; 660 sctp->sctp_def_flags = 0; 661 sctp->sctp_def_ppid = 0; 662 sctp->sctp_def_context = 0; 663 sctp->sctp_def_timetolive = 0; 664 665 if (sctp->sctp_sack_info != NULL) { 666 sctp_free_set(sctp->sctp_sack_info); 667 sctp->sctp_sack_info = NULL; 668 } 669 sctp->sctp_sack_gaps = 0; 670 671 if (sctp->sctp_cookie_mp != NULL) { 672 freemsg(sctp->sctp_cookie_mp); 673 sctp->sctp_cookie_mp = NULL; 674 } 675 676 /* Remove all the address resources. */ 677 sctp_zap_addrs(sctp); 678 for (cnt = 0; cnt < SCTP_IPIF_HASH; cnt++) { 679 ASSERT(sctp->sctp_saddrs[cnt].ipif_count == 0); 680 list_destroy(&sctp->sctp_saddrs[cnt].sctp_ipif_list); 681 } 682 683 if (sctp->sctp_hopopts != NULL) { 684 mi_free(sctp->sctp_hopopts); 685 sctp->sctp_hopopts = NULL; 686 sctp->sctp_hopoptslen = 0; 687 } 688 ASSERT(sctp->sctp_hopoptslen == 0); 689 if (sctp->sctp_dstopts != NULL) { 690 mi_free(sctp->sctp_dstopts); 691 sctp->sctp_dstopts = NULL; 692 sctp->sctp_dstoptslen = 0; 693 } 694 ASSERT(sctp->sctp_dstoptslen == 0); 695 if (sctp->sctp_rthdrdstopts != NULL) { 696 mi_free(sctp->sctp_rthdrdstopts); 697 sctp->sctp_rthdrdstopts = NULL; 698 sctp->sctp_rthdrdstoptslen = 0; 699 } 700 ASSERT(sctp->sctp_rthdrdstoptslen == 0); 701 if (sctp->sctp_rthdr != NULL) { 702 mi_free(sctp->sctp_rthdr); 703 sctp->sctp_rthdr = NULL; 704 sctp->sctp_rthdrlen = 0; 705 } 706 ASSERT(sctp->sctp_rthdrlen == 0); 707 sctp_headers_free(sctp); 708 709 sctp->sctp_shutdown_faddr = NULL; 710 711 if (sctp->sctp_err_chunks != NULL) { 712 freemsg(sctp->sctp_err_chunks); 713 sctp->sctp_err_chunks = NULL; 714 sctp->sctp_err_len = 0; 715 } 716 717 /* Clear all the bitfields. */ 718 bzero(&sctp->sctp_bits, sizeof (sctp->sctp_bits)); 719 720 /* It is time to update the global statistics. */ 721 UPDATE_MIB(&sctps->sctps_mib, sctpOutSCTPPkts, sctp->sctp_opkts); 722 UPDATE_MIB(&sctps->sctps_mib, sctpOutCtrlChunks, sctp->sctp_obchunks); 723 UPDATE_MIB(&sctps->sctps_mib, sctpOutOrderChunks, sctp->sctp_odchunks); 724 UPDATE_MIB(&sctps->sctps_mib, 725 sctpOutUnorderChunks, sctp->sctp_oudchunks); 726 UPDATE_MIB(&sctps->sctps_mib, sctpRetransChunks, sctp->sctp_rxtchunks); 727 UPDATE_MIB(&sctps->sctps_mib, sctpInSCTPPkts, sctp->sctp_ipkts); 728 UPDATE_MIB(&sctps->sctps_mib, sctpInCtrlChunks, sctp->sctp_ibchunks); 729 UPDATE_MIB(&sctps->sctps_mib, sctpInOrderChunks, sctp->sctp_idchunks); 730 UPDATE_MIB(&sctps->sctps_mib, 731 sctpInUnorderChunks, sctp->sctp_iudchunks); 732 UPDATE_MIB(&sctps->sctps_mib, sctpFragUsrMsgs, sctp->sctp_fragdmsgs); 733 UPDATE_MIB(&sctps->sctps_mib, sctpReasmUsrMsgs, sctp->sctp_reassmsgs); 734 sctp->sctp_opkts = 0; 735 sctp->sctp_obchunks = 0; 736 sctp->sctp_odchunks = 0; 737 sctp->sctp_oudchunks = 0; 738 sctp->sctp_rxtchunks = 0; 739 sctp->sctp_ipkts = 0; 740 sctp->sctp_ibchunks = 0; 741 sctp->sctp_idchunks = 0; 742 sctp->sctp_iudchunks = 0; 743 sctp->sctp_fragdmsgs = 0; 744 sctp->sctp_reassmsgs = 0; 745 sctp->sctp_outseqtsns = 0; 746 sctp->sctp_osacks = 0; 747 sctp->sctp_isacks = 0; 748 sctp->sctp_idupchunks = 0; 749 sctp->sctp_gapcnt = 0; 750 sctp->sctp_cum_obchunks = 0; 751 sctp->sctp_cum_odchunks = 0; 752 sctp->sctp_cum_oudchunks = 0; 753 sctp->sctp_cum_rxtchunks = 0; 754 sctp->sctp_cum_ibchunks = 0; 755 sctp->sctp_cum_idchunks = 0; 756 sctp->sctp_cum_iudchunks = 0; 757 758 sctp->sctp_autoclose = 0; 759 sctp->sctp_tx_adaptation_code = 0; 760 761 sctp->sctp_v6label_len = 0; 762 sctp->sctp_v4label_len = 0; 763 764 sctp->sctp_sctps = NULL; 765 766 sctp_conn_clear(connp); 767 kmem_cache_free(sctp_conn_cache, connp); 768 } 769 770 /* Diagnostic routine used to return a string associated with the sctp state. */ 771 char * 772 sctp_display(sctp_t *sctp, char *sup_buf) 773 { 774 char *buf; 775 char buf1[30]; 776 static char priv_buf[INET6_ADDRSTRLEN * 2 + 80]; 777 char *cp; 778 conn_t *connp; 779 780 if (sctp == NULL) 781 return ("NULL_SCTP"); 782 783 connp = sctp->sctp_connp; 784 buf = (sup_buf != NULL) ? sup_buf : priv_buf; 785 786 switch (sctp->sctp_state) { 787 case SCTPS_IDLE: 788 cp = "SCTP_IDLE"; 789 break; 790 case SCTPS_BOUND: 791 cp = "SCTP_BOUND"; 792 break; 793 case SCTPS_LISTEN: 794 cp = "SCTP_LISTEN"; 795 break; 796 case SCTPS_COOKIE_WAIT: 797 cp = "SCTP_COOKIE_WAIT"; 798 break; 799 case SCTPS_COOKIE_ECHOED: 800 cp = "SCTP_COOKIE_ECHOED"; 801 break; 802 case SCTPS_ESTABLISHED: 803 cp = "SCTP_ESTABLISHED"; 804 break; 805 case SCTPS_SHUTDOWN_PENDING: 806 cp = "SCTP_SHUTDOWN_PENDING"; 807 break; 808 case SCTPS_SHUTDOWN_SENT: 809 cp = "SCTPS_SHUTDOWN_SENT"; 810 break; 811 case SCTPS_SHUTDOWN_RECEIVED: 812 cp = "SCTPS_SHUTDOWN_RECEIVED"; 813 break; 814 case SCTPS_SHUTDOWN_ACK_SENT: 815 cp = "SCTPS_SHUTDOWN_ACK_SENT"; 816 break; 817 default: 818 (void) mi_sprintf(buf1, "SCTPUnkState(%d)", sctp->sctp_state); 819 cp = buf1; 820 break; 821 } 822 (void) mi_sprintf(buf, "[%u, %u] %s", 823 ntohs(connp->conn_lport), ntohs(connp->conn_fport), cp); 824 825 return (buf); 826 } 827 828 /* 829 * Initialize protocol control block. If a parent exists, inherit 830 * all values set through setsockopt(). 831 */ 832 static int 833 sctp_init_values(sctp_t *sctp, sctp_t *psctp, int sleep) 834 { 835 int err; 836 int cnt; 837 sctp_stack_t *sctps = sctp->sctp_sctps; 838 conn_t *connp; 839 840 connp = sctp->sctp_connp; 841 842 sctp->sctp_nsaddrs = 0; 843 for (cnt = 0; cnt < SCTP_IPIF_HASH; cnt++) { 844 sctp->sctp_saddrs[cnt].ipif_count = 0; 845 list_create(&sctp->sctp_saddrs[cnt].sctp_ipif_list, 846 sizeof (sctp_saddr_ipif_t), offsetof(sctp_saddr_ipif_t, 847 saddr_ipif)); 848 } 849 connp->conn_ports = 0; 850 sctp->sctp_running = B_FALSE; 851 sctp->sctp_state = SCTPS_IDLE; 852 853 sctp->sctp_refcnt = 1; 854 855 sctp->sctp_strikes = 0; 856 857 sctp->sctp_last_mtu_probe = ddi_get_lbolt64(); 858 sctp->sctp_mtu_probe_intvl = sctps->sctps_mtu_probe_interval; 859 860 sctp->sctp_sack_gaps = 0; 861 sctp->sctp_sack_toggle = 2; 862 863 /* Only need to do the allocation if there is no "cached" one. */ 864 if (sctp->sctp_pad_mp == NULL) { 865 if (sleep == KM_SLEEP) { 866 sctp->sctp_pad_mp = allocb_wait(SCTP_ALIGN, BPRI_MED, 867 STR_NOSIG, NULL); 868 } else { 869 sctp->sctp_pad_mp = allocb(SCTP_ALIGN, BPRI_MED); 870 if (sctp->sctp_pad_mp == NULL) 871 return (ENOMEM); 872 } 873 bzero(sctp->sctp_pad_mp->b_rptr, SCTP_ALIGN); 874 } 875 876 if (psctp != NULL) { 877 /* 878 * Inherit from parent 879 * 880 * Start by inheriting from the conn_t, including conn_ixa and 881 * conn_xmit_ipp. 882 */ 883 err = conn_inherit_parent(psctp->sctp_connp, connp); 884 if (err != 0) 885 goto failure; 886 887 sctp->sctp_cookie_lifetime = psctp->sctp_cookie_lifetime; 888 889 sctp->sctp_cwnd_max = psctp->sctp_cwnd_max; 890 sctp->sctp_rwnd = psctp->sctp_rwnd; 891 sctp->sctp_irwnd = psctp->sctp_rwnd; 892 sctp->sctp_pd_point = psctp->sctp_pd_point; 893 sctp->sctp_rto_max = psctp->sctp_rto_max; 894 sctp->sctp_init_rto_max = psctp->sctp_init_rto_max; 895 sctp->sctp_rto_min = psctp->sctp_rto_min; 896 sctp->sctp_rto_initial = psctp->sctp_rto_initial; 897 sctp->sctp_pa_max_rxt = psctp->sctp_pa_max_rxt; 898 sctp->sctp_pp_max_rxt = psctp->sctp_pp_max_rxt; 899 sctp->sctp_max_init_rxt = psctp->sctp_max_init_rxt; 900 901 sctp->sctp_def_stream = psctp->sctp_def_stream; 902 sctp->sctp_def_flags = psctp->sctp_def_flags; 903 sctp->sctp_def_ppid = psctp->sctp_def_ppid; 904 sctp->sctp_def_context = psctp->sctp_def_context; 905 sctp->sctp_def_timetolive = psctp->sctp_def_timetolive; 906 907 sctp->sctp_num_istr = psctp->sctp_num_istr; 908 sctp->sctp_num_ostr = psctp->sctp_num_ostr; 909 910 sctp->sctp_hb_interval = psctp->sctp_hb_interval; 911 sctp->sctp_autoclose = psctp->sctp_autoclose; 912 sctp->sctp_tx_adaptation_code = psctp->sctp_tx_adaptation_code; 913 914 /* xxx should be a better way to copy these flags xxx */ 915 sctp->sctp_bound_to_all = psctp->sctp_bound_to_all; 916 sctp->sctp_cansleep = psctp->sctp_cansleep; 917 sctp->sctp_send_adaptation = psctp->sctp_send_adaptation; 918 sctp->sctp_ndelay = psctp->sctp_ndelay; 919 sctp->sctp_events = psctp->sctp_events; 920 } else { 921 /* 922 * Set to system defaults 923 */ 924 sctp->sctp_cookie_lifetime = 925 MSEC_TO_TICK(sctps->sctps_cookie_life); 926 connp->conn_sndlowat = sctps->sctps_xmit_lowat; 927 connp->conn_sndbuf = sctps->sctps_xmit_hiwat; 928 connp->conn_rcvbuf = sctps->sctps_recv_hiwat; 929 930 sctp->sctp_cwnd_max = sctps->sctps_cwnd_max_; 931 sctp->sctp_rwnd = connp->conn_rcvbuf; 932 sctp->sctp_irwnd = sctp->sctp_rwnd; 933 sctp->sctp_pd_point = sctp->sctp_rwnd; 934 sctp->sctp_rto_max = MSEC_TO_TICK(sctps->sctps_rto_maxg); 935 sctp->sctp_init_rto_max = sctp->sctp_rto_max; 936 sctp->sctp_rto_min = MSEC_TO_TICK(sctps->sctps_rto_ming); 937 sctp->sctp_rto_initial = MSEC_TO_TICK( 938 sctps->sctps_rto_initialg); 939 sctp->sctp_pa_max_rxt = sctps->sctps_pa_max_retr; 940 sctp->sctp_pp_max_rxt = sctps->sctps_pp_max_retr; 941 sctp->sctp_max_init_rxt = sctps->sctps_max_init_retr; 942 943 sctp->sctp_num_istr = sctps->sctps_max_in_streams; 944 sctp->sctp_num_ostr = sctps->sctps_initial_out_streams; 945 946 sctp->sctp_hb_interval = 947 MSEC_TO_TICK(sctps->sctps_heartbeat_interval); 948 949 if (connp->conn_family == AF_INET) 950 connp->conn_default_ttl = sctps->sctps_ipv4_ttl; 951 else 952 connp->conn_default_ttl = sctps->sctps_ipv6_hoplimit; 953 954 connp->conn_xmit_ipp.ipp_unicast_hops = 955 connp->conn_default_ttl; 956 957 /* 958 * Initialize the header template 959 */ 960 if ((err = sctp_build_hdrs(sctp, sleep)) != 0) { 961 goto failure; 962 } 963 } 964 965 sctp->sctp_understands_asconf = B_TRUE; 966 sctp->sctp_understands_addip = B_TRUE; 967 sctp->sctp_prsctp_aware = B_FALSE; 968 969 sctp->sctp_connp->conn_ref = 1; 970 971 sctp->sctp_prsctpdrop = 0; 972 sctp->sctp_msgcount = 0; 973 974 return (0); 975 976 failure: 977 sctp_headers_free(sctp); 978 return (err); 979 } 980 981 /* 982 * Extracts the init tag from an INIT chunk and checks if it matches 983 * the sctp's verification tag. Returns 0 if it doesn't match, 1 if 984 * it does. 985 */ 986 static boolean_t 987 sctp_icmp_verf(sctp_t *sctp, sctp_hdr_t *sh, mblk_t *mp) 988 { 989 sctp_chunk_hdr_t *sch; 990 uint32_t verf, *vp; 991 992 sch = (sctp_chunk_hdr_t *)(sh + 1); 993 vp = (uint32_t *)(sch + 1); 994 995 /* Need at least the data chunk hdr and the first 4 bytes of INIT */ 996 if ((unsigned char *)(vp + 1) > mp->b_wptr) { 997 return (B_FALSE); 998 } 999 1000 bcopy(vp, &verf, sizeof (verf)); 1001 1002 if (verf == sctp->sctp_lvtag) { 1003 return (B_TRUE); 1004 } 1005 return (B_FALSE); 1006 } 1007 1008 /* 1009 * Update the SCTP state according to change of PMTU. 1010 * 1011 * Path MTU might have changed by either increase or decrease, so need to 1012 * adjust the MSS based on the value of ixa_pmtu. 1013 */ 1014 static void 1015 sctp_update_pmtu(sctp_t *sctp, sctp_faddr_t *fp, boolean_t decrease_only) 1016 { 1017 uint32_t pmtu; 1018 int32_t mss; 1019 ip_xmit_attr_t *ixa = fp->ixa; 1020 1021 if (sctp->sctp_state < SCTPS_ESTABLISHED) 1022 return; 1023 1024 /* 1025 * Always call ip_get_pmtu() to make sure that IP has updated 1026 * ixa_flags properly. 1027 */ 1028 pmtu = ip_get_pmtu(ixa); 1029 1030 /* 1031 * Calculate the MSS by decreasing the PMTU by sctp_hdr_len and 1032 * IPsec overhead if applied. Make sure to use the most recent 1033 * IPsec information. 1034 */ 1035 mss = pmtu - conn_ipsec_length(sctp->sctp_connp); 1036 if (ixa->ixa_flags & IXAF_IS_IPV4) 1037 mss -= sctp->sctp_hdr_len; 1038 else 1039 mss -= sctp->sctp_hdr6_len; 1040 1041 /* 1042 * Nothing to change, so just return. 1043 */ 1044 if (mss == fp->sfa_pmss) 1045 return; 1046 1047 /* 1048 * Currently, for ICMP errors, only PMTU decrease is handled. 1049 */ 1050 if (mss > fp->sfa_pmss && decrease_only) 1051 return; 1052 1053 #ifdef DEBUG 1054 (void) printf("sctp_update_pmtu mss from %d to %d\n", 1055 fp->sfa_pmss, mss); 1056 #endif 1057 DTRACE_PROBE2(sctp_update_pmtu, int32_t, fp->sfa_pmss, uint32_t, mss); 1058 1059 /* 1060 * Update ixa_fragsize and ixa_pmtu. 1061 */ 1062 ixa->ixa_fragsize = ixa->ixa_pmtu = pmtu; 1063 1064 /* 1065 * Make sure that sfa_pmss is a multiple of 1066 * SCTP_ALIGN. 1067 */ 1068 fp->sfa_pmss = mss & ~(SCTP_ALIGN - 1); 1069 fp->pmtu_discovered = 1; 1070 1071 #ifdef notyet 1072 if (mss < sctp->sctp_sctps->sctps_mss_min) 1073 ixa->ixa_flags |= IXAF_PMTU_TOO_SMALL; 1074 #endif 1075 if (ixa->ixa_flags & IXAF_PMTU_TOO_SMALL) 1076 ixa->ixa_flags &= ~(IXAF_DONTFRAG | IXAF_PMTU_IPV4_DF); 1077 1078 /* 1079 * If below the min size then ip_get_pmtu cleared IXAF_PMTU_IPV4_DF. 1080 * Make sure to clear IXAF_DONTFRAG, which is used by IP to decide 1081 * whether to fragment the packet. 1082 */ 1083 if (ixa->ixa_flags & IXAF_IS_IPV4) { 1084 if (!(ixa->ixa_flags & IXAF_PMTU_IPV4_DF)) { 1085 fp->df = B_FALSE; 1086 if (fp == sctp->sctp_current) { 1087 sctp->sctp_ipha-> 1088 ipha_fragment_offset_and_flags = 0; 1089 } 1090 } 1091 } 1092 } 1093 1094 /* 1095 * Notify function registered with ip_xmit_attr_t. It's called in the context 1096 * of conn_ip_output so it's safe to update the SCTP state. 1097 * Currently only used for pmtu changes. 1098 */ 1099 /* ARGSUSED1 */ 1100 static void 1101 sctp_notify(void *arg, ip_xmit_attr_t *ixa, ixa_notify_type_t ntype, 1102 ixa_notify_arg_t narg) 1103 { 1104 sctp_t *sctp = (sctp_t *)arg; 1105 sctp_faddr_t *fp; 1106 1107 switch (ntype) { 1108 case IXAN_PMTU: 1109 /* Find the faddr based on the ip_xmit_attr_t pointer */ 1110 for (fp = sctp->sctp_faddrs; fp != NULL; fp = fp->next) { 1111 if (fp->ixa == ixa) 1112 break; 1113 } 1114 if (fp != NULL) 1115 sctp_update_pmtu(sctp, fp, B_FALSE); 1116 break; 1117 default: 1118 break; 1119 } 1120 } 1121 1122 /* 1123 * sctp_icmp_error is called by sctp_input() to process ICMP error messages 1124 * passed up by IP. We need to find a sctp_t 1125 * that corresponds to the returned datagram. Passes the message back in on 1126 * the correct queue once it has located the connection. 1127 * Assumes that IP has pulled up everything up to and including 1128 * the ICMP header. 1129 */ 1130 void 1131 sctp_icmp_error(sctp_t *sctp, mblk_t *mp) 1132 { 1133 icmph_t *icmph; 1134 ipha_t *ipha; 1135 int iph_hdr_length; 1136 sctp_hdr_t *sctph; 1137 in6_addr_t dst; 1138 sctp_faddr_t *fp; 1139 sctp_stack_t *sctps = sctp->sctp_sctps; 1140 1141 dprint(1, ("sctp_icmp_error: sctp=%p, mp=%p\n", (void *)sctp, 1142 (void *)mp)); 1143 1144 ipha = (ipha_t *)mp->b_rptr; 1145 if (IPH_HDR_VERSION(ipha) != IPV4_VERSION) { 1146 ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION); 1147 sctp_icmp_error_ipv6(sctp, mp); 1148 return; 1149 } 1150 1151 /* account for the ip hdr from the icmp message */ 1152 iph_hdr_length = IPH_HDR_LENGTH(ipha); 1153 icmph = (icmph_t *)&mp->b_rptr[iph_hdr_length]; 1154 /* now the ip hdr of message resulting in this icmp */ 1155 ipha = (ipha_t *)&icmph[1]; 1156 iph_hdr_length = IPH_HDR_LENGTH(ipha); 1157 sctph = (sctp_hdr_t *)((char *)ipha + iph_hdr_length); 1158 /* first_mp must expose the full sctp header. */ 1159 if ((uchar_t *)(sctph + 1) >= mp->b_wptr) { 1160 /* not enough data for SCTP header */ 1161 freemsg(mp); 1162 return; 1163 } 1164 1165 switch (icmph->icmph_type) { 1166 case ICMP_DEST_UNREACHABLE: 1167 switch (icmph->icmph_code) { 1168 case ICMP_FRAGMENTATION_NEEDED: 1169 /* 1170 * Reduce the MSS based on the new MTU. This will 1171 * eliminate any fragmentation locally. 1172 * N.B. There may well be some funny side-effects on 1173 * the local send policy and the remote receive policy. 1174 * Pending further research, we provide 1175 * sctp_ignore_path_mtu just in case this proves 1176 * disastrous somewhere. 1177 * 1178 * After updating the MSS, retransmit part of the 1179 * dropped segment using the new mss by calling 1180 * sctp_wput_slow(). Need to adjust all those 1181 * params to make sure sctp_wput_slow() work properly. 1182 */ 1183 if (sctps->sctps_ignore_path_mtu) 1184 break; 1185 1186 /* find the offending faddr */ 1187 IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &dst); 1188 fp = sctp_lookup_faddr(sctp, &dst); 1189 if (fp == NULL) { 1190 break; 1191 } 1192 sctp_update_pmtu(sctp, fp, B_TRUE); 1193 /* 1194 * It is possible, even likely that a fast retransmit 1195 * attempt has been dropped by ip as a result of this 1196 * error, retransmission bundles as much as possible. 1197 * A retransmit here prevents significant delays waiting 1198 * on the timer. Analogous to behaviour of TCP after 1199 * ICMP too big. 1200 */ 1201 sctp_rexmit(sctp, fp); 1202 break; 1203 case ICMP_PORT_UNREACHABLE: 1204 case ICMP_PROTOCOL_UNREACHABLE: 1205 switch (sctp->sctp_state) { 1206 case SCTPS_COOKIE_WAIT: 1207 case SCTPS_COOKIE_ECHOED: 1208 /* make sure the verification tag matches */ 1209 if (!sctp_icmp_verf(sctp, sctph, mp)) { 1210 break; 1211 } 1212 BUMP_MIB(&sctps->sctps_mib, sctpAborted); 1213 sctp_assoc_event(sctp, SCTP_CANT_STR_ASSOC, 0, 1214 NULL); 1215 sctp_clean_death(sctp, ECONNREFUSED); 1216 break; 1217 } 1218 break; 1219 case ICMP_HOST_UNREACHABLE: 1220 case ICMP_NET_UNREACHABLE: 1221 /* Record the error in case we finally time out. */ 1222 sctp->sctp_client_errno = (icmph->icmph_code == 1223 ICMP_HOST_UNREACHABLE) ? EHOSTUNREACH : ENETUNREACH; 1224 break; 1225 default: 1226 break; 1227 } 1228 break; 1229 case ICMP_SOURCE_QUENCH: { 1230 /* Reduce the sending rate as if we got a retransmit timeout */ 1231 break; 1232 } 1233 } 1234 freemsg(mp); 1235 } 1236 1237 /* 1238 * sctp_icmp_error_ipv6() is called by sctp_icmp_error() to process ICMPv6 1239 * error messages passed up by IP. 1240 * Assumes that IP has pulled up all the extension headers as well 1241 * as the ICMPv6 header. 1242 */ 1243 static void 1244 sctp_icmp_error_ipv6(sctp_t *sctp, mblk_t *mp) 1245 { 1246 icmp6_t *icmp6; 1247 ip6_t *ip6h; 1248 uint16_t iph_hdr_length; 1249 sctp_hdr_t *sctpha; 1250 uint8_t *nexthdrp; 1251 sctp_faddr_t *fp; 1252 sctp_stack_t *sctps = sctp->sctp_sctps; 1253 1254 ip6h = (ip6_t *)mp->b_rptr; 1255 iph_hdr_length = (ip6h->ip6_nxt != IPPROTO_SCTP) ? 1256 ip_hdr_length_v6(mp, ip6h) : IPV6_HDR_LEN; 1257 1258 icmp6 = (icmp6_t *)&mp->b_rptr[iph_hdr_length]; 1259 ip6h = (ip6_t *)&icmp6[1]; 1260 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &iph_hdr_length, &nexthdrp)) { 1261 freemsg(mp); 1262 return; 1263 } 1264 ASSERT(*nexthdrp == IPPROTO_SCTP); 1265 1266 /* XXX need ifindex to find connection */ 1267 sctpha = (sctp_hdr_t *)((char *)ip6h + iph_hdr_length); 1268 if ((uchar_t *)sctpha >= mp->b_wptr) { 1269 /* not enough data for SCTP header */ 1270 freemsg(mp); 1271 return; 1272 } 1273 switch (icmp6->icmp6_type) { 1274 case ICMP6_PACKET_TOO_BIG: 1275 /* 1276 * Reduce the MSS based on the new MTU. This will 1277 * eliminate any fragmentation locally. 1278 * N.B. There may well be some funny side-effects on 1279 * the local send policy and the remote receive policy. 1280 * Pending further research, we provide 1281 * sctp_ignore_path_mtu just in case this proves 1282 * disastrous somewhere. 1283 * 1284 * After updating the MSS, retransmit part of the 1285 * dropped segment using the new mss by calling 1286 * sctp_wput_slow(). Need to adjust all those 1287 * params to make sure sctp_wput_slow() work properly. 1288 */ 1289 if (sctps->sctps_ignore_path_mtu) 1290 break; 1291 1292 /* find the offending faddr */ 1293 fp = sctp_lookup_faddr(sctp, &ip6h->ip6_dst); 1294 if (fp == NULL) { 1295 break; 1296 } 1297 1298 sctp_update_pmtu(sctp, fp, B_TRUE); 1299 /* 1300 * It is possible, even likely that a fast retransmit 1301 * attempt has been dropped by ip as a result of this 1302 * error, retransmission bundles as much as possible. 1303 * A retransmit here prevents significant delays waiting 1304 * on the timer. Analogous to behaviour of TCP after 1305 * ICMP too big. 1306 */ 1307 sctp_rexmit(sctp, fp); 1308 break; 1309 1310 case ICMP6_DST_UNREACH: 1311 switch (icmp6->icmp6_code) { 1312 case ICMP6_DST_UNREACH_NOPORT: 1313 /* make sure the verification tag matches */ 1314 if (!sctp_icmp_verf(sctp, sctpha, mp)) { 1315 break; 1316 } 1317 if (sctp->sctp_state == SCTPS_COOKIE_WAIT || 1318 sctp->sctp_state == SCTPS_COOKIE_ECHOED) { 1319 BUMP_MIB(&sctps->sctps_mib, sctpAborted); 1320 sctp_assoc_event(sctp, SCTP_CANT_STR_ASSOC, 0, 1321 NULL); 1322 sctp_clean_death(sctp, ECONNREFUSED); 1323 } 1324 break; 1325 1326 case ICMP6_DST_UNREACH_ADMIN: 1327 case ICMP6_DST_UNREACH_NOROUTE: 1328 case ICMP6_DST_UNREACH_NOTNEIGHBOR: 1329 case ICMP6_DST_UNREACH_ADDR: 1330 /* Record the error in case we finally time out. */ 1331 sctp->sctp_client_errno = EHOSTUNREACH; 1332 break; 1333 default: 1334 break; 1335 } 1336 break; 1337 1338 case ICMP6_PARAM_PROB: 1339 /* If this corresponds to an ICMP_PROTOCOL_UNREACHABLE */ 1340 if (icmp6->icmp6_code == ICMP6_PARAMPROB_NEXTHEADER && 1341 (uchar_t *)ip6h + icmp6->icmp6_pptr == 1342 (uchar_t *)nexthdrp) { 1343 /* make sure the verification tag matches */ 1344 if (!sctp_icmp_verf(sctp, sctpha, mp)) { 1345 break; 1346 } 1347 if (sctp->sctp_state == SCTPS_COOKIE_WAIT) { 1348 BUMP_MIB(&sctps->sctps_mib, sctpAborted); 1349 sctp_assoc_event(sctp, SCTP_CANT_STR_ASSOC, 0, 1350 NULL); 1351 sctp_clean_death(sctp, ECONNREFUSED); 1352 } 1353 break; 1354 } 1355 break; 1356 1357 case ICMP6_TIME_EXCEEDED: 1358 default: 1359 break; 1360 } 1361 freemsg(mp); 1362 } 1363 1364 /* 1365 * Called by sockfs to create a new sctp instance. 1366 * 1367 * If parent pointer is passed in, inherit settings from it. 1368 */ 1369 sctp_t * 1370 sctp_create(void *ulpd, sctp_t *parent, int family, int type, int flags, 1371 sock_upcalls_t *upcalls, sctp_sockbuf_limits_t *sbl, 1372 cred_t *credp) 1373 { 1374 sctp_t *sctp, *psctp; 1375 conn_t *connp; 1376 mblk_t *ack_mp, *hb_mp; 1377 int sleep = flags & SCTP_CAN_BLOCK ? KM_SLEEP : KM_NOSLEEP; 1378 zoneid_t zoneid; 1379 sctp_stack_t *sctps; 1380 1381 /* User must supply a credential. */ 1382 if (credp == NULL) 1383 return (NULL); 1384 1385 psctp = (sctp_t *)parent; 1386 if (psctp != NULL) { 1387 sctps = psctp->sctp_sctps; 1388 /* Increase here to have common decrease at end */ 1389 netstack_hold(sctps->sctps_netstack); 1390 } else { 1391 netstack_t *ns; 1392 1393 ns = netstack_find_by_cred(credp); 1394 ASSERT(ns != NULL); 1395 sctps = ns->netstack_sctp; 1396 ASSERT(sctps != NULL); 1397 1398 /* 1399 * For exclusive stacks we set the zoneid to zero 1400 * to make SCTP operate as if in the global zone. 1401 */ 1402 if (sctps->sctps_netstack->netstack_stackid != 1403 GLOBAL_NETSTACKID) 1404 zoneid = GLOBAL_ZONEID; 1405 else 1406 zoneid = crgetzoneid(credp); 1407 } 1408 if ((connp = ipcl_conn_create(IPCL_SCTPCONN, sleep, 1409 sctps->sctps_netstack)) == NULL) { 1410 netstack_rele(sctps->sctps_netstack); 1411 SCTP_KSTAT(sctps, sctp_conn_create); 1412 return (NULL); 1413 } 1414 /* 1415 * ipcl_conn_create did a netstack_hold. Undo the hold that was 1416 * done at top of sctp_create. 1417 */ 1418 netstack_rele(sctps->sctps_netstack); 1419 sctp = CONN2SCTP(connp); 1420 sctp->sctp_sctps = sctps; 1421 1422 if ((ack_mp = sctp_timer_alloc(sctp, sctp_ack_timer, sleep)) == NULL || 1423 (hb_mp = sctp_timer_alloc(sctp, sctp_heartbeat_timer, 1424 sleep)) == NULL) { 1425 if (ack_mp != NULL) 1426 freeb(ack_mp); 1427 sctp_conn_clear(connp); 1428 sctp->sctp_sctps = NULL; 1429 kmem_cache_free(sctp_conn_cache, connp); 1430 return (NULL); 1431 } 1432 1433 sctp->sctp_ack_mp = ack_mp; 1434 sctp->sctp_heartbeat_mp = hb_mp; 1435 1436 /* 1437 * Have conn_ip_output drop packets should our outer source 1438 * go invalid, and tell us about mtu changes. 1439 */ 1440 connp->conn_ixa->ixa_flags |= IXAF_SET_ULP_CKSUM | IXAF_VERIFY_SOURCE | 1441 IXAF_VERIFY_PMTU; 1442 connp->conn_family = family; 1443 connp->conn_so_type = type; 1444 1445 if (sctp_init_values(sctp, psctp, sleep) != 0) { 1446 freeb(ack_mp); 1447 freeb(hb_mp); 1448 sctp_conn_clear(connp); 1449 sctp->sctp_sctps = NULL; 1450 kmem_cache_free(sctp_conn_cache, connp); 1451 return (NULL); 1452 } 1453 sctp->sctp_cansleep = ((flags & SCTP_CAN_BLOCK) == SCTP_CAN_BLOCK); 1454 1455 sctp->sctp_mss = sctps->sctps_initial_mtu - ((family == AF_INET6) ? 1456 sctp->sctp_hdr6_len : sctp->sctp_hdr_len); 1457 1458 if (psctp != NULL) { 1459 conn_t *pconnp = psctp->sctp_connp; 1460 1461 RUN_SCTP(psctp); 1462 /* 1463 * Inherit local address list, local port. Parent is either 1464 * in SCTPS_BOUND, or SCTPS_LISTEN state. 1465 */ 1466 ASSERT((psctp->sctp_state == SCTPS_BOUND) || 1467 (psctp->sctp_state == SCTPS_LISTEN)); 1468 if (sctp_dup_saddrs(psctp, sctp, sleep)) { 1469 WAKE_SCTP(psctp); 1470 freeb(ack_mp); 1471 freeb(hb_mp); 1472 sctp_headers_free(sctp); 1473 sctp_conn_clear(connp); 1474 sctp->sctp_sctps = NULL; 1475 kmem_cache_free(sctp_conn_cache, connp); 1476 return (NULL); 1477 } 1478 1479 /* 1480 * If the parent is specified, it'll be immediatelly 1481 * followed by sctp_connect(). So don't add this guy to 1482 * bind hash. 1483 */ 1484 connp->conn_lport = pconnp->conn_lport; 1485 sctp->sctp_state = SCTPS_BOUND; 1486 WAKE_SCTP(psctp); 1487 } else { 1488 ASSERT(connp->conn_cred == NULL); 1489 connp->conn_zoneid = zoneid; 1490 /* 1491 * conn_allzones can not be set this early, hence 1492 * no IPCL_ZONEID 1493 */ 1494 connp->conn_ixa->ixa_zoneid = zoneid; 1495 connp->conn_open_time = ddi_get_lbolt64(); 1496 connp->conn_cred = credp; 1497 crhold(credp); 1498 connp->conn_cpid = curproc->p_pid; 1499 1500 /* 1501 * If the caller has the process-wide flag set, then default to 1502 * MAC exempt mode. This allows read-down to unlabeled hosts. 1503 */ 1504 if (getpflags(NET_MAC_AWARE, credp) != 0) 1505 connp->conn_mac_mode = CONN_MAC_AWARE; 1506 1507 connp->conn_zone_is_global = 1508 (crgetzoneid(credp) == GLOBAL_ZONEID); 1509 } 1510 1511 /* Initialize SCTP instance values, our verf tag must never be 0 */ 1512 (void) random_get_pseudo_bytes((uint8_t *)&sctp->sctp_lvtag, 1513 sizeof (sctp->sctp_lvtag)); 1514 if (sctp->sctp_lvtag == 0) 1515 sctp->sctp_lvtag = (uint32_t)gethrtime(); 1516 ASSERT(sctp->sctp_lvtag != 0); 1517 1518 sctp->sctp_ltsn = sctp->sctp_lvtag + 1; 1519 sctp->sctp_lcsn = sctp->sctp_ltsn; 1520 sctp->sctp_recovery_tsn = sctp->sctp_lastack_rxd = sctp->sctp_ltsn - 1; 1521 sctp->sctp_adv_pap = sctp->sctp_lastack_rxd; 1522 1523 /* Information required by upper layer */ 1524 ASSERT(ulpd != NULL); 1525 sctp->sctp_ulpd = ulpd; 1526 1527 ASSERT(upcalls != NULL); 1528 sctp->sctp_upcalls = upcalls; 1529 ASSERT(sbl != NULL); 1530 /* Fill in the socket buffer limits for sctpsockfs */ 1531 sbl->sbl_txlowat = connp->conn_sndlowat; 1532 sbl->sbl_txbuf = connp->conn_sndbuf; 1533 sbl->sbl_rxbuf = sctp->sctp_rwnd; 1534 sbl->sbl_rxlowat = SCTP_RECV_LOWATER; 1535 1536 /* Insert this in the global list. */ 1537 SCTP_LINK(sctp, sctps); 1538 1539 return (sctp); 1540 } 1541 1542 /* Run at module load time */ 1543 void 1544 sctp_ddi_g_init(void) 1545 { 1546 /* Create sctp_t/conn_t cache */ 1547 sctp_conn_cache_init(); 1548 1549 /* Create the faddr cache */ 1550 sctp_faddr_init(); 1551 1552 /* Create the sets cache */ 1553 sctp_sets_init(); 1554 1555 /* Create the PR-SCTP sets cache */ 1556 sctp_ftsn_sets_init(); 1557 1558 /* Initialize tables used for CRC calculation */ 1559 sctp_crc32_init(); 1560 1561 /* 1562 * We want to be informed each time a stack is created or 1563 * destroyed in the kernel, so we can maintain the 1564 * set of sctp_stack_t's. 1565 */ 1566 netstack_register(NS_SCTP, sctp_stack_init, NULL, sctp_stack_fini); 1567 } 1568 1569 static void * 1570 sctp_stack_init(netstackid_t stackid, netstack_t *ns) 1571 { 1572 sctp_stack_t *sctps; 1573 size_t arrsz; 1574 1575 sctps = kmem_zalloc(sizeof (*sctps), KM_SLEEP); 1576 sctps->sctps_netstack = ns; 1577 1578 /* Initialize locks */ 1579 mutex_init(&sctps->sctps_g_lock, NULL, MUTEX_DEFAULT, NULL); 1580 mutex_init(&sctps->sctps_epriv_port_lock, NULL, MUTEX_DEFAULT, NULL); 1581 sctps->sctps_g_num_epriv_ports = SCTP_NUM_EPRIV_PORTS; 1582 sctps->sctps_g_epriv_ports[0] = ULP_DEF_EPRIV_PORT1; 1583 sctps->sctps_g_epriv_ports[1] = ULP_DEF_EPRIV_PORT2; 1584 1585 /* Initialize SCTP hash arrays. */ 1586 sctp_hash_init(sctps); 1587 1588 arrsz = sctp_propinfo_count * sizeof (mod_prop_info_t); 1589 sctps->sctps_propinfo_tbl = (mod_prop_info_t *)kmem_alloc(arrsz, 1590 KM_SLEEP); 1591 bcopy(sctp_propinfo_tbl, sctps->sctps_propinfo_tbl, arrsz); 1592 1593 /* Initialize the recvq taskq. */ 1594 sctp_rq_tq_init(sctps); 1595 1596 /* saddr init */ 1597 sctp_saddr_init(sctps); 1598 1599 /* Global SCTP PCB list. */ 1600 list_create(&sctps->sctps_g_list, sizeof (sctp_t), 1601 offsetof(sctp_t, sctp_list)); 1602 1603 /* Initialize sctp kernel stats. */ 1604 sctps->sctps_mibkp = sctp_kstat_init(stackid); 1605 sctps->sctps_kstat = 1606 sctp_kstat2_init(stackid, &sctps->sctps_statistics); 1607 1608 return (sctps); 1609 } 1610 1611 /* 1612 * Called when the module is about to be unloaded. 1613 */ 1614 void 1615 sctp_ddi_g_destroy(void) 1616 { 1617 /* Destroy sctp_t/conn_t caches */ 1618 sctp_conn_cache_fini(); 1619 1620 /* Destroy the faddr cache */ 1621 sctp_faddr_fini(); 1622 1623 /* Destroy the sets cache */ 1624 sctp_sets_fini(); 1625 1626 /* Destroy the PR-SCTP sets cache */ 1627 sctp_ftsn_sets_fini(); 1628 1629 netstack_unregister(NS_SCTP); 1630 } 1631 1632 /* 1633 * Free the SCTP stack instance. 1634 */ 1635 static void 1636 sctp_stack_fini(netstackid_t stackid, void *arg) 1637 { 1638 sctp_stack_t *sctps = (sctp_stack_t *)arg; 1639 1640 kmem_free(sctps->sctps_propinfo_tbl, 1641 sctp_propinfo_count * sizeof (mod_prop_info_t)); 1642 sctps->sctps_propinfo_tbl = NULL; 1643 1644 /* Destroy the recvq taskqs. */ 1645 sctp_rq_tq_fini(sctps); 1646 1647 /* Destroy saddr */ 1648 sctp_saddr_fini(sctps); 1649 1650 /* Global SCTP PCB list. */ 1651 list_destroy(&sctps->sctps_g_list); 1652 1653 /* Destroy SCTP hash arrays. */ 1654 sctp_hash_destroy(sctps); 1655 1656 /* Destroy SCTP kernel stats. */ 1657 sctp_kstat2_fini(stackid, sctps->sctps_kstat); 1658 sctps->sctps_kstat = NULL; 1659 bzero(&sctps->sctps_statistics, sizeof (sctps->sctps_statistics)); 1660 1661 sctp_kstat_fini(stackid, sctps->sctps_mibkp); 1662 sctps->sctps_mibkp = NULL; 1663 1664 mutex_destroy(&sctps->sctps_g_lock); 1665 mutex_destroy(&sctps->sctps_epriv_port_lock); 1666 1667 kmem_free(sctps, sizeof (*sctps)); 1668 } 1669 1670 void 1671 sctp_display_all(sctp_stack_t *sctps) 1672 { 1673 sctp_t *sctp_walker; 1674 1675 mutex_enter(&sctps->sctps_g_lock); 1676 for (sctp_walker = list_head(&sctps->sctps_g_list); 1677 sctp_walker != NULL; 1678 sctp_walker = (sctp_t *)list_next(&sctps->sctps_g_list, 1679 sctp_walker)) { 1680 (void) sctp_display(sctp_walker, NULL); 1681 } 1682 mutex_exit(&sctps->sctps_g_lock); 1683 } 1684 1685 static void 1686 sctp_rq_tq_init(sctp_stack_t *sctps) 1687 { 1688 sctps->sctps_recvq_tq_list_max_sz = 16; 1689 sctps->sctps_recvq_tq_list_cur_sz = 1; 1690 /* 1691 * Initialize the recvq_tq_list and create the first recvq taskq. 1692 * What to do if it fails? 1693 */ 1694 sctps->sctps_recvq_tq_list = 1695 kmem_zalloc(sctps->sctps_recvq_tq_list_max_sz * sizeof (taskq_t *), 1696 KM_SLEEP); 1697 sctps->sctps_recvq_tq_list[0] = taskq_create("sctp_def_recvq_taskq", 1698 MIN(sctp_recvq_tq_thr_max, MAX(sctp_recvq_tq_thr_min, ncpus)), 1699 minclsyspri, sctp_recvq_tq_task_min, sctp_recvq_tq_task_max, 1700 TASKQ_PREPOPULATE); 1701 mutex_init(&sctps->sctps_rq_tq_lock, NULL, MUTEX_DEFAULT, NULL); 1702 } 1703 1704 static void 1705 sctp_rq_tq_fini(sctp_stack_t *sctps) 1706 { 1707 int i; 1708 1709 for (i = 0; i < sctps->sctps_recvq_tq_list_cur_sz; i++) { 1710 ASSERT(sctps->sctps_recvq_tq_list[i] != NULL); 1711 taskq_destroy(sctps->sctps_recvq_tq_list[i]); 1712 } 1713 kmem_free(sctps->sctps_recvq_tq_list, 1714 sctps->sctps_recvq_tq_list_max_sz * sizeof (taskq_t *)); 1715 sctps->sctps_recvq_tq_list = NULL; 1716 } 1717 1718 /* Add another taskq for a new ill. */ 1719 void 1720 sctp_inc_taskq(sctp_stack_t *sctps) 1721 { 1722 taskq_t *tq; 1723 char tq_name[TASKQ_NAMELEN]; 1724 1725 mutex_enter(&sctps->sctps_rq_tq_lock); 1726 if (sctps->sctps_recvq_tq_list_cur_sz + 1 > 1727 sctps->sctps_recvq_tq_list_max_sz) { 1728 mutex_exit(&sctps->sctps_rq_tq_lock); 1729 cmn_err(CE_NOTE, "Cannot create more SCTP recvq taskq"); 1730 return; 1731 } 1732 1733 (void) snprintf(tq_name, sizeof (tq_name), "sctp_recvq_taskq_%u", 1734 sctps->sctps_recvq_tq_list_cur_sz); 1735 tq = taskq_create(tq_name, 1736 MIN(sctp_recvq_tq_thr_max, MAX(sctp_recvq_tq_thr_min, ncpus)), 1737 minclsyspri, sctp_recvq_tq_task_min, sctp_recvq_tq_task_max, 1738 TASKQ_PREPOPULATE); 1739 if (tq == NULL) { 1740 mutex_exit(&sctps->sctps_rq_tq_lock); 1741 cmn_err(CE_NOTE, "SCTP recvq taskq creation failed"); 1742 return; 1743 } 1744 ASSERT(sctps->sctps_recvq_tq_list[ 1745 sctps->sctps_recvq_tq_list_cur_sz] == NULL); 1746 sctps->sctps_recvq_tq_list[sctps->sctps_recvq_tq_list_cur_sz] = tq; 1747 atomic_add_32(&sctps->sctps_recvq_tq_list_cur_sz, 1); 1748 mutex_exit(&sctps->sctps_rq_tq_lock); 1749 } 1750 1751 #ifdef DEBUG 1752 uint32_t recvq_loop_cnt = 0; 1753 uint32_t recvq_call = 0; 1754 #endif 1755 1756 /* 1757 * Find the next recvq_tq to use. This routine will go thru all the 1758 * taskqs until it can dispatch a job for the sctp. If this fails, 1759 * it will create a new taskq and try it. 1760 */ 1761 static boolean_t 1762 sctp_find_next_tq(sctp_t *sctp) 1763 { 1764 int next_tq, try; 1765 taskq_t *tq; 1766 sctp_stack_t *sctps = sctp->sctp_sctps; 1767 1768 /* 1769 * Note that since we don't hold a lock on sctp_rq_tq_lock for 1770 * performance reason, recvq_ta_list_cur_sz can be changed during 1771 * this loop. The problem this will create is that the loop may 1772 * not have tried all the recvq_tq. This should be OK. 1773 */ 1774 next_tq = atomic_add_32_nv(&sctps->sctps_recvq_tq_list_cur, 1) % 1775 sctps->sctps_recvq_tq_list_cur_sz; 1776 for (try = 0; try < sctps->sctps_recvq_tq_list_cur_sz; try++) { 1777 tq = sctps->sctps_recvq_tq_list[next_tq]; 1778 if (taskq_dispatch(tq, sctp_process_recvq, sctp, 1779 TQ_NOSLEEP) != NULL) { 1780 sctp->sctp_recvq_tq = tq; 1781 return (B_TRUE); 1782 } 1783 next_tq = (next_tq + 1) % sctps->sctps_recvq_tq_list_cur_sz; 1784 } 1785 1786 /* 1787 * Create one more taskq and try it. Note that sctp_inc_taskq() 1788 * may not have created another taskq if the number of recvq 1789 * taskqs is at the maximum. We are probably in a pretty bad 1790 * shape if this actually happens... 1791 */ 1792 sctp_inc_taskq(sctps); 1793 tq = sctps->sctps_recvq_tq_list[sctps->sctps_recvq_tq_list_cur_sz - 1]; 1794 if (taskq_dispatch(tq, sctp_process_recvq, sctp, TQ_NOSLEEP) != NULL) { 1795 sctp->sctp_recvq_tq = tq; 1796 return (B_TRUE); 1797 } 1798 SCTP_KSTAT(sctps, sctp_find_next_tq); 1799 return (B_FALSE); 1800 } 1801 1802 /* 1803 * To add a message to the recvq. Note that the sctp_timer_fire() 1804 * routine also uses this function to add the timer message to the 1805 * receive queue for later processing. And it should be the only 1806 * caller of sctp_add_recvq() which sets the try_harder argument 1807 * to B_TRUE. 1808 * 1809 * If the try_harder argument is B_TRUE, this routine sctp_find_next_tq() 1810 * will try very hard to dispatch the task. Refer to the comment 1811 * for that routine on how it does that. 1812 * 1813 * On failure the message has been freed i.e., this routine always consumes the 1814 * message. It bumps ipIfStatsInDiscards and and uses ip_drop_input to drop. 1815 */ 1816 void 1817 sctp_add_recvq(sctp_t *sctp, mblk_t *mp, boolean_t caller_hold_lock, 1818 ip_recv_attr_t *ira) 1819 { 1820 mblk_t *attrmp; 1821 ip_stack_t *ipst = sctp->sctp_sctps->sctps_netstack->netstack_ip; 1822 1823 ASSERT(ira->ira_ill == NULL); 1824 1825 if (!caller_hold_lock) 1826 mutex_enter(&sctp->sctp_recvq_lock); 1827 1828 /* If the taskq dispatch has not been scheduled, do it now. */ 1829 if (sctp->sctp_recvq_tq == NULL) { 1830 ASSERT(sctp->sctp_recvq == NULL); 1831 if (!sctp_find_next_tq(sctp)) { 1832 if (!caller_hold_lock) 1833 mutex_exit(&sctp->sctp_recvq_lock); 1834 BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsInDiscards); 1835 ip_drop_input("ipIfStatsInDiscards", mp, NULL); 1836 freemsg(mp); 1837 return; 1838 } 1839 /* Make sure the sctp_t will not go away. */ 1840 SCTP_REFHOLD(sctp); 1841 } 1842 1843 attrmp = ip_recv_attr_to_mblk(ira); 1844 if (attrmp == NULL) { 1845 if (!caller_hold_lock) 1846 mutex_exit(&sctp->sctp_recvq_lock); 1847 BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsInDiscards); 1848 ip_drop_input("ipIfStatsInDiscards", mp, NULL); 1849 freemsg(mp); 1850 return; 1851 } 1852 ASSERT(attrmp->b_cont == NULL); 1853 attrmp->b_cont = mp; 1854 mp = attrmp; 1855 1856 if (sctp->sctp_recvq == NULL) { 1857 sctp->sctp_recvq = mp; 1858 sctp->sctp_recvq_tail = mp; 1859 } else { 1860 sctp->sctp_recvq_tail->b_next = mp; 1861 sctp->sctp_recvq_tail = mp; 1862 } 1863 1864 if (!caller_hold_lock) 1865 mutex_exit(&sctp->sctp_recvq_lock); 1866 } 1867 1868 static void 1869 sctp_process_recvq(void *arg) 1870 { 1871 sctp_t *sctp = (sctp_t *)arg; 1872 mblk_t *mp; 1873 #ifdef DEBUG 1874 uint32_t loop_cnt = 0; 1875 #endif 1876 ip_recv_attr_t iras; 1877 1878 #ifdef _BIG_ENDIAN 1879 #define IPVER(ip6h) ((((uint32_t *)ip6h)[0] >> 28) & 0x7) 1880 #else 1881 #define IPVER(ip6h) ((((uint32_t *)ip6h)[0] >> 4) & 0x7) 1882 #endif 1883 1884 RUN_SCTP(sctp); 1885 mutex_enter(&sctp->sctp_recvq_lock); 1886 1887 #ifdef DEBUG 1888 recvq_call++; 1889 #endif 1890 /* 1891 * Note that while we are in this loop, other thread can put 1892 * new packets in the receive queue. We may be looping for 1893 * quite a while. 1894 */ 1895 while ((mp = sctp->sctp_recvq) != NULL) { 1896 mblk_t *data_mp; 1897 1898 sctp->sctp_recvq = mp->b_next; 1899 mutex_exit(&sctp->sctp_recvq_lock); 1900 mp->b_next = NULL; 1901 #ifdef DEBUG 1902 loop_cnt++; 1903 #endif 1904 mp->b_prev = NULL; 1905 1906 data_mp = mp->b_cont; 1907 mp->b_cont = NULL; 1908 if (!ip_recv_attr_from_mblk(mp, &iras)) { 1909 ip_drop_input("ip_recv_attr_from_mblk", mp, NULL); 1910 freemsg(mp); 1911 ira_cleanup(&iras, B_TRUE); 1912 continue; 1913 } 1914 1915 if (iras.ira_flags & IRAF_ICMP_ERROR) 1916 sctp_icmp_error(sctp, data_mp); 1917 else 1918 sctp_input_data(sctp, data_mp, &iras); 1919 1920 ira_cleanup(&iras, B_TRUE); 1921 mutex_enter(&sctp->sctp_recvq_lock); 1922 } 1923 1924 sctp->sctp_recvq_tail = NULL; 1925 sctp->sctp_recvq_tq = NULL; 1926 1927 mutex_exit(&sctp->sctp_recvq_lock); 1928 1929 WAKE_SCTP(sctp); 1930 1931 #ifdef DEBUG 1932 if (loop_cnt > recvq_loop_cnt) 1933 recvq_loop_cnt = loop_cnt; 1934 #endif 1935 /* Now it can go away. */ 1936 SCTP_REFRELE(sctp); 1937 } 1938 1939 /* ARGSUSED */ 1940 static int 1941 sctp_conn_cache_constructor(void *buf, void *cdrarg, int kmflags) 1942 { 1943 conn_t *connp = (conn_t *)buf; 1944 sctp_t *sctp = (sctp_t *)&connp[1]; 1945 int cnt; 1946 1947 bzero(connp, sizeof (conn_t)); 1948 bzero(buf, (char *)&sctp[1] - (char *)buf); 1949 1950 mutex_init(&sctp->sctp_reflock, NULL, MUTEX_DEFAULT, NULL); 1951 mutex_init(&sctp->sctp_lock, NULL, MUTEX_DEFAULT, NULL); 1952 mutex_init(&sctp->sctp_recvq_lock, NULL, MUTEX_DEFAULT, NULL); 1953 cv_init(&sctp->sctp_cv, NULL, CV_DEFAULT, NULL); 1954 for (cnt = 0; cnt < SCTP_IPIF_HASH; cnt++) { 1955 rw_init(&sctp->sctp_saddrs[cnt].ipif_hash_lock, NULL, 1956 RW_DEFAULT, NULL); 1957 } 1958 1959 mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL); 1960 cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL); 1961 connp->conn_flags = IPCL_SCTPCONN; 1962 connp->conn_proto = IPPROTO_SCTP; 1963 connp->conn_sctp = sctp; 1964 sctp->sctp_connp = connp; 1965 rw_init(&connp->conn_ilg_lock, NULL, RW_DEFAULT, NULL); 1966 1967 connp->conn_ixa = kmem_zalloc(sizeof (ip_xmit_attr_t), kmflags); 1968 if (connp->conn_ixa == NULL) { 1969 return (ENOMEM); 1970 } 1971 connp->conn_ixa->ixa_refcnt = 1; 1972 connp->conn_ixa->ixa_protocol = connp->conn_proto; 1973 connp->conn_ixa->ixa_xmit_hint = CONN_TO_XMIT_HINT(connp); 1974 return (0); 1975 } 1976 1977 /* ARGSUSED */ 1978 static void 1979 sctp_conn_cache_destructor(void *buf, void *cdrarg) 1980 { 1981 conn_t *connp = (conn_t *)buf; 1982 sctp_t *sctp = (sctp_t *)&connp[1]; 1983 int cnt; 1984 1985 ASSERT(sctp->sctp_connp == connp); 1986 ASSERT(!MUTEX_HELD(&sctp->sctp_lock)); 1987 ASSERT(!MUTEX_HELD(&sctp->sctp_reflock)); 1988 ASSERT(!MUTEX_HELD(&sctp->sctp_recvq_lock)); 1989 1990 ASSERT(sctp->sctp_conn_hash_next == NULL); 1991 ASSERT(sctp->sctp_conn_hash_prev == NULL); 1992 ASSERT(sctp->sctp_listen_hash_next == NULL); 1993 ASSERT(sctp->sctp_listen_hash_prev == NULL); 1994 ASSERT(sctp->sctp_listen_tfp == NULL); 1995 ASSERT(sctp->sctp_conn_tfp == NULL); 1996 1997 ASSERT(sctp->sctp_faddrs == NULL); 1998 ASSERT(sctp->sctp_nsaddrs == 0); 1999 2000 ASSERT(sctp->sctp_ulpd == NULL); 2001 2002 ASSERT(sctp->sctp_lastfaddr == NULL); 2003 ASSERT(sctp->sctp_primary == NULL); 2004 ASSERT(sctp->sctp_current == NULL); 2005 ASSERT(sctp->sctp_lastdata == NULL); 2006 2007 ASSERT(sctp->sctp_xmit_head == NULL); 2008 ASSERT(sctp->sctp_xmit_tail == NULL); 2009 ASSERT(sctp->sctp_xmit_unsent == NULL); 2010 ASSERT(sctp->sctp_xmit_unsent_tail == NULL); 2011 2012 ASSERT(sctp->sctp_ostrcntrs == NULL); 2013 2014 ASSERT(sctp->sctp_sack_info == NULL); 2015 ASSERT(sctp->sctp_ack_mp == NULL); 2016 ASSERT(sctp->sctp_instr == NULL); 2017 2018 ASSERT(sctp->sctp_iphc == NULL); 2019 ASSERT(sctp->sctp_iphc6 == NULL); 2020 ASSERT(sctp->sctp_ipha == NULL); 2021 ASSERT(sctp->sctp_ip6h == NULL); 2022 ASSERT(sctp->sctp_sctph == NULL); 2023 ASSERT(sctp->sctp_sctph6 == NULL); 2024 2025 ASSERT(sctp->sctp_cookie_mp == NULL); 2026 2027 ASSERT(sctp->sctp_refcnt == 0); 2028 ASSERT(sctp->sctp_timer_mp == NULL); 2029 ASSERT(sctp->sctp_connp->conn_ref == 0); 2030 ASSERT(sctp->sctp_heartbeat_mp == NULL); 2031 ASSERT(sctp->sctp_ptpbhn == NULL && sctp->sctp_bind_hash == NULL); 2032 2033 ASSERT(sctp->sctp_shutdown_faddr == NULL); 2034 2035 ASSERT(sctp->sctp_cxmit_list == NULL); 2036 2037 ASSERT(sctp->sctp_recvq == NULL); 2038 ASSERT(sctp->sctp_recvq_tail == NULL); 2039 ASSERT(sctp->sctp_recvq_tq == NULL); 2040 2041 /* 2042 * sctp_pad_mp can be NULL if the memory allocation fails 2043 * in sctp_init_values() and the conn_t is freed. 2044 */ 2045 if (sctp->sctp_pad_mp != NULL) { 2046 freeb(sctp->sctp_pad_mp); 2047 sctp->sctp_pad_mp = NULL; 2048 } 2049 2050 mutex_destroy(&sctp->sctp_reflock); 2051 mutex_destroy(&sctp->sctp_lock); 2052 mutex_destroy(&sctp->sctp_recvq_lock); 2053 cv_destroy(&sctp->sctp_cv); 2054 for (cnt = 0; cnt < SCTP_IPIF_HASH; cnt++) { 2055 rw_destroy(&sctp->sctp_saddrs[cnt].ipif_hash_lock); 2056 } 2057 2058 mutex_destroy(&connp->conn_lock); 2059 cv_destroy(&connp->conn_cv); 2060 rw_destroy(&connp->conn_ilg_lock); 2061 2062 /* Can be NULL if constructor failed */ 2063 if (connp->conn_ixa != NULL) { 2064 ASSERT(connp->conn_ixa->ixa_refcnt == 1); 2065 ASSERT(connp->conn_ixa->ixa_ire == NULL); 2066 ASSERT(connp->conn_ixa->ixa_nce == NULL); 2067 ixa_refrele(connp->conn_ixa); 2068 } 2069 } 2070 2071 static void 2072 sctp_conn_cache_init() 2073 { 2074 sctp_conn_cache = kmem_cache_create("sctp_conn_cache", 2075 sizeof (sctp_t) + sizeof (conn_t), 0, sctp_conn_cache_constructor, 2076 sctp_conn_cache_destructor, NULL, NULL, NULL, 0); 2077 } 2078 2079 static void 2080 sctp_conn_cache_fini() 2081 { 2082 kmem_cache_destroy(sctp_conn_cache); 2083 } 2084 2085 void 2086 sctp_conn_init(conn_t *connp) 2087 { 2088 ASSERT(connp->conn_flags == IPCL_SCTPCONN); 2089 connp->conn_rq = connp->conn_wq = NULL; 2090 connp->conn_ixa->ixa_flags |= IXAF_SET_ULP_CKSUM | IXAF_VERIFY_SOURCE | 2091 IXAF_VERIFY_PMTU; 2092 2093 ASSERT(connp->conn_proto == IPPROTO_SCTP); 2094 ASSERT(connp->conn_ixa->ixa_protocol == connp->conn_proto); 2095 connp->conn_state_flags |= CONN_INCIPIENT; 2096 2097 ASSERT(connp->conn_sctp != NULL); 2098 2099 /* 2100 * Register sctp_notify to listen to capability changes detected by IP. 2101 * This upcall is made in the context of the call to conn_ip_output 2102 * thus it holds whatever locks sctp holds across conn_ip_output. 2103 */ 2104 connp->conn_ixa->ixa_notify = sctp_notify; 2105 connp->conn_ixa->ixa_notify_cookie = connp->conn_sctp; 2106 } 2107 2108 static void 2109 sctp_conn_clear(conn_t *connp) 2110 { 2111 /* Clean up conn_t stuff */ 2112 if (connp->conn_latch != NULL) { 2113 IPLATCH_REFRELE(connp->conn_latch); 2114 connp->conn_latch = NULL; 2115 } 2116 if (connp->conn_latch_in_policy != NULL) { 2117 IPPOL_REFRELE(connp->conn_latch_in_policy); 2118 connp->conn_latch_in_policy = NULL; 2119 } 2120 if (connp->conn_latch_in_action != NULL) { 2121 IPACT_REFRELE(connp->conn_latch_in_action); 2122 connp->conn_latch_in_action = NULL; 2123 } 2124 if (connp->conn_policy != NULL) { 2125 IPPH_REFRELE(connp->conn_policy, connp->conn_netstack); 2126 connp->conn_policy = NULL; 2127 } 2128 if (connp->conn_ipsec_opt_mp != NULL) { 2129 freemsg(connp->conn_ipsec_opt_mp); 2130 connp->conn_ipsec_opt_mp = NULL; 2131 } 2132 netstack_rele(connp->conn_netstack); 2133 connp->conn_netstack = NULL; 2134 2135 /* Leave conn_ixa and other constructed fields in place */ 2136 ipcl_conn_cleanup(connp); 2137 } 2138