1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. 24 */ 25 26 #include <sys/types.h> 27 #include <sys/stream.h> 28 #include <sys/strsubr.h> 29 #include <sys/stropts.h> 30 #include <sys/strsun.h> 31 #define _SUN_TPI_VERSION 2 32 #include <sys/tihdr.h> 33 #include <sys/ddi.h> 34 #include <sys/sunddi.h> 35 #include <sys/xti_inet.h> 36 #include <sys/cmn_err.h> 37 #include <sys/debug.h> 38 #include <sys/vtrace.h> 39 #include <sys/kmem.h> 40 #include <sys/cpuvar.h> 41 #include <sys/random.h> 42 #include <sys/priv.h> 43 #include <sys/sunldi.h> 44 45 #include <sys/errno.h> 46 #include <sys/signal.h> 47 #include <sys/socket.h> 48 #include <sys/isa_defs.h> 49 #include <netinet/in.h> 50 #include <netinet/tcp.h> 51 #include <netinet/ip6.h> 52 #include <netinet/icmp6.h> 53 #include <netinet/sctp.h> 54 #include <net/if.h> 55 56 #include <inet/common.h> 57 #include <inet/ip.h> 58 #include <inet/ip_if.h> 59 #include <inet/ip_ire.h> 60 #include <inet/ip6.h> 61 #include <inet/mi.h> 62 #include <inet/mib2.h> 63 #include <inet/kstatcom.h> 64 #include <inet/optcom.h> 65 #include <inet/ipclassifier.h> 66 #include <inet/ipsec_impl.h> 67 #include <inet/sctp_ip.h> 68 #include <inet/sctp_crc32.h> 69 70 #include "sctp_impl.h" 71 #include "sctp_addr.h" 72 #include "sctp_asconf.h" 73 74 int sctpdebug; 75 sin6_t sctp_sin6_null; /* Zero address for quick clears */ 76 77 static void sctp_closei_local(sctp_t *sctp); 78 static int sctp_init_values(sctp_t *, sctp_t *, int); 79 static void sctp_icmp_error_ipv6(sctp_t *sctp, mblk_t *mp); 80 static void sctp_process_recvq(void *); 81 static void sctp_rq_tq_init(sctp_stack_t *); 82 static void sctp_rq_tq_fini(sctp_stack_t *); 83 static void sctp_conn_cache_init(); 84 static void sctp_conn_cache_fini(); 85 static int sctp_conn_cache_constructor(); 86 static void sctp_conn_cache_destructor(); 87 static void sctp_conn_clear(conn_t *); 88 static void sctp_notify(void *, ip_xmit_attr_t *, ixa_notify_type_t, 89 ixa_notify_arg_t); 90 91 static void *sctp_stack_init(netstackid_t stackid, netstack_t *ns); 92 static void sctp_stack_fini(netstackid_t stackid, void *arg); 93 94 /* 95 * SCTP receive queue taskq 96 * 97 * At SCTP initialization time, a default taskq is created for 98 * servicing packets received when the interrupt thread cannot 99 * get a hold on the sctp_t. The number of taskq can be increased in 100 * sctp_find_next_tq() when an existing taskq cannot be dispatched. 101 * The taskqs are never removed. But the max number of taskq which 102 * can be created is controlled by sctp_recvq_tq_list_max_sz. Note 103 * that SCTP recvq taskq is not tied to any specific CPU or ill. 104 * 105 * Those taskqs are stored in an array recvq_tq_list. And they are 106 * used in a round robin fashion. The current taskq being used is 107 * determined by recvq_tq_list_cur. 108 */ 109 110 /* /etc/system variables */ 111 /* The minimum number of threads for each taskq. */ 112 int sctp_recvq_tq_thr_min = 4; 113 /* The maximum number of threads for each taskq. */ 114 int sctp_recvq_tq_thr_max = 48; 115 /* The mnimum number of tasks for each taskq. */ 116 int sctp_recvq_tq_task_min = 8; 117 /* Default value of sctp_recvq_tq_list_max_sz. */ 118 int sctp_recvq_tq_list_max = 16; 119 120 /* 121 * SCTP tunables related declarations. Definitions are in sctp_tunables.c 122 */ 123 extern mod_prop_info_t sctp_propinfo_tbl[]; 124 extern int sctp_propinfo_count; 125 126 /* sctp_t/conn_t kmem cache */ 127 struct kmem_cache *sctp_conn_cache; 128 129 #define SCTP_CONDEMNED(sctp) \ 130 mutex_enter(&(sctp)->sctp_reflock); \ 131 ((sctp)->sctp_condemned = B_TRUE); \ 132 mutex_exit(&(sctp)->sctp_reflock); 133 134 /* Link/unlink a sctp_t to/from the global list. */ 135 #define SCTP_LINK(sctp, sctps) \ 136 mutex_enter(&(sctps)->sctps_g_lock); \ 137 list_insert_tail(&sctps->sctps_g_list, (sctp)); \ 138 mutex_exit(&(sctps)->sctps_g_lock); 139 140 #define SCTP_UNLINK(sctp, sctps) \ 141 mutex_enter(&(sctps)->sctps_g_lock); \ 142 ASSERT((sctp)->sctp_condemned); \ 143 list_remove(&(sctps)->sctps_g_list, (sctp)); \ 144 mutex_exit(&(sctps)->sctps_g_lock); 145 146 /* 147 * Hooks for Sun Cluster. On non-clustered nodes these will remain NULL. 148 * PSARC/2005/602. 149 */ 150 void (*cl_sctp_listen)(sa_family_t, uchar_t *, uint_t, in_port_t) = NULL; 151 void (*cl_sctp_unlisten)(sa_family_t, uchar_t *, uint_t, in_port_t) = NULL; 152 void (*cl_sctp_connect)(sa_family_t, uchar_t *, uint_t, in_port_t, 153 uchar_t *, uint_t, in_port_t, boolean_t, cl_sctp_handle_t) = NULL; 154 void (*cl_sctp_disconnect)(sa_family_t, cl_sctp_handle_t) = NULL; 155 void (*cl_sctp_assoc_change)(sa_family_t, uchar_t *, size_t, uint_t, 156 uchar_t *, size_t, uint_t, int, cl_sctp_handle_t) = NULL; 157 void (*cl_sctp_check_addrs)(sa_family_t, in_port_t, uchar_t **, size_t, 158 uint_t *, boolean_t) = NULL; 159 /* 160 * Return the version number of the SCTP kernel interface. 161 */ 162 int 163 sctp_itf_ver(int cl_ver) 164 { 165 if (cl_ver != SCTP_ITF_VER) 166 return (-1); 167 return (SCTP_ITF_VER); 168 } 169 170 /* 171 * Called when we need a new sctp instantiation but don't really have a 172 * new q to hang it off of. Copy the priv flag from the passed in structure. 173 */ 174 sctp_t * 175 sctp_create_eager(sctp_t *psctp) 176 { 177 sctp_t *sctp; 178 mblk_t *ack_mp, *hb_mp; 179 conn_t *connp; 180 cred_t *credp; 181 sctp_stack_t *sctps = psctp->sctp_sctps; 182 183 if ((connp = ipcl_conn_create(IPCL_SCTPCONN, KM_NOSLEEP, 184 sctps->sctps_netstack)) == NULL) { 185 return (NULL); 186 } 187 188 sctp = CONN2SCTP(connp); 189 sctp->sctp_sctps = sctps; 190 191 if ((ack_mp = sctp_timer_alloc(sctp, sctp_ack_timer, 192 KM_NOSLEEP)) == NULL || 193 (hb_mp = sctp_timer_alloc(sctp, sctp_heartbeat_timer, 194 KM_NOSLEEP)) == NULL) { 195 if (ack_mp != NULL) 196 freeb(ack_mp); 197 sctp_conn_clear(connp); 198 sctp->sctp_sctps = NULL; 199 kmem_cache_free(sctp_conn_cache, connp); 200 return (NULL); 201 } 202 203 sctp->sctp_ack_mp = ack_mp; 204 sctp->sctp_heartbeat_mp = hb_mp; 205 206 if (sctp_init_values(sctp, psctp, KM_NOSLEEP) != 0) { 207 freeb(ack_mp); 208 freeb(hb_mp); 209 sctp_conn_clear(connp); 210 sctp->sctp_sctps = NULL; 211 kmem_cache_free(sctp_conn_cache, connp); 212 return (NULL); 213 } 214 215 if ((credp = psctp->sctp_connp->conn_cred) != NULL) { 216 connp->conn_cred = credp; 217 crhold(credp); 218 } 219 220 sctp->sctp_mss = psctp->sctp_mss; 221 sctp->sctp_detached = B_TRUE; 222 /* 223 * Link to the global as soon as possible so that this sctp_t 224 * can be found. 225 */ 226 SCTP_LINK(sctp, sctps); 227 228 /* If the listener has a limit, inherit the counter info. */ 229 sctp->sctp_listen_cnt = psctp->sctp_listen_cnt; 230 231 return (sctp); 232 } 233 234 /* 235 * We are dying for some reason. Try to do it gracefully. 236 */ 237 void 238 sctp_clean_death(sctp_t *sctp, int err) 239 { 240 ASSERT(sctp != NULL); 241 242 dprint(3, ("sctp_clean_death %p, state %d\n", (void *)sctp, 243 sctp->sctp_state)); 244 245 sctp->sctp_client_errno = err; 246 /* 247 * Check to see if we need to notify upper layer. 248 */ 249 if ((sctp->sctp_state >= SCTPS_COOKIE_WAIT) && 250 !SCTP_IS_DETACHED(sctp)) { 251 if (sctp->sctp_xmit_head || sctp->sctp_xmit_unsent) { 252 sctp_regift_xmitlist(sctp); 253 } 254 if (sctp->sctp_ulp_disconnected(sctp->sctp_ulpd, 0, err)) { 255 /* 256 * Socket is gone, detach. 257 */ 258 sctp->sctp_detached = B_TRUE; 259 sctp->sctp_ulpd = NULL; 260 sctp->sctp_upcalls = NULL; 261 } 262 } 263 264 /* Remove this sctp from all hashes. */ 265 sctp_closei_local(sctp); 266 267 /* 268 * If the sctp_t is detached, we need to finish freeing up 269 * the resources. At this point, ip_fanout_sctp() should have 270 * a hold on this sctp_t. Some thread doing snmp stuff can 271 * have a hold. And a taskq can also have a hold waiting to 272 * work. sctp_unlink() the sctp_t from the global list so 273 * that no new thread can find it. Then do a SCTP_REFRELE(). 274 * The sctp_t will be freed after all those threads are done. 275 */ 276 if (SCTP_IS_DETACHED(sctp)) { 277 SCTP_CONDEMNED(sctp); 278 SCTP_REFRELE(sctp); 279 } 280 } 281 282 /* 283 * Called by upper layer when it wants to close this association. 284 * Depending on the state of this assoication, we need to do 285 * different things. 286 * 287 * If the state is below COOKIE_ECHOED or it is COOKIE_ECHOED but with 288 * no sent data, just remove this sctp from all the hashes. This 289 * makes sure that all packets from the other end will go to the default 290 * sctp handling. The upper layer will then do a sctp_close() to clean 291 * up. 292 * 293 * Otherwise, check and see if SO_LINGER is set. If it is set, check 294 * the value. If the value is 0, consider this an abortive close. Send 295 * an ABORT message and kill the associatiion. 296 * 297 */ 298 int 299 sctp_disconnect(sctp_t *sctp) 300 { 301 int error = 0; 302 conn_t *connp = sctp->sctp_connp; 303 304 dprint(3, ("sctp_disconnect %p, state %d\n", (void *)sctp, 305 sctp->sctp_state)); 306 307 RUN_SCTP(sctp); 308 309 switch (sctp->sctp_state) { 310 case SCTPS_IDLE: 311 case SCTPS_BOUND: 312 case SCTPS_LISTEN: 313 break; 314 case SCTPS_COOKIE_WAIT: 315 case SCTPS_COOKIE_ECHOED: 316 /* 317 * Close during the connect 3-way handshake 318 * but here there may or may not be pending data 319 * already on queue. Process almost same as in 320 * the ESTABLISHED state. 321 */ 322 if (sctp->sctp_xmit_head == NULL && 323 sctp->sctp_xmit_unsent == NULL) { 324 break; 325 } 326 /* FALLTHRU */ 327 default: 328 /* 329 * If SO_LINGER has set a zero linger time, terminate the 330 * association and send an ABORT. 331 */ 332 if (connp->conn_linger && connp->conn_lingertime == 0) { 333 sctp_user_abort(sctp, NULL); 334 WAKE_SCTP(sctp); 335 return (error); 336 } 337 338 /* 339 * In there is unread data, send an ABORT and terminate the 340 * association. 341 */ 342 if (sctp->sctp_rxqueued > 0 || sctp->sctp_irwnd > 343 sctp->sctp_rwnd) { 344 sctp_user_abort(sctp, NULL); 345 WAKE_SCTP(sctp); 346 return (error); 347 } 348 /* 349 * Transmit the shutdown before detaching the sctp_t. 350 * After sctp_detach returns this queue/perimeter 351 * no longer owns the sctp_t thus others can modify it. 352 */ 353 sctp_send_shutdown(sctp, 0); 354 355 /* Pass gathered wisdom to IP for keeping */ 356 sctp_update_dce(sctp); 357 358 /* 359 * If lingering on close then wait until the shutdown 360 * is complete, or the SO_LINGER time passes, or an 361 * ABORT is sent/received. Note that sctp_disconnect() 362 * can be called more than once. Make sure that only 363 * one thread waits. 364 */ 365 if (connp->conn_linger && connp->conn_lingertime > 0 && 366 sctp->sctp_state >= SCTPS_ESTABLISHED && 367 !sctp->sctp_lingering) { 368 clock_t stoptime; /* in ticks */ 369 clock_t ret; 370 371 sctp->sctp_lingering = 1; 372 sctp->sctp_client_errno = 0; 373 stoptime = ddi_get_lbolt() + 374 connp->conn_lingertime * hz; 375 376 mutex_enter(&sctp->sctp_lock); 377 sctp->sctp_running = B_FALSE; 378 while (sctp->sctp_state >= SCTPS_ESTABLISHED && 379 sctp->sctp_client_errno == 0) { 380 cv_signal(&sctp->sctp_cv); 381 ret = cv_timedwait_sig(&sctp->sctp_cv, 382 &sctp->sctp_lock, stoptime); 383 if (ret < 0) { 384 /* Stoptime has reached. */ 385 sctp->sctp_client_errno = EWOULDBLOCK; 386 break; 387 } else if (ret == 0) { 388 /* Got a signal. */ 389 break; 390 } 391 } 392 error = sctp->sctp_client_errno; 393 sctp->sctp_client_errno = 0; 394 mutex_exit(&sctp->sctp_lock); 395 } 396 397 WAKE_SCTP(sctp); 398 return (error); 399 } 400 401 402 /* Remove this sctp from all hashes so nobody can find it. */ 403 sctp_closei_local(sctp); 404 WAKE_SCTP(sctp); 405 return (error); 406 } 407 408 void 409 sctp_close(sctp_t *sctp) 410 { 411 dprint(3, ("sctp_close %p, state %d\n", (void *)sctp, 412 sctp->sctp_state)); 413 414 RUN_SCTP(sctp); 415 sctp->sctp_detached = 1; 416 sctp->sctp_ulpd = NULL; 417 sctp->sctp_upcalls = NULL; 418 bzero(&sctp->sctp_events, sizeof (sctp->sctp_events)); 419 420 /* If the graceful shutdown has not been completed, just return. */ 421 if (sctp->sctp_state != SCTPS_IDLE) { 422 WAKE_SCTP(sctp); 423 return; 424 } 425 426 /* 427 * Since sctp_t is in SCTPS_IDLE state, so the only thread which 428 * can have a hold on the sctp_t is doing snmp stuff. Just do 429 * a SCTP_REFRELE() here after the SCTP_UNLINK(). It will 430 * be freed when the other thread is done. 431 */ 432 SCTP_CONDEMNED(sctp); 433 WAKE_SCTP(sctp); 434 SCTP_REFRELE(sctp); 435 } 436 437 /* 438 * Unlink from global list and do the eager close. 439 * Remove the refhold implicit in being on the global list. 440 */ 441 void 442 sctp_close_eager(sctp_t *sctp) 443 { 444 SCTP_CONDEMNED(sctp); 445 sctp_closei_local(sctp); 446 SCTP_REFRELE(sctp); 447 } 448 449 /* 450 * The sctp_t is going away. Remove it from all lists and set it 451 * to SCTPS_IDLE. The caller has to remove it from the 452 * global list. The freeing up of memory is deferred until 453 * sctp_free(). This is needed since a thread in sctp_input() might have 454 * done a SCTP_REFHOLD on this structure before it was removed from the 455 * hashes. 456 */ 457 static void 458 sctp_closei_local(sctp_t *sctp) 459 { 460 mblk_t *mp; 461 conn_t *connp = sctp->sctp_connp; 462 463 /* The counter is incremented only for established associations. */ 464 if (sctp->sctp_state >= SCTPS_ESTABLISHED) 465 SCTPS_ASSOC_DEC(sctp->sctp_sctps); 466 467 if (sctp->sctp_listen_cnt != NULL) 468 SCTP_DECR_LISTEN_CNT(sctp); 469 470 /* Sanity check, don't do the same thing twice. */ 471 if (connp->conn_state_flags & CONN_CLOSING) { 472 ASSERT(sctp->sctp_state == SCTPS_IDLE); 473 return; 474 } 475 476 /* Stop and free the timers */ 477 sctp_free_faddr_timers(sctp); 478 if ((mp = sctp->sctp_heartbeat_mp) != NULL) { 479 sctp_timer_free(mp); 480 sctp->sctp_heartbeat_mp = NULL; 481 } 482 if ((mp = sctp->sctp_ack_mp) != NULL) { 483 sctp_timer_free(mp); 484 sctp->sctp_ack_mp = NULL; 485 } 486 487 /* Set the CONN_CLOSING flag so that IP will not cache IRE again. */ 488 mutex_enter(&connp->conn_lock); 489 connp->conn_state_flags |= CONN_CLOSING; 490 mutex_exit(&connp->conn_lock); 491 492 /* Remove from all hashes. */ 493 sctp_bind_hash_remove(sctp); 494 sctp_conn_hash_remove(sctp); 495 sctp_listen_hash_remove(sctp); 496 sctp->sctp_state = SCTPS_IDLE; 497 498 /* 499 * Clean up the recvq as much as possible. All those packets 500 * will be silently dropped as this sctp_t is now in idle state. 501 */ 502 mutex_enter(&sctp->sctp_recvq_lock); 503 while ((mp = sctp->sctp_recvq) != NULL) { 504 sctp->sctp_recvq = mp->b_next; 505 mp->b_next = NULL; 506 507 if (ip_recv_attr_is_mblk(mp)) 508 mp = ip_recv_attr_free_mblk(mp); 509 510 freemsg(mp); 511 } 512 mutex_exit(&sctp->sctp_recvq_lock); 513 } 514 515 /* 516 * Free memory associated with the sctp/ip header template. 517 */ 518 static void 519 sctp_headers_free(sctp_t *sctp) 520 { 521 if (sctp->sctp_iphc != NULL) { 522 kmem_free(sctp->sctp_iphc, sctp->sctp_iphc_len); 523 sctp->sctp_iphc = NULL; 524 sctp->sctp_ipha = NULL; 525 sctp->sctp_hdr_len = 0; 526 sctp->sctp_ip_hdr_len = 0; 527 sctp->sctp_iphc_len = 0; 528 sctp->sctp_sctph = NULL; 529 sctp->sctp_hdr_len = 0; 530 } 531 if (sctp->sctp_iphc6 != NULL) { 532 kmem_free(sctp->sctp_iphc6, sctp->sctp_iphc6_len); 533 sctp->sctp_iphc6 = NULL; 534 sctp->sctp_ip6h = NULL; 535 sctp->sctp_hdr6_len = 0; 536 sctp->sctp_ip_hdr6_len = 0; 537 sctp->sctp_iphc6_len = 0; 538 sctp->sctp_sctph6 = NULL; 539 sctp->sctp_hdr6_len = 0; 540 } 541 } 542 543 static void 544 sctp_free_xmit_data(sctp_t *sctp) 545 { 546 mblk_t *ump = NULL; 547 mblk_t *nump; 548 mblk_t *mp; 549 mblk_t *nmp; 550 551 sctp->sctp_xmit_unacked = NULL; 552 ump = sctp->sctp_xmit_head; 553 sctp->sctp_xmit_tail = sctp->sctp_xmit_head = NULL; 554 free_unsent: 555 for (; ump != NULL; ump = nump) { 556 for (mp = ump->b_cont; mp != NULL; mp = nmp) { 557 nmp = mp->b_next; 558 mp->b_next = NULL; 559 mp->b_prev = NULL; 560 freemsg(mp); 561 } 562 ASSERT(DB_REF(ump) == 1); 563 nump = ump->b_next; 564 ump->b_next = NULL; 565 ump->b_prev = NULL; 566 ump->b_cont = NULL; 567 freeb(ump); 568 } 569 if ((ump = sctp->sctp_xmit_unsent) == NULL) { 570 ASSERT(sctp->sctp_xmit_unsent_tail == NULL); 571 return; 572 } 573 sctp->sctp_xmit_unsent = sctp->sctp_xmit_unsent_tail = NULL; 574 goto free_unsent; 575 } 576 577 /* 578 * Cleanup all the messages in the stream queue and the reassembly lists. 579 * If 'free' is true, then delete the streams as well. 580 */ 581 void 582 sctp_instream_cleanup(sctp_t *sctp, boolean_t free) 583 { 584 int i; 585 mblk_t *mp; 586 mblk_t *mp1; 587 588 if (sctp->sctp_instr != NULL) { 589 /* walk thru and flush out anything remaining in the Q */ 590 for (i = 0; i < sctp->sctp_num_istr; i++) { 591 mp = sctp->sctp_instr[i].istr_msgs; 592 while (mp != NULL) { 593 mp1 = mp->b_next; 594 mp->b_next = mp->b_prev = NULL; 595 freemsg(mp); 596 mp = mp1; 597 } 598 sctp->sctp_instr[i].istr_msgs = NULL; 599 sctp->sctp_instr[i].istr_nmsgs = 0; 600 sctp_free_reass((sctp->sctp_instr) + i); 601 sctp->sctp_instr[i].nextseq = 0; 602 } 603 if (free) { 604 kmem_free(sctp->sctp_instr, 605 sizeof (*sctp->sctp_instr) * sctp->sctp_num_istr); 606 sctp->sctp_instr = NULL; 607 sctp->sctp_num_istr = 0; 608 } 609 } 610 /* un-ordered fragments */ 611 if (sctp->sctp_uo_frags != NULL) { 612 for (mp = sctp->sctp_uo_frags; mp != NULL; mp = mp1) { 613 mp1 = mp->b_next; 614 mp->b_next = mp->b_prev = NULL; 615 freemsg(mp); 616 } 617 sctp->sctp_uo_frags = NULL; 618 } 619 } 620 621 /* 622 * Last reference to the sctp_t is gone. Free all memory associated with it. 623 * Called from SCTP_REFRELE. Called inline in sctp_close() 624 */ 625 void 626 sctp_free(conn_t *connp) 627 { 628 sctp_t *sctp = CONN2SCTP(connp); 629 int cnt; 630 sctp_stack_t *sctps = sctp->sctp_sctps; 631 632 ASSERT(sctps != NULL); 633 /* Unlink it from the global list */ 634 SCTP_UNLINK(sctp, sctps); 635 636 ASSERT(connp->conn_ref == 0); 637 ASSERT(connp->conn_proto == IPPROTO_SCTP); 638 ASSERT(!MUTEX_HELD(&sctp->sctp_reflock)); 639 ASSERT(sctp->sctp_refcnt == 0); 640 641 ASSERT(sctp->sctp_ptpbhn == NULL && sctp->sctp_bind_hash == NULL); 642 ASSERT(sctp->sctp_conn_hash_next == NULL && 643 sctp->sctp_conn_hash_prev == NULL); 644 645 646 /* Free up all the resources. */ 647 648 /* blow away sctp stream management */ 649 if (sctp->sctp_ostrcntrs != NULL) { 650 kmem_free(sctp->sctp_ostrcntrs, 651 sizeof (uint16_t) * sctp->sctp_num_ostr); 652 sctp->sctp_ostrcntrs = NULL; 653 } 654 sctp_instream_cleanup(sctp, B_TRUE); 655 656 /* Remove all data transfer resources. */ 657 sctp->sctp_istr_nmsgs = 0; 658 sctp->sctp_rxqueued = 0; 659 sctp_free_xmit_data(sctp); 660 sctp->sctp_unacked = 0; 661 sctp->sctp_unsent = 0; 662 if (sctp->sctp_cxmit_list != NULL) 663 sctp_asconf_free_cxmit(sctp, NULL); 664 665 sctp->sctp_lastdata = NULL; 666 667 /* Clear out default xmit settings */ 668 sctp->sctp_def_stream = 0; 669 sctp->sctp_def_flags = 0; 670 sctp->sctp_def_ppid = 0; 671 sctp->sctp_def_context = 0; 672 sctp->sctp_def_timetolive = 0; 673 674 if (sctp->sctp_sack_info != NULL) { 675 sctp_free_set(sctp->sctp_sack_info); 676 sctp->sctp_sack_info = NULL; 677 } 678 sctp->sctp_sack_gaps = 0; 679 680 if (sctp->sctp_cookie_mp != NULL) { 681 freemsg(sctp->sctp_cookie_mp); 682 sctp->sctp_cookie_mp = NULL; 683 } 684 685 /* Remove all the address resources. */ 686 sctp_zap_addrs(sctp); 687 for (cnt = 0; cnt < SCTP_IPIF_HASH; cnt++) { 688 ASSERT(sctp->sctp_saddrs[cnt].ipif_count == 0); 689 list_destroy(&sctp->sctp_saddrs[cnt].sctp_ipif_list); 690 } 691 692 if (sctp->sctp_hopopts != NULL) { 693 mi_free(sctp->sctp_hopopts); 694 sctp->sctp_hopopts = NULL; 695 sctp->sctp_hopoptslen = 0; 696 } 697 ASSERT(sctp->sctp_hopoptslen == 0); 698 if (sctp->sctp_dstopts != NULL) { 699 mi_free(sctp->sctp_dstopts); 700 sctp->sctp_dstopts = NULL; 701 sctp->sctp_dstoptslen = 0; 702 } 703 ASSERT(sctp->sctp_dstoptslen == 0); 704 if (sctp->sctp_rthdrdstopts != NULL) { 705 mi_free(sctp->sctp_rthdrdstopts); 706 sctp->sctp_rthdrdstopts = NULL; 707 sctp->sctp_rthdrdstoptslen = 0; 708 } 709 ASSERT(sctp->sctp_rthdrdstoptslen == 0); 710 if (sctp->sctp_rthdr != NULL) { 711 mi_free(sctp->sctp_rthdr); 712 sctp->sctp_rthdr = NULL; 713 sctp->sctp_rthdrlen = 0; 714 } 715 ASSERT(sctp->sctp_rthdrlen == 0); 716 sctp_headers_free(sctp); 717 718 sctp->sctp_shutdown_faddr = NULL; 719 720 if (sctp->sctp_err_chunks != NULL) { 721 freemsg(sctp->sctp_err_chunks); 722 sctp->sctp_err_chunks = NULL; 723 sctp->sctp_err_len = 0; 724 } 725 726 /* Clear all the bitfields. */ 727 bzero(&sctp->sctp_bits, sizeof (sctp->sctp_bits)); 728 729 /* It is time to update the global statistics. */ 730 SCTPS_UPDATE_MIB(sctps, sctpOutSCTPPkts, sctp->sctp_opkts); 731 SCTPS_UPDATE_MIB(sctps, sctpOutCtrlChunks, sctp->sctp_obchunks); 732 SCTPS_UPDATE_MIB(sctps, sctpOutOrderChunks, sctp->sctp_odchunks); 733 SCTPS_UPDATE_MIB(sctps, sctpOutUnorderChunks, sctp->sctp_oudchunks); 734 SCTPS_UPDATE_MIB(sctps, sctpRetransChunks, sctp->sctp_rxtchunks); 735 SCTPS_UPDATE_MIB(sctps, sctpInSCTPPkts, sctp->sctp_ipkts); 736 SCTPS_UPDATE_MIB(sctps, sctpInCtrlChunks, sctp->sctp_ibchunks); 737 SCTPS_UPDATE_MIB(sctps, sctpInOrderChunks, sctp->sctp_idchunks); 738 SCTPS_UPDATE_MIB(sctps, sctpInUnorderChunks, sctp->sctp_iudchunks); 739 SCTPS_UPDATE_MIB(sctps, sctpFragUsrMsgs, sctp->sctp_fragdmsgs); 740 SCTPS_UPDATE_MIB(sctps, sctpReasmUsrMsgs, sctp->sctp_reassmsgs); 741 sctp->sctp_opkts = 0; 742 sctp->sctp_obchunks = 0; 743 sctp->sctp_odchunks = 0; 744 sctp->sctp_oudchunks = 0; 745 sctp->sctp_rxtchunks = 0; 746 sctp->sctp_ipkts = 0; 747 sctp->sctp_ibchunks = 0; 748 sctp->sctp_idchunks = 0; 749 sctp->sctp_iudchunks = 0; 750 sctp->sctp_fragdmsgs = 0; 751 sctp->sctp_reassmsgs = 0; 752 sctp->sctp_outseqtsns = 0; 753 sctp->sctp_osacks = 0; 754 sctp->sctp_isacks = 0; 755 sctp->sctp_idupchunks = 0; 756 sctp->sctp_gapcnt = 0; 757 sctp->sctp_cum_obchunks = 0; 758 sctp->sctp_cum_odchunks = 0; 759 sctp->sctp_cum_oudchunks = 0; 760 sctp->sctp_cum_rxtchunks = 0; 761 sctp->sctp_cum_ibchunks = 0; 762 sctp->sctp_cum_idchunks = 0; 763 sctp->sctp_cum_iudchunks = 0; 764 765 sctp->sctp_autoclose = 0; 766 sctp->sctp_tx_adaptation_code = 0; 767 768 sctp->sctp_v6label_len = 0; 769 sctp->sctp_v4label_len = 0; 770 771 sctp->sctp_sctps = NULL; 772 773 sctp_conn_clear(connp); 774 kmem_cache_free(sctp_conn_cache, connp); 775 } 776 777 /* 778 * Initialize protocol control block. If a parent exists, inherit 779 * all values set through setsockopt(). 780 */ 781 static int 782 sctp_init_values(sctp_t *sctp, sctp_t *psctp, int sleep) 783 { 784 int err; 785 int cnt; 786 sctp_stack_t *sctps = sctp->sctp_sctps; 787 conn_t *connp; 788 789 connp = sctp->sctp_connp; 790 791 sctp->sctp_nsaddrs = 0; 792 for (cnt = 0; cnt < SCTP_IPIF_HASH; cnt++) { 793 sctp->sctp_saddrs[cnt].ipif_count = 0; 794 list_create(&sctp->sctp_saddrs[cnt].sctp_ipif_list, 795 sizeof (sctp_saddr_ipif_t), offsetof(sctp_saddr_ipif_t, 796 saddr_ipif)); 797 } 798 connp->conn_ports = 0; 799 sctp->sctp_running = B_FALSE; 800 sctp->sctp_state = SCTPS_IDLE; 801 802 sctp->sctp_refcnt = 1; 803 804 sctp->sctp_strikes = 0; 805 806 sctp->sctp_last_mtu_probe = ddi_get_lbolt64(); 807 sctp->sctp_mtu_probe_intvl = sctps->sctps_mtu_probe_interval; 808 809 sctp->sctp_sack_gaps = 0; 810 sctp->sctp_sack_toggle = 2; 811 812 /* Only need to do the allocation if there is no "cached" one. */ 813 if (sctp->sctp_pad_mp == NULL) { 814 if (sleep == KM_SLEEP) { 815 sctp->sctp_pad_mp = allocb_wait(SCTP_ALIGN, BPRI_MED, 816 STR_NOSIG, NULL); 817 } else { 818 sctp->sctp_pad_mp = allocb(SCTP_ALIGN, BPRI_MED); 819 if (sctp->sctp_pad_mp == NULL) 820 return (ENOMEM); 821 } 822 bzero(sctp->sctp_pad_mp->b_rptr, SCTP_ALIGN); 823 } 824 825 if (psctp != NULL) { 826 /* 827 * Inherit from parent 828 * 829 * Start by inheriting from the conn_t, including conn_ixa and 830 * conn_xmit_ipp. 831 */ 832 err = conn_inherit_parent(psctp->sctp_connp, connp); 833 if (err != 0) 834 goto failure; 835 836 sctp->sctp_cookie_lifetime = psctp->sctp_cookie_lifetime; 837 838 sctp->sctp_cwnd_max = psctp->sctp_cwnd_max; 839 sctp->sctp_rwnd = psctp->sctp_rwnd; 840 sctp->sctp_irwnd = psctp->sctp_rwnd; 841 sctp->sctp_pd_point = psctp->sctp_pd_point; 842 sctp->sctp_rto_max = psctp->sctp_rto_max; 843 sctp->sctp_rto_max_init = psctp->sctp_rto_max_init; 844 sctp->sctp_rto_min = psctp->sctp_rto_min; 845 sctp->sctp_rto_initial = psctp->sctp_rto_initial; 846 sctp->sctp_pa_max_rxt = psctp->sctp_pa_max_rxt; 847 sctp->sctp_pp_max_rxt = psctp->sctp_pp_max_rxt; 848 sctp->sctp_max_init_rxt = psctp->sctp_max_init_rxt; 849 850 sctp->sctp_def_stream = psctp->sctp_def_stream; 851 sctp->sctp_def_flags = psctp->sctp_def_flags; 852 sctp->sctp_def_ppid = psctp->sctp_def_ppid; 853 sctp->sctp_def_context = psctp->sctp_def_context; 854 sctp->sctp_def_timetolive = psctp->sctp_def_timetolive; 855 856 sctp->sctp_num_istr = psctp->sctp_num_istr; 857 sctp->sctp_num_ostr = psctp->sctp_num_ostr; 858 859 sctp->sctp_hb_interval = psctp->sctp_hb_interval; 860 sctp->sctp_autoclose = psctp->sctp_autoclose; 861 sctp->sctp_tx_adaptation_code = psctp->sctp_tx_adaptation_code; 862 863 /* xxx should be a better way to copy these flags xxx */ 864 sctp->sctp_bound_to_all = psctp->sctp_bound_to_all; 865 sctp->sctp_cansleep = psctp->sctp_cansleep; 866 sctp->sctp_send_adaptation = psctp->sctp_send_adaptation; 867 sctp->sctp_ndelay = psctp->sctp_ndelay; 868 sctp->sctp_events = psctp->sctp_events; 869 } else { 870 /* 871 * Set to system defaults 872 */ 873 sctp->sctp_cookie_lifetime = 874 MSEC_TO_TICK(sctps->sctps_cookie_life); 875 connp->conn_sndlowat = sctps->sctps_xmit_lowat; 876 connp->conn_sndbuf = sctps->sctps_xmit_hiwat; 877 connp->conn_rcvbuf = sctps->sctps_recv_hiwat; 878 879 sctp->sctp_cwnd_max = sctps->sctps_cwnd_max_; 880 sctp->sctp_rwnd = connp->conn_rcvbuf; 881 sctp->sctp_irwnd = sctp->sctp_rwnd; 882 sctp->sctp_pd_point = sctp->sctp_rwnd; 883 sctp->sctp_rto_max = MSEC_TO_TICK(sctps->sctps_rto_maxg); 884 sctp->sctp_rto_max_init = sctp->sctp_rto_max; 885 sctp->sctp_rto_min = MSEC_TO_TICK(sctps->sctps_rto_ming); 886 sctp->sctp_rto_initial = MSEC_TO_TICK( 887 sctps->sctps_rto_initialg); 888 sctp->sctp_pa_max_rxt = sctps->sctps_pa_max_retr; 889 sctp->sctp_pp_max_rxt = sctps->sctps_pp_max_retr; 890 sctp->sctp_max_init_rxt = sctps->sctps_max_init_retr; 891 892 sctp->sctp_num_istr = sctps->sctps_max_in_streams; 893 sctp->sctp_num_ostr = sctps->sctps_initial_out_streams; 894 895 sctp->sctp_hb_interval = 896 MSEC_TO_TICK(sctps->sctps_heartbeat_interval); 897 898 if (connp->conn_family == AF_INET) 899 connp->conn_default_ttl = sctps->sctps_ipv4_ttl; 900 else 901 connp->conn_default_ttl = sctps->sctps_ipv6_hoplimit; 902 903 connp->conn_xmit_ipp.ipp_unicast_hops = 904 connp->conn_default_ttl; 905 906 /* 907 * Initialize the header template 908 */ 909 if ((err = sctp_build_hdrs(sctp, sleep)) != 0) { 910 goto failure; 911 } 912 } 913 914 sctp->sctp_understands_asconf = B_TRUE; 915 sctp->sctp_understands_addip = B_TRUE; 916 sctp->sctp_prsctp_aware = B_FALSE; 917 918 sctp->sctp_connp->conn_ref = 1; 919 920 sctp->sctp_prsctpdrop = 0; 921 sctp->sctp_msgcount = 0; 922 923 return (0); 924 925 failure: 926 sctp_headers_free(sctp); 927 return (err); 928 } 929 930 /* 931 * Extracts the init tag from an INIT chunk and checks if it matches 932 * the sctp's verification tag. Returns 0 if it doesn't match, 1 if 933 * it does. 934 */ 935 static boolean_t 936 sctp_icmp_verf(sctp_t *sctp, sctp_hdr_t *sh, mblk_t *mp) 937 { 938 sctp_chunk_hdr_t *sch; 939 uint32_t verf, *vp; 940 941 sch = (sctp_chunk_hdr_t *)(sh + 1); 942 vp = (uint32_t *)(sch + 1); 943 944 /* Need at least the data chunk hdr and the first 4 bytes of INIT */ 945 if ((unsigned char *)(vp + 1) > mp->b_wptr) { 946 return (B_FALSE); 947 } 948 949 bcopy(vp, &verf, sizeof (verf)); 950 951 if (verf == sctp->sctp_lvtag) { 952 return (B_TRUE); 953 } 954 return (B_FALSE); 955 } 956 957 /* 958 * Update the SCTP state according to change of PMTU. 959 * 960 * Path MTU might have changed by either increase or decrease, so need to 961 * adjust the MSS based on the value of ixa_pmtu. 962 */ 963 static void 964 sctp_update_pmtu(sctp_t *sctp, sctp_faddr_t *fp, boolean_t decrease_only) 965 { 966 uint32_t pmtu; 967 int32_t mss; 968 ip_xmit_attr_t *ixa = fp->sf_ixa; 969 970 if (sctp->sctp_state < SCTPS_ESTABLISHED) 971 return; 972 973 /* 974 * Always call ip_get_pmtu() to make sure that IP has updated 975 * ixa_flags properly. 976 */ 977 pmtu = ip_get_pmtu(ixa); 978 979 /* 980 * Calculate the MSS by decreasing the PMTU by sctp_hdr_len and 981 * IPsec overhead if applied. Make sure to use the most recent 982 * IPsec information. 983 */ 984 mss = pmtu - conn_ipsec_length(sctp->sctp_connp); 985 if (ixa->ixa_flags & IXAF_IS_IPV4) 986 mss -= sctp->sctp_hdr_len; 987 else 988 mss -= sctp->sctp_hdr6_len; 989 990 /* 991 * Nothing to change, so just return. 992 */ 993 if (mss == fp->sf_pmss) 994 return; 995 996 /* 997 * Currently, for ICMP errors, only PMTU decrease is handled. 998 */ 999 if (mss > fp->sf_pmss && decrease_only) 1000 return; 1001 1002 #ifdef DEBUG 1003 (void) printf("sctp_update_pmtu mss from %d to %d\n", 1004 fp->sf_pmss, mss); 1005 #endif 1006 DTRACE_PROBE2(sctp_update_pmtu, int32_t, fp->sf_pmss, uint32_t, mss); 1007 1008 /* 1009 * Update ixa_fragsize and ixa_pmtu. 1010 */ 1011 ixa->ixa_fragsize = ixa->ixa_pmtu = pmtu; 1012 1013 /* 1014 * Make sure that sfa_pmss is a multiple of 1015 * SCTP_ALIGN. 1016 */ 1017 fp->sf_pmss = mss & ~(SCTP_ALIGN - 1); 1018 fp->sf_pmtu_discovered = 1; 1019 1020 #ifdef notyet 1021 if (mss < sctp->sctp_sctps->sctps_mss_min) 1022 ixa->ixa_flags |= IXAF_PMTU_TOO_SMALL; 1023 #endif 1024 if (ixa->ixa_flags & IXAF_PMTU_TOO_SMALL) 1025 ixa->ixa_flags &= ~(IXAF_DONTFRAG | IXAF_PMTU_IPV4_DF); 1026 1027 /* 1028 * If below the min size then ip_get_pmtu cleared IXAF_PMTU_IPV4_DF. 1029 * Make sure to clear IXAF_DONTFRAG, which is used by IP to decide 1030 * whether to fragment the packet. 1031 */ 1032 if (ixa->ixa_flags & IXAF_IS_IPV4) { 1033 if (!(ixa->ixa_flags & IXAF_PMTU_IPV4_DF)) { 1034 fp->sf_df = B_FALSE; 1035 if (fp == sctp->sctp_current) { 1036 sctp->sctp_ipha-> 1037 ipha_fragment_offset_and_flags = 0; 1038 } 1039 } 1040 } 1041 } 1042 1043 /* 1044 * Notify function registered with ip_xmit_attr_t. It's called in the context 1045 * of conn_ip_output so it's safe to update the SCTP state. 1046 * Currently only used for pmtu changes. 1047 */ 1048 /* ARGSUSED1 */ 1049 static void 1050 sctp_notify(void *arg, ip_xmit_attr_t *ixa, ixa_notify_type_t ntype, 1051 ixa_notify_arg_t narg) 1052 { 1053 sctp_t *sctp = (sctp_t *)arg; 1054 sctp_faddr_t *fp; 1055 1056 switch (ntype) { 1057 case IXAN_PMTU: 1058 /* Find the faddr based on the ip_xmit_attr_t pointer */ 1059 for (fp = sctp->sctp_faddrs; fp != NULL; fp = fp->sf_next) { 1060 if (fp->sf_ixa == ixa) 1061 break; 1062 } 1063 if (fp != NULL) 1064 sctp_update_pmtu(sctp, fp, B_FALSE); 1065 break; 1066 default: 1067 break; 1068 } 1069 } 1070 1071 /* 1072 * sctp_icmp_error is called by sctp_input() to process ICMP error messages 1073 * passed up by IP. We need to find a sctp_t 1074 * that corresponds to the returned datagram. Passes the message back in on 1075 * the correct queue once it has located the connection. 1076 * Assumes that IP has pulled up everything up to and including 1077 * the ICMP header. 1078 */ 1079 void 1080 sctp_icmp_error(sctp_t *sctp, mblk_t *mp) 1081 { 1082 icmph_t *icmph; 1083 ipha_t *ipha; 1084 int iph_hdr_length; 1085 sctp_hdr_t *sctph; 1086 in6_addr_t dst; 1087 sctp_faddr_t *fp; 1088 sctp_stack_t *sctps = sctp->sctp_sctps; 1089 1090 dprint(1, ("sctp_icmp_error: sctp=%p, mp=%p\n", (void *)sctp, 1091 (void *)mp)); 1092 1093 ipha = (ipha_t *)mp->b_rptr; 1094 if (IPH_HDR_VERSION(ipha) != IPV4_VERSION) { 1095 ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION); 1096 sctp_icmp_error_ipv6(sctp, mp); 1097 return; 1098 } 1099 1100 /* account for the ip hdr from the icmp message */ 1101 iph_hdr_length = IPH_HDR_LENGTH(ipha); 1102 icmph = (icmph_t *)&mp->b_rptr[iph_hdr_length]; 1103 /* now the ip hdr of message resulting in this icmp */ 1104 ipha = (ipha_t *)&icmph[1]; 1105 iph_hdr_length = IPH_HDR_LENGTH(ipha); 1106 sctph = (sctp_hdr_t *)((char *)ipha + iph_hdr_length); 1107 /* first_mp must expose the full sctp header. */ 1108 if ((uchar_t *)(sctph + 1) >= mp->b_wptr) { 1109 /* not enough data for SCTP header */ 1110 freemsg(mp); 1111 return; 1112 } 1113 1114 switch (icmph->icmph_type) { 1115 case ICMP_DEST_UNREACHABLE: 1116 switch (icmph->icmph_code) { 1117 case ICMP_FRAGMENTATION_NEEDED: 1118 /* 1119 * Reduce the MSS based on the new MTU. This will 1120 * eliminate any fragmentation locally. 1121 * N.B. There may well be some funny side-effects on 1122 * the local send policy and the remote receive policy. 1123 * Pending further research, we provide 1124 * sctp_ignore_path_mtu just in case this proves 1125 * disastrous somewhere. 1126 * 1127 * After updating the MSS, retransmit part of the 1128 * dropped segment using the new mss by calling 1129 * sctp_wput_slow(). Need to adjust all those 1130 * params to make sure sctp_wput_slow() work properly. 1131 */ 1132 if (sctps->sctps_ignore_path_mtu) 1133 break; 1134 1135 /* find the offending faddr */ 1136 IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &dst); 1137 fp = sctp_lookup_faddr(sctp, &dst); 1138 if (fp == NULL) { 1139 break; 1140 } 1141 sctp_update_pmtu(sctp, fp, B_TRUE); 1142 /* 1143 * It is possible, even likely that a fast retransmit 1144 * attempt has been dropped by ip as a result of this 1145 * error, retransmission bundles as much as possible. 1146 * A retransmit here prevents significant delays waiting 1147 * on the timer. Analogous to behaviour of TCP after 1148 * ICMP too big. 1149 */ 1150 sctp_rexmit(sctp, fp); 1151 break; 1152 case ICMP_PORT_UNREACHABLE: 1153 case ICMP_PROTOCOL_UNREACHABLE: 1154 switch (sctp->sctp_state) { 1155 case SCTPS_COOKIE_WAIT: 1156 case SCTPS_COOKIE_ECHOED: 1157 /* make sure the verification tag matches */ 1158 if (!sctp_icmp_verf(sctp, sctph, mp)) { 1159 break; 1160 } 1161 SCTPS_BUMP_MIB(sctps, sctpAborted); 1162 sctp_assoc_event(sctp, SCTP_CANT_STR_ASSOC, 0, 1163 NULL); 1164 sctp_clean_death(sctp, ECONNREFUSED); 1165 break; 1166 } 1167 break; 1168 case ICMP_HOST_UNREACHABLE: 1169 case ICMP_NET_UNREACHABLE: 1170 /* Record the error in case we finally time out. */ 1171 sctp->sctp_client_errno = (icmph->icmph_code == 1172 ICMP_HOST_UNREACHABLE) ? EHOSTUNREACH : ENETUNREACH; 1173 break; 1174 default: 1175 break; 1176 } 1177 break; 1178 case ICMP_SOURCE_QUENCH: { 1179 /* Reduce the sending rate as if we got a retransmit timeout */ 1180 break; 1181 } 1182 } 1183 freemsg(mp); 1184 } 1185 1186 /* 1187 * sctp_icmp_error_ipv6() is called by sctp_icmp_error() to process ICMPv6 1188 * error messages passed up by IP. 1189 * Assumes that IP has pulled up all the extension headers as well 1190 * as the ICMPv6 header. 1191 */ 1192 static void 1193 sctp_icmp_error_ipv6(sctp_t *sctp, mblk_t *mp) 1194 { 1195 icmp6_t *icmp6; 1196 ip6_t *ip6h; 1197 uint16_t iph_hdr_length; 1198 sctp_hdr_t *sctpha; 1199 uint8_t *nexthdrp; 1200 sctp_faddr_t *fp; 1201 sctp_stack_t *sctps = sctp->sctp_sctps; 1202 1203 ip6h = (ip6_t *)mp->b_rptr; 1204 iph_hdr_length = (ip6h->ip6_nxt != IPPROTO_SCTP) ? 1205 ip_hdr_length_v6(mp, ip6h) : IPV6_HDR_LEN; 1206 1207 icmp6 = (icmp6_t *)&mp->b_rptr[iph_hdr_length]; 1208 ip6h = (ip6_t *)&icmp6[1]; 1209 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &iph_hdr_length, &nexthdrp)) { 1210 freemsg(mp); 1211 return; 1212 } 1213 ASSERT(*nexthdrp == IPPROTO_SCTP); 1214 1215 /* XXX need ifindex to find connection */ 1216 sctpha = (sctp_hdr_t *)((char *)ip6h + iph_hdr_length); 1217 if ((uchar_t *)sctpha >= mp->b_wptr) { 1218 /* not enough data for SCTP header */ 1219 freemsg(mp); 1220 return; 1221 } 1222 switch (icmp6->icmp6_type) { 1223 case ICMP6_PACKET_TOO_BIG: 1224 /* 1225 * Reduce the MSS based on the new MTU. This will 1226 * eliminate any fragmentation locally. 1227 * N.B. There may well be some funny side-effects on 1228 * the local send policy and the remote receive policy. 1229 * Pending further research, we provide 1230 * sctp_ignore_path_mtu just in case this proves 1231 * disastrous somewhere. 1232 * 1233 * After updating the MSS, retransmit part of the 1234 * dropped segment using the new mss by calling 1235 * sctp_wput_slow(). Need to adjust all those 1236 * params to make sure sctp_wput_slow() work properly. 1237 */ 1238 if (sctps->sctps_ignore_path_mtu) 1239 break; 1240 1241 /* find the offending faddr */ 1242 fp = sctp_lookup_faddr(sctp, &ip6h->ip6_dst); 1243 if (fp == NULL) { 1244 break; 1245 } 1246 1247 sctp_update_pmtu(sctp, fp, B_TRUE); 1248 /* 1249 * It is possible, even likely that a fast retransmit 1250 * attempt has been dropped by ip as a result of this 1251 * error, retransmission bundles as much as possible. 1252 * A retransmit here prevents significant delays waiting 1253 * on the timer. Analogous to behaviour of TCP after 1254 * ICMP too big. 1255 */ 1256 sctp_rexmit(sctp, fp); 1257 break; 1258 1259 case ICMP6_DST_UNREACH: 1260 switch (icmp6->icmp6_code) { 1261 case ICMP6_DST_UNREACH_NOPORT: 1262 /* make sure the verification tag matches */ 1263 if (!sctp_icmp_verf(sctp, sctpha, mp)) { 1264 break; 1265 } 1266 if (sctp->sctp_state == SCTPS_COOKIE_WAIT || 1267 sctp->sctp_state == SCTPS_COOKIE_ECHOED) { 1268 SCTPS_BUMP_MIB(sctps, sctpAborted); 1269 sctp_assoc_event(sctp, SCTP_CANT_STR_ASSOC, 0, 1270 NULL); 1271 sctp_clean_death(sctp, ECONNREFUSED); 1272 } 1273 break; 1274 1275 case ICMP6_DST_UNREACH_ADMIN: 1276 case ICMP6_DST_UNREACH_NOROUTE: 1277 case ICMP6_DST_UNREACH_NOTNEIGHBOR: 1278 case ICMP6_DST_UNREACH_ADDR: 1279 /* Record the error in case we finally time out. */ 1280 sctp->sctp_client_errno = EHOSTUNREACH; 1281 break; 1282 default: 1283 break; 1284 } 1285 break; 1286 1287 case ICMP6_PARAM_PROB: 1288 /* If this corresponds to an ICMP_PROTOCOL_UNREACHABLE */ 1289 if (icmp6->icmp6_code == ICMP6_PARAMPROB_NEXTHEADER && 1290 (uchar_t *)ip6h + icmp6->icmp6_pptr == 1291 (uchar_t *)nexthdrp) { 1292 /* make sure the verification tag matches */ 1293 if (!sctp_icmp_verf(sctp, sctpha, mp)) { 1294 break; 1295 } 1296 if (sctp->sctp_state == SCTPS_COOKIE_WAIT) { 1297 SCTPS_BUMP_MIB(sctps, sctpAborted); 1298 sctp_assoc_event(sctp, SCTP_CANT_STR_ASSOC, 0, 1299 NULL); 1300 sctp_clean_death(sctp, ECONNREFUSED); 1301 } 1302 break; 1303 } 1304 break; 1305 1306 case ICMP6_TIME_EXCEEDED: 1307 default: 1308 break; 1309 } 1310 freemsg(mp); 1311 } 1312 1313 /* 1314 * Called by sockfs to create a new sctp instance. 1315 * 1316 * If parent pointer is passed in, inherit settings from it. 1317 */ 1318 sctp_t * 1319 sctp_create(void *ulpd, sctp_t *parent, int family, int type, int flags, 1320 sock_upcalls_t *upcalls, sctp_sockbuf_limits_t *sbl, 1321 cred_t *credp) 1322 { 1323 sctp_t *sctp, *psctp; 1324 conn_t *connp; 1325 mblk_t *ack_mp, *hb_mp; 1326 int sleep = flags & SCTP_CAN_BLOCK ? KM_SLEEP : KM_NOSLEEP; 1327 zoneid_t zoneid; 1328 sctp_stack_t *sctps; 1329 1330 /* User must supply a credential. */ 1331 if (credp == NULL) 1332 return (NULL); 1333 1334 psctp = (sctp_t *)parent; 1335 if (psctp != NULL) { 1336 sctps = psctp->sctp_sctps; 1337 /* Increase here to have common decrease at end */ 1338 netstack_hold(sctps->sctps_netstack); 1339 ASSERT(sctps->sctps_recvq_tq_list_cur_sz > 0); 1340 } else { 1341 netstack_t *ns; 1342 1343 ns = netstack_find_by_cred(credp); 1344 sctps = ns->netstack_sctp; 1345 /* 1346 * Check if the receive queue taskq for this sctp_stack_t has 1347 * been set up. 1348 */ 1349 if (sctps->sctps_recvq_tq_list_cur_sz == 0) 1350 sctp_rq_tq_init(sctps); 1351 1352 /* 1353 * For exclusive stacks we set the zoneid to zero 1354 * to make SCTP operate as if in the global zone. 1355 */ 1356 if (sctps->sctps_netstack->netstack_stackid != 1357 GLOBAL_NETSTACKID) 1358 zoneid = GLOBAL_ZONEID; 1359 else 1360 zoneid = crgetzoneid(credp); 1361 } 1362 if ((connp = ipcl_conn_create(IPCL_SCTPCONN, sleep, 1363 sctps->sctps_netstack)) == NULL) { 1364 netstack_rele(sctps->sctps_netstack); 1365 SCTP_KSTAT(sctps, sctp_conn_create); 1366 return (NULL); 1367 } 1368 /* 1369 * ipcl_conn_create did a netstack_hold. Undo the hold that was 1370 * done at top of sctp_create. 1371 */ 1372 netstack_rele(sctps->sctps_netstack); 1373 sctp = CONN2SCTP(connp); 1374 sctp->sctp_sctps = sctps; 1375 1376 if ((ack_mp = sctp_timer_alloc(sctp, sctp_ack_timer, sleep)) == NULL || 1377 (hb_mp = sctp_timer_alloc(sctp, sctp_heartbeat_timer, 1378 sleep)) == NULL) { 1379 if (ack_mp != NULL) 1380 freeb(ack_mp); 1381 sctp_conn_clear(connp); 1382 sctp->sctp_sctps = NULL; 1383 kmem_cache_free(sctp_conn_cache, connp); 1384 return (NULL); 1385 } 1386 1387 sctp->sctp_ack_mp = ack_mp; 1388 sctp->sctp_heartbeat_mp = hb_mp; 1389 1390 /* 1391 * Have conn_ip_output drop packets should our outer source 1392 * go invalid, and tell us about mtu changes. 1393 */ 1394 connp->conn_ixa->ixa_flags |= IXAF_SET_ULP_CKSUM | IXAF_VERIFY_SOURCE | 1395 IXAF_VERIFY_PMTU; 1396 connp->conn_family = family; 1397 connp->conn_so_type = type; 1398 1399 if (sctp_init_values(sctp, psctp, sleep) != 0) { 1400 freeb(ack_mp); 1401 freeb(hb_mp); 1402 sctp_conn_clear(connp); 1403 sctp->sctp_sctps = NULL; 1404 kmem_cache_free(sctp_conn_cache, connp); 1405 return (NULL); 1406 } 1407 sctp->sctp_cansleep = ((flags & SCTP_CAN_BLOCK) == SCTP_CAN_BLOCK); 1408 1409 sctp->sctp_mss = sctps->sctps_initial_mtu - ((family == AF_INET6) ? 1410 sctp->sctp_hdr6_len : sctp->sctp_hdr_len); 1411 1412 if (psctp != NULL) { 1413 conn_t *pconnp = psctp->sctp_connp; 1414 1415 RUN_SCTP(psctp); 1416 /* 1417 * Inherit local address list, local port. Parent is either 1418 * in SCTPS_BOUND, or SCTPS_LISTEN state. 1419 */ 1420 ASSERT((psctp->sctp_state == SCTPS_BOUND) || 1421 (psctp->sctp_state == SCTPS_LISTEN)); 1422 if (sctp_dup_saddrs(psctp, sctp, sleep)) { 1423 WAKE_SCTP(psctp); 1424 freeb(ack_mp); 1425 freeb(hb_mp); 1426 sctp_headers_free(sctp); 1427 sctp_conn_clear(connp); 1428 sctp->sctp_sctps = NULL; 1429 kmem_cache_free(sctp_conn_cache, connp); 1430 return (NULL); 1431 } 1432 1433 /* 1434 * If the parent is specified, it'll be immediatelly 1435 * followed by sctp_connect(). So don't add this guy to 1436 * bind hash. 1437 */ 1438 connp->conn_lport = pconnp->conn_lport; 1439 sctp->sctp_state = SCTPS_BOUND; 1440 WAKE_SCTP(psctp); 1441 } else { 1442 ASSERT(connp->conn_cred == NULL); 1443 connp->conn_zoneid = zoneid; 1444 /* 1445 * conn_allzones can not be set this early, hence 1446 * no IPCL_ZONEID 1447 */ 1448 connp->conn_ixa->ixa_zoneid = zoneid; 1449 connp->conn_open_time = ddi_get_lbolt64(); 1450 connp->conn_cred = credp; 1451 crhold(credp); 1452 connp->conn_cpid = curproc->p_pid; 1453 1454 /* 1455 * If the caller has the process-wide flag set, then default to 1456 * MAC exempt mode. This allows read-down to unlabeled hosts. 1457 */ 1458 if (getpflags(NET_MAC_AWARE, credp) != 0) 1459 connp->conn_mac_mode = CONN_MAC_AWARE; 1460 1461 connp->conn_zone_is_global = 1462 (crgetzoneid(credp) == GLOBAL_ZONEID); 1463 } 1464 1465 /* Initialize SCTP instance values, our verf tag must never be 0 */ 1466 (void) random_get_pseudo_bytes((uint8_t *)&sctp->sctp_lvtag, 1467 sizeof (sctp->sctp_lvtag)); 1468 if (sctp->sctp_lvtag == 0) 1469 sctp->sctp_lvtag = (uint32_t)gethrtime(); 1470 ASSERT(sctp->sctp_lvtag != 0); 1471 1472 sctp->sctp_ltsn = sctp->sctp_lvtag + 1; 1473 sctp->sctp_lcsn = sctp->sctp_ltsn; 1474 sctp->sctp_recovery_tsn = sctp->sctp_lastack_rxd = sctp->sctp_ltsn - 1; 1475 sctp->sctp_adv_pap = sctp->sctp_lastack_rxd; 1476 1477 /* Information required by upper layer */ 1478 ASSERT(ulpd != NULL); 1479 sctp->sctp_ulpd = ulpd; 1480 1481 ASSERT(upcalls != NULL); 1482 sctp->sctp_upcalls = upcalls; 1483 ASSERT(sbl != NULL); 1484 /* Fill in the socket buffer limits for sctpsockfs */ 1485 sbl->sbl_txlowat = connp->conn_sndlowat; 1486 sbl->sbl_txbuf = connp->conn_sndbuf; 1487 sbl->sbl_rxbuf = sctp->sctp_rwnd; 1488 sbl->sbl_rxlowat = SCTP_RECV_LOWATER; 1489 1490 /* Insert this in the global list. */ 1491 SCTP_LINK(sctp, sctps); 1492 1493 return (sctp); 1494 } 1495 1496 /* Run at module load time */ 1497 void 1498 sctp_ddi_g_init(void) 1499 { 1500 /* Create sctp_t/conn_t cache */ 1501 sctp_conn_cache_init(); 1502 1503 /* Create the faddr cache */ 1504 sctp_faddr_init(); 1505 1506 /* Create the sets cache */ 1507 sctp_sets_init(); 1508 1509 /* Create the PR-SCTP sets cache */ 1510 sctp_ftsn_sets_init(); 1511 1512 /* Initialize tables used for CRC calculation */ 1513 sctp_crc32_init(); 1514 1515 /* 1516 * We want to be informed each time a stack is created or 1517 * destroyed in the kernel, so we can maintain the 1518 * set of sctp_stack_t's. 1519 */ 1520 netstack_register(NS_SCTP, sctp_stack_init, NULL, sctp_stack_fini); 1521 } 1522 1523 static void * 1524 sctp_stack_init(netstackid_t stackid, netstack_t *ns) 1525 { 1526 sctp_stack_t *sctps; 1527 size_t arrsz; 1528 int i; 1529 1530 sctps = kmem_zalloc(sizeof (*sctps), KM_SLEEP); 1531 sctps->sctps_netstack = ns; 1532 1533 /* Initialize locks */ 1534 mutex_init(&sctps->sctps_g_lock, NULL, MUTEX_DEFAULT, NULL); 1535 mutex_init(&sctps->sctps_epriv_port_lock, NULL, MUTEX_DEFAULT, NULL); 1536 sctps->sctps_g_num_epriv_ports = SCTP_NUM_EPRIV_PORTS; 1537 sctps->sctps_g_epriv_ports[0] = ULP_DEF_EPRIV_PORT1; 1538 sctps->sctps_g_epriv_ports[1] = ULP_DEF_EPRIV_PORT2; 1539 1540 /* Initialize SCTP hash arrays. */ 1541 sctp_hash_init(sctps); 1542 1543 arrsz = sctp_propinfo_count * sizeof (mod_prop_info_t); 1544 sctps->sctps_propinfo_tbl = (mod_prop_info_t *)kmem_alloc(arrsz, 1545 KM_SLEEP); 1546 bcopy(sctp_propinfo_tbl, sctps->sctps_propinfo_tbl, arrsz); 1547 1548 /* saddr init */ 1549 sctp_saddr_init(sctps); 1550 1551 /* Global SCTP PCB list. */ 1552 list_create(&sctps->sctps_g_list, sizeof (sctp_t), 1553 offsetof(sctp_t, sctp_list)); 1554 1555 /* Initialize SCTP kstats. */ 1556 sctps->sctps_mibkp = sctp_kstat_init(stackid); 1557 sctps->sctps_kstat = sctp_kstat2_init(stackid); 1558 1559 mutex_init(&sctps->sctps_reclaim_lock, NULL, MUTEX_DEFAULT, NULL); 1560 sctps->sctps_reclaim = B_FALSE; 1561 sctps->sctps_reclaim_tid = 0; 1562 sctps->sctps_reclaim_period = sctps->sctps_rto_maxg; 1563 1564 /* Allocate the per netstack stats */ 1565 mutex_enter(&cpu_lock); 1566 sctps->sctps_sc_cnt = MAX(ncpus, boot_ncpus); 1567 mutex_exit(&cpu_lock); 1568 sctps->sctps_sc = kmem_zalloc(max_ncpus * sizeof (sctp_stats_cpu_t *), 1569 KM_SLEEP); 1570 for (i = 0; i < sctps->sctps_sc_cnt; i++) { 1571 sctps->sctps_sc[i] = kmem_zalloc(sizeof (sctp_stats_cpu_t), 1572 KM_SLEEP); 1573 } 1574 1575 mutex_init(&sctps->sctps_listener_conf_lock, NULL, MUTEX_DEFAULT, NULL); 1576 list_create(&sctps->sctps_listener_conf, sizeof (sctp_listener_t), 1577 offsetof(sctp_listener_t, sl_link)); 1578 1579 return (sctps); 1580 } 1581 1582 /* 1583 * Called when the module is about to be unloaded. 1584 */ 1585 void 1586 sctp_ddi_g_destroy(void) 1587 { 1588 /* Destroy sctp_t/conn_t caches */ 1589 sctp_conn_cache_fini(); 1590 1591 /* Destroy the faddr cache */ 1592 sctp_faddr_fini(); 1593 1594 /* Destroy the sets cache */ 1595 sctp_sets_fini(); 1596 1597 /* Destroy the PR-SCTP sets cache */ 1598 sctp_ftsn_sets_fini(); 1599 1600 netstack_unregister(NS_SCTP); 1601 } 1602 1603 /* 1604 * Free the SCTP stack instance. 1605 */ 1606 static void 1607 sctp_stack_fini(netstackid_t stackid, void *arg) 1608 { 1609 sctp_stack_t *sctps = (sctp_stack_t *)arg; 1610 int i; 1611 1612 /* 1613 * Set sctps_reclaim to false tells sctp_reclaim_timer() not to restart 1614 * the timer. 1615 */ 1616 mutex_enter(&sctps->sctps_reclaim_lock); 1617 sctps->sctps_reclaim = B_FALSE; 1618 mutex_exit(&sctps->sctps_reclaim_lock); 1619 if (sctps->sctps_reclaim_tid != 0) 1620 (void) untimeout(sctps->sctps_reclaim_tid); 1621 mutex_destroy(&sctps->sctps_reclaim_lock); 1622 1623 sctp_listener_conf_cleanup(sctps); 1624 1625 kmem_free(sctps->sctps_propinfo_tbl, 1626 sctp_propinfo_count * sizeof (mod_prop_info_t)); 1627 sctps->sctps_propinfo_tbl = NULL; 1628 1629 /* Destroy the recvq taskqs. */ 1630 sctp_rq_tq_fini(sctps); 1631 1632 /* Destroy saddr */ 1633 sctp_saddr_fini(sctps); 1634 1635 /* Global SCTP PCB list. */ 1636 list_destroy(&sctps->sctps_g_list); 1637 1638 /* Destroy SCTP hash arrays. */ 1639 sctp_hash_destroy(sctps); 1640 1641 /* Destroy SCTP kernel stats. */ 1642 for (i = 0; i < sctps->sctps_sc_cnt; i++) 1643 kmem_free(sctps->sctps_sc[i], sizeof (sctp_stats_cpu_t)); 1644 kmem_free(sctps->sctps_sc, max_ncpus * sizeof (sctp_stats_cpu_t *)); 1645 1646 sctp_kstat_fini(stackid, sctps->sctps_mibkp); 1647 sctps->sctps_mibkp = NULL; 1648 sctp_kstat2_fini(stackid, sctps->sctps_kstat); 1649 sctps->sctps_kstat = NULL; 1650 1651 mutex_destroy(&sctps->sctps_g_lock); 1652 mutex_destroy(&sctps->sctps_epriv_port_lock); 1653 1654 kmem_free(sctps, sizeof (*sctps)); 1655 } 1656 1657 static void 1658 sctp_rq_tq_init(sctp_stack_t *sctps) 1659 { 1660 char tq_name[TASKQ_NAMELEN]; 1661 int thrs; 1662 int max_tasks; 1663 1664 thrs = MIN(sctp_recvq_tq_thr_max, MAX(sctp_recvq_tq_thr_min, 1665 MAX(ncpus, boot_ncpus))); 1666 /* 1667 * Make sure that the maximum number of tasks is at least thrice as 1668 * large as the number of threads. 1669 */ 1670 max_tasks = MAX(sctp_recvq_tq_task_min, thrs) * 3; 1671 1672 /* 1673 * This helps differentiate the default taskqs in different IP stacks. 1674 */ 1675 (void) snprintf(tq_name, sizeof (tq_name), "sctp_def_rq_taskq_%d", 1676 sctps->sctps_netstack->netstack_stackid); 1677 1678 sctps->sctps_recvq_tq_list_max_sz = sctp_recvq_tq_list_max; 1679 sctps->sctps_recvq_tq_list_cur_sz = 1; 1680 1681 /* 1682 * Initialize the recvq_tq_list and create the first recvq taskq. 1683 * What to do if it fails? 1684 */ 1685 sctps->sctps_recvq_tq_list = 1686 kmem_zalloc(sctps->sctps_recvq_tq_list_max_sz * sizeof (taskq_t *), 1687 KM_SLEEP); 1688 sctps->sctps_recvq_tq_list[0] = taskq_create(tq_name, thrs, 1689 minclsyspri, sctp_recvq_tq_task_min, max_tasks, TASKQ_PREPOPULATE); 1690 mutex_init(&sctps->sctps_rq_tq_lock, NULL, MUTEX_DEFAULT, NULL); 1691 } 1692 1693 static void 1694 sctp_rq_tq_fini(sctp_stack_t *sctps) 1695 { 1696 int i; 1697 1698 if (sctps->sctps_recvq_tq_list_cur_sz == 0) 1699 return; 1700 1701 for (i = 0; i < sctps->sctps_recvq_tq_list_cur_sz; i++) { 1702 ASSERT(sctps->sctps_recvq_tq_list[i] != NULL); 1703 taskq_destroy(sctps->sctps_recvq_tq_list[i]); 1704 } 1705 kmem_free(sctps->sctps_recvq_tq_list, 1706 sctps->sctps_recvq_tq_list_max_sz * sizeof (taskq_t *)); 1707 sctps->sctps_recvq_tq_list = NULL; 1708 } 1709 1710 /* Add another taskq for a new ill. */ 1711 void 1712 sctp_inc_taskq(sctp_stack_t *sctps) 1713 { 1714 taskq_t *tq; 1715 char tq_name[TASKQ_NAMELEN]; 1716 int thrs; 1717 int max_tasks; 1718 1719 thrs = MIN(sctp_recvq_tq_thr_max, MAX(sctp_recvq_tq_thr_min, 1720 MAX(ncpus, boot_ncpus))); 1721 /* 1722 * Make sure that the maximum number of tasks is at least thrice as 1723 * large as the number of threads. 1724 */ 1725 max_tasks = MAX(sctp_recvq_tq_task_min, thrs) * 3; 1726 1727 mutex_enter(&sctps->sctps_rq_tq_lock); 1728 if (sctps->sctps_recvq_tq_list_cur_sz + 1 > 1729 sctps->sctps_recvq_tq_list_max_sz) { 1730 mutex_exit(&sctps->sctps_rq_tq_lock); 1731 cmn_err(CE_NOTE, "Cannot create more SCTP recvq taskq"); 1732 return; 1733 } 1734 1735 (void) snprintf(tq_name, sizeof (tq_name), "sctp_rq_taskq_%d_%u", 1736 sctps->sctps_netstack->netstack_stackid, 1737 sctps->sctps_recvq_tq_list_cur_sz); 1738 tq = taskq_create(tq_name, thrs, minclsyspri, sctp_recvq_tq_task_min, 1739 max_tasks, TASKQ_PREPOPULATE); 1740 if (tq == NULL) { 1741 mutex_exit(&sctps->sctps_rq_tq_lock); 1742 cmn_err(CE_NOTE, "SCTP recvq taskq creation failed"); 1743 return; 1744 } 1745 ASSERT(sctps->sctps_recvq_tq_list[ 1746 sctps->sctps_recvq_tq_list_cur_sz] == NULL); 1747 sctps->sctps_recvq_tq_list[sctps->sctps_recvq_tq_list_cur_sz] = tq; 1748 atomic_add_32(&sctps->sctps_recvq_tq_list_cur_sz, 1); 1749 mutex_exit(&sctps->sctps_rq_tq_lock); 1750 } 1751 1752 #ifdef DEBUG 1753 uint32_t recvq_loop_cnt = 0; 1754 uint32_t recvq_call = 0; 1755 #endif 1756 1757 /* 1758 * Find the next recvq_tq to use. This routine will go thru all the 1759 * taskqs until it can dispatch a job for the sctp. If this fails, 1760 * it will create a new taskq and try it. 1761 */ 1762 static boolean_t 1763 sctp_find_next_tq(sctp_t *sctp) 1764 { 1765 int next_tq, try; 1766 taskq_t *tq; 1767 sctp_stack_t *sctps = sctp->sctp_sctps; 1768 1769 /* 1770 * Note that since we don't hold a lock on sctp_rq_tq_lock for 1771 * performance reason, recvq_ta_list_cur_sz can be changed during 1772 * this loop. The problem this will create is that the loop may 1773 * not have tried all the recvq_tq. This should be OK. 1774 */ 1775 next_tq = atomic_add_32_nv(&sctps->sctps_recvq_tq_list_cur, 1) % 1776 sctps->sctps_recvq_tq_list_cur_sz; 1777 for (try = 0; try < sctps->sctps_recvq_tq_list_cur_sz; try++) { 1778 tq = sctps->sctps_recvq_tq_list[next_tq]; 1779 if (taskq_dispatch(tq, sctp_process_recvq, sctp, 1780 TQ_NOSLEEP) != NULL) { 1781 sctp->sctp_recvq_tq = tq; 1782 return (B_TRUE); 1783 } 1784 next_tq = (next_tq + 1) % sctps->sctps_recvq_tq_list_cur_sz; 1785 } 1786 1787 /* 1788 * Create one more taskq and try it. Note that sctp_inc_taskq() 1789 * may not have created another taskq if the number of recvq 1790 * taskqs is at the maximum. We are probably in a pretty bad 1791 * shape if this actually happens... 1792 */ 1793 sctp_inc_taskq(sctps); 1794 tq = sctps->sctps_recvq_tq_list[sctps->sctps_recvq_tq_list_cur_sz - 1]; 1795 if (taskq_dispatch(tq, sctp_process_recvq, sctp, TQ_NOSLEEP) != NULL) { 1796 sctp->sctp_recvq_tq = tq; 1797 return (B_TRUE); 1798 } 1799 SCTP_KSTAT(sctps, sctp_find_next_tq); 1800 return (B_FALSE); 1801 } 1802 1803 /* 1804 * To add a message to the recvq. Note that the sctp_timer_fire() 1805 * routine also uses this function to add the timer message to the 1806 * receive queue for later processing. And it should be the only 1807 * caller of sctp_add_recvq() which sets the try_harder argument 1808 * to B_TRUE. 1809 * 1810 * If the try_harder argument is B_TRUE, this routine sctp_find_next_tq() 1811 * will try very hard to dispatch the task. Refer to the comment 1812 * for that routine on how it does that. 1813 * 1814 * On failure the message has been freed i.e., this routine always consumes the 1815 * message. It bumps ipIfStatsInDiscards and and uses ip_drop_input to drop. 1816 */ 1817 void 1818 sctp_add_recvq(sctp_t *sctp, mblk_t *mp, boolean_t caller_hold_lock, 1819 ip_recv_attr_t *ira) 1820 { 1821 mblk_t *attrmp; 1822 ip_stack_t *ipst = sctp->sctp_sctps->sctps_netstack->netstack_ip; 1823 1824 ASSERT(ira->ira_ill == NULL); 1825 1826 if (!caller_hold_lock) 1827 mutex_enter(&sctp->sctp_recvq_lock); 1828 1829 /* If the taskq dispatch has not been scheduled, do it now. */ 1830 if (sctp->sctp_recvq_tq == NULL) { 1831 ASSERT(sctp->sctp_recvq == NULL); 1832 if (!sctp_find_next_tq(sctp)) { 1833 if (!caller_hold_lock) 1834 mutex_exit(&sctp->sctp_recvq_lock); 1835 BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsInDiscards); 1836 ip_drop_input("ipIfStatsInDiscards", mp, NULL); 1837 freemsg(mp); 1838 return; 1839 } 1840 /* Make sure the sctp_t will not go away. */ 1841 SCTP_REFHOLD(sctp); 1842 } 1843 1844 attrmp = ip_recv_attr_to_mblk(ira); 1845 if (attrmp == NULL) { 1846 if (!caller_hold_lock) 1847 mutex_exit(&sctp->sctp_recvq_lock); 1848 BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsInDiscards); 1849 ip_drop_input("ipIfStatsInDiscards", mp, NULL); 1850 freemsg(mp); 1851 return; 1852 } 1853 ASSERT(attrmp->b_cont == NULL); 1854 attrmp->b_cont = mp; 1855 mp = attrmp; 1856 1857 if (sctp->sctp_recvq == NULL) { 1858 sctp->sctp_recvq = mp; 1859 sctp->sctp_recvq_tail = mp; 1860 } else { 1861 sctp->sctp_recvq_tail->b_next = mp; 1862 sctp->sctp_recvq_tail = mp; 1863 } 1864 1865 if (!caller_hold_lock) 1866 mutex_exit(&sctp->sctp_recvq_lock); 1867 } 1868 1869 static void 1870 sctp_process_recvq(void *arg) 1871 { 1872 sctp_t *sctp = (sctp_t *)arg; 1873 mblk_t *mp; 1874 #ifdef DEBUG 1875 uint32_t loop_cnt = 0; 1876 #endif 1877 ip_recv_attr_t iras; 1878 1879 #ifdef _BIG_ENDIAN 1880 #define IPVER(ip6h) ((((uint32_t *)ip6h)[0] >> 28) & 0x7) 1881 #else 1882 #define IPVER(ip6h) ((((uint32_t *)ip6h)[0] >> 4) & 0x7) 1883 #endif 1884 1885 RUN_SCTP(sctp); 1886 mutex_enter(&sctp->sctp_recvq_lock); 1887 1888 #ifdef DEBUG 1889 recvq_call++; 1890 #endif 1891 /* 1892 * Note that while we are in this loop, other thread can put 1893 * new packets in the receive queue. We may be looping for 1894 * quite a while. 1895 */ 1896 while ((mp = sctp->sctp_recvq) != NULL) { 1897 mblk_t *data_mp; 1898 1899 sctp->sctp_recvq = mp->b_next; 1900 mutex_exit(&sctp->sctp_recvq_lock); 1901 mp->b_next = NULL; 1902 #ifdef DEBUG 1903 loop_cnt++; 1904 #endif 1905 mp->b_prev = NULL; 1906 1907 data_mp = mp->b_cont; 1908 mp->b_cont = NULL; 1909 if (!ip_recv_attr_from_mblk(mp, &iras)) { 1910 ip_drop_input("ip_recv_attr_from_mblk", mp, NULL); 1911 freemsg(mp); 1912 ira_cleanup(&iras, B_TRUE); 1913 continue; 1914 } 1915 1916 if (iras.ira_flags & IRAF_ICMP_ERROR) 1917 sctp_icmp_error(sctp, data_mp); 1918 else 1919 sctp_input_data(sctp, data_mp, &iras); 1920 1921 ira_cleanup(&iras, B_TRUE); 1922 mutex_enter(&sctp->sctp_recvq_lock); 1923 } 1924 1925 sctp->sctp_recvq_tail = NULL; 1926 sctp->sctp_recvq_tq = NULL; 1927 1928 mutex_exit(&sctp->sctp_recvq_lock); 1929 1930 WAKE_SCTP(sctp); 1931 1932 #ifdef DEBUG 1933 if (loop_cnt > recvq_loop_cnt) 1934 recvq_loop_cnt = loop_cnt; 1935 #endif 1936 /* Now it can go away. */ 1937 SCTP_REFRELE(sctp); 1938 } 1939 1940 /* ARGSUSED */ 1941 static int 1942 sctp_conn_cache_constructor(void *buf, void *cdrarg, int kmflags) 1943 { 1944 conn_t *connp = (conn_t *)buf; 1945 sctp_t *sctp = (sctp_t *)&connp[1]; 1946 int cnt; 1947 1948 bzero(connp, sizeof (conn_t)); 1949 bzero(buf, (char *)&sctp[1] - (char *)buf); 1950 1951 mutex_init(&sctp->sctp_reflock, NULL, MUTEX_DEFAULT, NULL); 1952 mutex_init(&sctp->sctp_lock, NULL, MUTEX_DEFAULT, NULL); 1953 mutex_init(&sctp->sctp_recvq_lock, NULL, MUTEX_DEFAULT, NULL); 1954 cv_init(&sctp->sctp_cv, NULL, CV_DEFAULT, NULL); 1955 for (cnt = 0; cnt < SCTP_IPIF_HASH; cnt++) { 1956 rw_init(&sctp->sctp_saddrs[cnt].ipif_hash_lock, NULL, 1957 RW_DEFAULT, NULL); 1958 } 1959 1960 mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL); 1961 cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL); 1962 connp->conn_flags = IPCL_SCTPCONN; 1963 connp->conn_proto = IPPROTO_SCTP; 1964 connp->conn_sctp = sctp; 1965 sctp->sctp_connp = connp; 1966 rw_init(&connp->conn_ilg_lock, NULL, RW_DEFAULT, NULL); 1967 1968 connp->conn_ixa = kmem_zalloc(sizeof (ip_xmit_attr_t), kmflags); 1969 if (connp->conn_ixa == NULL) { 1970 return (ENOMEM); 1971 } 1972 connp->conn_ixa->ixa_refcnt = 1; 1973 connp->conn_ixa->ixa_protocol = connp->conn_proto; 1974 connp->conn_ixa->ixa_xmit_hint = CONN_TO_XMIT_HINT(connp); 1975 return (0); 1976 } 1977 1978 /* ARGSUSED */ 1979 static void 1980 sctp_conn_cache_destructor(void *buf, void *cdrarg) 1981 { 1982 conn_t *connp = (conn_t *)buf; 1983 sctp_t *sctp = (sctp_t *)&connp[1]; 1984 int cnt; 1985 1986 ASSERT(sctp->sctp_connp == connp); 1987 ASSERT(!MUTEX_HELD(&sctp->sctp_lock)); 1988 ASSERT(!MUTEX_HELD(&sctp->sctp_reflock)); 1989 ASSERT(!MUTEX_HELD(&sctp->sctp_recvq_lock)); 1990 1991 ASSERT(sctp->sctp_conn_hash_next == NULL); 1992 ASSERT(sctp->sctp_conn_hash_prev == NULL); 1993 ASSERT(sctp->sctp_listen_hash_next == NULL); 1994 ASSERT(sctp->sctp_listen_hash_prev == NULL); 1995 ASSERT(sctp->sctp_listen_tfp == NULL); 1996 ASSERT(sctp->sctp_conn_tfp == NULL); 1997 1998 ASSERT(sctp->sctp_faddrs == NULL); 1999 ASSERT(sctp->sctp_nsaddrs == 0); 2000 2001 ASSERT(sctp->sctp_ulpd == NULL); 2002 2003 ASSERT(sctp->sctp_lastfaddr == NULL); 2004 ASSERT(sctp->sctp_primary == NULL); 2005 ASSERT(sctp->sctp_current == NULL); 2006 ASSERT(sctp->sctp_lastdata == NULL); 2007 2008 ASSERT(sctp->sctp_xmit_head == NULL); 2009 ASSERT(sctp->sctp_xmit_tail == NULL); 2010 ASSERT(sctp->sctp_xmit_unsent == NULL); 2011 ASSERT(sctp->sctp_xmit_unsent_tail == NULL); 2012 2013 ASSERT(sctp->sctp_ostrcntrs == NULL); 2014 2015 ASSERT(sctp->sctp_sack_info == NULL); 2016 ASSERT(sctp->sctp_ack_mp == NULL); 2017 ASSERT(sctp->sctp_instr == NULL); 2018 2019 ASSERT(sctp->sctp_iphc == NULL); 2020 ASSERT(sctp->sctp_iphc6 == NULL); 2021 ASSERT(sctp->sctp_ipha == NULL); 2022 ASSERT(sctp->sctp_ip6h == NULL); 2023 ASSERT(sctp->sctp_sctph == NULL); 2024 ASSERT(sctp->sctp_sctph6 == NULL); 2025 2026 ASSERT(sctp->sctp_cookie_mp == NULL); 2027 2028 ASSERT(sctp->sctp_refcnt == 0); 2029 ASSERT(sctp->sctp_timer_mp == NULL); 2030 ASSERT(sctp->sctp_connp->conn_ref == 0); 2031 ASSERT(sctp->sctp_heartbeat_mp == NULL); 2032 ASSERT(sctp->sctp_ptpbhn == NULL && sctp->sctp_bind_hash == NULL); 2033 2034 ASSERT(sctp->sctp_shutdown_faddr == NULL); 2035 2036 ASSERT(sctp->sctp_cxmit_list == NULL); 2037 2038 ASSERT(sctp->sctp_recvq == NULL); 2039 ASSERT(sctp->sctp_recvq_tail == NULL); 2040 ASSERT(sctp->sctp_recvq_tq == NULL); 2041 2042 /* 2043 * sctp_pad_mp can be NULL if the memory allocation fails 2044 * in sctp_init_values() and the conn_t is freed. 2045 */ 2046 if (sctp->sctp_pad_mp != NULL) { 2047 freeb(sctp->sctp_pad_mp); 2048 sctp->sctp_pad_mp = NULL; 2049 } 2050 2051 mutex_destroy(&sctp->sctp_reflock); 2052 mutex_destroy(&sctp->sctp_lock); 2053 mutex_destroy(&sctp->sctp_recvq_lock); 2054 cv_destroy(&sctp->sctp_cv); 2055 for (cnt = 0; cnt < SCTP_IPIF_HASH; cnt++) { 2056 rw_destroy(&sctp->sctp_saddrs[cnt].ipif_hash_lock); 2057 } 2058 2059 mutex_destroy(&connp->conn_lock); 2060 cv_destroy(&connp->conn_cv); 2061 rw_destroy(&connp->conn_ilg_lock); 2062 2063 /* Can be NULL if constructor failed */ 2064 if (connp->conn_ixa != NULL) { 2065 ASSERT(connp->conn_ixa->ixa_refcnt == 1); 2066 ASSERT(connp->conn_ixa->ixa_ire == NULL); 2067 ASSERT(connp->conn_ixa->ixa_nce == NULL); 2068 ixa_refrele(connp->conn_ixa); 2069 } 2070 } 2071 2072 static void 2073 sctp_conn_cache_init() 2074 { 2075 sctp_conn_cache = kmem_cache_create("sctp_conn_cache", 2076 sizeof (sctp_t) + sizeof (conn_t), 0, sctp_conn_cache_constructor, 2077 sctp_conn_cache_destructor, sctp_conn_reclaim, NULL, NULL, 0); 2078 } 2079 2080 static void 2081 sctp_conn_cache_fini() 2082 { 2083 kmem_cache_destroy(sctp_conn_cache); 2084 } 2085 2086 void 2087 sctp_conn_init(conn_t *connp) 2088 { 2089 ASSERT(connp->conn_flags == IPCL_SCTPCONN); 2090 connp->conn_rq = connp->conn_wq = NULL; 2091 connp->conn_ixa->ixa_flags |= IXAF_SET_ULP_CKSUM | IXAF_VERIFY_SOURCE | 2092 IXAF_VERIFY_PMTU; 2093 2094 ASSERT(connp->conn_proto == IPPROTO_SCTP); 2095 ASSERT(connp->conn_ixa->ixa_protocol == connp->conn_proto); 2096 connp->conn_state_flags |= CONN_INCIPIENT; 2097 2098 ASSERT(connp->conn_sctp != NULL); 2099 2100 /* 2101 * Register sctp_notify to listen to capability changes detected by IP. 2102 * This upcall is made in the context of the call to conn_ip_output 2103 * thus it holds whatever locks sctp holds across conn_ip_output. 2104 */ 2105 connp->conn_ixa->ixa_notify = sctp_notify; 2106 connp->conn_ixa->ixa_notify_cookie = connp->conn_sctp; 2107 } 2108 2109 static void 2110 sctp_conn_clear(conn_t *connp) 2111 { 2112 /* Clean up conn_t stuff */ 2113 if (connp->conn_latch != NULL) { 2114 IPLATCH_REFRELE(connp->conn_latch); 2115 connp->conn_latch = NULL; 2116 } 2117 if (connp->conn_latch_in_policy != NULL) { 2118 IPPOL_REFRELE(connp->conn_latch_in_policy); 2119 connp->conn_latch_in_policy = NULL; 2120 } 2121 if (connp->conn_latch_in_action != NULL) { 2122 IPACT_REFRELE(connp->conn_latch_in_action); 2123 connp->conn_latch_in_action = NULL; 2124 } 2125 if (connp->conn_policy != NULL) { 2126 IPPH_REFRELE(connp->conn_policy, connp->conn_netstack); 2127 connp->conn_policy = NULL; 2128 } 2129 if (connp->conn_ipsec_opt_mp != NULL) { 2130 freemsg(connp->conn_ipsec_opt_mp); 2131 connp->conn_ipsec_opt_mp = NULL; 2132 } 2133 netstack_rele(connp->conn_netstack); 2134 connp->conn_netstack = NULL; 2135 2136 /* Leave conn_ixa and other constructed fields in place */ 2137 ipcl_conn_cleanup(connp); 2138 } 2139