1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. 24 */ 25 26 #include <sys/types.h> 27 #include <sys/stream.h> 28 #include <sys/strsubr.h> 29 #include <sys/stropts.h> 30 #include <sys/strsun.h> 31 #define _SUN_TPI_VERSION 2 32 #include <sys/tihdr.h> 33 #include <sys/ddi.h> 34 #include <sys/sunddi.h> 35 #include <sys/xti_inet.h> 36 #include <sys/cmn_err.h> 37 #include <sys/debug.h> 38 #include <sys/vtrace.h> 39 #include <sys/kmem.h> 40 #include <sys/cpuvar.h> 41 #include <sys/random.h> 42 #include <sys/priv.h> 43 #include <sys/sunldi.h> 44 45 #include <sys/errno.h> 46 #include <sys/signal.h> 47 #include <sys/socket.h> 48 #include <sys/isa_defs.h> 49 #include <netinet/in.h> 50 #include <netinet/tcp.h> 51 #include <netinet/ip6.h> 52 #include <netinet/icmp6.h> 53 #include <netinet/sctp.h> 54 #include <net/if.h> 55 56 #include <inet/common.h> 57 #include <inet/ip.h> 58 #include <inet/ip_if.h> 59 #include <inet/ip_ire.h> 60 #include <inet/ip6.h> 61 #include <inet/mi.h> 62 #include <inet/mib2.h> 63 #include <inet/kstatcom.h> 64 #include <inet/optcom.h> 65 #include <inet/ipclassifier.h> 66 #include <inet/ipsec_impl.h> 67 #include <inet/sctp_ip.h> 68 #include <inet/sctp_crc32.h> 69 70 #include "sctp_impl.h" 71 #include "sctp_addr.h" 72 #include "sctp_asconf.h" 73 74 int sctpdebug; 75 sin6_t sctp_sin6_null; /* Zero address for quick clears */ 76 77 static void sctp_closei_local(sctp_t *sctp); 78 static int sctp_init_values(sctp_t *, sctp_t *, int); 79 static void sctp_icmp_error_ipv6(sctp_t *sctp, mblk_t *mp); 80 static void sctp_process_recvq(void *); 81 static void sctp_rq_tq_init(sctp_stack_t *); 82 static void sctp_rq_tq_fini(sctp_stack_t *); 83 static void sctp_conn_cache_init(); 84 static void sctp_conn_cache_fini(); 85 static int sctp_conn_cache_constructor(); 86 static void sctp_conn_cache_destructor(); 87 static void sctp_conn_clear(conn_t *); 88 static void sctp_notify(void *, ip_xmit_attr_t *, ixa_notify_type_t, 89 ixa_notify_arg_t); 90 91 static void *sctp_stack_init(netstackid_t stackid, netstack_t *ns); 92 static void sctp_stack_fini(netstackid_t stackid, void *arg); 93 94 /* 95 * SCTP receive queue taskq 96 * 97 * At SCTP initialization time, a default taskq is created for 98 * servicing packets received when the interrupt thread cannot 99 * get a hold on the sctp_t. The number of taskq can be increased in 100 * sctp_find_next_tq() when an existing taskq cannot be dispatched. 101 * The taskqs are never removed. But the max number of taskq which 102 * can be created is controlled by sctp_recvq_tq_list_max_sz. Note 103 * that SCTP recvq taskq is not tied to any specific CPU or ill. 104 * 105 * Those taskqs are stored in an array recvq_tq_list. And they are 106 * used in a round robin fashion. The current taskq being used is 107 * determined by recvq_tq_list_cur. 108 */ 109 110 /* /etc/system variables */ 111 /* The minimum number of threads for each taskq. */ 112 int sctp_recvq_tq_thr_min = 4; 113 /* The maximum number of threads for each taskq. */ 114 int sctp_recvq_tq_thr_max = 48; 115 /* The mnimum number of tasks for each taskq. */ 116 int sctp_recvq_tq_task_min = 8; 117 /* Default value of sctp_recvq_tq_list_max_sz. */ 118 int sctp_recvq_tq_list_max = 16; 119 120 /* 121 * SCTP tunables related declarations. Definitions are in sctp_tunables.c 122 */ 123 extern mod_prop_info_t sctp_propinfo_tbl[]; 124 extern int sctp_propinfo_count; 125 126 /* sctp_t/conn_t kmem cache */ 127 struct kmem_cache *sctp_conn_cache; 128 129 #define SCTP_CONDEMNED(sctp) \ 130 mutex_enter(&(sctp)->sctp_reflock); \ 131 ((sctp)->sctp_condemned = B_TRUE); \ 132 mutex_exit(&(sctp)->sctp_reflock); 133 134 /* Link/unlink a sctp_t to/from the global list. */ 135 #define SCTP_LINK(sctp, sctps) \ 136 mutex_enter(&(sctps)->sctps_g_lock); \ 137 list_insert_tail(&sctps->sctps_g_list, (sctp)); \ 138 mutex_exit(&(sctps)->sctps_g_lock); 139 140 #define SCTP_UNLINK(sctp, sctps) \ 141 mutex_enter(&(sctps)->sctps_g_lock); \ 142 ASSERT((sctp)->sctp_condemned); \ 143 list_remove(&(sctps)->sctps_g_list, (sctp)); \ 144 mutex_exit(&(sctps)->sctps_g_lock); 145 146 /* 147 * Hooks for Sun Cluster. On non-clustered nodes these will remain NULL. 148 * PSARC/2005/602. 149 */ 150 void (*cl_sctp_listen)(sa_family_t, uchar_t *, uint_t, in_port_t) = NULL; 151 void (*cl_sctp_unlisten)(sa_family_t, uchar_t *, uint_t, in_port_t) = NULL; 152 void (*cl_sctp_connect)(sa_family_t, uchar_t *, uint_t, in_port_t, 153 uchar_t *, uint_t, in_port_t, boolean_t, cl_sctp_handle_t) = NULL; 154 void (*cl_sctp_disconnect)(sa_family_t, cl_sctp_handle_t) = NULL; 155 void (*cl_sctp_assoc_change)(sa_family_t, uchar_t *, size_t, uint_t, 156 uchar_t *, size_t, uint_t, int, cl_sctp_handle_t) = NULL; 157 void (*cl_sctp_check_addrs)(sa_family_t, in_port_t, uchar_t **, size_t, 158 uint_t *, boolean_t) = NULL; 159 /* 160 * Return the version number of the SCTP kernel interface. 161 */ 162 int 163 sctp_itf_ver(int cl_ver) 164 { 165 if (cl_ver != SCTP_ITF_VER) 166 return (-1); 167 return (SCTP_ITF_VER); 168 } 169 170 /* 171 * Called when we need a new sctp instantiation but don't really have a 172 * new q to hang it off of. Copy the priv flag from the passed in structure. 173 */ 174 sctp_t * 175 sctp_create_eager(sctp_t *psctp) 176 { 177 sctp_t *sctp; 178 mblk_t *ack_mp, *hb_mp; 179 conn_t *connp; 180 cred_t *credp; 181 sctp_stack_t *sctps = psctp->sctp_sctps; 182 183 if ((connp = ipcl_conn_create(IPCL_SCTPCONN, KM_NOSLEEP, 184 sctps->sctps_netstack)) == NULL) { 185 return (NULL); 186 } 187 188 sctp = CONN2SCTP(connp); 189 sctp->sctp_sctps = sctps; 190 191 if ((ack_mp = sctp_timer_alloc(sctp, sctp_ack_timer, 192 KM_NOSLEEP)) == NULL || 193 (hb_mp = sctp_timer_alloc(sctp, sctp_heartbeat_timer, 194 KM_NOSLEEP)) == NULL) { 195 if (ack_mp != NULL) 196 freeb(ack_mp); 197 sctp_conn_clear(connp); 198 sctp->sctp_sctps = NULL; 199 kmem_cache_free(sctp_conn_cache, connp); 200 return (NULL); 201 } 202 203 sctp->sctp_ack_mp = ack_mp; 204 sctp->sctp_heartbeat_mp = hb_mp; 205 206 if (sctp_init_values(sctp, psctp, KM_NOSLEEP) != 0) { 207 freeb(ack_mp); 208 freeb(hb_mp); 209 sctp_conn_clear(connp); 210 sctp->sctp_sctps = NULL; 211 kmem_cache_free(sctp_conn_cache, connp); 212 return (NULL); 213 } 214 215 if ((credp = psctp->sctp_connp->conn_cred) != NULL) { 216 connp->conn_cred = credp; 217 crhold(credp); 218 } 219 220 sctp->sctp_mss = psctp->sctp_mss; 221 sctp->sctp_detached = B_TRUE; 222 /* 223 * Link to the global as soon as possible so that this sctp_t 224 * can be found. 225 */ 226 SCTP_LINK(sctp, sctps); 227 228 /* If the listener has a limit, inherit the counter info. */ 229 sctp->sctp_listen_cnt = psctp->sctp_listen_cnt; 230 231 return (sctp); 232 } 233 234 /* 235 * We are dying for some reason. Try to do it gracefully. 236 */ 237 void 238 sctp_clean_death(sctp_t *sctp, int err) 239 { 240 ASSERT(sctp != NULL); 241 242 dprint(3, ("sctp_clean_death %p, state %d\n", (void *)sctp, 243 sctp->sctp_state)); 244 245 sctp->sctp_client_errno = err; 246 /* 247 * Check to see if we need to notify upper layer. 248 */ 249 if ((sctp->sctp_state >= SCTPS_COOKIE_WAIT) && 250 !SCTP_IS_DETACHED(sctp)) { 251 if (sctp->sctp_xmit_head || sctp->sctp_xmit_unsent) { 252 sctp_regift_xmitlist(sctp); 253 } 254 if (sctp->sctp_ulp_disconnected(sctp->sctp_ulpd, 0, err)) { 255 /* 256 * Socket is gone, detach. 257 */ 258 sctp->sctp_detached = B_TRUE; 259 sctp->sctp_ulpd = NULL; 260 sctp->sctp_upcalls = NULL; 261 } 262 } 263 264 /* Remove this sctp from all hashes. */ 265 sctp_closei_local(sctp); 266 267 /* 268 * If the sctp_t is detached, we need to finish freeing up 269 * the resources. At this point, ip_fanout_sctp() should have 270 * a hold on this sctp_t. Some thread doing snmp stuff can 271 * have a hold. And a taskq can also have a hold waiting to 272 * work. sctp_unlink() the sctp_t from the global list so 273 * that no new thread can find it. Then do a SCTP_REFRELE(). 274 * The sctp_t will be freed after all those threads are done. 275 */ 276 if (SCTP_IS_DETACHED(sctp)) { 277 SCTP_CONDEMNED(sctp); 278 SCTP_REFRELE(sctp); 279 } 280 } 281 282 /* 283 * Called by upper layer when it wants to close this association. 284 * Depending on the state of this assoication, we need to do 285 * different things. 286 * 287 * If the state is below COOKIE_ECHOED or it is COOKIE_ECHOED but with 288 * no sent data, just remove this sctp from all the hashes. This 289 * makes sure that all packets from the other end will go to the default 290 * sctp handling. The upper layer will then do a sctp_close() to clean 291 * up. 292 * 293 * Otherwise, check and see if SO_LINGER is set. If it is set, check 294 * the value. If the value is 0, consider this an abortive close. Send 295 * an ABORT message and kill the associatiion. 296 * 297 */ 298 int 299 sctp_disconnect(sctp_t *sctp) 300 { 301 int error = 0; 302 conn_t *connp = sctp->sctp_connp; 303 304 dprint(3, ("sctp_disconnect %p, state %d\n", (void *)sctp, 305 sctp->sctp_state)); 306 307 RUN_SCTP(sctp); 308 309 switch (sctp->sctp_state) { 310 case SCTPS_IDLE: 311 case SCTPS_BOUND: 312 case SCTPS_LISTEN: 313 break; 314 case SCTPS_COOKIE_WAIT: 315 case SCTPS_COOKIE_ECHOED: 316 /* 317 * Close during the connect 3-way handshake 318 * but here there may or may not be pending data 319 * already on queue. Process almost same as in 320 * the ESTABLISHED state. 321 */ 322 if (sctp->sctp_xmit_head == NULL && 323 sctp->sctp_xmit_unsent == NULL) { 324 break; 325 } 326 /* FALLTHRU */ 327 default: 328 /* 329 * If SO_LINGER has set a zero linger time, terminate the 330 * association and send an ABORT. 331 */ 332 if (connp->conn_linger && connp->conn_lingertime == 0) { 333 sctp_user_abort(sctp, NULL); 334 WAKE_SCTP(sctp); 335 return (error); 336 } 337 338 /* 339 * If there is unread data, send an ABORT and terminate the 340 * association. 341 */ 342 if (sctp->sctp_rxqueued > 0 || sctp->sctp_ulp_rxqueued > 0) { 343 sctp_user_abort(sctp, NULL); 344 WAKE_SCTP(sctp); 345 return (error); 346 } 347 /* 348 * Transmit the shutdown before detaching the sctp_t. 349 * After sctp_detach returns this queue/perimeter 350 * no longer owns the sctp_t thus others can modify it. 351 */ 352 sctp_send_shutdown(sctp, 0); 353 354 /* Pass gathered wisdom to IP for keeping */ 355 sctp_update_dce(sctp); 356 357 /* 358 * If lingering on close then wait until the shutdown 359 * is complete, or the SO_LINGER time passes, or an 360 * ABORT is sent/received. Note that sctp_disconnect() 361 * can be called more than once. Make sure that only 362 * one thread waits. 363 */ 364 if (connp->conn_linger && connp->conn_lingertime > 0 && 365 sctp->sctp_state >= SCTPS_ESTABLISHED && 366 !sctp->sctp_lingering) { 367 clock_t stoptime; /* in ticks */ 368 clock_t ret; 369 370 sctp->sctp_lingering = 1; 371 sctp->sctp_client_errno = 0; 372 stoptime = ddi_get_lbolt() + 373 connp->conn_lingertime * hz; 374 375 mutex_enter(&sctp->sctp_lock); 376 sctp->sctp_running = B_FALSE; 377 while (sctp->sctp_state >= SCTPS_ESTABLISHED && 378 sctp->sctp_client_errno == 0) { 379 cv_signal(&sctp->sctp_cv); 380 ret = cv_timedwait_sig(&sctp->sctp_cv, 381 &sctp->sctp_lock, stoptime); 382 if (ret < 0) { 383 /* Stoptime has reached. */ 384 sctp->sctp_client_errno = EWOULDBLOCK; 385 break; 386 } else if (ret == 0) { 387 /* Got a signal. */ 388 break; 389 } 390 } 391 error = sctp->sctp_client_errno; 392 sctp->sctp_client_errno = 0; 393 mutex_exit(&sctp->sctp_lock); 394 } 395 396 WAKE_SCTP(sctp); 397 return (error); 398 } 399 400 401 /* Remove this sctp from all hashes so nobody can find it. */ 402 sctp_closei_local(sctp); 403 WAKE_SCTP(sctp); 404 return (error); 405 } 406 407 void 408 sctp_close(sctp_t *sctp) 409 { 410 dprint(3, ("sctp_close %p, state %d\n", (void *)sctp, 411 sctp->sctp_state)); 412 413 RUN_SCTP(sctp); 414 sctp->sctp_detached = 1; 415 sctp->sctp_ulpd = NULL; 416 sctp->sctp_upcalls = NULL; 417 bzero(&sctp->sctp_events, sizeof (sctp->sctp_events)); 418 419 /* If the graceful shutdown has not been completed, just return. */ 420 if (sctp->sctp_state != SCTPS_IDLE) { 421 WAKE_SCTP(sctp); 422 return; 423 } 424 425 /* 426 * Since sctp_t is in SCTPS_IDLE state, so the only thread which 427 * can have a hold on the sctp_t is doing snmp stuff. Just do 428 * a SCTP_REFRELE() here after the SCTP_UNLINK(). It will 429 * be freed when the other thread is done. 430 */ 431 SCTP_CONDEMNED(sctp); 432 WAKE_SCTP(sctp); 433 SCTP_REFRELE(sctp); 434 } 435 436 /* 437 * Unlink from global list and do the eager close. 438 * Remove the refhold implicit in being on the global list. 439 */ 440 void 441 sctp_close_eager(sctp_t *sctp) 442 { 443 SCTP_CONDEMNED(sctp); 444 sctp_closei_local(sctp); 445 SCTP_REFRELE(sctp); 446 } 447 448 /* 449 * The sctp_t is going away. Remove it from all lists and set it 450 * to SCTPS_IDLE. The caller has to remove it from the 451 * global list. The freeing up of memory is deferred until 452 * sctp_free(). This is needed since a thread in sctp_input() might have 453 * done a SCTP_REFHOLD on this structure before it was removed from the 454 * hashes. 455 */ 456 static void 457 sctp_closei_local(sctp_t *sctp) 458 { 459 mblk_t *mp; 460 conn_t *connp = sctp->sctp_connp; 461 462 /* The counter is incremented only for established associations. */ 463 if (sctp->sctp_state >= SCTPS_ESTABLISHED) 464 SCTPS_ASSOC_DEC(sctp->sctp_sctps); 465 466 if (sctp->sctp_listen_cnt != NULL) 467 SCTP_DECR_LISTEN_CNT(sctp); 468 469 /* Sanity check, don't do the same thing twice. */ 470 if (connp->conn_state_flags & CONN_CLOSING) { 471 ASSERT(sctp->sctp_state == SCTPS_IDLE); 472 return; 473 } 474 475 /* Stop and free the timers */ 476 sctp_free_faddr_timers(sctp); 477 if ((mp = sctp->sctp_heartbeat_mp) != NULL) { 478 sctp_timer_free(mp); 479 sctp->sctp_heartbeat_mp = NULL; 480 } 481 if ((mp = sctp->sctp_ack_mp) != NULL) { 482 sctp_timer_free(mp); 483 sctp->sctp_ack_mp = NULL; 484 } 485 486 /* Set the CONN_CLOSING flag so that IP will not cache IRE again. */ 487 mutex_enter(&connp->conn_lock); 488 connp->conn_state_flags |= CONN_CLOSING; 489 mutex_exit(&connp->conn_lock); 490 491 /* Remove from all hashes. */ 492 sctp_bind_hash_remove(sctp); 493 sctp_conn_hash_remove(sctp); 494 sctp_listen_hash_remove(sctp); 495 sctp->sctp_state = SCTPS_IDLE; 496 497 /* 498 * Clean up the recvq as much as possible. All those packets 499 * will be silently dropped as this sctp_t is now in idle state. 500 */ 501 mutex_enter(&sctp->sctp_recvq_lock); 502 while ((mp = sctp->sctp_recvq) != NULL) { 503 sctp->sctp_recvq = mp->b_next; 504 mp->b_next = NULL; 505 506 if (ip_recv_attr_is_mblk(mp)) 507 mp = ip_recv_attr_free_mblk(mp); 508 509 freemsg(mp); 510 } 511 mutex_exit(&sctp->sctp_recvq_lock); 512 } 513 514 /* 515 * Free memory associated with the sctp/ip header template. 516 */ 517 static void 518 sctp_headers_free(sctp_t *sctp) 519 { 520 if (sctp->sctp_iphc != NULL) { 521 kmem_free(sctp->sctp_iphc, sctp->sctp_iphc_len); 522 sctp->sctp_iphc = NULL; 523 sctp->sctp_ipha = NULL; 524 sctp->sctp_hdr_len = 0; 525 sctp->sctp_ip_hdr_len = 0; 526 sctp->sctp_iphc_len = 0; 527 sctp->sctp_sctph = NULL; 528 sctp->sctp_hdr_len = 0; 529 } 530 if (sctp->sctp_iphc6 != NULL) { 531 kmem_free(sctp->sctp_iphc6, sctp->sctp_iphc6_len); 532 sctp->sctp_iphc6 = NULL; 533 sctp->sctp_ip6h = NULL; 534 sctp->sctp_hdr6_len = 0; 535 sctp->sctp_ip_hdr6_len = 0; 536 sctp->sctp_iphc6_len = 0; 537 sctp->sctp_sctph6 = NULL; 538 sctp->sctp_hdr6_len = 0; 539 } 540 } 541 542 static void 543 sctp_free_xmit_data(sctp_t *sctp) 544 { 545 mblk_t *ump = NULL; 546 mblk_t *nump; 547 mblk_t *mp; 548 mblk_t *nmp; 549 550 sctp->sctp_xmit_unacked = NULL; 551 ump = sctp->sctp_xmit_head; 552 sctp->sctp_xmit_tail = sctp->sctp_xmit_head = NULL; 553 free_unsent: 554 for (; ump != NULL; ump = nump) { 555 for (mp = ump->b_cont; mp != NULL; mp = nmp) { 556 nmp = mp->b_next; 557 mp->b_next = NULL; 558 mp->b_prev = NULL; 559 freemsg(mp); 560 } 561 ASSERT(DB_REF(ump) == 1); 562 nump = ump->b_next; 563 ump->b_next = NULL; 564 ump->b_prev = NULL; 565 ump->b_cont = NULL; 566 freeb(ump); 567 } 568 if ((ump = sctp->sctp_xmit_unsent) == NULL) { 569 ASSERT(sctp->sctp_xmit_unsent_tail == NULL); 570 return; 571 } 572 sctp->sctp_xmit_unsent = sctp->sctp_xmit_unsent_tail = NULL; 573 goto free_unsent; 574 } 575 576 /* 577 * Cleanup all the messages in the stream queue and the reassembly lists. 578 * If 'free' is true, then delete the streams as well. 579 */ 580 void 581 sctp_instream_cleanup(sctp_t *sctp, boolean_t free) 582 { 583 int i; 584 mblk_t *mp; 585 mblk_t *mp1; 586 587 if (sctp->sctp_instr != NULL) { 588 /* walk thru and flush out anything remaining in the Q */ 589 for (i = 0; i < sctp->sctp_num_istr; i++) { 590 mp = sctp->sctp_instr[i].istr_msgs; 591 while (mp != NULL) { 592 mp1 = mp->b_next; 593 mp->b_next = mp->b_prev = NULL; 594 freemsg(mp); 595 mp = mp1; 596 } 597 sctp->sctp_instr[i].istr_msgs = NULL; 598 sctp->sctp_instr[i].istr_nmsgs = 0; 599 sctp_free_reass((sctp->sctp_instr) + i); 600 sctp->sctp_instr[i].nextseq = 0; 601 } 602 if (free) { 603 kmem_free(sctp->sctp_instr, 604 sizeof (*sctp->sctp_instr) * sctp->sctp_num_istr); 605 sctp->sctp_instr = NULL; 606 sctp->sctp_num_istr = 0; 607 } 608 } 609 /* un-ordered fragments */ 610 if (sctp->sctp_uo_frags != NULL) { 611 for (mp = sctp->sctp_uo_frags; mp != NULL; mp = mp1) { 612 mp1 = mp->b_next; 613 mp->b_next = mp->b_prev = NULL; 614 freemsg(mp); 615 } 616 sctp->sctp_uo_frags = NULL; 617 } 618 } 619 620 /* 621 * Last reference to the sctp_t is gone. Free all memory associated with it. 622 * Called from SCTP_REFRELE. Called inline in sctp_close() 623 */ 624 void 625 sctp_free(conn_t *connp) 626 { 627 sctp_t *sctp = CONN2SCTP(connp); 628 int cnt; 629 sctp_stack_t *sctps = sctp->sctp_sctps; 630 631 ASSERT(sctps != NULL); 632 /* Unlink it from the global list */ 633 SCTP_UNLINK(sctp, sctps); 634 635 ASSERT(connp->conn_ref == 0); 636 ASSERT(connp->conn_proto == IPPROTO_SCTP); 637 ASSERT(!MUTEX_HELD(&sctp->sctp_reflock)); 638 ASSERT(sctp->sctp_refcnt == 0); 639 640 ASSERT(sctp->sctp_ptpbhn == NULL && sctp->sctp_bind_hash == NULL); 641 ASSERT(sctp->sctp_conn_hash_next == NULL && 642 sctp->sctp_conn_hash_prev == NULL); 643 644 645 /* Free up all the resources. */ 646 647 /* blow away sctp stream management */ 648 if (sctp->sctp_ostrcntrs != NULL) { 649 kmem_free(sctp->sctp_ostrcntrs, 650 sizeof (uint16_t) * sctp->sctp_num_ostr); 651 sctp->sctp_ostrcntrs = NULL; 652 } 653 sctp_instream_cleanup(sctp, B_TRUE); 654 655 /* Remove all data transfer resources. */ 656 sctp->sctp_istr_nmsgs = 0; 657 sctp->sctp_rxqueued = 0; 658 sctp_free_xmit_data(sctp); 659 sctp->sctp_unacked = 0; 660 sctp->sctp_unsent = 0; 661 if (sctp->sctp_cxmit_list != NULL) 662 sctp_asconf_free_cxmit(sctp, NULL); 663 664 sctp->sctp_lastdata = NULL; 665 666 /* Clear out default xmit settings */ 667 sctp->sctp_def_stream = 0; 668 sctp->sctp_def_flags = 0; 669 sctp->sctp_def_ppid = 0; 670 sctp->sctp_def_context = 0; 671 sctp->sctp_def_timetolive = 0; 672 673 if (sctp->sctp_sack_info != NULL) { 674 sctp_free_set(sctp->sctp_sack_info); 675 sctp->sctp_sack_info = NULL; 676 } 677 sctp->sctp_sack_gaps = 0; 678 679 if (sctp->sctp_cookie_mp != NULL) { 680 freemsg(sctp->sctp_cookie_mp); 681 sctp->sctp_cookie_mp = NULL; 682 } 683 684 /* Remove all the address resources. */ 685 sctp_zap_addrs(sctp); 686 for (cnt = 0; cnt < SCTP_IPIF_HASH; cnt++) { 687 ASSERT(sctp->sctp_saddrs[cnt].ipif_count == 0); 688 list_destroy(&sctp->sctp_saddrs[cnt].sctp_ipif_list); 689 } 690 691 if (sctp->sctp_hopopts != NULL) { 692 mi_free(sctp->sctp_hopopts); 693 sctp->sctp_hopopts = NULL; 694 sctp->sctp_hopoptslen = 0; 695 } 696 ASSERT(sctp->sctp_hopoptslen == 0); 697 if (sctp->sctp_dstopts != NULL) { 698 mi_free(sctp->sctp_dstopts); 699 sctp->sctp_dstopts = NULL; 700 sctp->sctp_dstoptslen = 0; 701 } 702 ASSERT(sctp->sctp_dstoptslen == 0); 703 if (sctp->sctp_rthdrdstopts != NULL) { 704 mi_free(sctp->sctp_rthdrdstopts); 705 sctp->sctp_rthdrdstopts = NULL; 706 sctp->sctp_rthdrdstoptslen = 0; 707 } 708 ASSERT(sctp->sctp_rthdrdstoptslen == 0); 709 if (sctp->sctp_rthdr != NULL) { 710 mi_free(sctp->sctp_rthdr); 711 sctp->sctp_rthdr = NULL; 712 sctp->sctp_rthdrlen = 0; 713 } 714 ASSERT(sctp->sctp_rthdrlen == 0); 715 sctp_headers_free(sctp); 716 717 sctp->sctp_shutdown_faddr = NULL; 718 719 if (sctp->sctp_err_chunks != NULL) { 720 freemsg(sctp->sctp_err_chunks); 721 sctp->sctp_err_chunks = NULL; 722 sctp->sctp_err_len = 0; 723 } 724 725 /* Clear all the bitfields. */ 726 bzero(&sctp->sctp_bits, sizeof (sctp->sctp_bits)); 727 728 /* It is time to update the global statistics. */ 729 SCTPS_UPDATE_MIB(sctps, sctpOutSCTPPkts, sctp->sctp_opkts); 730 SCTPS_UPDATE_MIB(sctps, sctpOutCtrlChunks, sctp->sctp_obchunks); 731 SCTPS_UPDATE_MIB(sctps, sctpOutOrderChunks, sctp->sctp_odchunks); 732 SCTPS_UPDATE_MIB(sctps, sctpOutUnorderChunks, sctp->sctp_oudchunks); 733 SCTPS_UPDATE_MIB(sctps, sctpRetransChunks, sctp->sctp_rxtchunks); 734 SCTPS_UPDATE_MIB(sctps, sctpInSCTPPkts, sctp->sctp_ipkts); 735 SCTPS_UPDATE_MIB(sctps, sctpInCtrlChunks, sctp->sctp_ibchunks); 736 SCTPS_UPDATE_MIB(sctps, sctpInOrderChunks, sctp->sctp_idchunks); 737 SCTPS_UPDATE_MIB(sctps, sctpInUnorderChunks, sctp->sctp_iudchunks); 738 SCTPS_UPDATE_MIB(sctps, sctpFragUsrMsgs, sctp->sctp_fragdmsgs); 739 SCTPS_UPDATE_MIB(sctps, sctpReasmUsrMsgs, sctp->sctp_reassmsgs); 740 sctp->sctp_opkts = 0; 741 sctp->sctp_obchunks = 0; 742 sctp->sctp_odchunks = 0; 743 sctp->sctp_oudchunks = 0; 744 sctp->sctp_rxtchunks = 0; 745 sctp->sctp_ipkts = 0; 746 sctp->sctp_ibchunks = 0; 747 sctp->sctp_idchunks = 0; 748 sctp->sctp_iudchunks = 0; 749 sctp->sctp_fragdmsgs = 0; 750 sctp->sctp_reassmsgs = 0; 751 sctp->sctp_outseqtsns = 0; 752 sctp->sctp_osacks = 0; 753 sctp->sctp_isacks = 0; 754 sctp->sctp_idupchunks = 0; 755 sctp->sctp_gapcnt = 0; 756 sctp->sctp_cum_obchunks = 0; 757 sctp->sctp_cum_odchunks = 0; 758 sctp->sctp_cum_oudchunks = 0; 759 sctp->sctp_cum_rxtchunks = 0; 760 sctp->sctp_cum_ibchunks = 0; 761 sctp->sctp_cum_idchunks = 0; 762 sctp->sctp_cum_iudchunks = 0; 763 764 sctp->sctp_autoclose = 0; 765 sctp->sctp_tx_adaptation_code = 0; 766 767 sctp->sctp_v6label_len = 0; 768 sctp->sctp_v4label_len = 0; 769 770 sctp->sctp_sctps = NULL; 771 772 sctp_conn_clear(connp); 773 kmem_cache_free(sctp_conn_cache, connp); 774 } 775 776 /* 777 * Initialize protocol control block. If a parent exists, inherit 778 * all values set through setsockopt(). 779 */ 780 static int 781 sctp_init_values(sctp_t *sctp, sctp_t *psctp, int sleep) 782 { 783 int err; 784 int cnt; 785 sctp_stack_t *sctps = sctp->sctp_sctps; 786 conn_t *connp; 787 788 connp = sctp->sctp_connp; 789 790 sctp->sctp_nsaddrs = 0; 791 for (cnt = 0; cnt < SCTP_IPIF_HASH; cnt++) { 792 sctp->sctp_saddrs[cnt].ipif_count = 0; 793 list_create(&sctp->sctp_saddrs[cnt].sctp_ipif_list, 794 sizeof (sctp_saddr_ipif_t), offsetof(sctp_saddr_ipif_t, 795 saddr_ipif)); 796 } 797 connp->conn_ports = 0; 798 sctp->sctp_running = B_FALSE; 799 sctp->sctp_state = SCTPS_IDLE; 800 801 sctp->sctp_refcnt = 1; 802 803 sctp->sctp_strikes = 0; 804 805 sctp->sctp_last_mtu_probe = ddi_get_lbolt64(); 806 sctp->sctp_mtu_probe_intvl = sctps->sctps_mtu_probe_interval; 807 808 sctp->sctp_sack_gaps = 0; 809 /* So we will not delay sending the first SACK. */ 810 sctp->sctp_sack_toggle = sctps->sctps_deferred_acks_max; 811 812 /* Only need to do the allocation if there is no "cached" one. */ 813 if (sctp->sctp_pad_mp == NULL) { 814 if (sleep == KM_SLEEP) { 815 sctp->sctp_pad_mp = allocb_wait(SCTP_ALIGN, BPRI_MED, 816 STR_NOSIG, NULL); 817 } else { 818 sctp->sctp_pad_mp = allocb(SCTP_ALIGN, BPRI_MED); 819 if (sctp->sctp_pad_mp == NULL) 820 return (ENOMEM); 821 } 822 bzero(sctp->sctp_pad_mp->b_rptr, SCTP_ALIGN); 823 } 824 825 if (psctp != NULL) { 826 /* 827 * Inherit from parent 828 * 829 * Start by inheriting from the conn_t, including conn_ixa and 830 * conn_xmit_ipp. 831 */ 832 err = conn_inherit_parent(psctp->sctp_connp, connp); 833 if (err != 0) 834 goto failure; 835 836 sctp->sctp_upcalls = psctp->sctp_upcalls; 837 838 sctp->sctp_cookie_lifetime = psctp->sctp_cookie_lifetime; 839 840 sctp->sctp_cwnd_max = psctp->sctp_cwnd_max; 841 sctp->sctp_rwnd = psctp->sctp_rwnd; 842 sctp->sctp_arwnd = psctp->sctp_arwnd; 843 sctp->sctp_pd_point = psctp->sctp_pd_point; 844 sctp->sctp_rto_max = psctp->sctp_rto_max; 845 sctp->sctp_rto_max_init = psctp->sctp_rto_max_init; 846 sctp->sctp_rto_min = psctp->sctp_rto_min; 847 sctp->sctp_rto_initial = psctp->sctp_rto_initial; 848 sctp->sctp_pa_max_rxt = psctp->sctp_pa_max_rxt; 849 sctp->sctp_pp_max_rxt = psctp->sctp_pp_max_rxt; 850 sctp->sctp_max_init_rxt = psctp->sctp_max_init_rxt; 851 852 sctp->sctp_def_stream = psctp->sctp_def_stream; 853 sctp->sctp_def_flags = psctp->sctp_def_flags; 854 sctp->sctp_def_ppid = psctp->sctp_def_ppid; 855 sctp->sctp_def_context = psctp->sctp_def_context; 856 sctp->sctp_def_timetolive = psctp->sctp_def_timetolive; 857 858 sctp->sctp_num_istr = psctp->sctp_num_istr; 859 sctp->sctp_num_ostr = psctp->sctp_num_ostr; 860 861 sctp->sctp_hb_interval = psctp->sctp_hb_interval; 862 sctp->sctp_autoclose = psctp->sctp_autoclose; 863 sctp->sctp_tx_adaptation_code = psctp->sctp_tx_adaptation_code; 864 865 /* xxx should be a better way to copy these flags xxx */ 866 sctp->sctp_bound_to_all = psctp->sctp_bound_to_all; 867 sctp->sctp_cansleep = psctp->sctp_cansleep; 868 sctp->sctp_send_adaptation = psctp->sctp_send_adaptation; 869 sctp->sctp_ndelay = psctp->sctp_ndelay; 870 sctp->sctp_events = psctp->sctp_events; 871 } else { 872 /* 873 * Set to system defaults 874 */ 875 sctp->sctp_cookie_lifetime = 876 MSEC_TO_TICK(sctps->sctps_cookie_life); 877 connp->conn_sndlowat = sctps->sctps_xmit_lowat; 878 connp->conn_sndbuf = sctps->sctps_xmit_hiwat; 879 connp->conn_rcvbuf = sctps->sctps_recv_hiwat; 880 881 sctp->sctp_cwnd_max = sctps->sctps_cwnd_max_; 882 sctp->sctp_rwnd = connp->conn_rcvbuf; 883 sctp->sctp_arwnd = connp->conn_rcvbuf; 884 sctp->sctp_pd_point = sctp->sctp_rwnd; 885 sctp->sctp_rto_max = MSEC_TO_TICK(sctps->sctps_rto_maxg); 886 sctp->sctp_rto_max_init = sctp->sctp_rto_max; 887 sctp->sctp_rto_min = MSEC_TO_TICK(sctps->sctps_rto_ming); 888 sctp->sctp_rto_initial = MSEC_TO_TICK( 889 sctps->sctps_rto_initialg); 890 sctp->sctp_pa_max_rxt = sctps->sctps_pa_max_retr; 891 sctp->sctp_pp_max_rxt = sctps->sctps_pp_max_retr; 892 sctp->sctp_max_init_rxt = sctps->sctps_max_init_retr; 893 894 sctp->sctp_num_istr = sctps->sctps_max_in_streams; 895 sctp->sctp_num_ostr = sctps->sctps_initial_out_streams; 896 897 sctp->sctp_hb_interval = 898 MSEC_TO_TICK(sctps->sctps_heartbeat_interval); 899 900 if (connp->conn_family == AF_INET) 901 connp->conn_default_ttl = sctps->sctps_ipv4_ttl; 902 else 903 connp->conn_default_ttl = sctps->sctps_ipv6_hoplimit; 904 905 connp->conn_xmit_ipp.ipp_unicast_hops = 906 connp->conn_default_ttl; 907 908 /* 909 * Initialize the header template 910 */ 911 if ((err = sctp_build_hdrs(sctp, sleep)) != 0) { 912 goto failure; 913 } 914 } 915 916 sctp->sctp_understands_asconf = B_TRUE; 917 sctp->sctp_understands_addip = B_TRUE; 918 sctp->sctp_prsctp_aware = B_FALSE; 919 920 sctp->sctp_connp->conn_ref = 1; 921 922 sctp->sctp_prsctpdrop = 0; 923 sctp->sctp_msgcount = 0; 924 925 return (0); 926 927 failure: 928 sctp_headers_free(sctp); 929 return (err); 930 } 931 932 /* 933 * Extracts the init tag from an INIT chunk and checks if it matches 934 * the sctp's verification tag. Returns 0 if it doesn't match, 1 if 935 * it does. 936 */ 937 static boolean_t 938 sctp_icmp_verf(sctp_t *sctp, sctp_hdr_t *sh, mblk_t *mp) 939 { 940 sctp_chunk_hdr_t *sch; 941 uint32_t verf, *vp; 942 943 sch = (sctp_chunk_hdr_t *)(sh + 1); 944 vp = (uint32_t *)(sch + 1); 945 946 /* Need at least the data chunk hdr and the first 4 bytes of INIT */ 947 if ((unsigned char *)(vp + 1) > mp->b_wptr) { 948 return (B_FALSE); 949 } 950 951 bcopy(vp, &verf, sizeof (verf)); 952 953 if (verf == sctp->sctp_lvtag) { 954 return (B_TRUE); 955 } 956 return (B_FALSE); 957 } 958 959 /* 960 * Update the SCTP state according to change of PMTU. 961 * 962 * Path MTU might have changed by either increase or decrease, so need to 963 * adjust the MSS based on the value of ixa_pmtu. 964 */ 965 static void 966 sctp_update_pmtu(sctp_t *sctp, sctp_faddr_t *fp, boolean_t decrease_only) 967 { 968 uint32_t pmtu; 969 int32_t mss; 970 ip_xmit_attr_t *ixa = fp->sf_ixa; 971 972 if (sctp->sctp_state < SCTPS_ESTABLISHED) 973 return; 974 975 /* 976 * Always call ip_get_pmtu() to make sure that IP has updated 977 * ixa_flags properly. 978 */ 979 pmtu = ip_get_pmtu(ixa); 980 981 /* 982 * Calculate the MSS by decreasing the PMTU by sctp_hdr_len and 983 * IPsec overhead if applied. Make sure to use the most recent 984 * IPsec information. 985 */ 986 mss = pmtu - conn_ipsec_length(sctp->sctp_connp); 987 if (ixa->ixa_flags & IXAF_IS_IPV4) 988 mss -= sctp->sctp_hdr_len; 989 else 990 mss -= sctp->sctp_hdr6_len; 991 992 /* 993 * Nothing to change, so just return. 994 */ 995 if (mss == fp->sf_pmss) 996 return; 997 998 /* 999 * Currently, for ICMP errors, only PMTU decrease is handled. 1000 */ 1001 if (mss > fp->sf_pmss && decrease_only) 1002 return; 1003 1004 #ifdef DEBUG 1005 (void) printf("sctp_update_pmtu mss from %d to %d\n", 1006 fp->sf_pmss, mss); 1007 #endif 1008 DTRACE_PROBE2(sctp_update_pmtu, int32_t, fp->sf_pmss, uint32_t, mss); 1009 1010 /* 1011 * Update ixa_fragsize and ixa_pmtu. 1012 */ 1013 ixa->ixa_fragsize = ixa->ixa_pmtu = pmtu; 1014 1015 /* 1016 * Make sure that sfa_pmss is a multiple of 1017 * SCTP_ALIGN. 1018 */ 1019 fp->sf_pmss = mss & ~(SCTP_ALIGN - 1); 1020 fp->sf_pmtu_discovered = 1; 1021 1022 #ifdef notyet 1023 if (mss < sctp->sctp_sctps->sctps_mss_min) 1024 ixa->ixa_flags |= IXAF_PMTU_TOO_SMALL; 1025 #endif 1026 if (ixa->ixa_flags & IXAF_PMTU_TOO_SMALL) 1027 ixa->ixa_flags &= ~(IXAF_DONTFRAG | IXAF_PMTU_IPV4_DF); 1028 1029 /* 1030 * If below the min size then ip_get_pmtu cleared IXAF_PMTU_IPV4_DF. 1031 * Make sure to clear IXAF_DONTFRAG, which is used by IP to decide 1032 * whether to fragment the packet. 1033 */ 1034 if (ixa->ixa_flags & IXAF_IS_IPV4) { 1035 if (!(ixa->ixa_flags & IXAF_PMTU_IPV4_DF)) { 1036 fp->sf_df = B_FALSE; 1037 if (fp == sctp->sctp_current) { 1038 sctp->sctp_ipha-> 1039 ipha_fragment_offset_and_flags = 0; 1040 } 1041 } 1042 } 1043 } 1044 1045 /* 1046 * Notify function registered with ip_xmit_attr_t. It's called in the context 1047 * of conn_ip_output so it's safe to update the SCTP state. 1048 * Currently only used for pmtu changes. 1049 */ 1050 /* ARGSUSED1 */ 1051 static void 1052 sctp_notify(void *arg, ip_xmit_attr_t *ixa, ixa_notify_type_t ntype, 1053 ixa_notify_arg_t narg) 1054 { 1055 sctp_t *sctp = (sctp_t *)arg; 1056 sctp_faddr_t *fp; 1057 1058 switch (ntype) { 1059 case IXAN_PMTU: 1060 /* Find the faddr based on the ip_xmit_attr_t pointer */ 1061 for (fp = sctp->sctp_faddrs; fp != NULL; fp = fp->sf_next) { 1062 if (fp->sf_ixa == ixa) 1063 break; 1064 } 1065 if (fp != NULL) 1066 sctp_update_pmtu(sctp, fp, B_FALSE); 1067 break; 1068 default: 1069 break; 1070 } 1071 } 1072 1073 /* 1074 * sctp_icmp_error is called by sctp_input() to process ICMP error messages 1075 * passed up by IP. We need to find a sctp_t 1076 * that corresponds to the returned datagram. Passes the message back in on 1077 * the correct queue once it has located the connection. 1078 * Assumes that IP has pulled up everything up to and including 1079 * the ICMP header. 1080 */ 1081 void 1082 sctp_icmp_error(sctp_t *sctp, mblk_t *mp) 1083 { 1084 icmph_t *icmph; 1085 ipha_t *ipha; 1086 int iph_hdr_length; 1087 sctp_hdr_t *sctph; 1088 in6_addr_t dst; 1089 sctp_faddr_t *fp; 1090 sctp_stack_t *sctps = sctp->sctp_sctps; 1091 1092 dprint(1, ("sctp_icmp_error: sctp=%p, mp=%p\n", (void *)sctp, 1093 (void *)mp)); 1094 1095 ipha = (ipha_t *)mp->b_rptr; 1096 if (IPH_HDR_VERSION(ipha) != IPV4_VERSION) { 1097 ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION); 1098 sctp_icmp_error_ipv6(sctp, mp); 1099 return; 1100 } 1101 1102 /* account for the ip hdr from the icmp message */ 1103 iph_hdr_length = IPH_HDR_LENGTH(ipha); 1104 icmph = (icmph_t *)&mp->b_rptr[iph_hdr_length]; 1105 /* now the ip hdr of message resulting in this icmp */ 1106 ipha = (ipha_t *)&icmph[1]; 1107 iph_hdr_length = IPH_HDR_LENGTH(ipha); 1108 sctph = (sctp_hdr_t *)((char *)ipha + iph_hdr_length); 1109 /* first_mp must expose the full sctp header. */ 1110 if ((uchar_t *)(sctph + 1) >= mp->b_wptr) { 1111 /* not enough data for SCTP header */ 1112 freemsg(mp); 1113 return; 1114 } 1115 1116 switch (icmph->icmph_type) { 1117 case ICMP_DEST_UNREACHABLE: 1118 switch (icmph->icmph_code) { 1119 case ICMP_FRAGMENTATION_NEEDED: 1120 /* 1121 * Reduce the MSS based on the new MTU. This will 1122 * eliminate any fragmentation locally. 1123 * N.B. There may well be some funny side-effects on 1124 * the local send policy and the remote receive policy. 1125 * Pending further research, we provide 1126 * sctp_ignore_path_mtu just in case this proves 1127 * disastrous somewhere. 1128 * 1129 * After updating the MSS, retransmit part of the 1130 * dropped segment using the new mss by calling 1131 * sctp_wput_slow(). Need to adjust all those 1132 * params to make sure sctp_wput_slow() work properly. 1133 */ 1134 if (sctps->sctps_ignore_path_mtu) 1135 break; 1136 1137 /* find the offending faddr */ 1138 IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &dst); 1139 fp = sctp_lookup_faddr(sctp, &dst); 1140 if (fp == NULL) { 1141 break; 1142 } 1143 sctp_update_pmtu(sctp, fp, B_TRUE); 1144 /* 1145 * It is possible, even likely that a fast retransmit 1146 * attempt has been dropped by ip as a result of this 1147 * error, retransmission bundles as much as possible. 1148 * A retransmit here prevents significant delays waiting 1149 * on the timer. Analogous to behaviour of TCP after 1150 * ICMP too big. 1151 */ 1152 sctp_rexmit(sctp, fp); 1153 break; 1154 case ICMP_PORT_UNREACHABLE: 1155 case ICMP_PROTOCOL_UNREACHABLE: 1156 switch (sctp->sctp_state) { 1157 case SCTPS_COOKIE_WAIT: 1158 case SCTPS_COOKIE_ECHOED: 1159 /* make sure the verification tag matches */ 1160 if (!sctp_icmp_verf(sctp, sctph, mp)) { 1161 break; 1162 } 1163 SCTPS_BUMP_MIB(sctps, sctpAborted); 1164 sctp_assoc_event(sctp, SCTP_CANT_STR_ASSOC, 0, 1165 NULL); 1166 sctp_clean_death(sctp, ECONNREFUSED); 1167 break; 1168 } 1169 break; 1170 case ICMP_HOST_UNREACHABLE: 1171 case ICMP_NET_UNREACHABLE: 1172 /* Record the error in case we finally time out. */ 1173 sctp->sctp_client_errno = (icmph->icmph_code == 1174 ICMP_HOST_UNREACHABLE) ? EHOSTUNREACH : ENETUNREACH; 1175 break; 1176 default: 1177 break; 1178 } 1179 break; 1180 case ICMP_SOURCE_QUENCH: { 1181 /* Reduce the sending rate as if we got a retransmit timeout */ 1182 break; 1183 } 1184 } 1185 freemsg(mp); 1186 } 1187 1188 /* 1189 * sctp_icmp_error_ipv6() is called by sctp_icmp_error() to process ICMPv6 1190 * error messages passed up by IP. 1191 * Assumes that IP has pulled up all the extension headers as well 1192 * as the ICMPv6 header. 1193 */ 1194 static void 1195 sctp_icmp_error_ipv6(sctp_t *sctp, mblk_t *mp) 1196 { 1197 icmp6_t *icmp6; 1198 ip6_t *ip6h; 1199 uint16_t iph_hdr_length; 1200 sctp_hdr_t *sctpha; 1201 uint8_t *nexthdrp; 1202 sctp_faddr_t *fp; 1203 sctp_stack_t *sctps = sctp->sctp_sctps; 1204 1205 ip6h = (ip6_t *)mp->b_rptr; 1206 iph_hdr_length = (ip6h->ip6_nxt != IPPROTO_SCTP) ? 1207 ip_hdr_length_v6(mp, ip6h) : IPV6_HDR_LEN; 1208 1209 icmp6 = (icmp6_t *)&mp->b_rptr[iph_hdr_length]; 1210 ip6h = (ip6_t *)&icmp6[1]; 1211 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &iph_hdr_length, &nexthdrp)) { 1212 freemsg(mp); 1213 return; 1214 } 1215 ASSERT(*nexthdrp == IPPROTO_SCTP); 1216 1217 /* XXX need ifindex to find connection */ 1218 sctpha = (sctp_hdr_t *)((char *)ip6h + iph_hdr_length); 1219 if ((uchar_t *)sctpha >= mp->b_wptr) { 1220 /* not enough data for SCTP header */ 1221 freemsg(mp); 1222 return; 1223 } 1224 switch (icmp6->icmp6_type) { 1225 case ICMP6_PACKET_TOO_BIG: 1226 /* 1227 * Reduce the MSS based on the new MTU. This will 1228 * eliminate any fragmentation locally. 1229 * N.B. There may well be some funny side-effects on 1230 * the local send policy and the remote receive policy. 1231 * Pending further research, we provide 1232 * sctp_ignore_path_mtu just in case this proves 1233 * disastrous somewhere. 1234 * 1235 * After updating the MSS, retransmit part of the 1236 * dropped segment using the new mss by calling 1237 * sctp_wput_slow(). Need to adjust all those 1238 * params to make sure sctp_wput_slow() work properly. 1239 */ 1240 if (sctps->sctps_ignore_path_mtu) 1241 break; 1242 1243 /* find the offending faddr */ 1244 fp = sctp_lookup_faddr(sctp, &ip6h->ip6_dst); 1245 if (fp == NULL) { 1246 break; 1247 } 1248 1249 sctp_update_pmtu(sctp, fp, B_TRUE); 1250 /* 1251 * It is possible, even likely that a fast retransmit 1252 * attempt has been dropped by ip as a result of this 1253 * error, retransmission bundles as much as possible. 1254 * A retransmit here prevents significant delays waiting 1255 * on the timer. Analogous to behaviour of TCP after 1256 * ICMP too big. 1257 */ 1258 sctp_rexmit(sctp, fp); 1259 break; 1260 1261 case ICMP6_DST_UNREACH: 1262 switch (icmp6->icmp6_code) { 1263 case ICMP6_DST_UNREACH_NOPORT: 1264 /* make sure the verification tag matches */ 1265 if (!sctp_icmp_verf(sctp, sctpha, mp)) { 1266 break; 1267 } 1268 if (sctp->sctp_state == SCTPS_COOKIE_WAIT || 1269 sctp->sctp_state == SCTPS_COOKIE_ECHOED) { 1270 SCTPS_BUMP_MIB(sctps, sctpAborted); 1271 sctp_assoc_event(sctp, SCTP_CANT_STR_ASSOC, 0, 1272 NULL); 1273 sctp_clean_death(sctp, ECONNREFUSED); 1274 } 1275 break; 1276 1277 case ICMP6_DST_UNREACH_ADMIN: 1278 case ICMP6_DST_UNREACH_NOROUTE: 1279 case ICMP6_DST_UNREACH_NOTNEIGHBOR: 1280 case ICMP6_DST_UNREACH_ADDR: 1281 /* Record the error in case we finally time out. */ 1282 sctp->sctp_client_errno = EHOSTUNREACH; 1283 break; 1284 default: 1285 break; 1286 } 1287 break; 1288 1289 case ICMP6_PARAM_PROB: 1290 /* If this corresponds to an ICMP_PROTOCOL_UNREACHABLE */ 1291 if (icmp6->icmp6_code == ICMP6_PARAMPROB_NEXTHEADER && 1292 (uchar_t *)ip6h + icmp6->icmp6_pptr == 1293 (uchar_t *)nexthdrp) { 1294 /* make sure the verification tag matches */ 1295 if (!sctp_icmp_verf(sctp, sctpha, mp)) { 1296 break; 1297 } 1298 if (sctp->sctp_state == SCTPS_COOKIE_WAIT) { 1299 SCTPS_BUMP_MIB(sctps, sctpAborted); 1300 sctp_assoc_event(sctp, SCTP_CANT_STR_ASSOC, 0, 1301 NULL); 1302 sctp_clean_death(sctp, ECONNREFUSED); 1303 } 1304 break; 1305 } 1306 break; 1307 1308 case ICMP6_TIME_EXCEEDED: 1309 default: 1310 break; 1311 } 1312 freemsg(mp); 1313 } 1314 1315 /* 1316 * Called by sockfs to create a new sctp instance. 1317 * 1318 * If parent pointer is passed in, inherit settings from it. 1319 */ 1320 sctp_t * 1321 sctp_create(void *ulpd, sctp_t *parent, int family, int type, int flags, 1322 sock_upcalls_t *upcalls, sctp_sockbuf_limits_t *sbl, 1323 cred_t *credp) 1324 { 1325 sctp_t *sctp, *psctp; 1326 conn_t *connp; 1327 mblk_t *ack_mp, *hb_mp; 1328 int sleep = flags & SCTP_CAN_BLOCK ? KM_SLEEP : KM_NOSLEEP; 1329 zoneid_t zoneid; 1330 sctp_stack_t *sctps; 1331 1332 /* User must supply a credential. */ 1333 if (credp == NULL) 1334 return (NULL); 1335 1336 psctp = (sctp_t *)parent; 1337 if (psctp != NULL) { 1338 sctps = psctp->sctp_sctps; 1339 /* Increase here to have common decrease at end */ 1340 netstack_hold(sctps->sctps_netstack); 1341 ASSERT(sctps->sctps_recvq_tq_list_cur_sz > 0); 1342 } else { 1343 netstack_t *ns; 1344 1345 ns = netstack_find_by_cred(credp); 1346 sctps = ns->netstack_sctp; 1347 /* 1348 * Check if the receive queue taskq for this sctp_stack_t has 1349 * been set up. 1350 */ 1351 if (sctps->sctps_recvq_tq_list_cur_sz == 0) 1352 sctp_rq_tq_init(sctps); 1353 1354 /* 1355 * For exclusive stacks we set the zoneid to zero 1356 * to make SCTP operate as if in the global zone. 1357 */ 1358 if (sctps->sctps_netstack->netstack_stackid != 1359 GLOBAL_NETSTACKID) 1360 zoneid = GLOBAL_ZONEID; 1361 else 1362 zoneid = crgetzoneid(credp); 1363 } 1364 if ((connp = ipcl_conn_create(IPCL_SCTPCONN, sleep, 1365 sctps->sctps_netstack)) == NULL) { 1366 netstack_rele(sctps->sctps_netstack); 1367 SCTP_KSTAT(sctps, sctp_conn_create); 1368 return (NULL); 1369 } 1370 /* 1371 * ipcl_conn_create did a netstack_hold. Undo the hold that was 1372 * done at top of sctp_create. 1373 */ 1374 netstack_rele(sctps->sctps_netstack); 1375 sctp = CONN2SCTP(connp); 1376 sctp->sctp_sctps = sctps; 1377 1378 if ((ack_mp = sctp_timer_alloc(sctp, sctp_ack_timer, sleep)) == NULL || 1379 (hb_mp = sctp_timer_alloc(sctp, sctp_heartbeat_timer, 1380 sleep)) == NULL) { 1381 if (ack_mp != NULL) 1382 freeb(ack_mp); 1383 sctp_conn_clear(connp); 1384 sctp->sctp_sctps = NULL; 1385 kmem_cache_free(sctp_conn_cache, connp); 1386 return (NULL); 1387 } 1388 1389 sctp->sctp_ack_mp = ack_mp; 1390 sctp->sctp_heartbeat_mp = hb_mp; 1391 1392 /* 1393 * Have conn_ip_output drop packets should our outer source 1394 * go invalid, and tell us about mtu changes. 1395 */ 1396 connp->conn_ixa->ixa_flags |= IXAF_SET_ULP_CKSUM | IXAF_VERIFY_SOURCE | 1397 IXAF_VERIFY_PMTU; 1398 connp->conn_family = family; 1399 connp->conn_so_type = type; 1400 1401 if (sctp_init_values(sctp, psctp, sleep) != 0) { 1402 freeb(ack_mp); 1403 freeb(hb_mp); 1404 sctp_conn_clear(connp); 1405 sctp->sctp_sctps = NULL; 1406 kmem_cache_free(sctp_conn_cache, connp); 1407 return (NULL); 1408 } 1409 sctp->sctp_cansleep = ((flags & SCTP_CAN_BLOCK) == SCTP_CAN_BLOCK); 1410 1411 sctp->sctp_mss = sctps->sctps_initial_mtu - ((family == AF_INET6) ? 1412 sctp->sctp_hdr6_len : sctp->sctp_hdr_len); 1413 1414 if (psctp != NULL) { 1415 conn_t *pconnp = psctp->sctp_connp; 1416 1417 RUN_SCTP(psctp); 1418 /* 1419 * Inherit local address list, local port. Parent is either 1420 * in SCTPS_BOUND, or SCTPS_LISTEN state. 1421 */ 1422 ASSERT((psctp->sctp_state == SCTPS_BOUND) || 1423 (psctp->sctp_state == SCTPS_LISTEN)); 1424 if (sctp_dup_saddrs(psctp, sctp, sleep)) { 1425 WAKE_SCTP(psctp); 1426 freeb(ack_mp); 1427 freeb(hb_mp); 1428 sctp_headers_free(sctp); 1429 sctp_conn_clear(connp); 1430 sctp->sctp_sctps = NULL; 1431 kmem_cache_free(sctp_conn_cache, connp); 1432 return (NULL); 1433 } 1434 1435 /* 1436 * If the parent is specified, it'll be immediatelly 1437 * followed by sctp_connect(). So don't add this guy to 1438 * bind hash. 1439 */ 1440 connp->conn_lport = pconnp->conn_lport; 1441 sctp->sctp_state = SCTPS_BOUND; 1442 WAKE_SCTP(psctp); 1443 } else { 1444 ASSERT(connp->conn_cred == NULL); 1445 connp->conn_zoneid = zoneid; 1446 /* 1447 * conn_allzones can not be set this early, hence 1448 * no IPCL_ZONEID 1449 */ 1450 connp->conn_ixa->ixa_zoneid = zoneid; 1451 connp->conn_open_time = ddi_get_lbolt64(); 1452 connp->conn_cred = credp; 1453 crhold(credp); 1454 connp->conn_cpid = curproc->p_pid; 1455 1456 /* 1457 * If the caller has the process-wide flag set, then default to 1458 * MAC exempt mode. This allows read-down to unlabeled hosts. 1459 */ 1460 if (getpflags(NET_MAC_AWARE, credp) != 0) 1461 connp->conn_mac_mode = CONN_MAC_AWARE; 1462 1463 connp->conn_zone_is_global = 1464 (crgetzoneid(credp) == GLOBAL_ZONEID); 1465 } 1466 1467 /* Initialize SCTP instance values, our verf tag must never be 0 */ 1468 (void) random_get_pseudo_bytes((uint8_t *)&sctp->sctp_lvtag, 1469 sizeof (sctp->sctp_lvtag)); 1470 if (sctp->sctp_lvtag == 0) 1471 sctp->sctp_lvtag = (uint32_t)gethrtime(); 1472 ASSERT(sctp->sctp_lvtag != 0); 1473 1474 sctp->sctp_ltsn = sctp->sctp_lvtag + 1; 1475 sctp->sctp_lcsn = sctp->sctp_ltsn; 1476 sctp->sctp_recovery_tsn = sctp->sctp_lastack_rxd = sctp->sctp_ltsn - 1; 1477 sctp->sctp_adv_pap = sctp->sctp_lastack_rxd; 1478 1479 /* Information required by upper layer */ 1480 ASSERT(ulpd != NULL); 1481 sctp->sctp_ulpd = ulpd; 1482 1483 ASSERT(upcalls != NULL); 1484 sctp->sctp_upcalls = upcalls; 1485 ASSERT(sbl != NULL); 1486 /* Fill in the socket buffer limits for sctpsockfs */ 1487 sbl->sbl_txlowat = connp->conn_sndlowat; 1488 sbl->sbl_txbuf = connp->conn_sndbuf; 1489 sbl->sbl_rxbuf = sctp->sctp_rwnd; 1490 sbl->sbl_rxlowat = SCTP_RECV_LOWATER; 1491 1492 /* Insert this in the global list. */ 1493 SCTP_LINK(sctp, sctps); 1494 1495 return (sctp); 1496 } 1497 1498 /* Run at module load time */ 1499 void 1500 sctp_ddi_g_init(void) 1501 { 1502 /* Create sctp_t/conn_t cache */ 1503 sctp_conn_cache_init(); 1504 1505 /* Create the faddr cache */ 1506 sctp_faddr_init(); 1507 1508 /* Create the sets cache */ 1509 sctp_sets_init(); 1510 1511 /* Create the PR-SCTP sets cache */ 1512 sctp_ftsn_sets_init(); 1513 1514 /* Initialize tables used for CRC calculation */ 1515 sctp_crc32_init(); 1516 1517 /* 1518 * We want to be informed each time a stack is created or 1519 * destroyed in the kernel, so we can maintain the 1520 * set of sctp_stack_t's. 1521 */ 1522 netstack_register(NS_SCTP, sctp_stack_init, NULL, sctp_stack_fini); 1523 } 1524 1525 static void * 1526 sctp_stack_init(netstackid_t stackid, netstack_t *ns) 1527 { 1528 sctp_stack_t *sctps; 1529 size_t arrsz; 1530 int i; 1531 1532 sctps = kmem_zalloc(sizeof (*sctps), KM_SLEEP); 1533 sctps->sctps_netstack = ns; 1534 1535 /* Initialize locks */ 1536 mutex_init(&sctps->sctps_g_lock, NULL, MUTEX_DEFAULT, NULL); 1537 mutex_init(&sctps->sctps_epriv_port_lock, NULL, MUTEX_DEFAULT, NULL); 1538 sctps->sctps_g_num_epriv_ports = SCTP_NUM_EPRIV_PORTS; 1539 sctps->sctps_g_epriv_ports[0] = ULP_DEF_EPRIV_PORT1; 1540 sctps->sctps_g_epriv_ports[1] = ULP_DEF_EPRIV_PORT2; 1541 1542 /* Initialize SCTP hash arrays. */ 1543 sctp_hash_init(sctps); 1544 1545 arrsz = sctp_propinfo_count * sizeof (mod_prop_info_t); 1546 sctps->sctps_propinfo_tbl = (mod_prop_info_t *)kmem_alloc(arrsz, 1547 KM_SLEEP); 1548 bcopy(sctp_propinfo_tbl, sctps->sctps_propinfo_tbl, arrsz); 1549 1550 /* saddr init */ 1551 sctp_saddr_init(sctps); 1552 1553 /* Global SCTP PCB list. */ 1554 list_create(&sctps->sctps_g_list, sizeof (sctp_t), 1555 offsetof(sctp_t, sctp_list)); 1556 1557 /* Initialize SCTP kstats. */ 1558 sctps->sctps_mibkp = sctp_kstat_init(stackid); 1559 sctps->sctps_kstat = sctp_kstat2_init(stackid); 1560 1561 mutex_init(&sctps->sctps_reclaim_lock, NULL, MUTEX_DEFAULT, NULL); 1562 sctps->sctps_reclaim = B_FALSE; 1563 sctps->sctps_reclaim_tid = 0; 1564 sctps->sctps_reclaim_period = sctps->sctps_rto_maxg; 1565 1566 /* Allocate the per netstack stats */ 1567 mutex_enter(&cpu_lock); 1568 sctps->sctps_sc_cnt = MAX(ncpus, boot_ncpus); 1569 mutex_exit(&cpu_lock); 1570 sctps->sctps_sc = kmem_zalloc(max_ncpus * sizeof (sctp_stats_cpu_t *), 1571 KM_SLEEP); 1572 for (i = 0; i < sctps->sctps_sc_cnt; i++) { 1573 sctps->sctps_sc[i] = kmem_zalloc(sizeof (sctp_stats_cpu_t), 1574 KM_SLEEP); 1575 } 1576 1577 mutex_init(&sctps->sctps_listener_conf_lock, NULL, MUTEX_DEFAULT, NULL); 1578 list_create(&sctps->sctps_listener_conf, sizeof (sctp_listener_t), 1579 offsetof(sctp_listener_t, sl_link)); 1580 1581 return (sctps); 1582 } 1583 1584 /* 1585 * Called when the module is about to be unloaded. 1586 */ 1587 void 1588 sctp_ddi_g_destroy(void) 1589 { 1590 /* Destroy sctp_t/conn_t caches */ 1591 sctp_conn_cache_fini(); 1592 1593 /* Destroy the faddr cache */ 1594 sctp_faddr_fini(); 1595 1596 /* Destroy the sets cache */ 1597 sctp_sets_fini(); 1598 1599 /* Destroy the PR-SCTP sets cache */ 1600 sctp_ftsn_sets_fini(); 1601 1602 netstack_unregister(NS_SCTP); 1603 } 1604 1605 /* 1606 * Free the SCTP stack instance. 1607 */ 1608 static void 1609 sctp_stack_fini(netstackid_t stackid, void *arg) 1610 { 1611 sctp_stack_t *sctps = (sctp_stack_t *)arg; 1612 int i; 1613 1614 /* 1615 * Set sctps_reclaim to false tells sctp_reclaim_timer() not to restart 1616 * the timer. 1617 */ 1618 mutex_enter(&sctps->sctps_reclaim_lock); 1619 sctps->sctps_reclaim = B_FALSE; 1620 mutex_exit(&sctps->sctps_reclaim_lock); 1621 if (sctps->sctps_reclaim_tid != 0) 1622 (void) untimeout(sctps->sctps_reclaim_tid); 1623 mutex_destroy(&sctps->sctps_reclaim_lock); 1624 1625 sctp_listener_conf_cleanup(sctps); 1626 1627 kmem_free(sctps->sctps_propinfo_tbl, 1628 sctp_propinfo_count * sizeof (mod_prop_info_t)); 1629 sctps->sctps_propinfo_tbl = NULL; 1630 1631 /* Destroy the recvq taskqs. */ 1632 sctp_rq_tq_fini(sctps); 1633 1634 /* Destroy saddr */ 1635 sctp_saddr_fini(sctps); 1636 1637 /* Global SCTP PCB list. */ 1638 list_destroy(&sctps->sctps_g_list); 1639 1640 /* Destroy SCTP hash arrays. */ 1641 sctp_hash_destroy(sctps); 1642 1643 /* Destroy SCTP kernel stats. */ 1644 for (i = 0; i < sctps->sctps_sc_cnt; i++) 1645 kmem_free(sctps->sctps_sc[i], sizeof (sctp_stats_cpu_t)); 1646 kmem_free(sctps->sctps_sc, max_ncpus * sizeof (sctp_stats_cpu_t *)); 1647 1648 sctp_kstat_fini(stackid, sctps->sctps_mibkp); 1649 sctps->sctps_mibkp = NULL; 1650 sctp_kstat2_fini(stackid, sctps->sctps_kstat); 1651 sctps->sctps_kstat = NULL; 1652 1653 mutex_destroy(&sctps->sctps_g_lock); 1654 mutex_destroy(&sctps->sctps_epriv_port_lock); 1655 1656 kmem_free(sctps, sizeof (*sctps)); 1657 } 1658 1659 static void 1660 sctp_rq_tq_init(sctp_stack_t *sctps) 1661 { 1662 char tq_name[TASKQ_NAMELEN]; 1663 int thrs; 1664 int max_tasks; 1665 1666 mutex_enter(&sctps->sctps_g_lock); 1667 /* Someone may have beaten us in creating the taskqs. */ 1668 if (sctps->sctps_recvq_tq_list_cur_sz > 0) { 1669 mutex_exit(&sctps->sctps_g_lock); 1670 return; 1671 } 1672 1673 thrs = MIN(sctp_recvq_tq_thr_max, MAX(sctp_recvq_tq_thr_min, 1674 MAX(ncpus, boot_ncpus))); 1675 /* 1676 * Make sure that the maximum number of tasks is at least thrice as 1677 * large as the number of threads. 1678 */ 1679 max_tasks = MAX(sctp_recvq_tq_task_min, thrs) * 3; 1680 1681 /* 1682 * This helps differentiate the default taskqs in different IP stacks. 1683 */ 1684 (void) snprintf(tq_name, sizeof (tq_name), "sctp_def_rq_taskq_%d", 1685 sctps->sctps_netstack->netstack_stackid); 1686 1687 sctps->sctps_recvq_tq_list_max_sz = sctp_recvq_tq_list_max; 1688 sctps->sctps_recvq_tq_list_cur_sz = 1; 1689 1690 /* 1691 * Initialize the recvq_tq_list and create the first recvq taskq. 1692 * What to do if it fails? 1693 */ 1694 sctps->sctps_recvq_tq_list = 1695 kmem_zalloc(sctps->sctps_recvq_tq_list_max_sz * sizeof (taskq_t *), 1696 KM_SLEEP); 1697 sctps->sctps_recvq_tq_list[0] = taskq_create(tq_name, thrs, 1698 minclsyspri, sctp_recvq_tq_task_min, max_tasks, TASKQ_PREPOPULATE); 1699 mutex_init(&sctps->sctps_rq_tq_lock, NULL, MUTEX_DEFAULT, NULL); 1700 1701 mutex_exit(&sctps->sctps_g_lock); 1702 } 1703 1704 static void 1705 sctp_rq_tq_fini(sctp_stack_t *sctps) 1706 { 1707 int i; 1708 1709 if (sctps->sctps_recvq_tq_list_cur_sz == 0) 1710 return; 1711 1712 for (i = 0; i < sctps->sctps_recvq_tq_list_cur_sz; i++) { 1713 ASSERT(sctps->sctps_recvq_tq_list[i] != NULL); 1714 taskq_destroy(sctps->sctps_recvq_tq_list[i]); 1715 } 1716 kmem_free(sctps->sctps_recvq_tq_list, 1717 sctps->sctps_recvq_tq_list_max_sz * sizeof (taskq_t *)); 1718 sctps->sctps_recvq_tq_list = NULL; 1719 } 1720 1721 /* Add another taskq for a new ill. */ 1722 void 1723 sctp_inc_taskq(sctp_stack_t *sctps) 1724 { 1725 taskq_t *tq; 1726 char tq_name[TASKQ_NAMELEN]; 1727 int thrs; 1728 int max_tasks; 1729 1730 thrs = MIN(sctp_recvq_tq_thr_max, MAX(sctp_recvq_tq_thr_min, 1731 MAX(ncpus, boot_ncpus))); 1732 /* 1733 * Make sure that the maximum number of tasks is at least thrice as 1734 * large as the number of threads. 1735 */ 1736 max_tasks = MAX(sctp_recvq_tq_task_min, thrs) * 3; 1737 1738 mutex_enter(&sctps->sctps_rq_tq_lock); 1739 if (sctps->sctps_recvq_tq_list_cur_sz + 1 > 1740 sctps->sctps_recvq_tq_list_max_sz) { 1741 mutex_exit(&sctps->sctps_rq_tq_lock); 1742 cmn_err(CE_NOTE, "Cannot create more SCTP recvq taskq"); 1743 return; 1744 } 1745 1746 (void) snprintf(tq_name, sizeof (tq_name), "sctp_rq_taskq_%d_%u", 1747 sctps->sctps_netstack->netstack_stackid, 1748 sctps->sctps_recvq_tq_list_cur_sz); 1749 tq = taskq_create(tq_name, thrs, minclsyspri, sctp_recvq_tq_task_min, 1750 max_tasks, TASKQ_PREPOPULATE); 1751 if (tq == NULL) { 1752 mutex_exit(&sctps->sctps_rq_tq_lock); 1753 cmn_err(CE_NOTE, "SCTP recvq taskq creation failed"); 1754 return; 1755 } 1756 ASSERT(sctps->sctps_recvq_tq_list[ 1757 sctps->sctps_recvq_tq_list_cur_sz] == NULL); 1758 sctps->sctps_recvq_tq_list[sctps->sctps_recvq_tq_list_cur_sz] = tq; 1759 atomic_inc_32(&sctps->sctps_recvq_tq_list_cur_sz); 1760 mutex_exit(&sctps->sctps_rq_tq_lock); 1761 } 1762 1763 #ifdef DEBUG 1764 uint32_t recvq_loop_cnt = 0; 1765 uint32_t recvq_call = 0; 1766 #endif 1767 1768 /* 1769 * Find the next recvq_tq to use. This routine will go thru all the 1770 * taskqs until it can dispatch a job for the sctp. If this fails, 1771 * it will create a new taskq and try it. 1772 */ 1773 static boolean_t 1774 sctp_find_next_tq(sctp_t *sctp) 1775 { 1776 int next_tq, try; 1777 taskq_t *tq; 1778 sctp_stack_t *sctps = sctp->sctp_sctps; 1779 1780 /* 1781 * Note that since we don't hold a lock on sctp_rq_tq_lock for 1782 * performance reason, recvq_ta_list_cur_sz can be changed during 1783 * this loop. The problem this will create is that the loop may 1784 * not have tried all the recvq_tq. This should be OK. 1785 */ 1786 next_tq = atomic_inc_32_nv(&sctps->sctps_recvq_tq_list_cur) % 1787 sctps->sctps_recvq_tq_list_cur_sz; 1788 for (try = 0; try < sctps->sctps_recvq_tq_list_cur_sz; try++) { 1789 tq = sctps->sctps_recvq_tq_list[next_tq]; 1790 if (taskq_dispatch(tq, sctp_process_recvq, sctp, 1791 TQ_NOSLEEP) != (uintptr_t)NULL) { 1792 sctp->sctp_recvq_tq = tq; 1793 return (B_TRUE); 1794 } 1795 next_tq = (next_tq + 1) % sctps->sctps_recvq_tq_list_cur_sz; 1796 } 1797 1798 /* 1799 * Create one more taskq and try it. Note that sctp_inc_taskq() 1800 * may not have created another taskq if the number of recvq 1801 * taskqs is at the maximum. We are probably in a pretty bad 1802 * shape if this actually happens... 1803 */ 1804 sctp_inc_taskq(sctps); 1805 tq = sctps->sctps_recvq_tq_list[sctps->sctps_recvq_tq_list_cur_sz - 1]; 1806 if (taskq_dispatch(tq, sctp_process_recvq, sctp, TQ_NOSLEEP) != 1807 (uintptr_t)NULL) { 1808 sctp->sctp_recvq_tq = tq; 1809 return (B_TRUE); 1810 } 1811 SCTP_KSTAT(sctps, sctp_find_next_tq); 1812 return (B_FALSE); 1813 } 1814 1815 /* 1816 * To add a message to the recvq. Note that the sctp_timer_fire() 1817 * routine also uses this function to add the timer message to the 1818 * receive queue for later processing. And it should be the only 1819 * caller of sctp_add_recvq() which sets the try_harder argument 1820 * to B_TRUE. 1821 * 1822 * If the try_harder argument is B_TRUE, this routine sctp_find_next_tq() 1823 * will try very hard to dispatch the task. Refer to the comment 1824 * for that routine on how it does that. 1825 * 1826 * On failure the message has been freed i.e., this routine always consumes the 1827 * message. It bumps ipIfStatsInDiscards and and uses ip_drop_input to drop. 1828 */ 1829 void 1830 sctp_add_recvq(sctp_t *sctp, mblk_t *mp, boolean_t caller_hold_lock, 1831 ip_recv_attr_t *ira) 1832 { 1833 mblk_t *attrmp; 1834 ip_stack_t *ipst = sctp->sctp_sctps->sctps_netstack->netstack_ip; 1835 1836 ASSERT(ira->ira_ill == NULL); 1837 1838 if (!caller_hold_lock) 1839 mutex_enter(&sctp->sctp_recvq_lock); 1840 1841 /* If the taskq dispatch has not been scheduled, do it now. */ 1842 if (sctp->sctp_recvq_tq == NULL) { 1843 ASSERT(sctp->sctp_recvq == NULL); 1844 if (!sctp_find_next_tq(sctp)) { 1845 if (!caller_hold_lock) 1846 mutex_exit(&sctp->sctp_recvq_lock); 1847 BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsInDiscards); 1848 ip_drop_input("ipIfStatsInDiscards", mp, NULL); 1849 freemsg(mp); 1850 return; 1851 } 1852 /* Make sure the sctp_t will not go away. */ 1853 SCTP_REFHOLD(sctp); 1854 } 1855 1856 attrmp = ip_recv_attr_to_mblk(ira); 1857 if (attrmp == NULL) { 1858 if (!caller_hold_lock) 1859 mutex_exit(&sctp->sctp_recvq_lock); 1860 BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsInDiscards); 1861 ip_drop_input("ipIfStatsInDiscards", mp, NULL); 1862 freemsg(mp); 1863 return; 1864 } 1865 ASSERT(attrmp->b_cont == NULL); 1866 attrmp->b_cont = mp; 1867 mp = attrmp; 1868 1869 if (sctp->sctp_recvq == NULL) { 1870 sctp->sctp_recvq = mp; 1871 sctp->sctp_recvq_tail = mp; 1872 } else { 1873 sctp->sctp_recvq_tail->b_next = mp; 1874 sctp->sctp_recvq_tail = mp; 1875 } 1876 1877 if (!caller_hold_lock) 1878 mutex_exit(&sctp->sctp_recvq_lock); 1879 } 1880 1881 static void 1882 sctp_process_recvq(void *arg) 1883 { 1884 sctp_t *sctp = (sctp_t *)arg; 1885 mblk_t *mp; 1886 #ifdef DEBUG 1887 uint32_t loop_cnt = 0; 1888 #endif 1889 ip_recv_attr_t iras; 1890 1891 #ifdef _BIG_ENDIAN 1892 #define IPVER(ip6h) ((((uint32_t *)ip6h)[0] >> 28) & 0x7) 1893 #else 1894 #define IPVER(ip6h) ((((uint32_t *)ip6h)[0] >> 4) & 0x7) 1895 #endif 1896 1897 RUN_SCTP(sctp); 1898 mutex_enter(&sctp->sctp_recvq_lock); 1899 1900 #ifdef DEBUG 1901 recvq_call++; 1902 #endif 1903 /* 1904 * Note that while we are in this loop, other thread can put 1905 * new packets in the receive queue. We may be looping for 1906 * quite a while. 1907 */ 1908 while ((mp = sctp->sctp_recvq) != NULL) { 1909 mblk_t *data_mp; 1910 1911 sctp->sctp_recvq = mp->b_next; 1912 mutex_exit(&sctp->sctp_recvq_lock); 1913 mp->b_next = NULL; 1914 #ifdef DEBUG 1915 loop_cnt++; 1916 #endif 1917 mp->b_prev = NULL; 1918 1919 data_mp = mp->b_cont; 1920 mp->b_cont = NULL; 1921 if (!ip_recv_attr_from_mblk(mp, &iras)) { 1922 ip_drop_input("ip_recv_attr_from_mblk", mp, NULL); 1923 freemsg(mp); 1924 ira_cleanup(&iras, B_TRUE); 1925 continue; 1926 } 1927 1928 if (iras.ira_flags & IRAF_ICMP_ERROR) 1929 sctp_icmp_error(sctp, data_mp); 1930 else 1931 sctp_input_data(sctp, data_mp, &iras); 1932 1933 ira_cleanup(&iras, B_TRUE); 1934 mutex_enter(&sctp->sctp_recvq_lock); 1935 } 1936 1937 sctp->sctp_recvq_tail = NULL; 1938 sctp->sctp_recvq_tq = NULL; 1939 1940 mutex_exit(&sctp->sctp_recvq_lock); 1941 1942 WAKE_SCTP(sctp); 1943 1944 #ifdef DEBUG 1945 if (loop_cnt > recvq_loop_cnt) 1946 recvq_loop_cnt = loop_cnt; 1947 #endif 1948 /* Now it can go away. */ 1949 SCTP_REFRELE(sctp); 1950 } 1951 1952 /* ARGSUSED */ 1953 static int 1954 sctp_conn_cache_constructor(void *buf, void *cdrarg, int kmflags) 1955 { 1956 conn_t *connp = (conn_t *)buf; 1957 sctp_t *sctp = (sctp_t *)&connp[1]; 1958 int cnt; 1959 1960 bzero(connp, sizeof (conn_t)); 1961 bzero(buf, (char *)&sctp[1] - (char *)buf); 1962 1963 mutex_init(&sctp->sctp_reflock, NULL, MUTEX_DEFAULT, NULL); 1964 mutex_init(&sctp->sctp_lock, NULL, MUTEX_DEFAULT, NULL); 1965 mutex_init(&sctp->sctp_recvq_lock, NULL, MUTEX_DEFAULT, NULL); 1966 cv_init(&sctp->sctp_cv, NULL, CV_DEFAULT, NULL); 1967 for (cnt = 0; cnt < SCTP_IPIF_HASH; cnt++) { 1968 rw_init(&sctp->sctp_saddrs[cnt].ipif_hash_lock, NULL, 1969 RW_DEFAULT, NULL); 1970 } 1971 1972 mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL); 1973 cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL); 1974 connp->conn_flags = IPCL_SCTPCONN; 1975 connp->conn_proto = IPPROTO_SCTP; 1976 connp->conn_sctp = sctp; 1977 sctp->sctp_connp = connp; 1978 rw_init(&connp->conn_ilg_lock, NULL, RW_DEFAULT, NULL); 1979 1980 connp->conn_ixa = kmem_zalloc(sizeof (ip_xmit_attr_t), kmflags); 1981 if (connp->conn_ixa == NULL) { 1982 return (ENOMEM); 1983 } 1984 connp->conn_ixa->ixa_refcnt = 1; 1985 connp->conn_ixa->ixa_protocol = connp->conn_proto; 1986 connp->conn_ixa->ixa_xmit_hint = CONN_TO_XMIT_HINT(connp); 1987 return (0); 1988 } 1989 1990 /* ARGSUSED */ 1991 static void 1992 sctp_conn_cache_destructor(void *buf, void *cdrarg) 1993 { 1994 conn_t *connp = (conn_t *)buf; 1995 sctp_t *sctp = (sctp_t *)&connp[1]; 1996 int cnt; 1997 1998 ASSERT(sctp->sctp_connp == connp); 1999 ASSERT(!MUTEX_HELD(&sctp->sctp_lock)); 2000 ASSERT(!MUTEX_HELD(&sctp->sctp_reflock)); 2001 ASSERT(!MUTEX_HELD(&sctp->sctp_recvq_lock)); 2002 2003 ASSERT(sctp->sctp_conn_hash_next == NULL); 2004 ASSERT(sctp->sctp_conn_hash_prev == NULL); 2005 ASSERT(sctp->sctp_listen_hash_next == NULL); 2006 ASSERT(sctp->sctp_listen_hash_prev == NULL); 2007 ASSERT(sctp->sctp_listen_tfp == NULL); 2008 ASSERT(sctp->sctp_conn_tfp == NULL); 2009 2010 ASSERT(sctp->sctp_faddrs == NULL); 2011 ASSERT(sctp->sctp_nsaddrs == 0); 2012 2013 ASSERT(sctp->sctp_ulpd == NULL); 2014 2015 ASSERT(sctp->sctp_lastfaddr == NULL); 2016 ASSERT(sctp->sctp_primary == NULL); 2017 ASSERT(sctp->sctp_current == NULL); 2018 ASSERT(sctp->sctp_lastdata == NULL); 2019 2020 ASSERT(sctp->sctp_xmit_head == NULL); 2021 ASSERT(sctp->sctp_xmit_tail == NULL); 2022 ASSERT(sctp->sctp_xmit_unsent == NULL); 2023 ASSERT(sctp->sctp_xmit_unsent_tail == NULL); 2024 2025 ASSERT(sctp->sctp_ostrcntrs == NULL); 2026 2027 ASSERT(sctp->sctp_sack_info == NULL); 2028 ASSERT(sctp->sctp_ack_mp == NULL); 2029 ASSERT(sctp->sctp_instr == NULL); 2030 2031 ASSERT(sctp->sctp_iphc == NULL); 2032 ASSERT(sctp->sctp_iphc6 == NULL); 2033 ASSERT(sctp->sctp_ipha == NULL); 2034 ASSERT(sctp->sctp_ip6h == NULL); 2035 ASSERT(sctp->sctp_sctph == NULL); 2036 ASSERT(sctp->sctp_sctph6 == NULL); 2037 2038 ASSERT(sctp->sctp_cookie_mp == NULL); 2039 2040 ASSERT(sctp->sctp_refcnt == 0); 2041 ASSERT(sctp->sctp_timer_mp == NULL); 2042 ASSERT(sctp->sctp_connp->conn_ref == 0); 2043 ASSERT(sctp->sctp_heartbeat_mp == NULL); 2044 ASSERT(sctp->sctp_ptpbhn == NULL && sctp->sctp_bind_hash == NULL); 2045 2046 ASSERT(sctp->sctp_shutdown_faddr == NULL); 2047 2048 ASSERT(sctp->sctp_cxmit_list == NULL); 2049 2050 ASSERT(sctp->sctp_recvq == NULL); 2051 ASSERT(sctp->sctp_recvq_tail == NULL); 2052 ASSERT(sctp->sctp_recvq_tq == NULL); 2053 2054 /* 2055 * sctp_pad_mp can be NULL if the memory allocation fails 2056 * in sctp_init_values() and the conn_t is freed. 2057 */ 2058 if (sctp->sctp_pad_mp != NULL) { 2059 freeb(sctp->sctp_pad_mp); 2060 sctp->sctp_pad_mp = NULL; 2061 } 2062 2063 mutex_destroy(&sctp->sctp_reflock); 2064 mutex_destroy(&sctp->sctp_lock); 2065 mutex_destroy(&sctp->sctp_recvq_lock); 2066 cv_destroy(&sctp->sctp_cv); 2067 for (cnt = 0; cnt < SCTP_IPIF_HASH; cnt++) { 2068 rw_destroy(&sctp->sctp_saddrs[cnt].ipif_hash_lock); 2069 } 2070 2071 mutex_destroy(&connp->conn_lock); 2072 cv_destroy(&connp->conn_cv); 2073 rw_destroy(&connp->conn_ilg_lock); 2074 2075 /* Can be NULL if constructor failed */ 2076 if (connp->conn_ixa != NULL) { 2077 ASSERT(connp->conn_ixa->ixa_refcnt == 1); 2078 ASSERT(connp->conn_ixa->ixa_ire == NULL); 2079 ASSERT(connp->conn_ixa->ixa_nce == NULL); 2080 ixa_refrele(connp->conn_ixa); 2081 } 2082 } 2083 2084 static void 2085 sctp_conn_cache_init() 2086 { 2087 sctp_conn_cache = kmem_cache_create("sctp_conn_cache", 2088 sizeof (sctp_t) + sizeof (conn_t), 0, sctp_conn_cache_constructor, 2089 sctp_conn_cache_destructor, sctp_conn_reclaim, NULL, NULL, 0); 2090 } 2091 2092 static void 2093 sctp_conn_cache_fini() 2094 { 2095 kmem_cache_destroy(sctp_conn_cache); 2096 } 2097 2098 void 2099 sctp_conn_init(conn_t *connp) 2100 { 2101 ASSERT(connp->conn_flags == IPCL_SCTPCONN); 2102 connp->conn_rq = connp->conn_wq = NULL; 2103 connp->conn_ixa->ixa_flags |= IXAF_SET_ULP_CKSUM | IXAF_VERIFY_SOURCE | 2104 IXAF_VERIFY_PMTU; 2105 2106 ASSERT(connp->conn_proto == IPPROTO_SCTP); 2107 ASSERT(connp->conn_ixa->ixa_protocol == connp->conn_proto); 2108 connp->conn_state_flags |= CONN_INCIPIENT; 2109 2110 ASSERT(connp->conn_sctp != NULL); 2111 2112 /* 2113 * Register sctp_notify to listen to capability changes detected by IP. 2114 * This upcall is made in the context of the call to conn_ip_output 2115 * thus it holds whatever locks sctp holds across conn_ip_output. 2116 */ 2117 connp->conn_ixa->ixa_notify = sctp_notify; 2118 connp->conn_ixa->ixa_notify_cookie = connp->conn_sctp; 2119 } 2120 2121 static void 2122 sctp_conn_clear(conn_t *connp) 2123 { 2124 /* Clean up conn_t stuff */ 2125 if (connp->conn_latch != NULL) { 2126 IPLATCH_REFRELE(connp->conn_latch); 2127 connp->conn_latch = NULL; 2128 } 2129 if (connp->conn_latch_in_policy != NULL) { 2130 IPPOL_REFRELE(connp->conn_latch_in_policy); 2131 connp->conn_latch_in_policy = NULL; 2132 } 2133 if (connp->conn_latch_in_action != NULL) { 2134 IPACT_REFRELE(connp->conn_latch_in_action); 2135 connp->conn_latch_in_action = NULL; 2136 } 2137 if (connp->conn_policy != NULL) { 2138 IPPH_REFRELE(connp->conn_policy, connp->conn_netstack); 2139 connp->conn_policy = NULL; 2140 } 2141 if (connp->conn_ipsec_opt_mp != NULL) { 2142 freemsg(connp->conn_ipsec_opt_mp); 2143 connp->conn_ipsec_opt_mp = NULL; 2144 } 2145 netstack_rele(connp->conn_netstack); 2146 connp->conn_netstack = NULL; 2147 2148 /* Leave conn_ixa and other constructed fields in place */ 2149 ipcl_conn_cleanup(connp); 2150 } 2151