1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include <sys/types.h> 28 #include <sys/t_lock.h> 29 #include <sys/param.h> 30 #include <sys/systm.h> 31 #include <sys/debug.h> 32 #include <sys/errno.h> 33 #include <sys/strsubr.h> 34 #include <sys/cmn_err.h> 35 #include <sys/sysmacros.h> 36 37 #include <sys/socket.h> 38 #include <sys/socketvar.h> 39 #include <sys/strsun.h> 40 #include <sys/signal.h> 41 42 #include <netinet/sctp.h> 43 #include <inet/sctp_itf.h> 44 #include <fs/sockfs/sockcommon.h> 45 #include "socksctp.h" 46 47 extern kmem_cache_t *sosctp_assoccache; 48 /* 49 * Find a free association id. See os/fio.c file descriptor allocator 50 * for description of the algorithm. 51 */ 52 sctp_assoc_t 53 sosctp_aid_get(struct sctp_sonode *ss) 54 { 55 sctp_assoc_t id, size, ralloc; 56 struct sctp_sa_id *assocs = ss->ss_assocs; 57 58 ASSERT((ss->ss_maxassoc & (ss->ss_maxassoc + 1)) == 0); 59 60 for (id = 1; (uint32_t)id < ss->ss_maxassoc; id |= id + 1) { 61 size = id + 1; 62 if (assocs[id].ssi_alloc == size) 63 continue; 64 for (ralloc = 0, size >>= 1; size != 0; size >>= 1) { 65 ralloc += assocs[id + size].ssi_alloc; 66 if (assocs[id].ssi_alloc == ralloc + size) { 67 id += size; 68 ralloc = 0; 69 } 70 } 71 return (id); 72 } 73 return (-1); 74 } 75 76 /* 77 * Allocate or free ID, depending on whether incr is 1 or -1 78 */ 79 void 80 sosctp_aid_reserve(struct sctp_sonode *ss, sctp_assoc_t id, int incr) 81 { 82 struct sctp_sa_id *assocs = ss->ss_assocs; 83 sctp_assoc_t pid; 84 85 ASSERT((assocs[id].ssi_assoc == NULL && incr == 1) || 86 (assocs[id].ssi_assoc != NULL && incr == -1)); 87 88 for (pid = id; pid >= 0; pid = (pid & (pid + 1)) - 1) { 89 assocs[pid].ssi_alloc += incr; 90 } 91 } 92 93 /* 94 * Increase size of the ss_assocs array to accommodate at least maxid. 95 * We keep the size of the form 2^n - 1 for benefit of sosctp_aid_get(). 96 */ 97 int 98 sosctp_aid_grow(struct sctp_sonode *ss, sctp_assoc_t maxid, int kmflags) 99 { 100 sctp_assoc_t newcnt, oldcnt; 101 struct sctp_sa_id *newlist, *oldlist; 102 103 ASSERT(MUTEX_HELD(&ss->ss_so.so_lock)); 104 for (newcnt = 1; newcnt <= maxid; newcnt = (newcnt << 1) | 1) { 105 continue; 106 } 107 108 mutex_exit(&ss->ss_so.so_lock); 109 newlist = kmem_alloc(newcnt * sizeof (struct sctp_sa_id), kmflags); 110 mutex_enter(&ss->ss_so.so_lock); 111 if (newlist == NULL) { 112 return (-1); 113 } 114 oldcnt = ss->ss_maxassoc; 115 if (newcnt <= oldcnt) { 116 kmem_free(newlist, newcnt * sizeof (struct sctp_sa_id)); 117 return (0); 118 } 119 ASSERT((newcnt & (newcnt + 1)) == 0); 120 oldlist = ss->ss_assocs; 121 ss->ss_assocs = newlist; 122 ss->ss_maxassoc = newcnt; 123 124 bcopy(oldlist, newlist, oldcnt * sizeof (struct sctp_sa_id)); 125 bzero(newlist + oldcnt, 126 (newcnt - oldcnt) * sizeof (struct sctp_sa_id)); 127 if (oldlist != NULL) { 128 kmem_free(oldlist, oldcnt * sizeof (struct sctp_sa_id)); 129 } 130 return (0); 131 } 132 133 /* 134 * Convert a id into a pointer to sctp_sockassoc structure. 135 * Increments refcnt. 136 */ 137 int 138 sosctp_assoc(struct sctp_sonode *ss, sctp_assoc_t id, struct sctp_soassoc **ssa) 139 { 140 ASSERT(ssa != NULL); 141 ASSERT(MUTEX_HELD(&ss->ss_so.so_lock)); 142 if ((uint32_t)id >= ss->ss_maxassoc) { 143 *ssa = NULL; 144 return (EINVAL); 145 } 146 147 if ((*ssa = ss->ss_assocs[id].ssi_assoc) == NULL) { 148 return (EINVAL); 149 } 150 if (((*ssa)->ssa_state & (SS_CANTSENDMORE|SS_CANTRCVMORE)) == 151 (SS_CANTSENDMORE|SS_CANTRCVMORE)) { 152 /* 153 * Disconnected connection, shouldn't be found anymore 154 */ 155 *ssa = NULL; 156 return (ESHUTDOWN); 157 } 158 SSA_REFHOLD(*ssa) 159 160 return (0); 161 } 162 163 /* 164 * Can be called from upcall, or through system call. 165 */ 166 struct sctp_soassoc * 167 sosctp_assoc_create(struct sctp_sonode *ss, int kmflag) 168 { 169 struct sctp_soassoc *ssa; 170 171 ssa = kmem_cache_alloc(sosctp_assoccache, kmflag); 172 if (ssa != NULL) { 173 ssa->ssa_type = SOSCTP_ASSOC; 174 ssa->ssa_refcnt = 1; 175 ssa->ssa_sonode = ss; 176 ssa->ssa_state = 0; 177 ssa->ssa_error = 0; 178 ssa->ssa_snd_qfull = 0; 179 ssa->ssa_rcv_queued = 0; 180 } 181 dprint(2, ("sosctp_assoc_create %p %p\n", (void *)ss, (void *)ssa)); 182 return (ssa); 183 } 184 185 void 186 sosctp_assoc_free(struct sctp_sonode *ss, struct sctp_soassoc *ssa) 187 { 188 struct sonode *so = &ss->ss_so; 189 190 dprint(2, ("sosctp_assoc_free %p %p (%d)\n", (void *)ss, (void *)ssa, 191 ssa->ssa_id)); 192 ASSERT(MUTEX_HELD(&so->so_lock)); 193 if (ssa->ssa_conn != NULL) { 194 mutex_exit(&so->so_lock); 195 196 sctp_recvd(ssa->ssa_conn, so->so_rcvbuf); 197 (void) sctp_disconnect(ssa->ssa_conn); 198 sctp_close(ssa->ssa_conn); 199 200 mutex_enter(&so->so_lock); 201 ssa->ssa_conn = NULL; 202 } 203 sosctp_aid_reserve(ss, ssa->ssa_id, -1); 204 ss->ss_assocs[ssa->ssa_id].ssi_assoc = NULL; 205 --ss->ss_assoccnt; 206 kmem_cache_free(sosctp_assoccache, ssa); 207 } 208 209 /* 210 * Pack the ancillary stuff taking care of alignment issues. 211 * sctp_input_add_ancillary() packs the information as: 212 * struct cmsghdr -> ancillary data + struct cmsghdr -> ancillary data + ... 213 * In the next version of SCTP, sctp_input_add_ancillary() should 214 * pack the information taking alignment into account, then we would 215 * not need this routine. 216 */ 217 void 218 sosctp_pack_cmsg(const uchar_t *opt, struct nmsghdr *msg, int len) 219 { 220 struct cmsghdr *ocmsg; 221 struct cmsghdr *cmsg; 222 int optlen = 0; 223 char *cend; 224 boolean_t isaligned = B_TRUE; 225 226 ocmsg = (struct cmsghdr *)opt; 227 cend = (char *)opt + len; 228 /* Figure out the length incl. alignment et. al. */ 229 for (;;) { 230 if ((char *)(ocmsg + 1) > cend || 231 ((char *)ocmsg + ocmsg->cmsg_len) > cend) { 232 break; 233 } 234 if (isaligned && !ISALIGNED_cmsghdr(ocmsg)) 235 isaligned = B_FALSE; 236 optlen += ROUNDUP_cmsglen(ocmsg->cmsg_len); 237 if (ocmsg->cmsg_len > 0) { 238 ocmsg = (struct cmsghdr *) 239 ((uchar_t *)ocmsg + ocmsg->cmsg_len); 240 } else { 241 break; 242 } 243 } 244 /* Now allocate and copy */ 245 msg->msg_control = kmem_zalloc(optlen, KM_SLEEP); 246 msg->msg_controllen = optlen; 247 if (isaligned) { 248 ASSERT(optlen == len); 249 bcopy(opt, msg->msg_control, len); 250 return; 251 } 252 cmsg = (struct cmsghdr *)msg->msg_control; 253 ASSERT(ISALIGNED_cmsghdr(cmsg)); 254 ocmsg = (struct cmsghdr *)opt; 255 cend = (char *)opt + len; 256 for (;;) { 257 if ((char *)(ocmsg + 1) > cend || 258 ((char *)ocmsg + ocmsg->cmsg_len) > cend) { 259 break; 260 } 261 bcopy(ocmsg, cmsg, ocmsg->cmsg_len); 262 if (ocmsg->cmsg_len > 0) { 263 cmsg = (struct cmsghdr *)((uchar_t *)cmsg + 264 ROUNDUP_cmsglen(ocmsg->cmsg_len)); 265 ASSERT(ISALIGNED_cmsghdr(cmsg)); 266 ocmsg = (struct cmsghdr *) 267 ((uchar_t *)ocmsg + ocmsg->cmsg_len); 268 } else { 269 break; 270 } 271 } 272 } 273 274 /* 275 * Find cmsghdr of specified type 276 */ 277 struct cmsghdr * 278 sosctp_find_cmsg(const uchar_t *control, socklen_t clen, int type) 279 { 280 struct cmsghdr *cmsg; 281 char *cend; 282 283 cmsg = (struct cmsghdr *)control; 284 cend = (char *)control + clen; 285 286 for (;;) { 287 if ((char *)(cmsg + 1) > cend || 288 ((char *)cmsg + cmsg->cmsg_len) > cend) { 289 break; 290 } 291 if ((cmsg->cmsg_level == IPPROTO_SCTP) && 292 (cmsg->cmsg_type == type)) { 293 return (cmsg); 294 } 295 if (cmsg->cmsg_len > 0) { 296 cmsg = CMSG_NEXT(cmsg); 297 } else { 298 break; 299 } 300 } 301 return (NULL); 302 } 303 304 /* 305 * Wait until the association is connected or there is an error. 306 * fmode should contain any nonblocking flags. 307 */ 308 static int 309 sosctp_assoc_waitconnected(struct sctp_soassoc *ssa, int fmode) 310 { 311 struct sonode *so = &ssa->ssa_sonode->ss_so; 312 int error = 0; 313 314 ASSERT((ssa->ssa_state & (SS_ISCONNECTED|SS_ISCONNECTING)) || 315 ssa->ssa_error != 0); 316 317 while ((ssa->ssa_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 318 SS_ISCONNECTING && ssa->ssa_error == 0) { 319 320 dprint(3, ("waiting for SS_ISCONNECTED on %p\n", (void *)so)); 321 if (fmode & (FNDELAY|FNONBLOCK)) 322 return (EINPROGRESS); 323 324 if (so->so_state & SS_CLOSING) 325 return (EINTR); 326 if (!cv_wait_sig_swap(&so->so_state_cv, &so->so_lock)) { 327 /* 328 * Return EINTR and let the application use 329 * nonblocking techniques for detecting when 330 * the connection has been established. 331 */ 332 return (EINTR); 333 } 334 dprint(3, ("awoken on %p\n", (void *)so)); 335 } 336 if (ssa->ssa_error != 0) { 337 error = ssa->ssa_error; 338 ssa->ssa_error = 0; 339 dprint(3, ("sosctp_assoc_waitconnected: error %d\n", error)); 340 return (error); 341 } 342 343 if (!(ssa->ssa_state & SS_ISCONNECTED)) { 344 /* 345 * Another thread could have consumed so_error 346 * e.g. by calling read. - take from sowaitconnected() 347 */ 348 error = ECONNREFUSED; 349 dprint(3, ("sosctp_waitconnected: error %d\n", error)); 350 return (error); 351 } 352 return (0); 353 } 354 355 /* 356 * Called from connect(), sendmsg() when we need to create a new association. 357 */ 358 int 359 sosctp_assoc_createconn(struct sctp_sonode *ss, const struct sockaddr *name, 360 socklen_t namelen, const uchar_t *control, socklen_t controllen, int fflag, 361 struct cred *cr, struct sctp_soassoc **ssap) 362 { 363 struct sonode *so = &ss->ss_so; 364 struct sctp_soassoc *ssa; 365 struct sockaddr_storage laddr; 366 sctp_sockbuf_limits_t sbl; 367 sctp_assoc_t id; 368 int error; 369 struct cmsghdr *cmsg; 370 371 ASSERT(MUTEX_HELD(&so->so_lock)); 372 373 /* 374 * System needs to pick local endpoint 375 */ 376 if (!(so->so_state & SS_ISBOUND)) { 377 bzero(&laddr, sizeof (laddr)); 378 laddr.ss_family = so->so_family; 379 380 error = SOP_BIND(so, (struct sockaddr *)&laddr, 381 sizeof (laddr), _SOBIND_LOCK_HELD, cr); 382 if (error) { 383 *ssap = NULL; 384 return (error); 385 } 386 } 387 388 /* 389 * Create a new association, and call connect on that. 390 */ 391 for (;;) { 392 id = sosctp_aid_get(ss); 393 if (id != -1) { 394 break; 395 } 396 /* 397 * Array not large enough; increase size. 398 */ 399 (void) sosctp_aid_grow(ss, ss->ss_maxassoc, KM_SLEEP); 400 } 401 ++ss->ss_assoccnt; 402 sosctp_aid_reserve(ss, id, 1); 403 404 mutex_exit(&so->so_lock); 405 406 ssa = sosctp_assoc_create(ss, KM_SLEEP); 407 ssa->ssa_wroff = ss->ss_wroff; 408 ssa->ssa_wrsize = ss->ss_wrsize; 409 ssa->ssa_conn = sctp_create(ssa, (struct sctp_s *)so->so_proto_handle, 410 so->so_family, SCTP_CAN_BLOCK, &sosctp_assoc_upcalls, &sbl, cr); 411 412 mutex_enter(&so->so_lock); 413 ss->ss_assocs[id].ssi_assoc = ssa; 414 ssa->ssa_id = id; 415 if (ssa->ssa_conn == NULL) { 416 ASSERT(ssa->ssa_refcnt == 1); 417 sosctp_assoc_free(ss, ssa); 418 *ssap = NULL; 419 return (ENOMEM); 420 } 421 ssa->ssa_state |= SS_ISBOUND; 422 423 sosctp_assoc_isconnecting(ssa); 424 SSA_REFHOLD(ssa); 425 mutex_exit(&so->so_lock); 426 427 /* 428 * Can specify special init params 429 */ 430 cmsg = sosctp_find_cmsg(control, controllen, SCTP_INIT); 431 if (cmsg != NULL) { 432 error = sctp_set_opt(ssa->ssa_conn, IPPROTO_SCTP, SCTP_INITMSG, 433 cmsg + 1, cmsg->cmsg_len - sizeof (*cmsg)); 434 if (error != 0) 435 goto ret_err; 436 } 437 438 if ((error = sctp_connect(ssa->ssa_conn, name, namelen)) != 0) 439 goto ret_err; 440 441 mutex_enter(&so->so_lock); 442 /* 443 * Allow other threads to access the socket 444 */ 445 error = sosctp_assoc_waitconnected(ssa, fflag); 446 447 switch (error) { 448 case 0: 449 case EINPROGRESS: 450 case EALREADY: 451 case EINTR: 452 /* Non-fatal errors */ 453 break; 454 default: 455 /* 456 * Fatal errors. It means that sctp_assoc_disconnected() 457 * must have been called. So we only need to do a 458 * SSA_REFRELE() here to release our hold done above. 459 */ 460 ASSERT(ssa->ssa_state & (SS_CANTSENDMORE | SS_CANTRCVMORE)); 461 SSA_REFRELE(ss, ssa); 462 ssa = NULL; 463 break; 464 } 465 466 *ssap = ssa; 467 return (error); 468 469 ret_err: 470 mutex_enter(&so->so_lock); 471 /* 472 * There should not be any upcall done by SCTP. So normally the 473 * ssa_refcnt should be 2. And we can call sosctp_assoc_free() 474 * directly. But since the ssa is inserted to the ss_soassocs 475 * array above, some thread can actually put a hold on it. In 476 * this special case, we "manually" decrease the ssa_refcnt by 2. 477 */ 478 if (ssa->ssa_refcnt > 2) 479 ssa->ssa_refcnt -= 2; 480 else 481 sosctp_assoc_free(ss, ssa); 482 *ssap = NULL; 483 return (error); 484 } 485 486 /* 487 * Inherit socket properties 488 */ 489 void 490 sosctp_so_inherit(struct sctp_sonode *lss, struct sctp_sonode *nss) 491 { 492 struct sonode *nso = &nss->ss_so; 493 struct sonode *lso = &lss->ss_so; 494 495 nso->so_options = lso->so_options & (SO_DEBUG|SO_REUSEADDR| 496 SO_KEEPALIVE|SO_DONTROUTE|SO_BROADCAST|SO_USELOOPBACK| 497 SO_OOBINLINE|SO_DGRAM_ERRIND|SO_LINGER); 498 nso->so_sndbuf = lso->so_sndbuf; 499 nso->so_rcvbuf = lso->so_rcvbuf; 500 nso->so_pgrp = lso->so_pgrp; 501 502 nso->so_rcvlowat = lso->so_rcvlowat; 503 nso->so_sndlowat = lso->so_sndlowat; 504 } 505 506 /* 507 * Branching association to it's own socket. Inherit properties from 508 * the parent, and move data for the association to the new socket. 509 */ 510 void 511 sosctp_assoc_move(struct sctp_sonode *ss, struct sctp_sonode *nss, 512 struct sctp_soassoc *ssa) 513 { 514 mblk_t *mp, **nmp, *last_mp; 515 struct sctp_soassoc *tmp; 516 517 sosctp_so_inherit(ss, nss); 518 519 nss->ss_so.so_state |= (ss->ss_so.so_state & (SS_NDELAY|SS_NONBLOCK)); 520 nss->ss_so.so_state |= 521 (ssa->ssa_state & (SS_ISCONNECTED|SS_ISCONNECTING| 522 SS_ISDISCONNECTING|SS_CANTSENDMORE|SS_CANTRCVMORE|SS_ISBOUND)); 523 nss->ss_so.so_error = ssa->ssa_error; 524 nss->ss_so.so_snd_qfull = ssa->ssa_snd_qfull; 525 nss->ss_wroff = ssa->ssa_wroff; 526 nss->ss_wrsize = ssa->ssa_wrsize; 527 nss->ss_so.so_rcv_queued = ssa->ssa_rcv_queued; 528 nss->ss_so.so_proto_handle = (sock_lower_handle_t)ssa->ssa_conn; 529 /* The peeled off socket is connection oriented */ 530 nss->ss_so.so_mode |= SM_CONNREQUIRED; 531 532 /* Consolidate all data on a single rcv list */ 533 if (ss->ss_so.so_rcv_head != NULL) { 534 so_process_new_message(&ss->ss_so, ss->ss_so.so_rcv_head, 535 ss->ss_so.so_rcv_last_head); 536 ss->ss_so.so_rcv_head = NULL; 537 ss->ss_so.so_rcv_last_head = NULL; 538 } 539 540 if (nss->ss_so.so_rcv_queued > 0) { 541 nmp = &ss->ss_so.so_rcv_q_head; 542 last_mp = NULL; 543 while ((mp = *nmp) != NULL) { 544 tmp = *(struct sctp_soassoc **)DB_BASE(mp); 545 #ifdef DEBUG 546 { 547 /* 548 * Verify that b_prev points to the last 549 * mblk in the b_cont chain (as mandated 550 * by so_dequeue_msg().) 551 */ 552 mblk_t *mp1 = mp; 553 while (mp1->b_cont != NULL) 554 mp1 = mp1->b_cont; 555 VERIFY(mp->b_prev == mp1); 556 } 557 #endif /* DEBUG */ 558 if (tmp == ssa) { 559 *nmp = mp->b_next; 560 ASSERT(DB_TYPE(mp) != M_DATA); 561 if (nss->ss_so.so_rcv_q_last_head == NULL) { 562 nss->ss_so.so_rcv_q_head = mp; 563 } else { 564 nss->ss_so.so_rcv_q_last_head->b_next = 565 mp; 566 } 567 nss->ss_so.so_rcv_q_last_head = mp; 568 mp->b_next = NULL; 569 } else { 570 nmp = &mp->b_next; 571 last_mp = mp; 572 } 573 } 574 575 ss->ss_so.so_rcv_q_last_head = last_mp; 576 } 577 } 578 579 void 580 sosctp_assoc_isconnecting(struct sctp_soassoc *ssa) 581 { 582 struct sonode *so = &ssa->ssa_sonode->ss_so; 583 584 ASSERT(MUTEX_HELD(&so->so_lock)); 585 586 ssa->ssa_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING); 587 ssa->ssa_state |= SS_ISCONNECTING; 588 cv_broadcast(&so->so_state_cv); 589 } 590 591 void 592 sosctp_assoc_isconnected(struct sctp_soassoc *ssa) 593 { 594 struct sonode *so = &ssa->ssa_sonode->ss_so; 595 596 ASSERT(MUTEX_HELD(&so->so_lock)); 597 598 ssa->ssa_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING); 599 ssa->ssa_state |= SS_ISCONNECTED; 600 cv_broadcast(&so->so_state_cv); 601 } 602 603 void 604 sosctp_assoc_isdisconnecting(struct sctp_soassoc *ssa) 605 { 606 struct sonode *so = &ssa->ssa_sonode->ss_so; 607 608 ASSERT(MUTEX_HELD(&so->so_lock)); 609 610 ssa->ssa_state &= ~SS_ISCONNECTING; 611 ssa->ssa_state |= SS_CANTSENDMORE; 612 cv_broadcast(&so->so_state_cv); 613 } 614 615 void 616 sosctp_assoc_isdisconnected(struct sctp_soassoc *ssa, int error) 617 { 618 struct sonode *so = &ssa->ssa_sonode->ss_so; 619 620 ASSERT(MUTEX_HELD(&so->so_lock)); 621 622 ssa->ssa_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING); 623 ssa->ssa_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE); 624 if (error != 0) 625 ssa->ssa_error = (ushort_t)error; 626 cv_broadcast(&so->so_state_cv); 627 } 628