1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include <sys/types.h> 28 #include <sys/t_lock.h> 29 #include <sys/param.h> 30 #include <sys/systm.h> 31 #include <sys/debug.h> 32 #include <sys/errno.h> 33 #include <sys/strsubr.h> 34 #include <sys/cmn_err.h> 35 #include <sys/sysmacros.h> 36 37 #include <sys/socket.h> 38 #include <sys/socketvar.h> 39 #include <sys/strsun.h> 40 #include <sys/signal.h> 41 42 #include <netinet/sctp.h> 43 #include <inet/sctp_itf.h> 44 #include <fs/sockfs/sockcommon.h> 45 #include "socksctp.h" 46 47 extern kmem_cache_t *sosctp_assoccache; 48 /* 49 * Find a free association id. See os/fio.c file descriptor allocator 50 * for description of the algorithm. 51 */ 52 sctp_assoc_t 53 sosctp_aid_get(struct sctp_sonode *ss) 54 { 55 sctp_assoc_t id, size, ralloc; 56 struct sctp_sa_id *assocs = ss->ss_assocs; 57 58 ASSERT((ss->ss_maxassoc & (ss->ss_maxassoc + 1)) == 0); 59 60 for (id = 1; (uint32_t)id < ss->ss_maxassoc; id |= id + 1) { 61 size = id + 1; 62 if (assocs[id].ssi_alloc == size) 63 continue; 64 for (ralloc = 0, size >>= 1; size != 0; size >>= 1) { 65 ralloc += assocs[id + size].ssi_alloc; 66 if (assocs[id].ssi_alloc == ralloc + size) { 67 id += size; 68 ralloc = 0; 69 } 70 } 71 return (id); 72 } 73 return (-1); 74 } 75 76 /* 77 * Allocate or free ID, depending on whether incr is 1 or -1 78 */ 79 void 80 sosctp_aid_reserve(struct sctp_sonode *ss, sctp_assoc_t id, int incr) 81 { 82 struct sctp_sa_id *assocs = ss->ss_assocs; 83 sctp_assoc_t pid; 84 85 ASSERT((assocs[id].ssi_assoc == NULL && incr == 1) || 86 (assocs[id].ssi_assoc != NULL && incr == -1)); 87 88 for (pid = id; pid >= 0; pid = (pid & (pid + 1)) - 1) { 89 assocs[pid].ssi_alloc += incr; 90 } 91 } 92 93 /* 94 * Increase size of the ss_assocs array to accommodate at least maxid. 95 * We keep the size of the form 2^n - 1 for benefit of sosctp_aid_get(). 96 */ 97 int 98 sosctp_aid_grow(struct sctp_sonode *ss, sctp_assoc_t maxid, int kmflags) 99 { 100 sctp_assoc_t newcnt, oldcnt; 101 struct sctp_sa_id *newlist, *oldlist; 102 103 ASSERT(MUTEX_HELD(&ss->ss_so.so_lock)); 104 for (newcnt = 1; newcnt <= maxid; newcnt = (newcnt << 1) | 1) { 105 continue; 106 } 107 108 mutex_exit(&ss->ss_so.so_lock); 109 newlist = kmem_alloc(newcnt * sizeof (struct sctp_sa_id), kmflags); 110 mutex_enter(&ss->ss_so.so_lock); 111 if (newlist == NULL) { 112 return (-1); 113 } 114 oldcnt = ss->ss_maxassoc; 115 if (newcnt <= oldcnt) { 116 kmem_free(newlist, newcnt * sizeof (struct sctp_sa_id)); 117 return (0); 118 } 119 ASSERT((newcnt & (newcnt + 1)) == 0); 120 oldlist = ss->ss_assocs; 121 ss->ss_assocs = newlist; 122 ss->ss_maxassoc = newcnt; 123 124 bcopy(oldlist, newlist, oldcnt * sizeof (struct sctp_sa_id)); 125 bzero(newlist + oldcnt, 126 (newcnt - oldcnt) * sizeof (struct sctp_sa_id)); 127 if (oldlist != NULL) { 128 kmem_free(oldlist, oldcnt * sizeof (struct sctp_sa_id)); 129 } 130 return (0); 131 } 132 133 /* 134 * Convert a id into a pointer to sctp_sockassoc structure. 135 * Increments refcnt. 136 */ 137 int 138 sosctp_assoc(struct sctp_sonode *ss, sctp_assoc_t id, struct sctp_soassoc **ssa) 139 { 140 ASSERT(ssa != NULL); 141 ASSERT(MUTEX_HELD(&ss->ss_so.so_lock)); 142 if ((uint32_t)id >= ss->ss_maxassoc) { 143 *ssa = NULL; 144 return (EINVAL); 145 } 146 147 if ((*ssa = ss->ss_assocs[id].ssi_assoc) == NULL) { 148 return (EINVAL); 149 } 150 if (((*ssa)->ssa_state & (SS_CANTSENDMORE|SS_CANTRCVMORE)) == 151 (SS_CANTSENDMORE|SS_CANTRCVMORE)) { 152 /* 153 * Disconnected connection, shouldn't be found anymore 154 */ 155 *ssa = NULL; 156 return (ESHUTDOWN); 157 } 158 SSA_REFHOLD(*ssa) 159 160 return (0); 161 } 162 163 /* 164 * Can be called from upcall, or through system call. 165 */ 166 struct sctp_soassoc * 167 sosctp_assoc_create(struct sctp_sonode *ss, int kmflag) 168 { 169 struct sctp_soassoc *ssa; 170 171 ssa = kmem_cache_alloc(sosctp_assoccache, kmflag); 172 if (ssa != NULL) { 173 ssa->ssa_type = SOSCTP_ASSOC; 174 ssa->ssa_refcnt = 1; 175 ssa->ssa_sonode = ss; 176 ssa->ssa_state = 0; 177 ssa->ssa_error = 0; 178 #if 0 179 ssa->ssa_txqueued = 0; 180 #endif 181 ssa->ssa_snd_qfull = 0; 182 } 183 dprint(2, ("sosctp_assoc_create %p %p\n", (void *)ss, (void *)ssa)); 184 return (ssa); 185 } 186 187 void 188 sosctp_assoc_free(struct sctp_sonode *ss, struct sctp_soassoc *ssa) 189 { 190 struct sonode *so = &ss->ss_so; 191 192 dprint(2, ("sosctp_assoc_free %p %p (%d)\n", (void *)ss, (void *)ssa, 193 ssa->ssa_id)); 194 ASSERT(MUTEX_HELD(&so->so_lock)); 195 if (ssa->ssa_conn != NULL) { 196 mutex_exit(&so->so_lock); 197 198 sctp_recvd(ssa->ssa_conn, so->so_rcvbuf); 199 (void) sctp_disconnect(ssa->ssa_conn); 200 sctp_close(ssa->ssa_conn); 201 202 mutex_enter(&so->so_lock); 203 ssa->ssa_conn = NULL; 204 } 205 sosctp_aid_reserve(ss, ssa->ssa_id, -1); 206 ss->ss_assocs[ssa->ssa_id].ssi_assoc = NULL; 207 --ss->ss_assoccnt; 208 kmem_cache_free(sosctp_assoccache, ssa); 209 } 210 211 /* 212 * Pack the ancillary stuff taking care of alignment issues. 213 * sctp_input_add_ancillary() packs the information as: 214 * struct cmsghdr -> ancillary data + struct cmsghdr -> ancillary data + ... 215 * In the next version of SCTP, sctp_input_add_ancillary() should 216 * pack the information taking alignment into account, then we would 217 * not need this routine. 218 */ 219 void 220 sosctp_pack_cmsg(const uchar_t *opt, struct nmsghdr *msg, int len) 221 { 222 struct cmsghdr *ocmsg; 223 struct cmsghdr *cmsg; 224 int optlen = 0; 225 char *cend; 226 boolean_t isaligned = B_TRUE; 227 228 ocmsg = (struct cmsghdr *)opt; 229 cend = (char *)opt + len; 230 /* Figure out the length incl. alignment et. al. */ 231 for (;;) { 232 if ((char *)(ocmsg + 1) > cend || 233 ((char *)ocmsg + ocmsg->cmsg_len) > cend) { 234 break; 235 } 236 if (isaligned && !ISALIGNED_cmsghdr(ocmsg)) 237 isaligned = B_FALSE; 238 optlen += ROUNDUP_cmsglen(ocmsg->cmsg_len); 239 if (ocmsg->cmsg_len > 0) { 240 ocmsg = (struct cmsghdr *) 241 ((uchar_t *)ocmsg + ocmsg->cmsg_len); 242 } else { 243 break; 244 } 245 } 246 /* Now allocate and copy */ 247 msg->msg_control = kmem_zalloc(optlen, KM_SLEEP); 248 msg->msg_controllen = optlen; 249 if (isaligned) { 250 ASSERT(optlen == len); 251 bcopy(opt, msg->msg_control, len); 252 return; 253 } 254 cmsg = (struct cmsghdr *)msg->msg_control; 255 ASSERT(ISALIGNED_cmsghdr(cmsg)); 256 ocmsg = (struct cmsghdr *)opt; 257 cend = (char *)opt + len; 258 for (;;) { 259 if ((char *)(ocmsg + 1) > cend || 260 ((char *)ocmsg + ocmsg->cmsg_len) > cend) { 261 break; 262 } 263 bcopy(ocmsg, cmsg, ocmsg->cmsg_len); 264 if (ocmsg->cmsg_len > 0) { 265 cmsg = (struct cmsghdr *)((uchar_t *)cmsg + 266 ROUNDUP_cmsglen(ocmsg->cmsg_len)); 267 ASSERT(ISALIGNED_cmsghdr(cmsg)); 268 ocmsg = (struct cmsghdr *) 269 ((uchar_t *)ocmsg + ocmsg->cmsg_len); 270 } else { 271 break; 272 } 273 } 274 } 275 276 /* 277 * Find cmsghdr of specified type 278 */ 279 struct cmsghdr * 280 sosctp_find_cmsg(const uchar_t *control, socklen_t clen, int type) 281 { 282 struct cmsghdr *cmsg; 283 char *cend; 284 285 cmsg = (struct cmsghdr *)control; 286 cend = (char *)control + clen; 287 288 for (;;) { 289 if ((char *)(cmsg + 1) > cend || 290 ((char *)cmsg + cmsg->cmsg_len) > cend) { 291 break; 292 } 293 if ((cmsg->cmsg_level == IPPROTO_SCTP) && 294 (cmsg->cmsg_type == type)) { 295 return (cmsg); 296 } 297 if (cmsg->cmsg_len > 0) { 298 cmsg = CMSG_NEXT(cmsg); 299 } else { 300 break; 301 } 302 } 303 return (NULL); 304 } 305 306 /* 307 * Wait until the association is connected or there is an error. 308 * fmode should contain any nonblocking flags. 309 */ 310 static int 311 sosctp_assoc_waitconnected(struct sctp_soassoc *ssa, int fmode) 312 { 313 struct sonode *so = &ssa->ssa_sonode->ss_so; 314 int error = 0; 315 316 ASSERT((ssa->ssa_state & (SS_ISCONNECTED|SS_ISCONNECTING)) || 317 ssa->ssa_error != 0); 318 319 while ((ssa->ssa_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 320 SS_ISCONNECTING && ssa->ssa_error == 0) { 321 322 dprint(3, ("waiting for SS_ISCONNECTED on %p\n", (void *)so)); 323 if (fmode & (FNDELAY|FNONBLOCK)) 324 return (EINPROGRESS); 325 326 if (so->so_state & SS_CLOSING) 327 return (EINTR); 328 if (!cv_wait_sig_swap(&so->so_state_cv, &so->so_lock)) { 329 /* 330 * Return EINTR and let the application use 331 * nonblocking techniques for detecting when 332 * the connection has been established. 333 */ 334 return (EINTR); 335 } 336 dprint(3, ("awoken on %p\n", (void *)so)); 337 } 338 if (ssa->ssa_error != 0) { 339 error = ssa->ssa_error; 340 ssa->ssa_error = 0; 341 dprint(3, ("sosctp_assoc_waitconnected: error %d\n", error)); 342 return (error); 343 } 344 345 if (!(ssa->ssa_state & SS_ISCONNECTED)) { 346 /* 347 * Another thread could have consumed so_error 348 * e.g. by calling read. - take from sowaitconnected() 349 */ 350 error = ECONNREFUSED; 351 dprint(3, ("sosctp_waitconnected: error %d\n", error)); 352 return (error); 353 } 354 return (0); 355 } 356 357 /* 358 * Called from connect(), sendmsg() when we need to create a new association. 359 */ 360 int 361 sosctp_assoc_createconn(struct sctp_sonode *ss, const struct sockaddr *name, 362 socklen_t namelen, const uchar_t *control, socklen_t controllen, int fflag, 363 struct cred *cr, struct sctp_soassoc **ssap) 364 { 365 struct sonode *so = &ss->ss_so; 366 struct sctp_soassoc *ssa; 367 struct sockaddr_storage laddr; 368 sctp_sockbuf_limits_t sbl; 369 sctp_assoc_t id; 370 int error; 371 struct cmsghdr *cmsg; 372 373 ASSERT(MUTEX_HELD(&so->so_lock)); 374 375 /* 376 * System needs to pick local endpoint 377 */ 378 if (!(so->so_state & SS_ISBOUND)) { 379 bzero(&laddr, sizeof (laddr)); 380 laddr.ss_family = so->so_family; 381 382 error = SOP_BIND(so, (struct sockaddr *)&laddr, 383 sizeof (laddr), _SOBIND_LOCK_HELD, cr); 384 if (error) { 385 *ssap = NULL; 386 return (error); 387 } 388 } 389 390 /* 391 * Create a new association, and call connect on that. 392 */ 393 for (;;) { 394 id = sosctp_aid_get(ss); 395 if (id != -1) { 396 break; 397 } 398 /* 399 * Array not large enough; increase size. 400 */ 401 (void) sosctp_aid_grow(ss, ss->ss_maxassoc, KM_SLEEP); 402 } 403 ++ss->ss_assoccnt; 404 sosctp_aid_reserve(ss, id, 1); 405 406 mutex_exit(&so->so_lock); 407 408 ssa = sosctp_assoc_create(ss, KM_SLEEP); 409 ssa->ssa_wroff = ss->ss_wroff; 410 ssa->ssa_wrsize = ss->ss_wrsize; 411 ssa->ssa_conn = sctp_create(ssa, (struct sctp_s *)so->so_proto_handle, 412 so->so_family, SCTP_CAN_BLOCK, &sosctp_assoc_upcalls, &sbl, cr); 413 414 mutex_enter(&so->so_lock); 415 ss->ss_assocs[id].ssi_assoc = ssa; 416 ssa->ssa_id = id; 417 if (ssa->ssa_conn == NULL) { 418 ASSERT(ssa->ssa_refcnt == 1); 419 sosctp_assoc_free(ss, ssa); 420 *ssap = NULL; 421 return (ENOMEM); 422 } 423 ssa->ssa_state |= SS_ISBOUND; 424 425 sosctp_assoc_isconnecting(ssa); 426 SSA_REFHOLD(ssa); 427 mutex_exit(&so->so_lock); 428 429 /* 430 * Can specify special init params 431 */ 432 cmsg = sosctp_find_cmsg(control, controllen, SCTP_INIT); 433 if (cmsg != NULL) { 434 error = sctp_set_opt(ssa->ssa_conn, IPPROTO_SCTP, SCTP_INITMSG, 435 cmsg + 1, cmsg->cmsg_len - sizeof (*cmsg)); 436 if (error != 0) 437 goto ret_err; 438 } 439 440 if ((error = sctp_connect(ssa->ssa_conn, name, namelen)) != 0) 441 goto ret_err; 442 443 mutex_enter(&so->so_lock); 444 /* 445 * Allow other threads to access the socket 446 */ 447 error = sosctp_assoc_waitconnected(ssa, fflag); 448 449 switch (error) { 450 case 0: 451 case EINPROGRESS: 452 case EALREADY: 453 case EINTR: 454 /* Non-fatal errors */ 455 break; 456 default: 457 /* 458 * Fatal errors. It means that sctp_assoc_disconnected() 459 * must have been called. So we only need to do a 460 * SSA_REFRELE() here to release our hold done above. 461 */ 462 ASSERT(ssa->ssa_state & (SS_CANTSENDMORE | SS_CANTRCVMORE)); 463 SSA_REFRELE(ss, ssa); 464 ssa = NULL; 465 break; 466 } 467 468 *ssap = ssa; 469 return (error); 470 471 ret_err: 472 mutex_enter(&so->so_lock); 473 /* 474 * There should not be any upcall done by SCTP. So normally the 475 * ssa_refcnt should be 2. And we can call sosctp_assoc_free() 476 * directly. But since the ssa is inserted to the ss_soassocs 477 * array above, some thread can actually put a hold on it. In 478 * this special case, we "manually" decrease the ssa_refcnt by 2. 479 */ 480 if (ssa->ssa_refcnt > 2) 481 ssa->ssa_refcnt -= 2; 482 else 483 sosctp_assoc_free(ss, ssa); 484 *ssap = NULL; 485 return (error); 486 } 487 488 /* 489 * Inherit socket properties 490 */ 491 void 492 sosctp_so_inherit(struct sctp_sonode *lss, struct sctp_sonode *nss) 493 { 494 struct sonode *nso = &nss->ss_so; 495 struct sonode *lso = &lss->ss_so; 496 497 nso->so_options = lso->so_options & (SO_DEBUG|SO_REUSEADDR| 498 SO_KEEPALIVE|SO_DONTROUTE|SO_BROADCAST|SO_USELOOPBACK| 499 SO_OOBINLINE|SO_DGRAM_ERRIND|SO_LINGER); 500 nso->so_sndbuf = lso->so_sndbuf; 501 nso->so_rcvbuf = lso->so_rcvbuf; 502 nso->so_pgrp = lso->so_pgrp; 503 504 nso->so_rcvlowat = lso->so_rcvlowat; 505 nso->so_sndlowat = lso->so_sndlowat; 506 } 507 508 /* 509 * Branching association to it's own socket. Inherit properties from 510 * the parent, and move data from RX queue to TX. 511 */ 512 void 513 sosctp_assoc_move(struct sctp_sonode *ss, struct sctp_sonode *nss, 514 struct sctp_soassoc *ssa) 515 { 516 mblk_t *mp, **nmp, *last_mp; 517 struct sctp_soassoc *tmp; 518 519 sosctp_so_inherit(ss, nss); 520 521 nss->ss_so.so_state |= (ss->ss_so.so_state & (SS_NDELAY|SS_NONBLOCK)); 522 nss->ss_so.so_state |= 523 (ssa->ssa_state & (SS_ISCONNECTED|SS_ISCONNECTING| 524 SS_ISDISCONNECTING|SS_CANTSENDMORE|SS_CANTRCVMORE|SS_ISBOUND)); 525 nss->ss_so.so_error = ssa->ssa_error; 526 #if 0 527 nss->ss_so.so_txqueued = ssa->ssa_txqueued; 528 #endif 529 nss->ss_so.so_snd_qfull = ssa->ssa_snd_qfull; 530 nss->ss_wroff = ssa->ssa_wroff; 531 nss->ss_wrsize = ssa->ssa_wrsize; 532 nss->ss_so.so_rcv_queued = ssa->ssa_rcv_queued; 533 nss->ss_so.so_proto_handle = (sock_lower_handle_t)ssa->ssa_conn; 534 535 if (nss->ss_so.so_rcv_queued > 0) { 536 nmp = &ss->ss_so.so_rcv_q_head; 537 last_mp = NULL; 538 while ((mp = *nmp) != NULL) { 539 tmp = *(struct sctp_soassoc **)DB_BASE(mp); 540 if (tmp == ssa) { 541 *nmp = mp->b_next; 542 ASSERT(DB_TYPE(mp) != M_DATA); 543 if (nss->ss_so.so_rcv_q_last_head == NULL) { 544 nss->ss_so.so_rcv_q_head = mp; 545 } else { 546 nss->ss_so.so_rcv_q_last_head->b_next = 547 mp; 548 } 549 nss->ss_so.so_rcv_q_last_head = mp; 550 nss->ss_so.so_rcv_q_last_head->b_prev = last_mp; 551 mp->b_next = NULL; 552 } else { 553 nmp = &mp->b_next; 554 last_mp = mp; 555 } 556 } 557 ss->ss_so.so_rcv_q_last_head = last_mp; 558 ss->ss_so.so_rcv_q_last_head->b_prev = last_mp; 559 } 560 } 561 562 void 563 sosctp_assoc_isconnecting(struct sctp_soassoc *ssa) 564 { 565 struct sonode *so = &ssa->ssa_sonode->ss_so; 566 567 ASSERT(MUTEX_HELD(&so->so_lock)); 568 569 ssa->ssa_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING); 570 ssa->ssa_state |= SS_ISCONNECTING; 571 cv_broadcast(&so->so_state_cv); 572 } 573 574 void 575 sosctp_assoc_isconnected(struct sctp_soassoc *ssa) 576 { 577 struct sonode *so = &ssa->ssa_sonode->ss_so; 578 579 ASSERT(MUTEX_HELD(&so->so_lock)); 580 581 ssa->ssa_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING); 582 ssa->ssa_state |= SS_ISCONNECTED; 583 cv_broadcast(&so->so_state_cv); 584 } 585 586 void 587 sosctp_assoc_isdisconnecting(struct sctp_soassoc *ssa) 588 { 589 struct sonode *so = &ssa->ssa_sonode->ss_so; 590 591 ASSERT(MUTEX_HELD(&so->so_lock)); 592 593 ssa->ssa_state &= ~SS_ISCONNECTING; 594 ssa->ssa_state |= SS_CANTSENDMORE; 595 cv_broadcast(&so->so_state_cv); 596 } 597 598 void 599 sosctp_assoc_isdisconnected(struct sctp_soassoc *ssa, int error) 600 { 601 struct sonode *so = &ssa->ssa_sonode->ss_so; 602 603 ASSERT(MUTEX_HELD(&so->so_lock)); 604 605 ssa->ssa_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING); 606 ssa->ssa_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE); 607 if (error != 0) 608 ssa->ssa_error = (ushort_t)error; 609 cv_broadcast(&so->so_state_cv); 610 } 611