1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. 24 */ 25 26 #include <sys/types.h> 27 #include <sys/t_lock.h> 28 #include <sys/param.h> 29 #include <sys/systm.h> 30 #include <sys/debug.h> 31 #include <sys/errno.h> 32 #include <sys/strsubr.h> 33 #include <sys/cmn_err.h> 34 #include <sys/sysmacros.h> 35 36 #include <sys/socket.h> 37 #include <sys/socketvar.h> 38 #include <sys/strsun.h> 39 #include <sys/signal.h> 40 41 #include <netinet/sctp.h> 42 #include <inet/sctp_itf.h> 43 #include <fs/sockfs/sockcommon.h> 44 #include "socksctp.h" 45 46 extern kmem_cache_t *sosctp_assoccache; 47 /* 48 * Find a free association id. See os/fio.c file descriptor allocator 49 * for description of the algorithm. 50 */ 51 sctp_assoc_t 52 sosctp_aid_get(struct sctp_sonode *ss) 53 { 54 sctp_assoc_t id, size, ralloc; 55 struct sctp_sa_id *assocs = ss->ss_assocs; 56 57 ASSERT((ss->ss_maxassoc & (ss->ss_maxassoc + 1)) == 0); 58 59 for (id = 1; (uint32_t)id < ss->ss_maxassoc; id |= id + 1) { 60 size = id + 1; 61 if (assocs[id].ssi_alloc == size) 62 continue; 63 for (ralloc = 0, size >>= 1; size != 0; size >>= 1) { 64 ralloc += assocs[id + size].ssi_alloc; 65 if (assocs[id].ssi_alloc == ralloc + size) { 66 id += size; 67 ralloc = 0; 68 } 69 } 70 return (id); 71 } 72 return (-1); 73 } 74 75 /* 76 * Allocate or free ID, depending on whether incr is 1 or -1 77 */ 78 void 79 sosctp_aid_reserve(struct sctp_sonode *ss, sctp_assoc_t id, int incr) 80 { 81 struct sctp_sa_id *assocs = ss->ss_assocs; 82 sctp_assoc_t pid; 83 84 ASSERT((assocs[id].ssi_assoc == NULL && incr == 1) || 85 (assocs[id].ssi_assoc != NULL && incr == -1)); 86 87 for (pid = id; pid >= 0; pid = (pid & (pid + 1)) - 1) { 88 assocs[pid].ssi_alloc += incr; 89 } 90 } 91 92 /* 93 * Increase size of the ss_assocs array to accommodate at least maxid. 94 * We keep the size of the form 2^n - 1 for benefit of sosctp_aid_get(). 95 */ 96 int 97 sosctp_aid_grow(struct sctp_sonode *ss, sctp_assoc_t maxid, int kmflags) 98 { 99 sctp_assoc_t newcnt, oldcnt; 100 struct sctp_sa_id *newlist, *oldlist; 101 102 ASSERT(MUTEX_HELD(&ss->ss_so.so_lock)); 103 for (newcnt = 1; newcnt <= maxid; newcnt = (newcnt << 1) | 1) { 104 continue; 105 } 106 107 mutex_exit(&ss->ss_so.so_lock); 108 newlist = kmem_alloc(newcnt * sizeof (struct sctp_sa_id), kmflags); 109 mutex_enter(&ss->ss_so.so_lock); 110 if (newlist == NULL) { 111 return (-1); 112 } 113 oldcnt = ss->ss_maxassoc; 114 if (newcnt <= oldcnt) { 115 kmem_free(newlist, newcnt * sizeof (struct sctp_sa_id)); 116 return (0); 117 } 118 ASSERT((newcnt & (newcnt + 1)) == 0); 119 oldlist = ss->ss_assocs; 120 ss->ss_assocs = newlist; 121 ss->ss_maxassoc = newcnt; 122 123 bcopy(oldlist, newlist, oldcnt * sizeof (struct sctp_sa_id)); 124 bzero(newlist + oldcnt, 125 (newcnt - oldcnt) * sizeof (struct sctp_sa_id)); 126 if (oldlist != NULL) { 127 kmem_free(oldlist, oldcnt * sizeof (struct sctp_sa_id)); 128 } 129 return (0); 130 } 131 132 /* 133 * Convert a id into a pointer to sctp_sockassoc structure. 134 * Increments refcnt. 135 */ 136 int 137 sosctp_assoc(struct sctp_sonode *ss, sctp_assoc_t id, struct sctp_soassoc **ssa) 138 { 139 ASSERT(ssa != NULL); 140 ASSERT(MUTEX_HELD(&ss->ss_so.so_lock)); 141 if ((uint32_t)id >= ss->ss_maxassoc) { 142 *ssa = NULL; 143 return (EINVAL); 144 } 145 146 if ((*ssa = ss->ss_assocs[id].ssi_assoc) == NULL) { 147 return (EINVAL); 148 } 149 if (((*ssa)->ssa_state & (SS_CANTSENDMORE|SS_CANTRCVMORE)) == 150 (SS_CANTSENDMORE|SS_CANTRCVMORE)) { 151 /* 152 * Disconnected connection, shouldn't be found anymore 153 */ 154 *ssa = NULL; 155 return (ESHUTDOWN); 156 } 157 SSA_REFHOLD(*ssa) 158 159 return (0); 160 } 161 162 /* 163 * Can be called from upcall, or through system call. 164 */ 165 struct sctp_soassoc * 166 sosctp_assoc_create(struct sctp_sonode *ss, int kmflag) 167 { 168 struct sctp_soassoc *ssa; 169 170 ssa = kmem_cache_alloc(sosctp_assoccache, kmflag); 171 if (ssa != NULL) { 172 ssa->ssa_type = SOSCTP_ASSOC; 173 ssa->ssa_refcnt = 1; 174 ssa->ssa_sonode = ss; 175 ssa->ssa_state = 0; 176 ssa->ssa_error = 0; 177 ssa->ssa_snd_qfull = 0; 178 ssa->ssa_rcv_queued = 0; 179 ssa->ssa_flowctrld = B_FALSE; 180 } 181 dprint(2, ("sosctp_assoc_create %p %p\n", (void *)ss, (void *)ssa)); 182 return (ssa); 183 } 184 185 void 186 sosctp_assoc_free(struct sctp_sonode *ss, struct sctp_soassoc *ssa) 187 { 188 struct sonode *so = &ss->ss_so; 189 190 dprint(2, ("sosctp_assoc_free %p %p (%d)\n", (void *)ss, (void *)ssa, 191 ssa->ssa_id)); 192 ASSERT(MUTEX_HELD(&so->so_lock)); 193 if (ssa->ssa_conn != NULL) { 194 mutex_exit(&so->so_lock); 195 196 sctp_recvd(ssa->ssa_conn, so->so_rcvbuf); 197 (void) sctp_disconnect(ssa->ssa_conn); 198 sctp_close(ssa->ssa_conn); 199 200 mutex_enter(&so->so_lock); 201 ssa->ssa_conn = NULL; 202 } 203 sosctp_aid_reserve(ss, ssa->ssa_id, -1); 204 ss->ss_assocs[ssa->ssa_id].ssi_assoc = NULL; 205 --ss->ss_assoccnt; 206 kmem_cache_free(sosctp_assoccache, ssa); 207 } 208 209 /* 210 * Pack the ancillary stuff taking care of alignment issues. 211 * sctp_input_add_ancillary() packs the information as: 212 * struct cmsghdr -> ancillary data + struct cmsghdr -> ancillary data + ... 213 * In the next version of SCTP, sctp_input_add_ancillary() should 214 * pack the information taking alignment into account, then we would 215 * not need this routine. 216 */ 217 void 218 sosctp_pack_cmsg(const uchar_t *opt, struct nmsghdr *msg, int len) 219 { 220 struct cmsghdr *ocmsg; 221 struct cmsghdr *cmsg; 222 int optlen = 0; 223 char *cend; 224 boolean_t isaligned = B_TRUE; 225 226 ocmsg = (struct cmsghdr *)opt; 227 cend = (char *)opt + len; 228 /* Figure out the length incl. alignment et. al. */ 229 for (;;) { 230 if ((char *)(ocmsg + 1) > cend || 231 ((char *)ocmsg + ocmsg->cmsg_len) > cend) { 232 break; 233 } 234 if (isaligned && !ISALIGNED_cmsghdr(ocmsg)) 235 isaligned = B_FALSE; 236 optlen += ROUNDUP_cmsglen(ocmsg->cmsg_len); 237 if (ocmsg->cmsg_len > 0) { 238 ocmsg = (struct cmsghdr *) 239 ((uchar_t *)ocmsg + ocmsg->cmsg_len); 240 } else { 241 break; 242 } 243 } 244 /* Now allocate and copy */ 245 msg->msg_control = kmem_zalloc(optlen, KM_SLEEP); 246 msg->msg_controllen = optlen; 247 if (isaligned) { 248 ASSERT(optlen == len); 249 bcopy(opt, msg->msg_control, len); 250 return; 251 } 252 cmsg = (struct cmsghdr *)msg->msg_control; 253 ASSERT(ISALIGNED_cmsghdr(cmsg)); 254 ocmsg = (struct cmsghdr *)opt; 255 cend = (char *)opt + len; 256 for (;;) { 257 if ((char *)(ocmsg + 1) > cend || 258 ((char *)ocmsg + ocmsg->cmsg_len) > cend) { 259 break; 260 } 261 bcopy(ocmsg, cmsg, ocmsg->cmsg_len); 262 if (ocmsg->cmsg_len > 0) { 263 cmsg = (struct cmsghdr *)((uchar_t *)cmsg + 264 ROUNDUP_cmsglen(ocmsg->cmsg_len)); 265 ASSERT(ISALIGNED_cmsghdr(cmsg)); 266 ocmsg = (struct cmsghdr *) 267 ((uchar_t *)ocmsg + ocmsg->cmsg_len); 268 } else { 269 break; 270 } 271 } 272 } 273 274 /* 275 * Find cmsghdr of specified type 276 */ 277 struct cmsghdr * 278 sosctp_find_cmsg(const uchar_t *control, socklen_t clen, int type) 279 { 280 struct cmsghdr *cmsg; 281 char *cend; 282 283 cmsg = (struct cmsghdr *)control; 284 cend = (char *)control + clen; 285 286 for (;;) { 287 if ((char *)(cmsg + 1) > cend || 288 ((char *)cmsg + cmsg->cmsg_len) > cend) { 289 break; 290 } 291 if ((cmsg->cmsg_level == IPPROTO_SCTP) && 292 (cmsg->cmsg_type == type)) { 293 return (cmsg); 294 } 295 if (cmsg->cmsg_len > 0) { 296 cmsg = CMSG_NEXT(cmsg); 297 } else { 298 break; 299 } 300 } 301 return (NULL); 302 } 303 304 /* 305 * Wait until the association is connected or there is an error. 306 * fmode should contain any nonblocking flags. 307 */ 308 static int 309 sosctp_assoc_waitconnected(struct sctp_soassoc *ssa, int fmode) 310 { 311 struct sonode *so = &ssa->ssa_sonode->ss_so; 312 int error = 0; 313 314 ASSERT((ssa->ssa_state & (SS_ISCONNECTED|SS_ISCONNECTING)) || 315 ssa->ssa_error != 0); 316 317 while ((ssa->ssa_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 318 SS_ISCONNECTING && ssa->ssa_error == 0) { 319 320 dprint(3, ("waiting for SS_ISCONNECTED on %p\n", (void *)so)); 321 if (fmode & (FNDELAY|FNONBLOCK)) 322 return (EINPROGRESS); 323 324 if (so->so_state & SS_CLOSING) 325 return (EINTR); 326 if (!cv_wait_sig_swap(&so->so_state_cv, &so->so_lock)) { 327 /* 328 * Return EINTR and let the application use 329 * nonblocking techniques for detecting when 330 * the connection has been established. 331 */ 332 return (EINTR); 333 } 334 dprint(3, ("awoken on %p\n", (void *)so)); 335 } 336 if (ssa->ssa_error != 0) { 337 error = ssa->ssa_error; 338 ssa->ssa_error = 0; 339 dprint(3, ("sosctp_assoc_waitconnected: error %d\n", error)); 340 return (error); 341 } 342 343 if (!(ssa->ssa_state & SS_ISCONNECTED)) { 344 /* 345 * Another thread could have consumed so_error 346 * e.g. by calling read. - take from sowaitconnected() 347 */ 348 error = ECONNREFUSED; 349 dprint(3, ("sosctp_waitconnected: error %d\n", error)); 350 return (error); 351 } 352 return (0); 353 } 354 355 /* 356 * Called from connect(), sendmsg() when we need to create a new association. 357 */ 358 int 359 sosctp_assoc_createconn(struct sctp_sonode *ss, const struct sockaddr *name, 360 socklen_t namelen, const uchar_t *control, socklen_t controllen, int fflag, 361 struct cred *cr, struct sctp_soassoc **ssap) 362 { 363 struct sonode *so = &ss->ss_so; 364 struct sctp_soassoc *ssa; 365 struct sockaddr_storage laddr; 366 sctp_sockbuf_limits_t sbl; 367 sctp_assoc_t id; 368 int error; 369 struct cmsghdr *cmsg; 370 pid_t pid = curproc->p_pid; 371 372 ASSERT(MUTEX_HELD(&so->so_lock)); 373 374 /* 375 * System needs to pick local endpoint 376 */ 377 if (!(so->so_state & SS_ISBOUND)) { 378 bzero(&laddr, sizeof (laddr)); 379 laddr.ss_family = so->so_family; 380 381 error = SOP_BIND(so, (struct sockaddr *)&laddr, 382 sizeof (laddr), _SOBIND_LOCK_HELD, cr); 383 if (error) { 384 *ssap = NULL; 385 return (error); 386 } 387 } 388 389 /* 390 * Create a new association, and call connect on that. 391 */ 392 for (;;) { 393 id = sosctp_aid_get(ss); 394 if (id != -1) { 395 break; 396 } 397 /* 398 * Array not large enough; increase size. 399 */ 400 (void) sosctp_aid_grow(ss, ss->ss_maxassoc, KM_SLEEP); 401 } 402 ++ss->ss_assoccnt; 403 sosctp_aid_reserve(ss, id, 1); 404 405 mutex_exit(&so->so_lock); 406 407 ssa = sosctp_assoc_create(ss, KM_SLEEP); 408 ssa->ssa_wroff = ss->ss_wroff; 409 ssa->ssa_wrsize = ss->ss_wrsize; 410 ssa->ssa_conn = sctp_create(ssa, (struct sctp_s *)so->so_proto_handle, 411 so->so_family, so->so_type, SCTP_CAN_BLOCK, &sosctp_assoc_upcalls, 412 &sbl, cr); 413 414 mutex_enter(&so->so_lock); 415 ss->ss_assocs[id].ssi_assoc = ssa; 416 ssa->ssa_id = id; 417 if (ssa->ssa_conn == NULL) { 418 ASSERT(ssa->ssa_refcnt == 1); 419 sosctp_assoc_free(ss, ssa); 420 *ssap = NULL; 421 return (ENOMEM); 422 } 423 ssa->ssa_state |= SS_ISBOUND; 424 425 sosctp_assoc_isconnecting(ssa); 426 SSA_REFHOLD(ssa); 427 mutex_exit(&so->so_lock); 428 429 /* 430 * Can specify special init params 431 */ 432 cmsg = sosctp_find_cmsg(control, controllen, SCTP_INIT); 433 if (cmsg != NULL) { 434 error = sctp_set_opt(ssa->ssa_conn, IPPROTO_SCTP, SCTP_INITMSG, 435 cmsg + 1, cmsg->cmsg_len - sizeof (*cmsg)); 436 if (error != 0) 437 goto ret_err; 438 } 439 440 if ((error = sctp_connect(ssa->ssa_conn, name, namelen, cr, pid)) != 0) 441 goto ret_err; 442 443 mutex_enter(&so->so_lock); 444 /* 445 * Allow other threads to access the socket 446 */ 447 error = sosctp_assoc_waitconnected(ssa, fflag); 448 449 switch (error) { 450 case 0: 451 case EINPROGRESS: 452 case EALREADY: 453 case EINTR: 454 /* Non-fatal errors */ 455 break; 456 default: 457 /* 458 * Fatal errors. It means that sctp_assoc_disconnected() 459 * must have been called. So we only need to do a 460 * SSA_REFRELE() here to release our hold done above. 461 */ 462 ASSERT(ssa->ssa_state & (SS_CANTSENDMORE | SS_CANTRCVMORE)); 463 SSA_REFRELE(ss, ssa); 464 ssa = NULL; 465 break; 466 } 467 468 *ssap = ssa; 469 return (error); 470 471 ret_err: 472 mutex_enter(&so->so_lock); 473 /* 474 * There should not be any upcall done by SCTP. So normally the 475 * ssa_refcnt should be 2. And we can call sosctp_assoc_free() 476 * directly. But since the ssa is inserted to the ss_soassocs 477 * array above, some thread can actually put a hold on it. In 478 * this special case, we "manually" decrease the ssa_refcnt by 2. 479 */ 480 if (ssa->ssa_refcnt > 2) 481 ssa->ssa_refcnt -= 2; 482 else 483 sosctp_assoc_free(ss, ssa); 484 *ssap = NULL; 485 return (error); 486 } 487 488 /* 489 * Inherit socket properties 490 */ 491 void 492 sosctp_so_inherit(struct sctp_sonode *lss, struct sctp_sonode *nss) 493 { 494 struct sonode *nso = &nss->ss_so; 495 struct sonode *lso = &lss->ss_so; 496 497 nso->so_options = lso->so_options & (SO_DEBUG|SO_REUSEADDR| 498 SO_KEEPALIVE|SO_DONTROUTE|SO_BROADCAST|SO_USELOOPBACK| 499 SO_OOBINLINE|SO_DGRAM_ERRIND|SO_LINGER); 500 nso->so_sndbuf = lso->so_sndbuf; 501 nso->so_rcvbuf = lso->so_rcvbuf; 502 nso->so_pgrp = lso->so_pgrp; 503 504 nso->so_rcvlowat = lso->so_rcvlowat; 505 nso->so_sndlowat = lso->so_sndlowat; 506 } 507 508 /* 509 * Branching association to it's own socket. Inherit properties from 510 * the parent, and move data for the association to the new socket. 511 */ 512 void 513 sosctp_assoc_move(struct sctp_sonode *ss, struct sctp_sonode *nss, 514 struct sctp_soassoc *ssa) 515 { 516 mblk_t *mp, **nmp, *last_mp; 517 struct sctp_soassoc *tmp; 518 struct sonode *nso, *sso; 519 520 sosctp_so_inherit(ss, nss); 521 522 sso = &ss->ss_so; 523 nso = &nss->ss_so; 524 525 nso->so_state |= (sso->so_state & (SS_NDELAY|SS_NONBLOCK)); 526 nso->so_state |= 527 (ssa->ssa_state & (SS_ISCONNECTED|SS_ISCONNECTING| 528 SS_ISDISCONNECTING|SS_CANTSENDMORE|SS_CANTRCVMORE|SS_ISBOUND)); 529 nso->so_error = ssa->ssa_error; 530 nso->so_snd_qfull = ssa->ssa_snd_qfull; 531 nso->so_proto_props.sopp_wroff = ssa->ssa_wroff; 532 nso->so_proto_props.sopp_maxblk = ssa->ssa_wrsize; 533 nso->so_rcv_queued = ssa->ssa_rcv_queued; 534 nso->so_flowctrld = ssa->ssa_flowctrld; 535 nso->so_proto_handle = (sock_lower_handle_t)ssa->ssa_conn; 536 /* The peeled off socket is connection oriented */ 537 nso->so_mode |= SM_CONNREQUIRED; 538 539 /* Consolidate all data on a single rcv list */ 540 if (sso->so_rcv_head != NULL) { 541 so_process_new_message(&ss->ss_so, sso->so_rcv_head, 542 sso->so_rcv_last_head); 543 sso->so_rcv_head = NULL; 544 sso->so_rcv_last_head = NULL; 545 } 546 547 if (nso->so_rcv_queued > 0) { 548 nmp = &sso->so_rcv_q_head; 549 last_mp = NULL; 550 while ((mp = *nmp) != NULL) { 551 tmp = *(struct sctp_soassoc **)DB_BASE(mp); 552 #ifdef DEBUG 553 { 554 /* 555 * Verify that b_prev points to the last 556 * mblk in the b_cont chain (as mandated 557 * by so_dequeue_msg().) 558 */ 559 mblk_t *mp1 = mp; 560 while (mp1->b_cont != NULL) 561 mp1 = mp1->b_cont; 562 VERIFY(mp->b_prev == mp1); 563 } 564 #endif /* DEBUG */ 565 if (tmp == ssa) { 566 *nmp = mp->b_next; 567 ASSERT(DB_TYPE(mp) != M_DATA); 568 if (nso->so_rcv_q_last_head == NULL) { 569 nso->so_rcv_q_head = mp; 570 } else { 571 nso->so_rcv_q_last_head->b_next = mp; 572 } 573 nso->so_rcv_q_last_head = mp; 574 mp->b_next = NULL; 575 } else { 576 nmp = &mp->b_next; 577 last_mp = mp; 578 } 579 } 580 581 sso->so_rcv_q_last_head = last_mp; 582 } 583 } 584 585 void 586 sosctp_assoc_isconnecting(struct sctp_soassoc *ssa) 587 { 588 struct sonode *so = &ssa->ssa_sonode->ss_so; 589 590 ASSERT(MUTEX_HELD(&so->so_lock)); 591 592 ssa->ssa_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING); 593 ssa->ssa_state |= SS_ISCONNECTING; 594 cv_broadcast(&so->so_state_cv); 595 } 596 597 void 598 sosctp_assoc_isconnected(struct sctp_soassoc *ssa) 599 { 600 struct sonode *so = &ssa->ssa_sonode->ss_so; 601 602 ASSERT(MUTEX_HELD(&so->so_lock)); 603 604 ssa->ssa_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING); 605 ssa->ssa_state |= SS_ISCONNECTED; 606 cv_broadcast(&so->so_state_cv); 607 } 608 609 void 610 sosctp_assoc_isdisconnecting(struct sctp_soassoc *ssa) 611 { 612 struct sonode *so = &ssa->ssa_sonode->ss_so; 613 614 ASSERT(MUTEX_HELD(&so->so_lock)); 615 616 ssa->ssa_state &= ~SS_ISCONNECTING; 617 ssa->ssa_state |= SS_CANTSENDMORE; 618 cv_broadcast(&so->so_state_cv); 619 } 620 621 void 622 sosctp_assoc_isdisconnected(struct sctp_soassoc *ssa, int error) 623 { 624 struct sonode *so = &ssa->ssa_sonode->ss_so; 625 626 ASSERT(MUTEX_HELD(&so->so_lock)); 627 628 ssa->ssa_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING); 629 ssa->ssa_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE); 630 if (error != 0) 631 ssa->ssa_error = (ushort_t)error; 632 cv_broadcast(&so->so_state_cv); 633 } 634