xref: /titanic_50/usr/src/uts/common/inet/sockmods/socksctpsubr.c (revision 585995d5d19489bf178112c08c8c61ffc049ff6e)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <sys/types.h>
28 #include <sys/t_lock.h>
29 #include <sys/param.h>
30 #include <sys/systm.h>
31 #include <sys/debug.h>
32 #include <sys/errno.h>
33 #include <sys/strsubr.h>
34 #include <sys/cmn_err.h>
35 #include <sys/sysmacros.h>
36 
37 #include <sys/socket.h>
38 #include <sys/socketvar.h>
39 #include <sys/strsun.h>
40 #include <sys/signal.h>
41 
42 #include <netinet/sctp.h>
43 #include <inet/sctp_itf.h>
44 #include <fs/sockfs/sockcommon.h>
45 #include "socksctp.h"
46 
47 extern kmem_cache_t *sosctp_assoccache;
48 /*
49  * Find a free association id. See os/fio.c file descriptor allocator
50  * for description of the algorithm.
51  */
52 sctp_assoc_t
53 sosctp_aid_get(struct sctp_sonode *ss)
54 {
55 	sctp_assoc_t id, size, ralloc;
56 	struct sctp_sa_id *assocs = ss->ss_assocs;
57 
58 	ASSERT((ss->ss_maxassoc & (ss->ss_maxassoc + 1)) == 0);
59 
60 	for (id = 1; (uint32_t)id < ss->ss_maxassoc; id |= id + 1) {
61 		size = id + 1;
62 		if (assocs[id].ssi_alloc == size)
63 			continue;
64 		for (ralloc = 0, size >>= 1; size != 0; size >>= 1) {
65 			ralloc += assocs[id + size].ssi_alloc;
66 			if (assocs[id].ssi_alloc == ralloc + size) {
67 				id += size;
68 				ralloc = 0;
69 			}
70 		}
71 		return (id);
72 	}
73 	return (-1);
74 }
75 
76 /*
77  * Allocate or free ID, depending on whether incr is 1 or -1
78  */
79 void
80 sosctp_aid_reserve(struct sctp_sonode *ss, sctp_assoc_t id, int incr)
81 {
82 	struct sctp_sa_id *assocs = ss->ss_assocs;
83 	sctp_assoc_t pid;
84 
85 	ASSERT((assocs[id].ssi_assoc == NULL && incr == 1) ||
86 	    (assocs[id].ssi_assoc != NULL && incr == -1));
87 
88 	for (pid = id; pid >= 0; pid = (pid & (pid + 1)) - 1) {
89 		assocs[pid].ssi_alloc += incr;
90 	}
91 }
92 
93 /*
94  * Increase size of the ss_assocs array to accommodate at least maxid.
95  * We keep the size of the form 2^n - 1 for benefit of sosctp_aid_get().
96  */
97 int
98 sosctp_aid_grow(struct sctp_sonode *ss, sctp_assoc_t maxid, int kmflags)
99 {
100 	sctp_assoc_t newcnt, oldcnt;
101 	struct sctp_sa_id *newlist, *oldlist;
102 
103 	ASSERT(MUTEX_HELD(&ss->ss_so.so_lock));
104 	for (newcnt = 1; newcnt <= maxid; newcnt = (newcnt << 1) | 1) {
105 		continue;
106 	}
107 
108 	mutex_exit(&ss->ss_so.so_lock);
109 	newlist = kmem_alloc(newcnt * sizeof (struct sctp_sa_id), kmflags);
110 	mutex_enter(&ss->ss_so.so_lock);
111 	if (newlist == NULL) {
112 		return (-1);
113 	}
114 	oldcnt = ss->ss_maxassoc;
115 	if (newcnt <= oldcnt) {
116 		kmem_free(newlist, newcnt * sizeof (struct sctp_sa_id));
117 		return (0);
118 	}
119 	ASSERT((newcnt & (newcnt + 1)) == 0);
120 	oldlist = ss->ss_assocs;
121 	ss->ss_assocs = newlist;
122 	ss->ss_maxassoc = newcnt;
123 
124 	bcopy(oldlist, newlist, oldcnt * sizeof (struct sctp_sa_id));
125 	bzero(newlist + oldcnt,
126 	    (newcnt - oldcnt) * sizeof (struct sctp_sa_id));
127 	if (oldlist != NULL) {
128 		kmem_free(oldlist, oldcnt * sizeof (struct sctp_sa_id));
129 	}
130 	return (0);
131 }
132 
133 /*
134  * Convert a id into a pointer to sctp_sockassoc structure.
135  * Increments refcnt.
136  */
137 int
138 sosctp_assoc(struct sctp_sonode *ss, sctp_assoc_t id, struct sctp_soassoc **ssa)
139 {
140 	ASSERT(ssa != NULL);
141 	ASSERT(MUTEX_HELD(&ss->ss_so.so_lock));
142 	if ((uint32_t)id >= ss->ss_maxassoc) {
143 		*ssa = NULL;
144 		return (EINVAL);
145 	}
146 
147 	if ((*ssa = ss->ss_assocs[id].ssi_assoc) == NULL) {
148 		return (EINVAL);
149 	}
150 	if (((*ssa)->ssa_state & (SS_CANTSENDMORE|SS_CANTRCVMORE)) ==
151 	    (SS_CANTSENDMORE|SS_CANTRCVMORE)) {
152 		/*
153 		 * Disconnected connection, shouldn't be found anymore
154 		 */
155 		*ssa = NULL;
156 		return (ESHUTDOWN);
157 	}
158 	SSA_REFHOLD(*ssa)
159 
160 	return (0);
161 }
162 
163 /*
164  * Can be called from upcall, or through system call.
165  */
166 struct sctp_soassoc *
167 sosctp_assoc_create(struct sctp_sonode *ss, int kmflag)
168 {
169 	struct sctp_soassoc *ssa;
170 
171 	ssa = kmem_cache_alloc(sosctp_assoccache, kmflag);
172 	if (ssa != NULL) {
173 		ssa->ssa_type = SOSCTP_ASSOC;
174 		ssa->ssa_refcnt = 1;
175 		ssa->ssa_sonode = ss;
176 		ssa->ssa_state = 0;
177 		ssa->ssa_error = 0;
178 #if 0
179 		ssa->ssa_txqueued = 0;
180 #endif
181 		ssa->ssa_snd_qfull = 0;
182 	}
183 	dprint(2, ("sosctp_assoc_create %p %p\n", (void *)ss, (void *)ssa));
184 	return (ssa);
185 }
186 
187 void
188 sosctp_assoc_free(struct sctp_sonode *ss, struct sctp_soassoc *ssa)
189 {
190 	struct sonode *so = &ss->ss_so;
191 
192 	dprint(2, ("sosctp_assoc_free %p %p (%d)\n", (void *)ss, (void *)ssa,
193 	    ssa->ssa_id));
194 	ASSERT(MUTEX_HELD(&so->so_lock));
195 	if (ssa->ssa_conn != NULL) {
196 		mutex_exit(&so->so_lock);
197 
198 		sctp_recvd(ssa->ssa_conn, so->so_rcvbuf);
199 		(void) sctp_disconnect(ssa->ssa_conn);
200 		sctp_close(ssa->ssa_conn);
201 
202 		mutex_enter(&so->so_lock);
203 		ssa->ssa_conn = NULL;
204 	}
205 	sosctp_aid_reserve(ss, ssa->ssa_id, -1);
206 	ss->ss_assocs[ssa->ssa_id].ssi_assoc = NULL;
207 	--ss->ss_assoccnt;
208 	kmem_cache_free(sosctp_assoccache, ssa);
209 }
210 
211 /*
212  * Pack the ancillary stuff taking care of alignment issues.
213  * sctp_input_add_ancillary() packs the information as:
214  * struct cmsghdr -> ancillary data + struct cmsghdr -> ancillary data + ...
215  * In the next version of SCTP, sctp_input_add_ancillary() should
216  * pack the information taking alignment into account, then we would
217  * not need this routine.
218  */
219 void
220 sosctp_pack_cmsg(const uchar_t *opt, struct nmsghdr *msg, int len)
221 {
222 	struct cmsghdr	*ocmsg;
223 	struct cmsghdr	*cmsg;
224 	int		optlen = 0;
225 	char		*cend;
226 	boolean_t	isaligned = B_TRUE;
227 
228 	ocmsg = (struct cmsghdr *)opt;
229 	cend = (char *)opt + len;
230 	/* Figure out the length incl. alignment et. al. */
231 	for (;;) {
232 		if ((char *)(ocmsg + 1) > cend ||
233 		    ((char *)ocmsg + ocmsg->cmsg_len) > cend) {
234 			break;
235 		}
236 		if (isaligned && !ISALIGNED_cmsghdr(ocmsg))
237 			isaligned = B_FALSE;
238 		optlen += ROUNDUP_cmsglen(ocmsg->cmsg_len);
239 		if (ocmsg->cmsg_len > 0) {
240 			ocmsg = (struct cmsghdr *)
241 			    ((uchar_t *)ocmsg + ocmsg->cmsg_len);
242 		} else {
243 			break;
244 		}
245 	}
246 	/* Now allocate and copy */
247 	msg->msg_control = kmem_zalloc(optlen, KM_SLEEP);
248 	msg->msg_controllen = optlen;
249 	if (isaligned) {
250 		ASSERT(optlen == len);
251 		bcopy(opt, msg->msg_control, len);
252 		return;
253 	}
254 	cmsg = (struct cmsghdr *)msg->msg_control;
255 	ASSERT(ISALIGNED_cmsghdr(cmsg));
256 	ocmsg = (struct cmsghdr *)opt;
257 	cend = (char *)opt + len;
258 	for (;;) {
259 		if ((char *)(ocmsg + 1) > cend ||
260 		    ((char *)ocmsg + ocmsg->cmsg_len) > cend) {
261 			break;
262 		}
263 		bcopy(ocmsg, cmsg, ocmsg->cmsg_len);
264 		if (ocmsg->cmsg_len > 0) {
265 			cmsg = (struct cmsghdr *)((uchar_t *)cmsg +
266 			    ROUNDUP_cmsglen(ocmsg->cmsg_len));
267 			ASSERT(ISALIGNED_cmsghdr(cmsg));
268 			ocmsg = (struct cmsghdr *)
269 			    ((uchar_t *)ocmsg + ocmsg->cmsg_len);
270 		} else {
271 			break;
272 		}
273 	}
274 }
275 
276 /*
277  * Find cmsghdr of specified type
278  */
279 struct cmsghdr *
280 sosctp_find_cmsg(const uchar_t *control, socklen_t clen, int type)
281 {
282 	struct cmsghdr *cmsg;
283 	char *cend;
284 
285 	cmsg = (struct cmsghdr *)control;
286 	cend = (char *)control + clen;
287 
288 	for (;;) {
289 		if ((char *)(cmsg + 1) > cend ||
290 		    ((char *)cmsg + cmsg->cmsg_len) > cend) {
291 			break;
292 		}
293 		if ((cmsg->cmsg_level == IPPROTO_SCTP) &&
294 		    (cmsg->cmsg_type == type)) {
295 			return (cmsg);
296 		}
297 		if (cmsg->cmsg_len > 0) {
298 			cmsg = CMSG_NEXT(cmsg);
299 		} else {
300 			break;
301 		}
302 	}
303 	return (NULL);
304 }
305 
306 /*
307  * Wait until the association is connected or there is an error.
308  * fmode should contain any nonblocking flags.
309  */
310 static int
311 sosctp_assoc_waitconnected(struct sctp_soassoc *ssa, int fmode)
312 {
313 	struct sonode *so = &ssa->ssa_sonode->ss_so;
314 	int error = 0;
315 
316 	ASSERT((ssa->ssa_state & (SS_ISCONNECTED|SS_ISCONNECTING)) ||
317 	    ssa->ssa_error != 0);
318 
319 	while ((ssa->ssa_state & (SS_ISCONNECTED|SS_ISCONNECTING)) ==
320 	    SS_ISCONNECTING && ssa->ssa_error == 0) {
321 
322 		dprint(3, ("waiting for SS_ISCONNECTED on %p\n", (void *)so));
323 		if (fmode & (FNDELAY|FNONBLOCK))
324 			return (EINPROGRESS);
325 
326 		if (so->so_state & SS_CLOSING)
327 			return (EINTR);
328 		if (!cv_wait_sig_swap(&so->so_state_cv, &so->so_lock)) {
329 			/*
330 			 * Return EINTR and let the application use
331 			 * nonblocking techniques for detecting when
332 			 * the connection has been established.
333 			 */
334 			return (EINTR);
335 		}
336 		dprint(3, ("awoken on %p\n", (void *)so));
337 	}
338 	if (ssa->ssa_error != 0) {
339 		error = ssa->ssa_error;
340 		ssa->ssa_error = 0;
341 		dprint(3, ("sosctp_assoc_waitconnected: error %d\n", error));
342 		return (error);
343 	}
344 
345 	if (!(ssa->ssa_state & SS_ISCONNECTED)) {
346 		/*
347 		 * Another thread could have consumed so_error
348 		 * e.g. by calling read. - take from sowaitconnected()
349 		 */
350 		error = ECONNREFUSED;
351 		dprint(3, ("sosctp_waitconnected: error %d\n", error));
352 		return (error);
353 	}
354 	return (0);
355 }
356 
357 /*
358  * Called from connect(), sendmsg() when we need to create a new association.
359  */
360 int
361 sosctp_assoc_createconn(struct sctp_sonode *ss, const struct sockaddr *name,
362     socklen_t namelen, const uchar_t *control, socklen_t controllen, int fflag,
363     struct cred *cr, struct sctp_soassoc **ssap)
364 {
365 	struct sonode *so = &ss->ss_so;
366 	struct sctp_soassoc *ssa;
367 	struct sockaddr_storage laddr;
368 	sctp_sockbuf_limits_t sbl;
369 	sctp_assoc_t id;
370 	int error;
371 	struct cmsghdr *cmsg;
372 
373 	ASSERT(MUTEX_HELD(&so->so_lock));
374 
375 	/*
376 	 * System needs to pick local endpoint
377 	 */
378 	if (!(so->so_state & SS_ISBOUND)) {
379 		bzero(&laddr, sizeof (laddr));
380 		laddr.ss_family = so->so_family;
381 
382 		error = SOP_BIND(so, (struct sockaddr *)&laddr,
383 		    sizeof (laddr), _SOBIND_LOCK_HELD, cr);
384 		if (error) {
385 			*ssap = NULL;
386 			return (error);
387 		}
388 	}
389 
390 	/*
391 	 * Create a new association, and call connect on that.
392 	 */
393 	for (;;) {
394 		id = sosctp_aid_get(ss);
395 		if (id != -1) {
396 			break;
397 		}
398 		/*
399 		 * Array not large enough; increase size.
400 		 */
401 		(void) sosctp_aid_grow(ss, ss->ss_maxassoc, KM_SLEEP);
402 	}
403 	++ss->ss_assoccnt;
404 	sosctp_aid_reserve(ss, id, 1);
405 
406 	mutex_exit(&so->so_lock);
407 
408 	ssa = sosctp_assoc_create(ss, KM_SLEEP);
409 	ssa->ssa_wroff = ss->ss_wroff;
410 	ssa->ssa_wrsize = ss->ss_wrsize;
411 	ssa->ssa_conn = sctp_create(ssa, (struct sctp_s *)so->so_proto_handle,
412 	    so->so_family, SCTP_CAN_BLOCK, &sosctp_assoc_upcalls, &sbl, cr);
413 
414 	mutex_enter(&so->so_lock);
415 	ss->ss_assocs[id].ssi_assoc = ssa;
416 	ssa->ssa_id = id;
417 	if (ssa->ssa_conn == NULL) {
418 		ASSERT(ssa->ssa_refcnt == 1);
419 		sosctp_assoc_free(ss, ssa);
420 		*ssap = NULL;
421 		return (ENOMEM);
422 	}
423 	ssa->ssa_state |= SS_ISBOUND;
424 
425 	sosctp_assoc_isconnecting(ssa);
426 	SSA_REFHOLD(ssa);
427 	mutex_exit(&so->so_lock);
428 
429 	/*
430 	 * Can specify special init params
431 	 */
432 	cmsg = sosctp_find_cmsg(control, controllen, SCTP_INIT);
433 	if (cmsg != NULL) {
434 		error = sctp_set_opt(ssa->ssa_conn, IPPROTO_SCTP, SCTP_INITMSG,
435 		    cmsg + 1, cmsg->cmsg_len - sizeof (*cmsg));
436 		if (error != 0)
437 			goto ret_err;
438 	}
439 
440 	if ((error = sctp_connect(ssa->ssa_conn, name, namelen)) != 0)
441 		goto ret_err;
442 
443 	mutex_enter(&so->so_lock);
444 	/*
445 	 * Allow other threads to access the socket
446 	 */
447 	error = sosctp_assoc_waitconnected(ssa, fflag);
448 
449 	switch (error) {
450 	case 0:
451 	case EINPROGRESS:
452 	case EALREADY:
453 	case EINTR:
454 		/* Non-fatal errors */
455 		break;
456 	default:
457 		/*
458 		 * Fatal errors.  It means that sctp_assoc_disconnected()
459 		 * must have been called.  So we only need to do a
460 		 * SSA_REFRELE() here to release our hold done above.
461 		 */
462 		ASSERT(ssa->ssa_state & (SS_CANTSENDMORE | SS_CANTRCVMORE));
463 		SSA_REFRELE(ss, ssa);
464 		ssa = NULL;
465 		break;
466 	}
467 
468 	*ssap = ssa;
469 	return (error);
470 
471 ret_err:
472 	mutex_enter(&so->so_lock);
473 	/*
474 	 * There should not be any upcall done by SCTP.  So normally the
475 	 * ssa_refcnt should be 2.  And we can call sosctp_assoc_free()
476 	 * directly.  But since the ssa is inserted to the ss_soassocs
477 	 * array above, some thread can actually put a hold on it.  In
478 	 * this special case, we "manually" decrease the ssa_refcnt by 2.
479 	 */
480 	if (ssa->ssa_refcnt > 2)
481 		ssa->ssa_refcnt -= 2;
482 	else
483 		sosctp_assoc_free(ss, ssa);
484 	*ssap = NULL;
485 	return (error);
486 }
487 
488 /*
489  * Inherit socket properties
490  */
491 void
492 sosctp_so_inherit(struct sctp_sonode *lss, struct sctp_sonode *nss)
493 {
494 	struct sonode *nso = &nss->ss_so;
495 	struct sonode *lso = &lss->ss_so;
496 
497 	nso->so_options = lso->so_options & (SO_DEBUG|SO_REUSEADDR|
498 	    SO_KEEPALIVE|SO_DONTROUTE|SO_BROADCAST|SO_USELOOPBACK|
499 	    SO_OOBINLINE|SO_DGRAM_ERRIND|SO_LINGER);
500 	nso->so_sndbuf = lso->so_sndbuf;
501 	nso->so_rcvbuf = lso->so_rcvbuf;
502 	nso->so_pgrp = lso->so_pgrp;
503 
504 	nso->so_rcvlowat = lso->so_rcvlowat;
505 	nso->so_sndlowat = lso->so_sndlowat;
506 }
507 
508 /*
509  * Branching association to it's own socket. Inherit properties from
510  * the parent, and move data from RX queue to TX.
511  */
512 void
513 sosctp_assoc_move(struct sctp_sonode *ss, struct sctp_sonode *nss,
514     struct sctp_soassoc *ssa)
515 {
516 	mblk_t *mp, **nmp, *last_mp;
517 	struct sctp_soassoc *tmp;
518 
519 	sosctp_so_inherit(ss, nss);
520 
521 	nss->ss_so.so_state |= (ss->ss_so.so_state & (SS_NDELAY|SS_NONBLOCK));
522 	nss->ss_so.so_state |=
523 	    (ssa->ssa_state & (SS_ISCONNECTED|SS_ISCONNECTING|
524 	    SS_ISDISCONNECTING|SS_CANTSENDMORE|SS_CANTRCVMORE|SS_ISBOUND));
525 	nss->ss_so.so_error = ssa->ssa_error;
526 #if 0
527 	nss->ss_so.so_txqueued = ssa->ssa_txqueued;
528 #endif
529 	nss->ss_so.so_snd_qfull = ssa->ssa_snd_qfull;
530 	nss->ss_wroff = ssa->ssa_wroff;
531 	nss->ss_wrsize = ssa->ssa_wrsize;
532 	nss->ss_so.so_rcv_queued = ssa->ssa_rcv_queued;
533 	nss->ss_so.so_proto_handle = (sock_lower_handle_t)ssa->ssa_conn;
534 
535 	if (nss->ss_so.so_rcv_queued > 0) {
536 		nmp = &ss->ss_so.so_rcv_q_head;
537 		last_mp = NULL;
538 		while ((mp = *nmp) != NULL) {
539 			tmp = *(struct sctp_soassoc **)DB_BASE(mp);
540 			if (tmp == ssa) {
541 				*nmp = mp->b_next;
542 				ASSERT(DB_TYPE(mp) != M_DATA);
543 				if (nss->ss_so.so_rcv_q_last_head == NULL) {
544 					nss->ss_so.so_rcv_q_head = mp;
545 				} else {
546 					nss->ss_so.so_rcv_q_last_head->b_next =
547 					    mp;
548 				}
549 				nss->ss_so.so_rcv_q_last_head = mp;
550 				nss->ss_so.so_rcv_q_last_head->b_prev = last_mp;
551 				mp->b_next = NULL;
552 			} else {
553 				nmp = &mp->b_next;
554 				last_mp = mp;
555 			}
556 		}
557 		ss->ss_so.so_rcv_q_last_head = last_mp;
558 		ss->ss_so.so_rcv_q_last_head->b_prev = last_mp;
559 	}
560 }
561 
562 void
563 sosctp_assoc_isconnecting(struct sctp_soassoc *ssa)
564 {
565 	struct sonode *so = &ssa->ssa_sonode->ss_so;
566 
567 	ASSERT(MUTEX_HELD(&so->so_lock));
568 
569 	ssa->ssa_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING);
570 	ssa->ssa_state |= SS_ISCONNECTING;
571 	cv_broadcast(&so->so_state_cv);
572 }
573 
574 void
575 sosctp_assoc_isconnected(struct sctp_soassoc *ssa)
576 {
577 	struct sonode *so = &ssa->ssa_sonode->ss_so;
578 
579 	ASSERT(MUTEX_HELD(&so->so_lock));
580 
581 	ssa->ssa_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING);
582 	ssa->ssa_state |= SS_ISCONNECTED;
583 	cv_broadcast(&so->so_state_cv);
584 }
585 
586 void
587 sosctp_assoc_isdisconnecting(struct sctp_soassoc *ssa)
588 {
589 	struct sonode *so = &ssa->ssa_sonode->ss_so;
590 
591 	ASSERT(MUTEX_HELD(&so->so_lock));
592 
593 	ssa->ssa_state &= ~SS_ISCONNECTING;
594 	ssa->ssa_state |= SS_CANTSENDMORE;
595 	cv_broadcast(&so->so_state_cv);
596 }
597 
598 void
599 sosctp_assoc_isdisconnected(struct sctp_soassoc *ssa, int error)
600 {
601 	struct sonode *so = &ssa->ssa_sonode->ss_so;
602 
603 	ASSERT(MUTEX_HELD(&so->so_lock));
604 
605 	ssa->ssa_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING);
606 	ssa->ssa_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE);
607 	if (error != 0)
608 		ssa->ssa_error = (ushort_t)error;
609 	cv_broadcast(&so->so_state_cv);
610 }
611