xref: /freebsd/sys/netinet/tcp_usrreq.c (revision 8e537d168674d6b65869f73c20813001af875738)
1 /*
2  * Copyright (c) 1982, 1986, 1988, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *	This product includes software developed by the University of
16  *	California, Berkeley and its contributors.
17  * 4. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  *
33  *	From: @(#)tcp_usrreq.c	8.2 (Berkeley) 1/3/94
34  *	$Id: tcp_usrreq.c,v 1.24 1996/07/12 17:28:47 davidg Exp $
35  */
36 
37 #include <sys/param.h>
38 #include <sys/queue.h>
39 #include <sys/systm.h>
40 #include <sys/kernel.h>
41 #include <sys/sysctl.h>
42 #include <sys/malloc.h>
43 #include <sys/mbuf.h>
44 #include <sys/socket.h>
45 #include <sys/socketvar.h>
46 #include <sys/protosw.h>
47 #include <sys/errno.h>
48 #include <sys/stat.h>
49 
50 #include <net/if.h>
51 #include <net/route.h>
52 
53 #include <netinet/in.h>
54 #include <netinet/in_systm.h>
55 #include <netinet/ip.h>
56 #include <netinet/in_pcb.h>
57 #include <netinet/in_var.h>
58 #include <netinet/ip_var.h>
59 #include <netinet/tcp.h>
60 #include <netinet/tcp_fsm.h>
61 #include <netinet/tcp_seq.h>
62 #include <netinet/tcp_timer.h>
63 #include <netinet/tcp_var.h>
64 #include <netinet/tcpip.h>
65 #ifdef TCPDEBUG
66 #include <netinet/tcp_debug.h>
67 #endif
68 
69 /*
70  * TCP protocol interface to socket abstraction.
71  */
72 extern	char *tcpstates[];
73 
74 static int	tcp_attach __P((struct socket *));
75 static int	tcp_connect __P((struct tcpcb *, struct mbuf *));
76 static struct tcpcb *
77 		tcp_disconnect __P((struct tcpcb *));
78 static struct tcpcb *
79 		tcp_usrclosed __P((struct tcpcb *));
80 
81 #ifdef notdef
82 /*
83  * Process a TCP user request for TCP tb.  If this is a send request
84  * then m is the mbuf chain of send data.  If this is a timer expiration
85  * (called from the software clock routine), then timertype tells which timer.
86  */
87 /*ARGSUSED*/
88 int
89 tcp_usrreq(so, req, m, nam, control)
90 	struct socket *so;
91 	int req;
92 	struct mbuf *m, *nam, *control;
93 {
94 	register struct inpcb *inp;
95 	register struct tcpcb *tp = 0;
96 	struct sockaddr_in *sinp;
97 	int s;
98 	int error = 0;
99 #ifdef TCPDEBUG
100 	int ostate;
101 #endif
102 
103 	if (req == PRU_CONTROL)
104 		return (in_control(so, (u_long)m, (caddr_t)nam,
105 			(struct ifnet *)control));
106 	if (control && control->m_len) {
107 		m_freem(control);
108 		if (m)
109 			m_freem(m);
110 		return (EINVAL);
111 	}
112 
113 	s = splnet();
114 	inp = sotoinpcb(so);
115 	/*
116 	 * When a TCP is attached to a socket, then there will be
117 	 * a (struct inpcb) pointed at by the socket, and this
118 	 * structure will point at a subsidary (struct tcpcb).
119 	 */
120 	if (inp == 0 && req != PRU_ATTACH) {
121 		splx(s);
122 #if 0
123 		/*
124 		 * The following corrects an mbuf leak under rare
125 		 * circumstances, but has not been fully tested.
126 		 */
127 		if (m && req != PRU_SENSE)
128 			m_freem(m);
129 #else
130 		/* safer version of fix for mbuf leak */
131 		if (m && (req == PRU_SEND || req == PRU_SENDOOB))
132 			m_freem(m);
133 #endif
134 		return (EINVAL);		/* XXX */
135 	}
136 	if (inp) {
137 		tp = intotcpcb(inp);
138 		/* WHAT IF TP IS 0? */
139 #ifdef KPROF
140 		tcp_acounts[tp->t_state][req]++;
141 #endif
142 #ifdef TCPDEBUG
143 		ostate = tp->t_state;
144 	} else
145 		ostate = 0;
146 #else /* TCPDEBUG */
147 	}
148 #endif /* TCPDEBUG */
149 
150 	switch (req) {
151 
152 	/*
153 	 * TCP attaches to socket via PRU_ATTACH, reserving space,
154 	 * and an internet control block.
155 	 */
156 	case PRU_ATTACH:
157 		if (inp) {
158 			error = EISCONN;
159 			break;
160 		}
161 		error = tcp_attach(so);
162 		if (error)
163 			break;
164 		if ((so->so_options & SO_LINGER) && so->so_linger == 0)
165 			so->so_linger = TCP_LINGERTIME * hz;
166 		tp = sototcpcb(so);
167 		break;
168 
169 	/*
170 	 * PRU_DETACH detaches the TCP protocol from the socket.
171 	 * If the protocol state is non-embryonic, then can't
172 	 * do this directly: have to initiate a PRU_DISCONNECT,
173 	 * which may finish later; embryonic TCB's can just
174 	 * be discarded here.
175 	 */
176 	case PRU_DETACH:
177 		if (tp->t_state > TCPS_LISTEN)
178 			tp = tcp_disconnect(tp);
179 		else
180 			tp = tcp_close(tp);
181 		break;
182 
183 	/*
184 	 * Give the socket an address.
185 	 */
186 	case PRU_BIND:
187 		/*
188 		 * Must check for multicast addresses and disallow binding
189 		 * to them.
190 		 */
191 		sinp = mtod(nam, struct sockaddr_in *);
192 		if (sinp->sin_family == AF_INET &&
193 		    IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) {
194 			error = EAFNOSUPPORT;
195 			break;
196 		}
197 		error = in_pcbbind(inp, nam);
198 		if (error)
199 			break;
200 		break;
201 
202 	/*
203 	 * Prepare to accept connections.
204 	 */
205 	case PRU_LISTEN:
206 		if (inp->inp_lport == 0)
207 			error = in_pcbbind(inp, NULL);
208 		if (error == 0)
209 			tp->t_state = TCPS_LISTEN;
210 		break;
211 
212 	/*
213 	 * Initiate connection to peer.
214 	 * Create a template for use in transmissions on this connection.
215 	 * Enter SYN_SENT state, and mark socket as connecting.
216 	 * Start keep-alive timer, and seed output sequence space.
217 	 * Send initial segment on connection.
218 	 */
219 	case PRU_CONNECT:
220 		/*
221 		 * Must disallow TCP ``connections'' to multicast addresses.
222 		 */
223 		sinp = mtod(nam, struct sockaddr_in *);
224 		if (sinp->sin_family == AF_INET
225 		    && IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) {
226 			error = EAFNOSUPPORT;
227 			break;
228 		}
229 
230 		if ((error = tcp_connect(tp, nam)) != 0)
231 			break;
232 		error = tcp_output(tp);
233 		break;
234 
235 	/*
236 	 * Create a TCP connection between two sockets.
237 	 */
238 	case PRU_CONNECT2:
239 		error = EOPNOTSUPP;
240 		break;
241 
242 	/*
243 	 * Initiate disconnect from peer.
244 	 * If connection never passed embryonic stage, just drop;
245 	 * else if don't need to let data drain, then can just drop anyways,
246 	 * else have to begin TCP shutdown process: mark socket disconnecting,
247 	 * drain unread data, state switch to reflect user close, and
248 	 * send segment (e.g. FIN) to peer.  Socket will be really disconnected
249 	 * when peer sends FIN and acks ours.
250 	 *
251 	 * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB.
252 	 */
253 	case PRU_DISCONNECT:
254 		tp = tcp_disconnect(tp);
255 		break;
256 
257 	/*
258 	 * Accept a connection.  Essentially all the work is
259 	 * done at higher levels; just return the address
260 	 * of the peer, storing through addr.
261 	 */
262 	case PRU_ACCEPT:
263 		in_setpeeraddr(inp, nam);
264 		break;
265 
266 	/*
267 	 * Mark the connection as being incapable of further output.
268 	 */
269 	case PRU_SHUTDOWN:
270 		socantsendmore(so);
271 		tp = tcp_usrclosed(tp);
272 		if (tp)
273 			error = tcp_output(tp);
274 		break;
275 
276 	/*
277 	 * After a receive, possibly send window update to peer.
278 	 */
279 	case PRU_RCVD:
280 		(void) tcp_output(tp);
281 		break;
282 
283 	/*
284 	 * Do a send by putting data in output queue and updating urgent
285 	 * marker if URG set.  Possibly send more data.
286 	 */
287 	case PRU_SEND_EOF:
288 	case PRU_SEND:
289 		sbappend(&so->so_snd, m);
290 		if (nam && tp->t_state < TCPS_SYN_SENT) {
291 			/*
292 			 * Do implied connect if not yet connected,
293 			 * initialize window to default value, and
294 			 * initialize maxseg/maxopd using peer's cached
295 			 * MSS.
296 			 */
297 			error = tcp_connect(tp, nam);
298 			if (error)
299 				break;
300 			tp->snd_wnd = TTCP_CLIENT_SND_WND;
301 			tcp_mss(tp, -1);
302 		}
303 
304 		if (req == PRU_SEND_EOF) {
305 			/*
306 			 * Close the send side of the connection after
307 			 * the data is sent.
308 			 */
309 			socantsendmore(so);
310 			tp = tcp_usrclosed(tp);
311 		}
312 		if (tp != NULL)
313 			error = tcp_output(tp);
314 		break;
315 
316 	/*
317 	 * Abort the TCP.
318 	 */
319 	case PRU_ABORT:
320 		tp = tcp_drop(tp, ECONNABORTED);
321 		break;
322 
323 	case PRU_SENSE:
324 		((struct stat *) m)->st_blksize = so->so_snd.sb_hiwat;
325 		(void) splx(s);
326 		return (0);
327 
328 	case PRU_RCVOOB:
329 		if ((so->so_oobmark == 0 &&
330 		    (so->so_state & SS_RCVATMARK) == 0) ||
331 		    so->so_options & SO_OOBINLINE ||
332 		    tp->t_oobflags & TCPOOB_HADDATA) {
333 			error = EINVAL;
334 			break;
335 		}
336 		if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) {
337 			error = EWOULDBLOCK;
338 			break;
339 		}
340 		m->m_len = 1;
341 		*mtod(m, caddr_t) = tp->t_iobc;
342 		if (((int)nam & MSG_PEEK) == 0)
343 			tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA);
344 		break;
345 
346 	case PRU_SENDOOB:
347 		if (sbspace(&so->so_snd) < -512) {
348 			m_freem(m);
349 			error = ENOBUFS;
350 			break;
351 		}
352 		/*
353 		 * According to RFC961 (Assigned Protocols),
354 		 * the urgent pointer points to the last octet
355 		 * of urgent data.  We continue, however,
356 		 * to consider it to indicate the first octet
357 		 * of data past the urgent section.
358 		 * Otherwise, snd_up should be one lower.
359 		 */
360 		sbappend(&so->so_snd, m);
361 		tp->snd_up = tp->snd_una + so->so_snd.sb_cc;
362 		tp->t_force = 1;
363 		error = tcp_output(tp);
364 		tp->t_force = 0;
365 		break;
366 
367 	case PRU_SOCKADDR:
368 		in_setsockaddr(inp, nam);
369 		break;
370 
371 	case PRU_PEERADDR:
372 		in_setpeeraddr(inp, nam);
373 		break;
374 
375 	/*
376 	 * TCP slow timer went off; going through this
377 	 * routine for tracing's sake.
378 	 */
379 	case PRU_SLOWTIMO:
380 		tp = tcp_timers(tp, (int)nam);
381 #ifdef TCPDEBUG
382 		req |= (int)nam << 8;		/* for debug's sake */
383 #endif
384 		break;
385 
386 	default:
387 		panic("tcp_usrreq");
388 	}
389 #ifdef TCPDEBUG
390 	if (tp && (so->so_options & SO_DEBUG))
391 		tcp_trace(TA_USER, ostate, tp, (struct tcpiphdr *)0, req);
392 #endif
393 	splx(s);
394 	return (error);
395 }
396 #endif
397 
398 #ifdef TCPDEBUG
399 #define	TCPDEBUG0	int ostate
400 #define	TCPDEBUG1()	ostate = tp ? tp->t_state : 0
401 #define	TCPDEBUG2(req)	if (tp && (so->so_options & SO_DEBUG)) \
402 				tcp_trace(TA_USER, ostate, tp, 0, req)
403 #else
404 #define	TCPDEBUG0
405 #define	TCPDEBUG1()
406 #define	TCPDEBUG2(req)
407 #endif
408 
409 /*
410  * TCP attaches to socket via pru_attach(), reserving space,
411  * and an internet control block.
412  */
413 static int
414 tcp_usr_attach(struct socket *so, int proto)
415 {
416 	int s = splnet();
417 	int error;
418 	struct inpcb *inp = sotoinpcb(so);
419 	struct tcpcb *tp = 0;
420 	TCPDEBUG0;
421 
422 	TCPDEBUG1();
423 	if (inp) {
424 		error = EISCONN;
425 		goto out;
426 	}
427 
428 	error = tcp_attach(so);
429 	if (error)
430 		goto out;
431 
432 	if ((so->so_options & SO_LINGER) && so->so_linger == 0)
433 		so->so_linger = TCP_LINGERTIME * hz;
434 	tp = sototcpcb(so);
435 out:
436 	TCPDEBUG2(PRU_ATTACH);
437 	splx(s);
438 	return error;
439 }
440 
441 /*
442  * pru_detach() detaches the TCP protocol from the socket.
443  * If the protocol state is non-embryonic, then can't
444  * do this directly: have to initiate a pru_disconnect(),
445  * which may finish later; embryonic TCB's can just
446  * be discarded here.
447  */
448 static int
449 tcp_usr_detach(struct socket *so)
450 {
451 	int s = splnet();
452 	int error = 0;
453 	struct inpcb *inp = sotoinpcb(so);
454 	struct tcpcb *tp;
455 	TCPDEBUG0;
456 
457 	if (inp == 0) {
458 		splx(s);
459 		return EINVAL;	/* XXX */
460 	}
461 	tp = intotcpcb(inp);
462 	TCPDEBUG1();
463 	if (tp->t_state > TCPS_LISTEN)
464 		tp = tcp_disconnect(tp);
465 	else
466 		tp = tcp_close(tp);
467 
468 	TCPDEBUG2(PRU_DETACH);
469 	splx(s);
470 	return error;
471 }
472 
473 #define	COMMON_START()	TCPDEBUG0; \
474 			do { \
475 				     if (inp == 0) { \
476 					     splx(s); \
477 					     return EINVAL; \
478 				     } \
479 				     tp = intotcpcb(inp); \
480 				     TCPDEBUG1(); \
481 		     } while(0)
482 
483 #define COMMON_END(req)	out: TCPDEBUG2(req); splx(s); return error; goto out
484 
485 
486 /*
487  * Give the socket an address.
488  */
489 static int
490 tcp_usr_bind(struct socket *so, struct mbuf *nam)
491 {
492 	int s = splnet();
493 	int error = 0;
494 	struct inpcb *inp = sotoinpcb(so);
495 	struct tcpcb *tp;
496 	struct sockaddr_in *sinp;
497 
498 	COMMON_START();
499 
500 	/*
501 	 * Must check for multicast addresses and disallow binding
502 	 * to them.
503 	 */
504 	sinp = mtod(nam, struct sockaddr_in *);
505 	if (sinp->sin_family == AF_INET &&
506 	    IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) {
507 		error = EAFNOSUPPORT;
508 		goto out;
509 	}
510 	error = in_pcbbind(inp, nam);
511 	if (error)
512 		goto out;
513 	COMMON_END(PRU_BIND);
514 
515 }
516 
517 /*
518  * Prepare to accept connections.
519  */
520 static int
521 tcp_usr_listen(struct socket *so)
522 {
523 	int s = splnet();
524 	int error = 0;
525 	struct inpcb *inp = sotoinpcb(so);
526 	struct tcpcb *tp;
527 
528 	COMMON_START();
529 	if (inp->inp_lport == 0)
530 		error = in_pcbbind(inp, NULL);
531 	if (error == 0)
532 		tp->t_state = TCPS_LISTEN;
533 	COMMON_END(PRU_LISTEN);
534 }
535 
536 /*
537  * Initiate connection to peer.
538  * Create a template for use in transmissions on this connection.
539  * Enter SYN_SENT state, and mark socket as connecting.
540  * Start keep-alive timer, and seed output sequence space.
541  * Send initial segment on connection.
542  */
543 static int
544 tcp_usr_connect(struct socket *so, struct mbuf *nam)
545 {
546 	int s = splnet();
547 	int error = 0;
548 	struct inpcb *inp = sotoinpcb(so);
549 	struct tcpcb *tp;
550 	struct sockaddr_in *sinp;
551 
552 	COMMON_START();
553 
554 	/*
555 	 * Must disallow TCP ``connections'' to multicast addresses.
556 	 */
557 	sinp = mtod(nam, struct sockaddr_in *);
558 	if (sinp->sin_family == AF_INET
559 	    && IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) {
560 		error = EAFNOSUPPORT;
561 		goto out;
562 	}
563 
564 	if ((error = tcp_connect(tp, nam)) != 0)
565 		goto out;
566 	error = tcp_output(tp);
567 	COMMON_END(PRU_CONNECT);
568 }
569 
570 /*
571  * Initiate disconnect from peer.
572  * If connection never passed embryonic stage, just drop;
573  * else if don't need to let data drain, then can just drop anyways,
574  * else have to begin TCP shutdown process: mark socket disconnecting,
575  * drain unread data, state switch to reflect user close, and
576  * send segment (e.g. FIN) to peer.  Socket will be really disconnected
577  * when peer sends FIN and acks ours.
578  *
579  * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB.
580  */
581 static int
582 tcp_usr_disconnect(struct socket *so)
583 {
584 	int s = splnet();
585 	int error = 0;
586 	struct inpcb *inp = sotoinpcb(so);
587 	struct tcpcb *tp;
588 
589 	COMMON_START();
590 	tp = tcp_disconnect(tp);
591 	COMMON_END(PRU_DISCONNECT);
592 }
593 
594 /*
595  * Accept a connection.  Essentially all the work is
596  * done at higher levels; just return the address
597  * of the peer, storing through addr.
598  */
599 static int
600 tcp_usr_accept(struct socket *so, struct mbuf *nam)
601 {
602 	int s = splnet();
603 	int error = 0;
604 	struct inpcb *inp = sotoinpcb(so);
605 	struct tcpcb *tp;
606 
607 	COMMON_START();
608 	in_setpeeraddr(inp, nam);
609 	COMMON_END(PRU_ACCEPT);
610 }
611 
612 /*
613  * Mark the connection as being incapable of further output.
614  */
615 static int
616 tcp_usr_shutdown(struct socket *so)
617 {
618 	int s = splnet();
619 	int error = 0;
620 	struct inpcb *inp = sotoinpcb(so);
621 	struct tcpcb *tp;
622 
623 	COMMON_START();
624 	socantsendmore(so);
625 	tp = tcp_usrclosed(tp);
626 	if (tp)
627 		error = tcp_output(tp);
628 	COMMON_END(PRU_SHUTDOWN);
629 }
630 
631 /*
632  * After a receive, possibly send window update to peer.
633  */
634 static int
635 tcp_usr_rcvd(struct socket *so, int flags)
636 {
637 	int s = splnet();
638 	int error = 0;
639 	struct inpcb *inp = sotoinpcb(so);
640 	struct tcpcb *tp;
641 
642 	COMMON_START();
643 	tcp_output(tp);
644 	COMMON_END(PRU_RCVD);
645 }
646 
647 /*
648  * Do a send by putting data in output queue and updating urgent
649  * marker if URG set.  Possibly send more data.
650  */
651 static int
652 tcp_usr_send(struct socket *so, int flags, struct mbuf *m, struct mbuf *nam,
653 	     struct mbuf *control)
654 {
655 	int s = splnet();
656 	int error = 0;
657 	struct inpcb *inp = sotoinpcb(so);
658 	struct tcpcb *tp;
659 
660 	COMMON_START();
661 	if (control && control->m_len) {
662 		m_freem(control); /* XXX shouldn't caller do this??? */
663 		if (m)
664 			m_freem(m);
665 		return EINVAL;
666 	}
667 
668 	if(!(flags & PRUS_OOB)) {
669 		sbappend(&so->so_snd, m);
670 		if (nam && tp->t_state < TCPS_SYN_SENT) {
671 			/*
672 			 * Do implied connect if not yet connected,
673 			 * initialize window to default value, and
674 			 * initialize maxseg/maxopd using peer's cached
675 			 * MSS.
676 			 */
677 			error = tcp_connect(tp, nam);
678 			if (error)
679 				goto out;
680 			tp->snd_wnd = TTCP_CLIENT_SND_WND;
681 			tcp_mss(tp, -1);
682 		}
683 
684 		if (flags & PRUS_EOF) {
685 			/*
686 			 * Close the send side of the connection after
687 			 * the data is sent.
688 			 */
689 			socantsendmore(so);
690 			tp = tcp_usrclosed(tp);
691 		}
692 		if (tp != NULL)
693 			error = tcp_output(tp);
694 	} else {
695 		if (sbspace(&so->so_snd) < -512) {
696 			m_freem(m);
697 			error = ENOBUFS;
698 			goto out;
699 		}
700 		/*
701 		 * According to RFC961 (Assigned Protocols),
702 		 * the urgent pointer points to the last octet
703 		 * of urgent data.  We continue, however,
704 		 * to consider it to indicate the first octet
705 		 * of data past the urgent section.
706 		 * Otherwise, snd_up should be one lower.
707 		 */
708 		sbappend(&so->so_snd, m);
709 		tp->snd_up = tp->snd_una + so->so_snd.sb_cc;
710 		tp->t_force = 1;
711 		error = tcp_output(tp);
712 		tp->t_force = 0;
713 	}
714 	COMMON_END((flags & PRUS_OOB) ? PRU_SENDOOB :
715 		   ((flags & PRUS_EOF) ? PRU_SEND_EOF : PRU_SEND));
716 }
717 
718 /*
719  * Abort the TCP.
720  */
721 static int
722 tcp_usr_abort(struct socket *so)
723 {
724 	int s = splnet();
725 	int error = 0;
726 	struct inpcb *inp = sotoinpcb(so);
727 	struct tcpcb *tp;
728 
729 	COMMON_START();
730 	tp = tcp_drop(tp, ECONNABORTED);
731 	COMMON_END(PRU_ABORT);
732 }
733 
734 /*
735  * Fill in st_bklsize for fstat() operations on a socket.
736  */
737 static int
738 tcp_usr_sense(struct socket *so, struct stat *sb)
739 {
740 	int s = splnet();
741 
742 	sb->st_blksize = so->so_snd.sb_hiwat;
743 	splx(s);
744 	return 0;
745 }
746 
747 /*
748  * Receive out-of-band data.
749  */
750 static int
751 tcp_usr_rcvoob(struct socket *so, struct mbuf *m, int flags)
752 {
753 	int s = splnet();
754 	int error = 0;
755 	struct inpcb *inp = sotoinpcb(so);
756 	struct tcpcb *tp;
757 
758 	COMMON_START();
759 	if ((so->so_oobmark == 0 &&
760 	     (so->so_state & SS_RCVATMARK) == 0) ||
761 	    so->so_options & SO_OOBINLINE ||
762 	    tp->t_oobflags & TCPOOB_HADDATA) {
763 		error = EINVAL;
764 		goto out;
765 	}
766 	if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) {
767 		error = EWOULDBLOCK;
768 		goto out;
769 	}
770 	m->m_len = 1;
771 	*mtod(m, caddr_t) = tp->t_iobc;
772 	if ((flags & MSG_PEEK) == 0)
773 		tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA);
774 	COMMON_END(PRU_RCVOOB);
775 }
776 
777 static int
778 tcp_usr_sockaddr(struct socket *so, struct mbuf *nam)
779 {
780 	int s = splnet();
781 	int error = 0;
782 	struct inpcb *inp = sotoinpcb(so);
783 	struct tcpcb *tp;
784 
785 	COMMON_START();
786 	in_setsockaddr(inp, nam);
787 	COMMON_END(PRU_SOCKADDR);
788 }
789 
790 static int
791 tcp_usr_peeraddr(struct socket *so, struct mbuf *nam)
792 {
793 	int s = splnet();
794 	int error = 0;
795 	struct inpcb *inp = sotoinpcb(so);
796 	struct tcpcb *tp;
797 
798 	COMMON_START();
799 	in_setpeeraddr(inp, nam);
800 	COMMON_END(PRU_PEERADDR);
801 }
802 
803 /*
804  * XXX - this should just be a call to in_control, but we need to get
805  * the types worked out.
806  */
807 static int
808 tcp_usr_control(struct socket *so, int cmd, caddr_t arg, struct ifnet *ifp)
809 {
810 	return in_control(so, cmd, arg, ifp);
811 }
812 
813 /* xxx - should be const */
814 struct pr_usrreqs tcp_usrreqs = {
815 	tcp_usr_abort, tcp_usr_accept, tcp_usr_attach, tcp_usr_bind,
816 	tcp_usr_connect, pru_connect2_notsupp, tcp_usr_control, tcp_usr_detach,
817 	tcp_usr_disconnect, tcp_usr_listen, tcp_usr_peeraddr, tcp_usr_rcvd,
818 	tcp_usr_rcvoob, tcp_usr_send, tcp_usr_sense, tcp_usr_shutdown,
819 	tcp_usr_sockaddr
820 };
821 
822 /*
823  * Common subroutine to open a TCP connection to remote host specified
824  * by struct sockaddr_in in mbuf *nam.  Call in_pcbbind to assign a local
825  * port number if needed.  Call in_pcbladdr to do the routing and to choose
826  * a local host address (interface).  If there is an existing incarnation
827  * of the same connection in TIME-WAIT state and if the remote host was
828  * sending CC options and if the connection duration was < MSL, then
829  * truncate the previous TIME-WAIT state and proceed.
830  * Initialize connection parameters and enter SYN-SENT state.
831  */
832 static int
833 tcp_connect(tp, nam)
834 	register struct tcpcb *tp;
835 	struct mbuf *nam;
836 {
837 	struct inpcb *inp = tp->t_inpcb, *oinp;
838 	struct socket *so = inp->inp_socket;
839 	struct tcpcb *otp;
840 	struct sockaddr_in *sin = mtod(nam, struct sockaddr_in *);
841 	struct sockaddr_in *ifaddr;
842 	int error;
843 	struct rmxp_tao *taop;
844 	struct rmxp_tao tao_noncached;
845 
846 	if (inp->inp_lport == 0) {
847 		error = in_pcbbind(inp, NULL);
848 		if (error)
849 			return error;
850 	}
851 
852 	/*
853 	 * Cannot simply call in_pcbconnect, because there might be an
854 	 * earlier incarnation of this same connection still in
855 	 * TIME_WAIT state, creating an ADDRINUSE error.
856 	 */
857 	error = in_pcbladdr(inp, nam, &ifaddr);
858 	if (error)
859 		return error;
860 	oinp = in_pcblookup(inp->inp_pcbinfo->listhead,
861 	    sin->sin_addr, sin->sin_port,
862 	    inp->inp_laddr.s_addr != INADDR_ANY ? inp->inp_laddr
863 						: ifaddr->sin_addr,
864 	    inp->inp_lport,  0);
865 	if (oinp) {
866 		if (oinp != inp && (otp = intotcpcb(oinp)) != NULL &&
867 		otp->t_state == TCPS_TIME_WAIT &&
868 		    otp->t_duration < TCPTV_MSL &&
869 		    (otp->t_flags & TF_RCVD_CC))
870 			otp = tcp_close(otp);
871 		else
872 			return EADDRINUSE;
873 	}
874 	if (inp->inp_laddr.s_addr == INADDR_ANY)
875 		inp->inp_laddr = ifaddr->sin_addr;
876 	inp->inp_faddr = sin->sin_addr;
877 	inp->inp_fport = sin->sin_port;
878 	in_pcbrehash(inp);
879 
880 	tp->t_template = tcp_template(tp);
881 	if (tp->t_template == 0) {
882 		in_pcbdisconnect(inp);
883 		return ENOBUFS;
884 	}
885 
886 	/* Compute window scaling to request.  */
887 	while (tp->request_r_scale < TCP_MAX_WINSHIFT &&
888 	    (TCP_MAXWIN << tp->request_r_scale) < so->so_rcv.sb_hiwat)
889 		tp->request_r_scale++;
890 
891 	soisconnecting(so);
892 	tcpstat.tcps_connattempt++;
893 	tp->t_state = TCPS_SYN_SENT;
894 	tp->t_timer[TCPT_KEEP] = tcp_keepinit;
895 	tp->iss = tcp_iss; tcp_iss += TCP_ISSINCR/2;
896 	tcp_sendseqinit(tp);
897 
898 	/*
899 	 * Generate a CC value for this connection and
900 	 * check whether CC or CCnew should be used.
901 	 */
902 	if ((taop = tcp_gettaocache(tp->t_inpcb)) == NULL) {
903 		taop = &tao_noncached;
904 		bzero(taop, sizeof(*taop));
905 	}
906 
907 	tp->cc_send = CC_INC(tcp_ccgen);
908 	if (taop->tao_ccsent != 0 &&
909 	    CC_GEQ(tp->cc_send, taop->tao_ccsent)) {
910 		taop->tao_ccsent = tp->cc_send;
911 	} else {
912 		taop->tao_ccsent = 0;
913 		tp->t_flags |= TF_SENDCCNEW;
914 	}
915 
916 	return 0;
917 }
918 
919 int
920 tcp_ctloutput(op, so, level, optname, mp)
921 	int op;
922 	struct socket *so;
923 	int level, optname;
924 	struct mbuf **mp;
925 {
926 	int error = 0, s;
927 	struct inpcb *inp;
928 	register struct tcpcb *tp;
929 	register struct mbuf *m;
930 	register int i;
931 
932 	s = splnet();
933 	inp = sotoinpcb(so);
934 	if (inp == NULL) {
935 		splx(s);
936 		if (op == PRCO_SETOPT && *mp)
937 			(void) m_free(*mp);
938 		return (ECONNRESET);
939 	}
940 	if (level != IPPROTO_TCP) {
941 		error = ip_ctloutput(op, so, level, optname, mp);
942 		splx(s);
943 		return (error);
944 	}
945 	tp = intotcpcb(inp);
946 
947 	switch (op) {
948 
949 	case PRCO_SETOPT:
950 		m = *mp;
951 		switch (optname) {
952 
953 		case TCP_NODELAY:
954 			if (m == NULL || m->m_len < sizeof (int))
955 				error = EINVAL;
956 			else if (*mtod(m, int *))
957 				tp->t_flags |= TF_NODELAY;
958 			else
959 				tp->t_flags &= ~TF_NODELAY;
960 			break;
961 
962 		case TCP_MAXSEG:
963 			if (m && (i = *mtod(m, int *)) > 0 && i <= tp->t_maxseg)
964 				tp->t_maxseg = i;
965 			else
966 				error = EINVAL;
967 			break;
968 
969 		case TCP_NOOPT:
970 			if (m == NULL || m->m_len < sizeof (int))
971 				error = EINVAL;
972 			else if (*mtod(m, int *))
973 				tp->t_flags |= TF_NOOPT;
974 			else
975 				tp->t_flags &= ~TF_NOOPT;
976 			break;
977 
978 		case TCP_NOPUSH:
979 			if (m == NULL || m->m_len < sizeof (int))
980 				error = EINVAL;
981 			else if (*mtod(m, int *))
982 				tp->t_flags |= TF_NOPUSH;
983 			else
984 				tp->t_flags &= ~TF_NOPUSH;
985 			break;
986 
987 		default:
988 			error = ENOPROTOOPT;
989 			break;
990 		}
991 		if (m)
992 			(void) m_free(m);
993 		break;
994 
995 	case PRCO_GETOPT:
996 		*mp = m = m_get(M_WAIT, MT_SOOPTS);
997 		m->m_len = sizeof(int);
998 
999 		switch (optname) {
1000 		case TCP_NODELAY:
1001 			*mtod(m, int *) = tp->t_flags & TF_NODELAY;
1002 			break;
1003 		case TCP_MAXSEG:
1004 			*mtod(m, int *) = tp->t_maxseg;
1005 			break;
1006 		case TCP_NOOPT:
1007 			*mtod(m, int *) = tp->t_flags & TF_NOOPT;
1008 			break;
1009 		case TCP_NOPUSH:
1010 			*mtod(m, int *) = tp->t_flags & TF_NOPUSH;
1011 			break;
1012 		default:
1013 			error = ENOPROTOOPT;
1014 			break;
1015 		}
1016 		break;
1017 	}
1018 	splx(s);
1019 	return (error);
1020 }
1021 
1022 /*
1023  * tcp_sendspace and tcp_recvspace are the default send and receive window
1024  * sizes, respectively.  These are obsolescent (this information should
1025  * be set by the route).
1026  */
1027 u_long	tcp_sendspace = 1024*16;
1028 SYSCTL_INT(_net_inet_tcp, TCPCTL_SENDSPACE, sendspace,
1029 	CTLFLAG_RW, &tcp_sendspace , 0, "");
1030 u_long	tcp_recvspace = 1024*16;
1031 SYSCTL_INT(_net_inet_tcp, TCPCTL_RECVSPACE, recvspace,
1032 	CTLFLAG_RW, &tcp_recvspace , 0, "");
1033 
1034 /*
1035  * Attach TCP protocol to socket, allocating
1036  * internet protocol control block, tcp control block,
1037  * bufer space, and entering LISTEN state if to accept connections.
1038  */
1039 static int
1040 tcp_attach(so)
1041 	struct socket *so;
1042 {
1043 	register struct tcpcb *tp;
1044 	struct inpcb *inp;
1045 	int error;
1046 
1047 	if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
1048 		error = soreserve(so, tcp_sendspace, tcp_recvspace);
1049 		if (error)
1050 			return (error);
1051 	}
1052 	error = in_pcballoc(so, &tcbinfo);
1053 	if (error)
1054 		return (error);
1055 	inp = sotoinpcb(so);
1056 	tp = tcp_newtcpcb(inp);
1057 	if (tp == 0) {
1058 		int nofd = so->so_state & SS_NOFDREF;	/* XXX */
1059 
1060 		so->so_state &= ~SS_NOFDREF;	/* don't free the socket yet */
1061 		in_pcbdetach(inp);
1062 		so->so_state |= nofd;
1063 		return (ENOBUFS);
1064 	}
1065 	tp->t_state = TCPS_CLOSED;
1066 	return (0);
1067 }
1068 
1069 /*
1070  * Initiate (or continue) disconnect.
1071  * If embryonic state, just send reset (once).
1072  * If in ``let data drain'' option and linger null, just drop.
1073  * Otherwise (hard), mark socket disconnecting and drop
1074  * current input data; switch states based on user close, and
1075  * send segment to peer (with FIN).
1076  */
1077 static struct tcpcb *
1078 tcp_disconnect(tp)
1079 	register struct tcpcb *tp;
1080 {
1081 	struct socket *so = tp->t_inpcb->inp_socket;
1082 
1083 	if (tp->t_state < TCPS_ESTABLISHED)
1084 		tp = tcp_close(tp);
1085 	else if ((so->so_options & SO_LINGER) && so->so_linger == 0)
1086 		tp = tcp_drop(tp, 0);
1087 	else {
1088 		soisdisconnecting(so);
1089 		sbflush(&so->so_rcv);
1090 		tp = tcp_usrclosed(tp);
1091 		if (tp)
1092 			(void) tcp_output(tp);
1093 	}
1094 	return (tp);
1095 }
1096 
1097 /*
1098  * User issued close, and wish to trail through shutdown states:
1099  * if never received SYN, just forget it.  If got a SYN from peer,
1100  * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN.
1101  * If already got a FIN from peer, then almost done; go to LAST_ACK
1102  * state.  In all other cases, have already sent FIN to peer (e.g.
1103  * after PRU_SHUTDOWN), and just have to play tedious game waiting
1104  * for peer to send FIN or not respond to keep-alives, etc.
1105  * We can let the user exit from the close as soon as the FIN is acked.
1106  */
1107 static struct tcpcb *
1108 tcp_usrclosed(tp)
1109 	register struct tcpcb *tp;
1110 {
1111 
1112 	switch (tp->t_state) {
1113 
1114 	case TCPS_CLOSED:
1115 	case TCPS_LISTEN:
1116 		tp->t_state = TCPS_CLOSED;
1117 		tp = tcp_close(tp);
1118 		break;
1119 
1120 	case TCPS_SYN_SENT:
1121 	case TCPS_SYN_RECEIVED:
1122 		tp->t_flags |= TF_NEEDFIN;
1123 		break;
1124 
1125 	case TCPS_ESTABLISHED:
1126 		tp->t_state = TCPS_FIN_WAIT_1;
1127 		break;
1128 
1129 	case TCPS_CLOSE_WAIT:
1130 		tp->t_state = TCPS_LAST_ACK;
1131 		break;
1132 	}
1133 	if (tp && tp->t_state >= TCPS_FIN_WAIT_2) {
1134 		soisdisconnected(tp->t_inpcb->inp_socket);
1135 		/* To prevent the connection hanging in FIN_WAIT_2 forever. */
1136 		if (tp->t_state == TCPS_FIN_WAIT_2)
1137 			tp->t_timer[TCPT_2MSL] = tcp_maxidle;
1138 	}
1139 	return (tp);
1140 }
1141 
1142