xref: /freebsd/sys/netinet/tcp_usrreq.c (revision a316b26e50bbed7cf655fbba726ab87d8ab7599d)
1 /*
2  * Copyright (c) 1982, 1986, 1988, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *	This product includes software developed by the University of
16  *	California, Berkeley and its contributors.
17  * 4. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  *
33  *	@(#)tcp_usrreq.c	8.2 (Berkeley) 1/3/94
34  * $Id: tcp_usrreq.c,v 1.5 1994/09/15 10:36:56 davidg Exp $
35  */
36 
37 #include <sys/param.h>
38 #include <sys/systm.h>
39 #include <sys/malloc.h>
40 #include <sys/mbuf.h>
41 #include <sys/socket.h>
42 #include <sys/socketvar.h>
43 #include <sys/protosw.h>
44 #include <sys/errno.h>
45 #include <sys/stat.h>
46 
47 #include <net/if.h>
48 #include <net/route.h>
49 
50 #include <netinet/in.h>
51 #include <netinet/in_systm.h>
52 #include <netinet/ip.h>
53 #include <netinet/in_pcb.h>
54 #include <netinet/ip_var.h>
55 #include <netinet/tcp.h>
56 #include <netinet/tcp_fsm.h>
57 #include <netinet/tcp_seq.h>
58 #include <netinet/tcp_timer.h>
59 #include <netinet/tcp_var.h>
60 #include <netinet/tcpip.h>
61 #ifdef TCPDEBUG
62 #include <netinet/tcp_debug.h>
63 #endif
64 
65 /*
66  * TCP protocol interface to socket abstraction.
67  */
68 extern	char *tcpstates[];
69 
70 /*
71  * Process a TCP user request for TCP tb.  If this is a send request
72  * then m is the mbuf chain of send data.  If this is a timer expiration
73  * (called from the software clock routine), then timertype tells which timer.
74  */
75 /*ARGSUSED*/
76 int
77 tcp_usrreq(so, req, m, nam, control)
78 	struct socket *so;
79 	int req;
80 	struct mbuf *m, *nam, *control;
81 {
82 	register struct inpcb *inp;
83 	register struct tcpcb *tp = 0;
84 	struct sockaddr_in *sinp;
85 	int s;
86 	int error = 0;
87 	int ostate;
88 
89 	if (req == PRU_CONTROL)
90 		return (in_control(so, (int)m, (caddr_t)nam,
91 			(struct ifnet *)control));
92 	if (control && control->m_len) {
93 		m_freem(control);
94 		if (m)
95 			m_freem(m);
96 		return (EINVAL);
97 	}
98 
99 	s = splnet();
100 	inp = sotoinpcb(so);
101 	/*
102 	 * When a TCP is attached to a socket, then there will be
103 	 * a (struct inpcb) pointed at by the socket, and this
104 	 * structure will point at a subsidary (struct tcpcb).
105 	 */
106 	if (inp == 0 && req != PRU_ATTACH) {
107 		splx(s);
108 		return (EINVAL);		/* XXX */
109 	}
110 	if (inp) {
111 		tp = intotcpcb(inp);
112 		/* WHAT IF TP IS 0? */
113 #ifdef KPROF
114 		tcp_acounts[tp->t_state][req]++;
115 #endif
116 		ostate = tp->t_state;
117 	} else
118 		ostate = 0;
119 	switch (req) {
120 
121 	/*
122 	 * TCP attaches to socket via PRU_ATTACH, reserving space,
123 	 * and an internet control block.
124 	 */
125 	case PRU_ATTACH:
126 		if (inp) {
127 			error = EISCONN;
128 			break;
129 		}
130 		error = tcp_attach(so);
131 		if (error)
132 			break;
133 		if ((so->so_options & SO_LINGER) && so->so_linger == 0)
134 			so->so_linger = TCP_LINGERTIME;
135 		tp = sototcpcb(so);
136 		break;
137 
138 	/*
139 	 * PRU_DETACH detaches the TCP protocol from the socket.
140 	 * If the protocol state is non-embryonic, then can't
141 	 * do this directly: have to initiate a PRU_DISCONNECT,
142 	 * which may finish later; embryonic TCB's can just
143 	 * be discarded here.
144 	 */
145 	case PRU_DETACH:
146 		if (tp->t_state > TCPS_LISTEN)
147 			tp = tcp_disconnect(tp);
148 		else
149 			tp = tcp_close(tp);
150 		break;
151 
152 	/*
153 	 * Give the socket an address.
154 	 */
155 	case PRU_BIND:
156 		/*
157 		 * Must check for multicast addresses and disallow binding
158 		 * to them.
159 		 */
160 		sinp = mtod(nam, struct sockaddr_in *);
161 		if (sinp->sin_family == AF_INET &&
162 		    IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) {
163 			error = EAFNOSUPPORT;
164 			break;
165 		}
166 		error = in_pcbbind(inp, nam);
167 		if (error)
168 			break;
169 		break;
170 
171 	/*
172 	 * Prepare to accept connections.
173 	 */
174 	case PRU_LISTEN:
175 		if (inp->inp_lport == 0)
176 			error = in_pcbbind(inp, (struct mbuf *)0);
177 		if (error == 0)
178 			tp->t_state = TCPS_LISTEN;
179 		break;
180 
181 	/*
182 	 * Initiate connection to peer.
183 	 * Create a template for use in transmissions on this connection.
184 	 * Enter SYN_SENT state, and mark socket as connecting.
185 	 * Start keep-alive timer, and seed output sequence space.
186 	 * Send initial segment on connection.
187 	 */
188 	case PRU_CONNECT:
189 		/*
190 		 * Must disallow TCP ``connections'' to multicast addresses.
191 		 */
192 		sinp = mtod(nam, struct sockaddr_in *);
193 		if (sinp->sin_family == AF_INET
194 		    && IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) {
195 			error = EAFNOSUPPORT;
196 			break;
197 		}
198 
199 		if (inp->inp_lport == 0) {
200 			error = in_pcbbind(inp, (struct mbuf *)0);
201 			if (error)
202 				break;
203 		}
204 		error = in_pcbconnect(inp, nam);
205 		if (error)
206 			break;
207 		tp->t_template = tcp_template(tp);
208 		if (tp->t_template == 0) {
209 			in_pcbdisconnect(inp);
210 			error = ENOBUFS;
211 			break;
212 		}
213 		/* Compute window scaling to request.  */
214 		while (tp->request_r_scale < TCP_MAX_WINSHIFT &&
215 		    (TCP_MAXWIN << tp->request_r_scale) < so->so_rcv.sb_hiwat)
216 			tp->request_r_scale++;
217 		soisconnecting(so);
218 		tcpstat.tcps_connattempt++;
219 		tp->t_state = TCPS_SYN_SENT;
220 		tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT;
221 		tp->iss = tcp_iss; tcp_iss += TCP_ISSINCR/2;
222 		tcp_sendseqinit(tp);
223 		error = tcp_output(tp);
224 		break;
225 
226 	/*
227 	 * Create a TCP connection between two sockets.
228 	 */
229 	case PRU_CONNECT2:
230 		error = EOPNOTSUPP;
231 		break;
232 
233 	/*
234 	 * Initiate disconnect from peer.
235 	 * If connection never passed embryonic stage, just drop;
236 	 * else if don't need to let data drain, then can just drop anyways,
237 	 * else have to begin TCP shutdown process: mark socket disconnecting,
238 	 * drain unread data, state switch to reflect user close, and
239 	 * send segment (e.g. FIN) to peer.  Socket will be really disconnected
240 	 * when peer sends FIN and acks ours.
241 	 *
242 	 * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB.
243 	 */
244 	case PRU_DISCONNECT:
245 		tp = tcp_disconnect(tp);
246 		break;
247 
248 	/*
249 	 * Accept a connection.  Essentially all the work is
250 	 * done at higher levels; just return the address
251 	 * of the peer, storing through addr.
252 	 */
253 	case PRU_ACCEPT:
254 		in_setpeeraddr(inp, nam);
255 		break;
256 
257 	/*
258 	 * Mark the connection as being incapable of further output.
259 	 */
260 	case PRU_SHUTDOWN:
261 		socantsendmore(so);
262 		tp = tcp_usrclosed(tp);
263 		if (tp)
264 			error = tcp_output(tp);
265 		break;
266 
267 	/*
268 	 * After a receive, possibly send window update to peer.
269 	 */
270 	case PRU_RCVD:
271 		(void) tcp_output(tp);
272 		break;
273 
274 	/*
275 	 * Do a send by putting data in output queue and updating urgent
276 	 * marker if URG set.  Possibly send more data.
277 	 */
278 	case PRU_SEND:
279 		sbappend(&so->so_snd, m);
280 		error = tcp_output(tp);
281 		break;
282 
283 	/*
284 	 * Abort the TCP.
285 	 */
286 	case PRU_ABORT:
287 		tp = tcp_drop(tp, ECONNABORTED);
288 		break;
289 
290 	case PRU_SENSE:
291 		((struct stat *) m)->st_blksize = so->so_snd.sb_hiwat;
292 		(void) splx(s);
293 		return (0);
294 
295 	case PRU_RCVOOB:
296 		if ((so->so_oobmark == 0 &&
297 		    (so->so_state & SS_RCVATMARK) == 0) ||
298 		    so->so_options & SO_OOBINLINE ||
299 		    tp->t_oobflags & TCPOOB_HADDATA) {
300 			error = EINVAL;
301 			break;
302 		}
303 		if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) {
304 			error = EWOULDBLOCK;
305 			break;
306 		}
307 		m->m_len = 1;
308 		*mtod(m, caddr_t) = tp->t_iobc;
309 		if (((int)nam & MSG_PEEK) == 0)
310 			tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA);
311 		break;
312 
313 	case PRU_SENDOOB:
314 		if (sbspace(&so->so_snd) < -512) {
315 			m_freem(m);
316 			error = ENOBUFS;
317 			break;
318 		}
319 		/*
320 		 * According to RFC961 (Assigned Protocols),
321 		 * the urgent pointer points to the last octet
322 		 * of urgent data.  We continue, however,
323 		 * to consider it to indicate the first octet
324 		 * of data past the urgent section.
325 		 * Otherwise, snd_up should be one lower.
326 		 */
327 		sbappend(&so->so_snd, m);
328 		tp->snd_up = tp->snd_una + so->so_snd.sb_cc;
329 		tp->t_force = 1;
330 		error = tcp_output(tp);
331 		tp->t_force = 0;
332 		break;
333 
334 	case PRU_SOCKADDR:
335 		in_setsockaddr(inp, nam);
336 		break;
337 
338 	case PRU_PEERADDR:
339 		in_setpeeraddr(inp, nam);
340 		break;
341 
342 	/*
343 	 * TCP slow timer went off; going through this
344 	 * routine for tracing's sake.
345 	 */
346 	case PRU_SLOWTIMO:
347 		tp = tcp_timers(tp, (int)nam);
348 		req |= (int)nam << 8;		/* for debug's sake */
349 		break;
350 
351 	default:
352 		panic("tcp_usrreq");
353 	}
354 #ifdef TCPDEBUG
355 	if (tp && (so->so_options & SO_DEBUG))
356 		tcp_trace(TA_USER, ostate, tp, (struct tcpiphdr *)0, req);
357 #endif
358 	splx(s);
359 	return (error);
360 }
361 
362 int
363 tcp_ctloutput(op, so, level, optname, mp)
364 	int op;
365 	struct socket *so;
366 	int level, optname;
367 	struct mbuf **mp;
368 {
369 	int error = 0, s;
370 	struct inpcb *inp;
371 	register struct tcpcb *tp;
372 	register struct mbuf *m;
373 	register int i;
374 
375 	s = splnet();
376 	inp = sotoinpcb(so);
377 	if (inp == NULL) {
378 		splx(s);
379 		if (op == PRCO_SETOPT && *mp)
380 			(void) m_free(*mp);
381 		return (ECONNRESET);
382 	}
383 	if (level != IPPROTO_TCP) {
384 		error = ip_ctloutput(op, so, level, optname, mp);
385 		splx(s);
386 		return (error);
387 	}
388 	tp = intotcpcb(inp);
389 
390 	switch (op) {
391 
392 	case PRCO_SETOPT:
393 		m = *mp;
394 		switch (optname) {
395 
396 		case TCP_NODELAY:
397 			if (m == NULL || m->m_len < sizeof (int))
398 				error = EINVAL;
399 			else if (*mtod(m, int *))
400 				tp->t_flags |= TF_NODELAY;
401 			else
402 				tp->t_flags &= ~TF_NODELAY;
403 			break;
404 
405 		case TCP_MAXSEG:
406 			if (m && (i = *mtod(m, int *)) > 0 && i <= tp->t_maxseg)
407 				tp->t_maxseg = i;
408 			else
409 				error = EINVAL;
410 			break;
411 
412 		default:
413 			error = ENOPROTOOPT;
414 			break;
415 		}
416 		if (m)
417 			(void) m_free(m);
418 		break;
419 
420 	case PRCO_GETOPT:
421 		*mp = m = m_get(M_WAIT, MT_SOOPTS);
422 		m->m_len = sizeof(int);
423 
424 		switch (optname) {
425 		case TCP_NODELAY:
426 			*mtod(m, int *) = tp->t_flags & TF_NODELAY;
427 			break;
428 		case TCP_MAXSEG:
429 			*mtod(m, int *) = tp->t_maxseg;
430 			break;
431 		default:
432 			error = ENOPROTOOPT;
433 			break;
434 		}
435 		break;
436 	}
437 	splx(s);
438 	return (error);
439 }
440 
441 /*
442  * tcp_sendspace and tcp_recvspace are the default send and receive window
443  * sizes, respectively.  These are obsolescent (this information should
444  * be set by the route).
445  */
446 #ifdef TCP_SMALLSPACE
447 u_long	tcp_sendspace = 1024*4;
448 u_long	tcp_recvspace = 1024*4;
449 #else
450 u_long	tcp_sendspace = 1024*16;
451 u_long	tcp_recvspace = 1024*16;
452 #endif
453 
454 /*
455  * Attach TCP protocol to socket, allocating
456  * internet protocol control block, tcp control block,
457  * bufer space, and entering LISTEN state if to accept connections.
458  */
459 int
460 tcp_attach(so)
461 	struct socket *so;
462 {
463 	register struct tcpcb *tp;
464 	struct inpcb *inp;
465 	int error;
466 
467 	if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
468 		error = soreserve(so, tcp_sendspace, tcp_recvspace);
469 		if (error)
470 			return (error);
471 	}
472 	error = in_pcballoc(so, &tcb);
473 	if (error)
474 		return (error);
475 	inp = sotoinpcb(so);
476 	tp = tcp_newtcpcb(inp);
477 	if (tp == 0) {
478 		int nofd = so->so_state & SS_NOFDREF;	/* XXX */
479 
480 		so->so_state &= ~SS_NOFDREF;	/* don't free the socket yet */
481 		in_pcbdetach(inp);
482 		so->so_state |= nofd;
483 		return (ENOBUFS);
484 	}
485 	tp->t_state = TCPS_CLOSED;
486 	return (0);
487 }
488 
489 /*
490  * Initiate (or continue) disconnect.
491  * If embryonic state, just send reset (once).
492  * If in ``let data drain'' option and linger null, just drop.
493  * Otherwise (hard), mark socket disconnecting and drop
494  * current input data; switch states based on user close, and
495  * send segment to peer (with FIN).
496  */
497 struct tcpcb *
498 tcp_disconnect(tp)
499 	register struct tcpcb *tp;
500 {
501 	struct socket *so = tp->t_inpcb->inp_socket;
502 
503 	if (tp->t_state < TCPS_ESTABLISHED)
504 		tp = tcp_close(tp);
505 	else if ((so->so_options & SO_LINGER) && so->so_linger == 0)
506 		tp = tcp_drop(tp, 0);
507 	else {
508 		soisdisconnecting(so);
509 		sbflush(&so->so_rcv);
510 		tp = tcp_usrclosed(tp);
511 		if (tp)
512 			(void) tcp_output(tp);
513 	}
514 	return (tp);
515 }
516 
517 /*
518  * User issued close, and wish to trail through shutdown states:
519  * if never received SYN, just forget it.  If got a SYN from peer,
520  * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN.
521  * If already got a FIN from peer, then almost done; go to LAST_ACK
522  * state.  In all other cases, have already sent FIN to peer (e.g.
523  * after PRU_SHUTDOWN), and just have to play tedious game waiting
524  * for peer to send FIN or not respond to keep-alives, etc.
525  * We can let the user exit from the close as soon as the FIN is acked.
526  */
527 struct tcpcb *
528 tcp_usrclosed(tp)
529 	register struct tcpcb *tp;
530 {
531 
532 	switch (tp->t_state) {
533 
534 	case TCPS_CLOSED:
535 	case TCPS_LISTEN:
536 	case TCPS_SYN_SENT:
537 		tp->t_state = TCPS_CLOSED;
538 		tp = tcp_close(tp);
539 		break;
540 
541 	case TCPS_SYN_RECEIVED:
542 	case TCPS_ESTABLISHED:
543 		tp->t_state = TCPS_FIN_WAIT_1;
544 		break;
545 
546 	case TCPS_CLOSE_WAIT:
547 		tp->t_state = TCPS_LAST_ACK;
548 		break;
549 	}
550 	if (tp && tp->t_state >= TCPS_FIN_WAIT_2)
551 		soisdisconnected(tp->t_inpcb->inp_socket);
552 	return (tp);
553 }
554