xref: /linux/net/dccp/output.c (revision f3d9478b2ce468c3115b02ecae7e975990697f15)
1 /*
2  *  net/dccp/output.c
3  *
4  *  An implementation of the DCCP protocol
5  *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>
6  *
7  *	This program is free software; you can redistribute it and/or
8  *	modify it under the terms of the GNU General Public License
9  *	as published by the Free Software Foundation; either version
10  *	2 of the License, or (at your option) any later version.
11  */
12 
13 #include <linux/config.h>
14 #include <linux/dccp.h>
15 #include <linux/kernel.h>
16 #include <linux/skbuff.h>
17 
18 #include <net/inet_sock.h>
19 #include <net/sock.h>
20 
21 #include "ackvec.h"
22 #include "ccid.h"
23 #include "dccp.h"
24 
25 static inline void dccp_event_ack_sent(struct sock *sk)
26 {
27 	inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
28 }
29 
30 static void dccp_skb_entail(struct sock *sk, struct sk_buff *skb)
31 {
32 	skb_set_owner_w(skb, sk);
33 	WARN_ON(sk->sk_send_head);
34 	sk->sk_send_head = skb;
35 }
36 
37 /*
38  * All SKB's seen here are completely headerless. It is our
39  * job to build the DCCP header, and pass the packet down to
40  * IP so it can do the same plus pass the packet off to the
41  * device.
42  */
43 static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb)
44 {
45 	if (likely(skb != NULL)) {
46 		const struct inet_sock *inet = inet_sk(sk);
47 		const struct inet_connection_sock *icsk = inet_csk(sk);
48 		struct dccp_sock *dp = dccp_sk(sk);
49 		struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb);
50 		struct dccp_hdr *dh;
51 		/* XXX For now we're using only 48 bits sequence numbers */
52 		const u32 dccp_header_size = sizeof(*dh) +
53 					     sizeof(struct dccp_hdr_ext) +
54 					  dccp_packet_hdr_len(dcb->dccpd_type);
55 		int err, set_ack = 1;
56 		u64 ackno = dp->dccps_gsr;
57 
58 		dccp_inc_seqno(&dp->dccps_gss);
59 
60 		switch (dcb->dccpd_type) {
61 		case DCCP_PKT_DATA:
62 			set_ack = 0;
63 			/* fall through */
64 		case DCCP_PKT_DATAACK:
65 			break;
66 
67 		case DCCP_PKT_REQUEST:
68 			set_ack = 0;
69 			/* fall through */
70 
71 		case DCCP_PKT_SYNC:
72 		case DCCP_PKT_SYNCACK:
73 			ackno = dcb->dccpd_seq;
74 			/* fall through */
75 		default:
76 			/*
77 			 * Only data packets should come through with skb->sk
78 			 * set.
79 			 */
80 			WARN_ON(skb->sk);
81 			skb_set_owner_w(skb, sk);
82 			break;
83 		}
84 
85 		dcb->dccpd_seq = dp->dccps_gss;
86 
87 		if (dccp_insert_options(sk, skb)) {
88 			kfree_skb(skb);
89 			return -EPROTO;
90 		}
91 
92 		skb->h.raw = skb_push(skb, dccp_header_size);
93 		dh = dccp_hdr(skb);
94 
95 		/* Build DCCP header and checksum it. */
96 		memset(dh, 0, dccp_header_size);
97 		dh->dccph_type	= dcb->dccpd_type;
98 		dh->dccph_sport	= inet->sport;
99 		dh->dccph_dport	= inet->dport;
100 		dh->dccph_doff	= (dccp_header_size + dcb->dccpd_opt_len) / 4;
101 		dh->dccph_ccval	= dcb->dccpd_ccval;
102 		/* XXX For now we're using only 48 bits sequence numbers */
103 		dh->dccph_x	= 1;
104 
105 		dp->dccps_awh = dp->dccps_gss;
106 		dccp_hdr_set_seq(dh, dp->dccps_gss);
107 		if (set_ack)
108 			dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), ackno);
109 
110 		switch (dcb->dccpd_type) {
111 		case DCCP_PKT_REQUEST:
112 			dccp_hdr_request(skb)->dccph_req_service =
113 							dp->dccps_service;
114 			break;
115 		case DCCP_PKT_RESET:
116 			dccp_hdr_reset(skb)->dccph_reset_code =
117 							dcb->dccpd_reset_code;
118 			break;
119 		}
120 
121 		icsk->icsk_af_ops->send_check(sk, skb->len, skb);
122 
123 		if (set_ack)
124 			dccp_event_ack_sent(sk);
125 
126 		DCCP_INC_STATS(DCCP_MIB_OUTSEGS);
127 
128 		memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
129 		err = icsk->icsk_af_ops->queue_xmit(skb, 0);
130 		if (err <= 0)
131 			return err;
132 
133 		/* NET_XMIT_CN is special. It does not guarantee,
134 		 * that this packet is lost. It tells that device
135 		 * is about to start to drop packets or already
136 		 * drops some packets of the same priority and
137 		 * invokes us to send less aggressively.
138 		 */
139 		return err == NET_XMIT_CN ? 0 : err;
140 	}
141 	return -ENOBUFS;
142 }
143 
144 unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu)
145 {
146 	struct inet_connection_sock *icsk = inet_csk(sk);
147 	struct dccp_sock *dp = dccp_sk(sk);
148 	int mss_now = (pmtu - icsk->icsk_af_ops->net_header_len -
149 		       sizeof(struct dccp_hdr) - sizeof(struct dccp_hdr_ext));
150 
151 	/* Now subtract optional transport overhead */
152 	mss_now -= icsk->icsk_ext_hdr_len;
153 
154 	/*
155 	 * FIXME: this should come from the CCID infrastructure, where, say,
156 	 * TFRC will say it wants TIMESTAMPS, ELAPSED time, etc, for now lets
157 	 * put a rough estimate for NDP + TIMESTAMP + TIMESTAMP_ECHO + ELAPSED
158 	 * TIME + TFRC_OPT_LOSS_EVENT_RATE + TFRC_OPT_RECEIVE_RATE + padding to
159 	 * make it a multiple of 4
160 	 */
161 
162 	mss_now -= ((5 + 6 + 10 + 6 + 6 + 6 + 3) / 4) * 4;
163 
164 	/* And store cached results */
165 	icsk->icsk_pmtu_cookie = pmtu;
166 	dp->dccps_mss_cache = mss_now;
167 
168 	return mss_now;
169 }
170 
171 EXPORT_SYMBOL_GPL(dccp_sync_mss);
172 
173 void dccp_write_space(struct sock *sk)
174 {
175 	read_lock(&sk->sk_callback_lock);
176 
177 	if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
178 		wake_up_interruptible(sk->sk_sleep);
179 	/* Should agree with poll, otherwise some programs break */
180 	if (sock_writeable(sk))
181 		sk_wake_async(sk, 2, POLL_OUT);
182 
183 	read_unlock(&sk->sk_callback_lock);
184 }
185 
186 /**
187  * dccp_wait_for_ccid - Wait for ccid to tell us we can send a packet
188  * @sk: socket to wait for
189  * @timeo: for how long
190  */
191 static int dccp_wait_for_ccid(struct sock *sk, struct sk_buff *skb,
192 			      long *timeo)
193 {
194 	struct dccp_sock *dp = dccp_sk(sk);
195 	DEFINE_WAIT(wait);
196 	long delay;
197 	int rc;
198 
199 	while (1) {
200 		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
201 
202 		if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
203 			goto do_error;
204 		if (!*timeo)
205 			goto do_nonblock;
206 		if (signal_pending(current))
207 			goto do_interrupted;
208 
209 		rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb,
210 					    skb->len);
211 		if (rc <= 0)
212 			break;
213 		delay = msecs_to_jiffies(rc);
214 		if (delay > *timeo || delay < 0)
215 			goto do_nonblock;
216 
217 		sk->sk_write_pending++;
218 		release_sock(sk);
219 		*timeo -= schedule_timeout(delay);
220 		lock_sock(sk);
221 		sk->sk_write_pending--;
222 	}
223 out:
224 	finish_wait(sk->sk_sleep, &wait);
225 	return rc;
226 
227 do_error:
228 	rc = -EPIPE;
229 	goto out;
230 do_nonblock:
231 	rc = -EAGAIN;
232 	goto out;
233 do_interrupted:
234 	rc = sock_intr_errno(*timeo);
235 	goto out;
236 }
237 
238 int dccp_write_xmit(struct sock *sk, struct sk_buff *skb, long *timeo)
239 {
240 	const struct dccp_sock *dp = dccp_sk(sk);
241 	int err = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb,
242 					 skb->len);
243 
244 	if (err > 0)
245 		err = dccp_wait_for_ccid(sk, skb, timeo);
246 
247 	if (err == 0) {
248 		struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb);
249 		const int len = skb->len;
250 
251 		if (sk->sk_state == DCCP_PARTOPEN) {
252 			/* See 8.1.5.  Handshake Completion */
253 			inet_csk_schedule_ack(sk);
254 			inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
255 						  inet_csk(sk)->icsk_rto,
256 						  DCCP_RTO_MAX);
257 			dcb->dccpd_type = DCCP_PKT_DATAACK;
258 		} else if (dccp_ack_pending(sk))
259 			dcb->dccpd_type = DCCP_PKT_DATAACK;
260 		else
261 			dcb->dccpd_type = DCCP_PKT_DATA;
262 
263 		err = dccp_transmit_skb(sk, skb);
264 		ccid_hc_tx_packet_sent(dp->dccps_hc_tx_ccid, sk, 0, len);
265 	} else
266 		kfree_skb(skb);
267 
268 	return err;
269 }
270 
271 int dccp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
272 {
273 	if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk) != 0)
274 		return -EHOSTUNREACH; /* Routing failure or similar. */
275 
276 	return dccp_transmit_skb(sk, (skb_cloned(skb) ?
277 				      pskb_copy(skb, GFP_ATOMIC):
278 				      skb_clone(skb, GFP_ATOMIC)));
279 }
280 
281 struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst,
282 				   struct request_sock *req)
283 {
284 	struct dccp_hdr *dh;
285 	struct dccp_request_sock *dreq;
286 	const u32 dccp_header_size = sizeof(struct dccp_hdr) +
287 				     sizeof(struct dccp_hdr_ext) +
288 				     sizeof(struct dccp_hdr_response);
289 	struct sk_buff *skb = sock_wmalloc(sk, sk->sk_prot->max_header, 1,
290 					   GFP_ATOMIC);
291 	if (skb == NULL)
292 		return NULL;
293 
294 	/* Reserve space for headers. */
295 	skb_reserve(skb, sk->sk_prot->max_header);
296 
297 	skb->dst = dst_clone(dst);
298 	skb->csum = 0;
299 
300 	dreq = dccp_rsk(req);
301 	DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_RESPONSE;
302 	DCCP_SKB_CB(skb)->dccpd_seq  = dreq->dreq_iss;
303 
304 	if (dccp_insert_options(sk, skb)) {
305 		kfree_skb(skb);
306 		return NULL;
307 	}
308 
309 	skb->h.raw = skb_push(skb, dccp_header_size);
310 
311 	dh = dccp_hdr(skb);
312 	memset(dh, 0, dccp_header_size);
313 
314 	dh->dccph_sport	= inet_sk(sk)->sport;
315 	dh->dccph_dport	= inet_rsk(req)->rmt_port;
316 	dh->dccph_doff	= (dccp_header_size +
317 			   DCCP_SKB_CB(skb)->dccpd_opt_len) / 4;
318 	dh->dccph_type	= DCCP_PKT_RESPONSE;
319 	dh->dccph_x	= 1;
320 	dccp_hdr_set_seq(dh, dreq->dreq_iss);
321 	dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), dreq->dreq_isr);
322 	dccp_hdr_response(skb)->dccph_resp_service = dreq->dreq_service;
323 
324 	DCCP_INC_STATS(DCCP_MIB_OUTSEGS);
325 	return skb;
326 }
327 
328 EXPORT_SYMBOL_GPL(dccp_make_response);
329 
330 static struct sk_buff *dccp_make_reset(struct sock *sk, struct dst_entry *dst,
331 				       const enum dccp_reset_codes code)
332 
333 {
334 	struct dccp_hdr *dh;
335 	struct dccp_sock *dp = dccp_sk(sk);
336 	const u32 dccp_header_size = sizeof(struct dccp_hdr) +
337 				     sizeof(struct dccp_hdr_ext) +
338 				     sizeof(struct dccp_hdr_reset);
339 	struct sk_buff *skb = sock_wmalloc(sk, sk->sk_prot->max_header, 1,
340 					   GFP_ATOMIC);
341 	if (skb == NULL)
342 		return NULL;
343 
344 	/* Reserve space for headers. */
345 	skb_reserve(skb, sk->sk_prot->max_header);
346 
347 	skb->dst = dst_clone(dst);
348 	skb->csum = 0;
349 
350 	dccp_inc_seqno(&dp->dccps_gss);
351 
352 	DCCP_SKB_CB(skb)->dccpd_reset_code = code;
353 	DCCP_SKB_CB(skb)->dccpd_type	   = DCCP_PKT_RESET;
354 	DCCP_SKB_CB(skb)->dccpd_seq	   = dp->dccps_gss;
355 
356 	if (dccp_insert_options(sk, skb)) {
357 		kfree_skb(skb);
358 		return NULL;
359 	}
360 
361 	skb->h.raw = skb_push(skb, dccp_header_size);
362 
363 	dh = dccp_hdr(skb);
364 	memset(dh, 0, dccp_header_size);
365 
366 	dh->dccph_sport	= inet_sk(sk)->sport;
367 	dh->dccph_dport	= inet_sk(sk)->dport;
368 	dh->dccph_doff	= (dccp_header_size +
369 			   DCCP_SKB_CB(skb)->dccpd_opt_len) / 4;
370 	dh->dccph_type	= DCCP_PKT_RESET;
371 	dh->dccph_x	= 1;
372 	dccp_hdr_set_seq(dh, dp->dccps_gss);
373 	dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), dp->dccps_gsr);
374 
375 	dccp_hdr_reset(skb)->dccph_reset_code = code;
376 	inet_csk(sk)->icsk_af_ops->send_check(sk, skb->len, skb);
377 
378 	DCCP_INC_STATS(DCCP_MIB_OUTSEGS);
379 	return skb;
380 }
381 
382 int dccp_send_reset(struct sock *sk, enum dccp_reset_codes code)
383 {
384 	/*
385 	 * FIXME: what if rebuild_header fails?
386 	 * Should we be doing a rebuild_header here?
387 	 */
388 	int err = inet_sk_rebuild_header(sk);
389 
390 	if (err == 0) {
391 		struct sk_buff *skb = dccp_make_reset(sk, sk->sk_dst_cache,
392 						      code);
393 		if (skb != NULL) {
394 			memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
395 			err = inet_csk(sk)->icsk_af_ops->queue_xmit(skb, 0);
396 			if (err == NET_XMIT_CN)
397 				err = 0;
398 		}
399 	}
400 
401 	return err;
402 }
403 
404 /*
405  * Do all connect socket setups that can be done AF independent.
406  */
407 static inline void dccp_connect_init(struct sock *sk)
408 {
409 	struct dccp_sock *dp = dccp_sk(sk);
410 	struct dst_entry *dst = __sk_dst_get(sk);
411 	struct inet_connection_sock *icsk = inet_csk(sk);
412 
413 	sk->sk_err = 0;
414 	sock_reset_flag(sk, SOCK_DONE);
415 
416 	dccp_sync_mss(sk, dst_mtu(dst));
417 
418 	dccp_update_gss(sk, dp->dccps_iss);
419  	/*
420 	 * SWL and AWL are initially adjusted so that they are not less than
421 	 * the initial Sequence Numbers received and sent, respectively:
422 	 *	SWL := max(GSR + 1 - floor(W/4), ISR),
423 	 *	AWL := max(GSS - W' + 1, ISS).
424 	 * These adjustments MUST be applied only at the beginning of the
425 	 * connection.
426  	 */
427 	dccp_set_seqno(&dp->dccps_awl, max48(dp->dccps_awl, dp->dccps_iss));
428 
429 	icsk->icsk_retransmits = 0;
430 }
431 
432 int dccp_connect(struct sock *sk)
433 {
434 	struct sk_buff *skb;
435 	struct inet_connection_sock *icsk = inet_csk(sk);
436 
437 	dccp_connect_init(sk);
438 
439 	skb = alloc_skb(sk->sk_prot->max_header, sk->sk_allocation);
440 	if (unlikely(skb == NULL))
441 		return -ENOBUFS;
442 
443 	/* Reserve space for headers. */
444 	skb_reserve(skb, sk->sk_prot->max_header);
445 
446 	DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_REQUEST;
447 	skb->csum = 0;
448 
449 	dccp_skb_entail(sk, skb);
450 	dccp_transmit_skb(sk, skb_clone(skb, GFP_KERNEL));
451 	DCCP_INC_STATS(DCCP_MIB_ACTIVEOPENS);
452 
453 	/* Timer for repeating the REQUEST until an answer. */
454 	inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
455 				  icsk->icsk_rto, DCCP_RTO_MAX);
456 	return 0;
457 }
458 
459 EXPORT_SYMBOL_GPL(dccp_connect);
460 
461 void dccp_send_ack(struct sock *sk)
462 {
463 	/* If we have been reset, we may not send again. */
464 	if (sk->sk_state != DCCP_CLOSED) {
465 		struct sk_buff *skb = alloc_skb(sk->sk_prot->max_header,
466 						GFP_ATOMIC);
467 
468 		if (skb == NULL) {
469 			inet_csk_schedule_ack(sk);
470 			inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN;
471 			inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
472 						  TCP_DELACK_MAX,
473 						  DCCP_RTO_MAX);
474 			return;
475 		}
476 
477 		/* Reserve space for headers */
478 		skb_reserve(skb, sk->sk_prot->max_header);
479 		skb->csum = 0;
480 		DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_ACK;
481 		dccp_transmit_skb(sk, skb);
482 	}
483 }
484 
485 EXPORT_SYMBOL_GPL(dccp_send_ack);
486 
487 void dccp_send_delayed_ack(struct sock *sk)
488 {
489 	struct inet_connection_sock *icsk = inet_csk(sk);
490 	/*
491 	 * FIXME: tune this timer. elapsed time fixes the skew, so no problem
492 	 * with using 2s, and active senders also piggyback the ACK into a
493 	 * DATAACK packet, so this is really for quiescent senders.
494 	 */
495 	unsigned long timeout = jiffies + 2 * HZ;
496 
497 	/* Use new timeout only if there wasn't a older one earlier. */
498 	if (icsk->icsk_ack.pending & ICSK_ACK_TIMER) {
499 		/* If delack timer was blocked or is about to expire,
500 		 * send ACK now.
501 		 *
502 		 * FIXME: check the "about to expire" part
503 		 */
504 		if (icsk->icsk_ack.blocked) {
505 			dccp_send_ack(sk);
506 			return;
507 		}
508 
509 		if (!time_before(timeout, icsk->icsk_ack.timeout))
510 			timeout = icsk->icsk_ack.timeout;
511 	}
512 	icsk->icsk_ack.pending |= ICSK_ACK_SCHED | ICSK_ACK_TIMER;
513 	icsk->icsk_ack.timeout = timeout;
514 	sk_reset_timer(sk, &icsk->icsk_delack_timer, timeout);
515 }
516 
517 void dccp_send_sync(struct sock *sk, const u64 seq,
518 		    const enum dccp_pkt_type pkt_type)
519 {
520 	/*
521 	 * We are not putting this on the write queue, so
522 	 * dccp_transmit_skb() will set the ownership to this
523 	 * sock.
524 	 */
525 	struct sk_buff *skb = alloc_skb(sk->sk_prot->max_header, GFP_ATOMIC);
526 
527 	if (skb == NULL)
528 		/* FIXME: how to make sure the sync is sent? */
529 		return;
530 
531 	/* Reserve space for headers and prepare control bits. */
532 	skb_reserve(skb, sk->sk_prot->max_header);
533 	skb->csum = 0;
534 	DCCP_SKB_CB(skb)->dccpd_type = pkt_type;
535 	DCCP_SKB_CB(skb)->dccpd_seq = seq;
536 
537 	dccp_transmit_skb(sk, skb);
538 }
539 
540 EXPORT_SYMBOL_GPL(dccp_send_sync);
541 
542 /*
543  * Send a DCCP_PKT_CLOSE/CLOSEREQ. The caller locks the socket for us. This
544  * cannot be allowed to fail queueing a DCCP_PKT_CLOSE/CLOSEREQ frame under
545  * any circumstances.
546  */
547 void dccp_send_close(struct sock *sk, const int active)
548 {
549 	struct dccp_sock *dp = dccp_sk(sk);
550 	struct sk_buff *skb;
551 	const gfp_t prio = active ? GFP_KERNEL : GFP_ATOMIC;
552 
553 	skb = alloc_skb(sk->sk_prot->max_header, prio);
554 	if (skb == NULL)
555 		return;
556 
557 	/* Reserve space for headers and prepare control bits. */
558 	skb_reserve(skb, sk->sk_prot->max_header);
559 	skb->csum = 0;
560 	DCCP_SKB_CB(skb)->dccpd_type = dp->dccps_role == DCCP_ROLE_CLIENT ?
561 					DCCP_PKT_CLOSE : DCCP_PKT_CLOSEREQ;
562 
563 	if (active) {
564 		dccp_skb_entail(sk, skb);
565 		dccp_transmit_skb(sk, skb_clone(skb, prio));
566 	} else
567 		dccp_transmit_skb(sk, skb);
568 }
569