1 /* 2 * net/dccp/output.c 3 * 4 * An implementation of the DCCP protocol 5 * Arnaldo Carvalho de Melo <acme@conectiva.com.br> 6 * 7 * This program is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU General Public License 9 * as published by the Free Software Foundation; either version 10 * 2 of the License, or (at your option) any later version. 11 */ 12 13 #include <linux/config.h> 14 #include <linux/dccp.h> 15 #include <linux/kernel.h> 16 #include <linux/skbuff.h> 17 18 #include <net/inet_sock.h> 19 #include <net/sock.h> 20 21 #include "ackvec.h" 22 #include "ccid.h" 23 #include "dccp.h" 24 25 static inline void dccp_event_ack_sent(struct sock *sk) 26 { 27 inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK); 28 } 29 30 static void dccp_skb_entail(struct sock *sk, struct sk_buff *skb) 31 { 32 skb_set_owner_w(skb, sk); 33 WARN_ON(sk->sk_send_head); 34 sk->sk_send_head = skb; 35 } 36 37 /* 38 * All SKB's seen here are completely headerless. It is our 39 * job to build the DCCP header, and pass the packet down to 40 * IP so it can do the same plus pass the packet off to the 41 * device. 42 */ 43 static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb) 44 { 45 if (likely(skb != NULL)) { 46 const struct inet_sock *inet = inet_sk(sk); 47 const struct inet_connection_sock *icsk = inet_csk(sk); 48 struct dccp_sock *dp = dccp_sk(sk); 49 struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); 50 struct dccp_hdr *dh; 51 /* XXX For now we're using only 48 bits sequence numbers */ 52 const u32 dccp_header_size = sizeof(*dh) + 53 sizeof(struct dccp_hdr_ext) + 54 dccp_packet_hdr_len(dcb->dccpd_type); 55 int err, set_ack = 1; 56 u64 ackno = dp->dccps_gsr; 57 58 dccp_inc_seqno(&dp->dccps_gss); 59 60 switch (dcb->dccpd_type) { 61 case DCCP_PKT_DATA: 62 set_ack = 0; 63 /* fall through */ 64 case DCCP_PKT_DATAACK: 65 break; 66 67 case DCCP_PKT_REQUEST: 68 set_ack = 0; 69 /* fall through */ 70 71 case DCCP_PKT_SYNC: 72 case DCCP_PKT_SYNCACK: 73 ackno = dcb->dccpd_seq; 74 /* fall through */ 75 default: 76 /* 77 * Only data packets should come through with skb->sk 78 * set. 79 */ 80 WARN_ON(skb->sk); 81 skb_set_owner_w(skb, sk); 82 break; 83 } 84 85 dcb->dccpd_seq = dp->dccps_gss; 86 87 if (dccp_insert_options(sk, skb)) { 88 kfree_skb(skb); 89 return -EPROTO; 90 } 91 92 skb->h.raw = skb_push(skb, dccp_header_size); 93 dh = dccp_hdr(skb); 94 95 /* Build DCCP header and checksum it. */ 96 memset(dh, 0, dccp_header_size); 97 dh->dccph_type = dcb->dccpd_type; 98 dh->dccph_sport = inet->sport; 99 dh->dccph_dport = inet->dport; 100 dh->dccph_doff = (dccp_header_size + dcb->dccpd_opt_len) / 4; 101 dh->dccph_ccval = dcb->dccpd_ccval; 102 /* XXX For now we're using only 48 bits sequence numbers */ 103 dh->dccph_x = 1; 104 105 dp->dccps_awh = dp->dccps_gss; 106 dccp_hdr_set_seq(dh, dp->dccps_gss); 107 if (set_ack) 108 dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), ackno); 109 110 switch (dcb->dccpd_type) { 111 case DCCP_PKT_REQUEST: 112 dccp_hdr_request(skb)->dccph_req_service = 113 dp->dccps_service; 114 break; 115 case DCCP_PKT_RESET: 116 dccp_hdr_reset(skb)->dccph_reset_code = 117 dcb->dccpd_reset_code; 118 break; 119 } 120 121 icsk->icsk_af_ops->send_check(sk, skb->len, skb); 122 123 if (set_ack) 124 dccp_event_ack_sent(sk); 125 126 DCCP_INC_STATS(DCCP_MIB_OUTSEGS); 127 128 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); 129 err = icsk->icsk_af_ops->queue_xmit(skb, 0); 130 if (err <= 0) 131 return err; 132 133 /* NET_XMIT_CN is special. It does not guarantee, 134 * that this packet is lost. It tells that device 135 * is about to start to drop packets or already 136 * drops some packets of the same priority and 137 * invokes us to send less aggressively. 138 */ 139 return err == NET_XMIT_CN ? 0 : err; 140 } 141 return -ENOBUFS; 142 } 143 144 unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu) 145 { 146 struct inet_connection_sock *icsk = inet_csk(sk); 147 struct dccp_sock *dp = dccp_sk(sk); 148 int mss_now = (pmtu - icsk->icsk_af_ops->net_header_len - 149 sizeof(struct dccp_hdr) - sizeof(struct dccp_hdr_ext)); 150 151 /* Now subtract optional transport overhead */ 152 mss_now -= icsk->icsk_ext_hdr_len; 153 154 /* 155 * FIXME: this should come from the CCID infrastructure, where, say, 156 * TFRC will say it wants TIMESTAMPS, ELAPSED time, etc, for now lets 157 * put a rough estimate for NDP + TIMESTAMP + TIMESTAMP_ECHO + ELAPSED 158 * TIME + TFRC_OPT_LOSS_EVENT_RATE + TFRC_OPT_RECEIVE_RATE + padding to 159 * make it a multiple of 4 160 */ 161 162 mss_now -= ((5 + 6 + 10 + 6 + 6 + 6 + 3) / 4) * 4; 163 164 /* And store cached results */ 165 icsk->icsk_pmtu_cookie = pmtu; 166 dp->dccps_mss_cache = mss_now; 167 168 return mss_now; 169 } 170 171 EXPORT_SYMBOL_GPL(dccp_sync_mss); 172 173 void dccp_write_space(struct sock *sk) 174 { 175 read_lock(&sk->sk_callback_lock); 176 177 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) 178 wake_up_interruptible(sk->sk_sleep); 179 /* Should agree with poll, otherwise some programs break */ 180 if (sock_writeable(sk)) 181 sk_wake_async(sk, 2, POLL_OUT); 182 183 read_unlock(&sk->sk_callback_lock); 184 } 185 186 /** 187 * dccp_wait_for_ccid - Wait for ccid to tell us we can send a packet 188 * @sk: socket to wait for 189 * @timeo: for how long 190 */ 191 static int dccp_wait_for_ccid(struct sock *sk, struct sk_buff *skb, 192 long *timeo) 193 { 194 struct dccp_sock *dp = dccp_sk(sk); 195 DEFINE_WAIT(wait); 196 long delay; 197 int rc; 198 199 while (1) { 200 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); 201 202 if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) 203 goto do_error; 204 if (!*timeo) 205 goto do_nonblock; 206 if (signal_pending(current)) 207 goto do_interrupted; 208 209 rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb, 210 skb->len); 211 if (rc <= 0) 212 break; 213 delay = msecs_to_jiffies(rc); 214 if (delay > *timeo || delay < 0) 215 goto do_nonblock; 216 217 sk->sk_write_pending++; 218 release_sock(sk); 219 *timeo -= schedule_timeout(delay); 220 lock_sock(sk); 221 sk->sk_write_pending--; 222 } 223 out: 224 finish_wait(sk->sk_sleep, &wait); 225 return rc; 226 227 do_error: 228 rc = -EPIPE; 229 goto out; 230 do_nonblock: 231 rc = -EAGAIN; 232 goto out; 233 do_interrupted: 234 rc = sock_intr_errno(*timeo); 235 goto out; 236 } 237 238 int dccp_write_xmit(struct sock *sk, struct sk_buff *skb, long *timeo) 239 { 240 const struct dccp_sock *dp = dccp_sk(sk); 241 int err = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb, 242 skb->len); 243 244 if (err > 0) 245 err = dccp_wait_for_ccid(sk, skb, timeo); 246 247 if (err == 0) { 248 struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); 249 const int len = skb->len; 250 251 if (sk->sk_state == DCCP_PARTOPEN) { 252 /* See 8.1.5. Handshake Completion */ 253 inet_csk_schedule_ack(sk); 254 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, 255 inet_csk(sk)->icsk_rto, 256 DCCP_RTO_MAX); 257 dcb->dccpd_type = DCCP_PKT_DATAACK; 258 } else if (dccp_ack_pending(sk)) 259 dcb->dccpd_type = DCCP_PKT_DATAACK; 260 else 261 dcb->dccpd_type = DCCP_PKT_DATA; 262 263 err = dccp_transmit_skb(sk, skb); 264 ccid_hc_tx_packet_sent(dp->dccps_hc_tx_ccid, sk, 0, len); 265 } else 266 kfree_skb(skb); 267 268 return err; 269 } 270 271 int dccp_retransmit_skb(struct sock *sk, struct sk_buff *skb) 272 { 273 if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk) != 0) 274 return -EHOSTUNREACH; /* Routing failure or similar. */ 275 276 return dccp_transmit_skb(sk, (skb_cloned(skb) ? 277 pskb_copy(skb, GFP_ATOMIC): 278 skb_clone(skb, GFP_ATOMIC))); 279 } 280 281 struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst, 282 struct request_sock *req) 283 { 284 struct dccp_hdr *dh; 285 struct dccp_request_sock *dreq; 286 const u32 dccp_header_size = sizeof(struct dccp_hdr) + 287 sizeof(struct dccp_hdr_ext) + 288 sizeof(struct dccp_hdr_response); 289 struct sk_buff *skb = sock_wmalloc(sk, sk->sk_prot->max_header, 1, 290 GFP_ATOMIC); 291 if (skb == NULL) 292 return NULL; 293 294 /* Reserve space for headers. */ 295 skb_reserve(skb, sk->sk_prot->max_header); 296 297 skb->dst = dst_clone(dst); 298 skb->csum = 0; 299 300 dreq = dccp_rsk(req); 301 DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_RESPONSE; 302 DCCP_SKB_CB(skb)->dccpd_seq = dreq->dreq_iss; 303 304 if (dccp_insert_options(sk, skb)) { 305 kfree_skb(skb); 306 return NULL; 307 } 308 309 skb->h.raw = skb_push(skb, dccp_header_size); 310 311 dh = dccp_hdr(skb); 312 memset(dh, 0, dccp_header_size); 313 314 dh->dccph_sport = inet_sk(sk)->sport; 315 dh->dccph_dport = inet_rsk(req)->rmt_port; 316 dh->dccph_doff = (dccp_header_size + 317 DCCP_SKB_CB(skb)->dccpd_opt_len) / 4; 318 dh->dccph_type = DCCP_PKT_RESPONSE; 319 dh->dccph_x = 1; 320 dccp_hdr_set_seq(dh, dreq->dreq_iss); 321 dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), dreq->dreq_isr); 322 dccp_hdr_response(skb)->dccph_resp_service = dreq->dreq_service; 323 324 DCCP_INC_STATS(DCCP_MIB_OUTSEGS); 325 return skb; 326 } 327 328 EXPORT_SYMBOL_GPL(dccp_make_response); 329 330 static struct sk_buff *dccp_make_reset(struct sock *sk, struct dst_entry *dst, 331 const enum dccp_reset_codes code) 332 333 { 334 struct dccp_hdr *dh; 335 struct dccp_sock *dp = dccp_sk(sk); 336 const u32 dccp_header_size = sizeof(struct dccp_hdr) + 337 sizeof(struct dccp_hdr_ext) + 338 sizeof(struct dccp_hdr_reset); 339 struct sk_buff *skb = sock_wmalloc(sk, sk->sk_prot->max_header, 1, 340 GFP_ATOMIC); 341 if (skb == NULL) 342 return NULL; 343 344 /* Reserve space for headers. */ 345 skb_reserve(skb, sk->sk_prot->max_header); 346 347 skb->dst = dst_clone(dst); 348 skb->csum = 0; 349 350 dccp_inc_seqno(&dp->dccps_gss); 351 352 DCCP_SKB_CB(skb)->dccpd_reset_code = code; 353 DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_RESET; 354 DCCP_SKB_CB(skb)->dccpd_seq = dp->dccps_gss; 355 356 if (dccp_insert_options(sk, skb)) { 357 kfree_skb(skb); 358 return NULL; 359 } 360 361 skb->h.raw = skb_push(skb, dccp_header_size); 362 363 dh = dccp_hdr(skb); 364 memset(dh, 0, dccp_header_size); 365 366 dh->dccph_sport = inet_sk(sk)->sport; 367 dh->dccph_dport = inet_sk(sk)->dport; 368 dh->dccph_doff = (dccp_header_size + 369 DCCP_SKB_CB(skb)->dccpd_opt_len) / 4; 370 dh->dccph_type = DCCP_PKT_RESET; 371 dh->dccph_x = 1; 372 dccp_hdr_set_seq(dh, dp->dccps_gss); 373 dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), dp->dccps_gsr); 374 375 dccp_hdr_reset(skb)->dccph_reset_code = code; 376 inet_csk(sk)->icsk_af_ops->send_check(sk, skb->len, skb); 377 378 DCCP_INC_STATS(DCCP_MIB_OUTSEGS); 379 return skb; 380 } 381 382 int dccp_send_reset(struct sock *sk, enum dccp_reset_codes code) 383 { 384 /* 385 * FIXME: what if rebuild_header fails? 386 * Should we be doing a rebuild_header here? 387 */ 388 int err = inet_sk_rebuild_header(sk); 389 390 if (err == 0) { 391 struct sk_buff *skb = dccp_make_reset(sk, sk->sk_dst_cache, 392 code); 393 if (skb != NULL) { 394 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); 395 err = inet_csk(sk)->icsk_af_ops->queue_xmit(skb, 0); 396 if (err == NET_XMIT_CN) 397 err = 0; 398 } 399 } 400 401 return err; 402 } 403 404 /* 405 * Do all connect socket setups that can be done AF independent. 406 */ 407 static inline void dccp_connect_init(struct sock *sk) 408 { 409 struct dccp_sock *dp = dccp_sk(sk); 410 struct dst_entry *dst = __sk_dst_get(sk); 411 struct inet_connection_sock *icsk = inet_csk(sk); 412 413 sk->sk_err = 0; 414 sock_reset_flag(sk, SOCK_DONE); 415 416 dccp_sync_mss(sk, dst_mtu(dst)); 417 418 dccp_update_gss(sk, dp->dccps_iss); 419 /* 420 * SWL and AWL are initially adjusted so that they are not less than 421 * the initial Sequence Numbers received and sent, respectively: 422 * SWL := max(GSR + 1 - floor(W/4), ISR), 423 * AWL := max(GSS - W' + 1, ISS). 424 * These adjustments MUST be applied only at the beginning of the 425 * connection. 426 */ 427 dccp_set_seqno(&dp->dccps_awl, max48(dp->dccps_awl, dp->dccps_iss)); 428 429 icsk->icsk_retransmits = 0; 430 } 431 432 int dccp_connect(struct sock *sk) 433 { 434 struct sk_buff *skb; 435 struct inet_connection_sock *icsk = inet_csk(sk); 436 437 dccp_connect_init(sk); 438 439 skb = alloc_skb(sk->sk_prot->max_header, sk->sk_allocation); 440 if (unlikely(skb == NULL)) 441 return -ENOBUFS; 442 443 /* Reserve space for headers. */ 444 skb_reserve(skb, sk->sk_prot->max_header); 445 446 DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_REQUEST; 447 skb->csum = 0; 448 449 dccp_skb_entail(sk, skb); 450 dccp_transmit_skb(sk, skb_clone(skb, GFP_KERNEL)); 451 DCCP_INC_STATS(DCCP_MIB_ACTIVEOPENS); 452 453 /* Timer for repeating the REQUEST until an answer. */ 454 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, 455 icsk->icsk_rto, DCCP_RTO_MAX); 456 return 0; 457 } 458 459 EXPORT_SYMBOL_GPL(dccp_connect); 460 461 void dccp_send_ack(struct sock *sk) 462 { 463 /* If we have been reset, we may not send again. */ 464 if (sk->sk_state != DCCP_CLOSED) { 465 struct sk_buff *skb = alloc_skb(sk->sk_prot->max_header, 466 GFP_ATOMIC); 467 468 if (skb == NULL) { 469 inet_csk_schedule_ack(sk); 470 inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN; 471 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, 472 TCP_DELACK_MAX, 473 DCCP_RTO_MAX); 474 return; 475 } 476 477 /* Reserve space for headers */ 478 skb_reserve(skb, sk->sk_prot->max_header); 479 skb->csum = 0; 480 DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_ACK; 481 dccp_transmit_skb(sk, skb); 482 } 483 } 484 485 EXPORT_SYMBOL_GPL(dccp_send_ack); 486 487 void dccp_send_delayed_ack(struct sock *sk) 488 { 489 struct inet_connection_sock *icsk = inet_csk(sk); 490 /* 491 * FIXME: tune this timer. elapsed time fixes the skew, so no problem 492 * with using 2s, and active senders also piggyback the ACK into a 493 * DATAACK packet, so this is really for quiescent senders. 494 */ 495 unsigned long timeout = jiffies + 2 * HZ; 496 497 /* Use new timeout only if there wasn't a older one earlier. */ 498 if (icsk->icsk_ack.pending & ICSK_ACK_TIMER) { 499 /* If delack timer was blocked or is about to expire, 500 * send ACK now. 501 * 502 * FIXME: check the "about to expire" part 503 */ 504 if (icsk->icsk_ack.blocked) { 505 dccp_send_ack(sk); 506 return; 507 } 508 509 if (!time_before(timeout, icsk->icsk_ack.timeout)) 510 timeout = icsk->icsk_ack.timeout; 511 } 512 icsk->icsk_ack.pending |= ICSK_ACK_SCHED | ICSK_ACK_TIMER; 513 icsk->icsk_ack.timeout = timeout; 514 sk_reset_timer(sk, &icsk->icsk_delack_timer, timeout); 515 } 516 517 void dccp_send_sync(struct sock *sk, const u64 seq, 518 const enum dccp_pkt_type pkt_type) 519 { 520 /* 521 * We are not putting this on the write queue, so 522 * dccp_transmit_skb() will set the ownership to this 523 * sock. 524 */ 525 struct sk_buff *skb = alloc_skb(sk->sk_prot->max_header, GFP_ATOMIC); 526 527 if (skb == NULL) 528 /* FIXME: how to make sure the sync is sent? */ 529 return; 530 531 /* Reserve space for headers and prepare control bits. */ 532 skb_reserve(skb, sk->sk_prot->max_header); 533 skb->csum = 0; 534 DCCP_SKB_CB(skb)->dccpd_type = pkt_type; 535 DCCP_SKB_CB(skb)->dccpd_seq = seq; 536 537 dccp_transmit_skb(sk, skb); 538 } 539 540 EXPORT_SYMBOL_GPL(dccp_send_sync); 541 542 /* 543 * Send a DCCP_PKT_CLOSE/CLOSEREQ. The caller locks the socket for us. This 544 * cannot be allowed to fail queueing a DCCP_PKT_CLOSE/CLOSEREQ frame under 545 * any circumstances. 546 */ 547 void dccp_send_close(struct sock *sk, const int active) 548 { 549 struct dccp_sock *dp = dccp_sk(sk); 550 struct sk_buff *skb; 551 const gfp_t prio = active ? GFP_KERNEL : GFP_ATOMIC; 552 553 skb = alloc_skb(sk->sk_prot->max_header, prio); 554 if (skb == NULL) 555 return; 556 557 /* Reserve space for headers and prepare control bits. */ 558 skb_reserve(skb, sk->sk_prot->max_header); 559 skb->csum = 0; 560 DCCP_SKB_CB(skb)->dccpd_type = dp->dccps_role == DCCP_ROLE_CLIENT ? 561 DCCP_PKT_CLOSE : DCCP_PKT_CLOSEREQ; 562 563 if (active) { 564 dccp_skb_entail(sk, skb); 565 dccp_transmit_skb(sk, skb_clone(skb, prio)); 566 } else 567 dccp_transmit_skb(sk, skb); 568 } 569