1 /* 2 * net/dccp/ipv4.c 3 * 4 * An implementation of the DCCP protocol 5 * Arnaldo Carvalho de Melo <acme@conectiva.com.br> 6 * 7 * This program is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU General Public License 9 * as published by the Free Software Foundation; either version 10 * 2 of the License, or (at your option) any later version. 11 */ 12 13 #include <linux/dccp.h> 14 #include <linux/icmp.h> 15 #include <linux/module.h> 16 #include <linux/skbuff.h> 17 #include <linux/random.h> 18 19 #include <net/icmp.h> 20 #include <net/inet_common.h> 21 #include <net/inet_hashtables.h> 22 #include <net/inet_sock.h> 23 #include <net/protocol.h> 24 #include <net/sock.h> 25 #include <net/timewait_sock.h> 26 #include <net/tcp_states.h> 27 #include <net/xfrm.h> 28 29 #include "ackvec.h" 30 #include "ccid.h" 31 #include "dccp.h" 32 #include "feat.h" 33 34 /* 35 * This is the global socket data structure used for responding to 36 * the Out-of-the-blue (OOTB) packets. A control sock will be created 37 * for this socket at the initialization time. 38 */ 39 static struct socket *dccp_v4_ctl_socket; 40 41 static int dccp_v4_get_port(struct sock *sk, const unsigned short snum) 42 { 43 return inet_csk_get_port(&dccp_hashinfo, sk, snum, 44 inet_csk_bind_conflict); 45 } 46 47 int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) 48 { 49 struct inet_sock *inet = inet_sk(sk); 50 struct dccp_sock *dp = dccp_sk(sk); 51 const struct sockaddr_in *usin = (struct sockaddr_in *)uaddr; 52 struct rtable *rt; 53 u32 daddr, nexthop; 54 int tmp; 55 int err; 56 57 dp->dccps_role = DCCP_ROLE_CLIENT; 58 59 if (dccp_service_not_initialized(sk)) 60 return -EPROTO; 61 62 if (addr_len < sizeof(struct sockaddr_in)) 63 return -EINVAL; 64 65 if (usin->sin_family != AF_INET) 66 return -EAFNOSUPPORT; 67 68 nexthop = daddr = usin->sin_addr.s_addr; 69 if (inet->opt != NULL && inet->opt->srr) { 70 if (daddr == 0) 71 return -EINVAL; 72 nexthop = inet->opt->faddr; 73 } 74 75 tmp = ip_route_connect(&rt, nexthop, inet->saddr, 76 RT_CONN_FLAGS(sk), sk->sk_bound_dev_if, 77 IPPROTO_DCCP, 78 inet->sport, usin->sin_port, sk); 79 if (tmp < 0) 80 return tmp; 81 82 if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) { 83 ip_rt_put(rt); 84 return -ENETUNREACH; 85 } 86 87 if (inet->opt == NULL || !inet->opt->srr) 88 daddr = rt->rt_dst; 89 90 if (inet->saddr == 0) 91 inet->saddr = rt->rt_src; 92 inet->rcv_saddr = inet->saddr; 93 94 inet->dport = usin->sin_port; 95 inet->daddr = daddr; 96 97 inet_csk(sk)->icsk_ext_hdr_len = 0; 98 if (inet->opt != NULL) 99 inet_csk(sk)->icsk_ext_hdr_len = inet->opt->optlen; 100 /* 101 * Socket identity is still unknown (sport may be zero). 102 * However we set state to DCCP_REQUESTING and not releasing socket 103 * lock select source port, enter ourselves into the hash tables and 104 * complete initialization after this. 105 */ 106 dccp_set_state(sk, DCCP_REQUESTING); 107 err = inet_hash_connect(&dccp_death_row, sk); 108 if (err != 0) 109 goto failure; 110 111 err = ip_route_newports(&rt, IPPROTO_DCCP, inet->sport, inet->dport, 112 sk); 113 if (err != 0) 114 goto failure; 115 116 /* OK, now commit destination to socket. */ 117 sk_setup_caps(sk, &rt->u.dst); 118 119 dp->dccps_gar = 120 dp->dccps_iss = secure_dccp_sequence_number(inet->saddr, 121 inet->daddr, 122 inet->sport, 123 usin->sin_port); 124 dccp_update_gss(sk, dp->dccps_iss); 125 126 inet->id = dp->dccps_iss ^ jiffies; 127 128 err = dccp_connect(sk); 129 rt = NULL; 130 if (err != 0) 131 goto failure; 132 out: 133 return err; 134 failure: 135 /* 136 * This unhashes the socket and releases the local port, if necessary. 137 */ 138 dccp_set_state(sk, DCCP_CLOSED); 139 ip_rt_put(rt); 140 sk->sk_route_caps = 0; 141 inet->dport = 0; 142 goto out; 143 } 144 145 EXPORT_SYMBOL_GPL(dccp_v4_connect); 146 147 /* 148 * This routine does path mtu discovery as defined in RFC1191. 149 */ 150 static inline void dccp_do_pmtu_discovery(struct sock *sk, 151 const struct iphdr *iph, 152 u32 mtu) 153 { 154 struct dst_entry *dst; 155 const struct inet_sock *inet = inet_sk(sk); 156 const struct dccp_sock *dp = dccp_sk(sk); 157 158 /* We are not interested in DCCP_LISTEN and request_socks (RESPONSEs 159 * send out by Linux are always < 576bytes so they should go through 160 * unfragmented). 161 */ 162 if (sk->sk_state == DCCP_LISTEN) 163 return; 164 165 /* We don't check in the destentry if pmtu discovery is forbidden 166 * on this route. We just assume that no packet_to_big packets 167 * are send back when pmtu discovery is not active. 168 * There is a small race when the user changes this flag in the 169 * route, but I think that's acceptable. 170 */ 171 if ((dst = __sk_dst_check(sk, 0)) == NULL) 172 return; 173 174 dst->ops->update_pmtu(dst, mtu); 175 176 /* Something is about to be wrong... Remember soft error 177 * for the case, if this connection will not able to recover. 178 */ 179 if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst)) 180 sk->sk_err_soft = EMSGSIZE; 181 182 mtu = dst_mtu(dst); 183 184 if (inet->pmtudisc != IP_PMTUDISC_DONT && 185 inet_csk(sk)->icsk_pmtu_cookie > mtu) { 186 dccp_sync_mss(sk, mtu); 187 188 /* 189 * From: draft-ietf-dccp-spec-11.txt 190 * 191 * DCCP-Sync packets are the best choice for upward 192 * probing, since DCCP-Sync probes do not risk application 193 * data loss. 194 */ 195 dccp_send_sync(sk, dp->dccps_gsr, DCCP_PKT_SYNC); 196 } /* else let the usual retransmit timer handle it */ 197 } 198 199 static void dccp_v4_reqsk_send_ack(struct sk_buff *rxskb, 200 struct request_sock *req) 201 { 202 int err; 203 struct dccp_hdr *rxdh = dccp_hdr(rxskb), *dh; 204 const u32 dccp_hdr_ack_len = sizeof(struct dccp_hdr) + 205 sizeof(struct dccp_hdr_ext) + 206 sizeof(struct dccp_hdr_ack_bits); 207 struct sk_buff *skb; 208 209 if (((struct rtable *)rxskb->dst)->rt_type != RTN_LOCAL) 210 return; 211 212 skb = alloc_skb(dccp_v4_ctl_socket->sk->sk_prot->max_header, GFP_ATOMIC); 213 if (skb == NULL) 214 return; 215 216 /* Reserve space for headers. */ 217 skb_reserve(skb, dccp_v4_ctl_socket->sk->sk_prot->max_header); 218 219 skb->dst = dst_clone(rxskb->dst); 220 221 skb->h.raw = skb_push(skb, dccp_hdr_ack_len); 222 dh = dccp_hdr(skb); 223 memset(dh, 0, dccp_hdr_ack_len); 224 225 /* Build DCCP header and checksum it. */ 226 dh->dccph_type = DCCP_PKT_ACK; 227 dh->dccph_sport = rxdh->dccph_dport; 228 dh->dccph_dport = rxdh->dccph_sport; 229 dh->dccph_doff = dccp_hdr_ack_len / 4; 230 dh->dccph_x = 1; 231 232 dccp_hdr_set_seq(dh, DCCP_SKB_CB(rxskb)->dccpd_ack_seq); 233 dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), 234 DCCP_SKB_CB(rxskb)->dccpd_seq); 235 236 bh_lock_sock(dccp_v4_ctl_socket->sk); 237 err = ip_build_and_send_pkt(skb, dccp_v4_ctl_socket->sk, 238 rxskb->nh.iph->daddr, 239 rxskb->nh.iph->saddr, NULL); 240 bh_unlock_sock(dccp_v4_ctl_socket->sk); 241 242 if (err == NET_XMIT_CN || err == 0) { 243 DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS); 244 DCCP_INC_STATS_BH(DCCP_MIB_OUTRSTS); 245 } 246 } 247 248 static int dccp_v4_send_response(struct sock *sk, struct request_sock *req, 249 struct dst_entry *dst) 250 { 251 int err = -1; 252 struct sk_buff *skb; 253 254 /* First, grab a route. */ 255 256 if (dst == NULL && (dst = inet_csk_route_req(sk, req)) == NULL) 257 goto out; 258 259 skb = dccp_make_response(sk, dst, req); 260 if (skb != NULL) { 261 const struct inet_request_sock *ireq = inet_rsk(req); 262 struct dccp_hdr *dh = dccp_hdr(skb); 263 264 dh->dccph_checksum = dccp_v4_checksum(skb, ireq->loc_addr, 265 ireq->rmt_addr); 266 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); 267 err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr, 268 ireq->rmt_addr, 269 ireq->opt); 270 if (err == NET_XMIT_CN) 271 err = 0; 272 } 273 274 out: 275 dst_release(dst); 276 return err; 277 } 278 279 /* 280 * This routine is called by the ICMP module when it gets some sort of error 281 * condition. If err < 0 then the socket should be closed and the error 282 * returned to the user. If err > 0 it's just the icmp type << 8 | icmp code. 283 * After adjustment header points to the first 8 bytes of the tcp header. We 284 * need to find the appropriate port. 285 * 286 * The locking strategy used here is very "optimistic". When someone else 287 * accesses the socket the ICMP is just dropped and for some paths there is no 288 * check at all. A more general error queue to queue errors for later handling 289 * is probably better. 290 */ 291 static void dccp_v4_err(struct sk_buff *skb, u32 info) 292 { 293 const struct iphdr *iph = (struct iphdr *)skb->data; 294 const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data + 295 (iph->ihl << 2)); 296 struct dccp_sock *dp; 297 struct inet_sock *inet; 298 const int type = skb->h.icmph->type; 299 const int code = skb->h.icmph->code; 300 struct sock *sk; 301 __u64 seq; 302 int err; 303 304 if (skb->len < (iph->ihl << 2) + 8) { 305 ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); 306 return; 307 } 308 309 sk = inet_lookup(&dccp_hashinfo, iph->daddr, dh->dccph_dport, 310 iph->saddr, dh->dccph_sport, inet_iif(skb)); 311 if (sk == NULL) { 312 ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); 313 return; 314 } 315 316 if (sk->sk_state == DCCP_TIME_WAIT) { 317 inet_twsk_put((struct inet_timewait_sock *)sk); 318 return; 319 } 320 321 bh_lock_sock(sk); 322 /* If too many ICMPs get dropped on busy 323 * servers this needs to be solved differently. 324 */ 325 if (sock_owned_by_user(sk)) 326 NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS); 327 328 if (sk->sk_state == DCCP_CLOSED) 329 goto out; 330 331 dp = dccp_sk(sk); 332 seq = dccp_hdr_seq(skb); 333 if (sk->sk_state != DCCP_LISTEN && 334 !between48(seq, dp->dccps_swl, dp->dccps_swh)) { 335 NET_INC_STATS(LINUX_MIB_OUTOFWINDOWICMPS); 336 goto out; 337 } 338 339 switch (type) { 340 case ICMP_SOURCE_QUENCH: 341 /* Just silently ignore these. */ 342 goto out; 343 case ICMP_PARAMETERPROB: 344 err = EPROTO; 345 break; 346 case ICMP_DEST_UNREACH: 347 if (code > NR_ICMP_UNREACH) 348 goto out; 349 350 if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */ 351 if (!sock_owned_by_user(sk)) 352 dccp_do_pmtu_discovery(sk, iph, info); 353 goto out; 354 } 355 356 err = icmp_err_convert[code].errno; 357 break; 358 case ICMP_TIME_EXCEEDED: 359 err = EHOSTUNREACH; 360 break; 361 default: 362 goto out; 363 } 364 365 switch (sk->sk_state) { 366 struct request_sock *req , **prev; 367 case DCCP_LISTEN: 368 if (sock_owned_by_user(sk)) 369 goto out; 370 req = inet_csk_search_req(sk, &prev, dh->dccph_dport, 371 iph->daddr, iph->saddr); 372 if (!req) 373 goto out; 374 375 /* 376 * ICMPs are not backlogged, hence we cannot get an established 377 * socket here. 378 */ 379 BUG_TRAP(!req->sk); 380 381 if (seq != dccp_rsk(req)->dreq_iss) { 382 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS); 383 goto out; 384 } 385 /* 386 * Still in RESPOND, just remove it silently. 387 * There is no good way to pass the error to the newly 388 * created socket, and POSIX does not want network 389 * errors returned from accept(). 390 */ 391 inet_csk_reqsk_queue_drop(sk, req, prev); 392 goto out; 393 394 case DCCP_REQUESTING: 395 case DCCP_RESPOND: 396 if (!sock_owned_by_user(sk)) { 397 DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS); 398 sk->sk_err = err; 399 400 sk->sk_error_report(sk); 401 402 dccp_done(sk); 403 } else 404 sk->sk_err_soft = err; 405 goto out; 406 } 407 408 /* If we've already connected we will keep trying 409 * until we time out, or the user gives up. 410 * 411 * rfc1122 4.2.3.9 allows to consider as hard errors 412 * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too, 413 * but it is obsoleted by pmtu discovery). 414 * 415 * Note, that in modern internet, where routing is unreliable 416 * and in each dark corner broken firewalls sit, sending random 417 * errors ordered by their masters even this two messages finally lose 418 * their original sense (even Linux sends invalid PORT_UNREACHs) 419 * 420 * Now we are in compliance with RFCs. 421 * --ANK (980905) 422 */ 423 424 inet = inet_sk(sk); 425 if (!sock_owned_by_user(sk) && inet->recverr) { 426 sk->sk_err = err; 427 sk->sk_error_report(sk); 428 } else /* Only an error on timeout */ 429 sk->sk_err_soft = err; 430 out: 431 bh_unlock_sock(sk); 432 sock_put(sk); 433 } 434 435 /* This routine computes an IPv4 DCCP checksum. */ 436 void dccp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb) 437 { 438 const struct inet_sock *inet = inet_sk(sk); 439 struct dccp_hdr *dh = dccp_hdr(skb); 440 441 dh->dccph_checksum = dccp_v4_checksum(skb, inet->saddr, inet->daddr); 442 } 443 444 EXPORT_SYMBOL_GPL(dccp_v4_send_check); 445 446 static inline u64 dccp_v4_init_sequence(const struct sock *sk, 447 const struct sk_buff *skb) 448 { 449 return secure_dccp_sequence_number(skb->nh.iph->daddr, 450 skb->nh.iph->saddr, 451 dccp_hdr(skb)->dccph_dport, 452 dccp_hdr(skb)->dccph_sport); 453 } 454 455 int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) 456 { 457 struct inet_request_sock *ireq; 458 struct dccp_sock dp; 459 struct request_sock *req; 460 struct dccp_request_sock *dreq; 461 const __be32 saddr = skb->nh.iph->saddr; 462 const __be32 daddr = skb->nh.iph->daddr; 463 const __be32 service = dccp_hdr_request(skb)->dccph_req_service; 464 struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); 465 __u8 reset_code = DCCP_RESET_CODE_TOO_BUSY; 466 467 /* Never answer to DCCP_PKT_REQUESTs send to broadcast or multicast */ 468 if (((struct rtable *)skb->dst)->rt_flags & 469 (RTCF_BROADCAST | RTCF_MULTICAST)) { 470 reset_code = DCCP_RESET_CODE_NO_CONNECTION; 471 goto drop; 472 } 473 474 if (dccp_bad_service_code(sk, service)) { 475 reset_code = DCCP_RESET_CODE_BAD_SERVICE_CODE; 476 goto drop; 477 } 478 /* 479 * TW buckets are converted to open requests without 480 * limitations, they conserve resources and peer is 481 * evidently real one. 482 */ 483 if (inet_csk_reqsk_queue_is_full(sk)) 484 goto drop; 485 486 /* 487 * Accept backlog is full. If we have already queued enough 488 * of warm entries in syn queue, drop request. It is better than 489 * clogging syn queue with openreqs with exponentially increasing 490 * timeout. 491 */ 492 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) 493 goto drop; 494 495 req = reqsk_alloc(sk->sk_prot->rsk_prot); 496 if (req == NULL) 497 goto drop; 498 499 if (dccp_parse_options(sk, skb)) 500 goto drop_and_free; 501 502 dccp_openreq_init(req, &dp, skb); 503 504 ireq = inet_rsk(req); 505 ireq->loc_addr = daddr; 506 ireq->rmt_addr = saddr; 507 req->rcv_wnd = 100; /* Fake, option parsing will get the 508 right value */ 509 ireq->opt = NULL; 510 511 /* 512 * Step 3: Process LISTEN state 513 * 514 * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookie 515 * 516 * In fact we defer setting S.GSR, S.SWL, S.SWH to 517 * dccp_create_openreq_child. 518 */ 519 dreq = dccp_rsk(req); 520 dreq->dreq_isr = dcb->dccpd_seq; 521 dreq->dreq_iss = dccp_v4_init_sequence(sk, skb); 522 dreq->dreq_service = service; 523 524 if (dccp_v4_send_response(sk, req, NULL)) 525 goto drop_and_free; 526 527 inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT); 528 return 0; 529 530 drop_and_free: 531 reqsk_free(req); 532 drop: 533 DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS); 534 dcb->dccpd_reset_code = reset_code; 535 return -1; 536 } 537 538 EXPORT_SYMBOL_GPL(dccp_v4_conn_request); 539 540 /* 541 * The three way handshake has completed - we got a valid ACK or DATAACK - 542 * now create the new socket. 543 * 544 * This is the equivalent of TCP's tcp_v4_syn_recv_sock 545 */ 546 struct sock *dccp_v4_request_recv_sock(struct sock *sk, struct sk_buff *skb, 547 struct request_sock *req, 548 struct dst_entry *dst) 549 { 550 struct inet_request_sock *ireq; 551 struct inet_sock *newinet; 552 struct dccp_sock *newdp; 553 struct sock *newsk; 554 555 if (sk_acceptq_is_full(sk)) 556 goto exit_overflow; 557 558 if (dst == NULL && (dst = inet_csk_route_req(sk, req)) == NULL) 559 goto exit; 560 561 newsk = dccp_create_openreq_child(sk, req, skb); 562 if (newsk == NULL) 563 goto exit; 564 565 sk_setup_caps(newsk, dst); 566 567 newdp = dccp_sk(newsk); 568 newinet = inet_sk(newsk); 569 ireq = inet_rsk(req); 570 newinet->daddr = ireq->rmt_addr; 571 newinet->rcv_saddr = ireq->loc_addr; 572 newinet->saddr = ireq->loc_addr; 573 newinet->opt = ireq->opt; 574 ireq->opt = NULL; 575 newinet->mc_index = inet_iif(skb); 576 newinet->mc_ttl = skb->nh.iph->ttl; 577 newinet->id = jiffies; 578 579 dccp_sync_mss(newsk, dst_mtu(dst)); 580 581 __inet_hash(&dccp_hashinfo, newsk, 0); 582 __inet_inherit_port(&dccp_hashinfo, sk, newsk); 583 584 return newsk; 585 586 exit_overflow: 587 NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS); 588 exit: 589 NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS); 590 dst_release(dst); 591 return NULL; 592 } 593 594 EXPORT_SYMBOL_GPL(dccp_v4_request_recv_sock); 595 596 static struct sock *dccp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) 597 { 598 const struct dccp_hdr *dh = dccp_hdr(skb); 599 const struct iphdr *iph = skb->nh.iph; 600 struct sock *nsk; 601 struct request_sock **prev; 602 /* Find possible connection requests. */ 603 struct request_sock *req = inet_csk_search_req(sk, &prev, 604 dh->dccph_sport, 605 iph->saddr, iph->daddr); 606 if (req != NULL) 607 return dccp_check_req(sk, skb, req, prev); 608 609 nsk = __inet_lookup_established(&dccp_hashinfo, 610 iph->saddr, dh->dccph_sport, 611 iph->daddr, ntohs(dh->dccph_dport), 612 inet_iif(skb)); 613 if (nsk != NULL) { 614 if (nsk->sk_state != DCCP_TIME_WAIT) { 615 bh_lock_sock(nsk); 616 return nsk; 617 } 618 inet_twsk_put((struct inet_timewait_sock *)nsk); 619 return NULL; 620 } 621 622 return sk; 623 } 624 625 int dccp_v4_checksum(const struct sk_buff *skb, const __be32 saddr, 626 const __be32 daddr) 627 { 628 const struct dccp_hdr* dh = dccp_hdr(skb); 629 int checksum_len; 630 u32 tmp; 631 632 if (dh->dccph_cscov == 0) 633 checksum_len = skb->len; 634 else { 635 checksum_len = (dh->dccph_cscov + dh->dccph_x) * sizeof(u32); 636 checksum_len = checksum_len < skb->len ? checksum_len : 637 skb->len; 638 } 639 640 tmp = csum_partial((unsigned char *)dh, checksum_len, 0); 641 return csum_tcpudp_magic(saddr, daddr, checksum_len, 642 IPPROTO_DCCP, tmp); 643 } 644 645 EXPORT_SYMBOL_GPL(dccp_v4_checksum); 646 647 static int dccp_v4_verify_checksum(struct sk_buff *skb, 648 const __be32 saddr, const __be32 daddr) 649 { 650 struct dccp_hdr *dh = dccp_hdr(skb); 651 int checksum_len; 652 u32 tmp; 653 654 if (dh->dccph_cscov == 0) 655 checksum_len = skb->len; 656 else { 657 checksum_len = (dh->dccph_cscov + dh->dccph_x) * sizeof(u32); 658 checksum_len = checksum_len < skb->len ? checksum_len : 659 skb->len; 660 } 661 tmp = csum_partial((unsigned char *)dh, checksum_len, 0); 662 return csum_tcpudp_magic(saddr, daddr, checksum_len, 663 IPPROTO_DCCP, tmp) == 0 ? 0 : -1; 664 } 665 666 static struct dst_entry* dccp_v4_route_skb(struct sock *sk, 667 struct sk_buff *skb) 668 { 669 struct rtable *rt; 670 struct flowi fl = { .oif = ((struct rtable *)skb->dst)->rt_iif, 671 .nl_u = { .ip4_u = 672 { .daddr = skb->nh.iph->saddr, 673 .saddr = skb->nh.iph->daddr, 674 .tos = RT_CONN_FLAGS(sk) } }, 675 .proto = sk->sk_protocol, 676 .uli_u = { .ports = 677 { .sport = dccp_hdr(skb)->dccph_dport, 678 .dport = dccp_hdr(skb)->dccph_sport } 679 } 680 }; 681 682 if (ip_route_output_flow(&rt, &fl, sk, 0)) { 683 IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); 684 return NULL; 685 } 686 687 return &rt->u.dst; 688 } 689 690 static void dccp_v4_ctl_send_reset(struct sk_buff *rxskb) 691 { 692 int err; 693 struct dccp_hdr *rxdh = dccp_hdr(rxskb), *dh; 694 const int dccp_hdr_reset_len = sizeof(struct dccp_hdr) + 695 sizeof(struct dccp_hdr_ext) + 696 sizeof(struct dccp_hdr_reset); 697 struct sk_buff *skb; 698 struct dst_entry *dst; 699 u64 seqno; 700 701 /* Never send a reset in response to a reset. */ 702 if (rxdh->dccph_type == DCCP_PKT_RESET) 703 return; 704 705 if (((struct rtable *)rxskb->dst)->rt_type != RTN_LOCAL) 706 return; 707 708 dst = dccp_v4_route_skb(dccp_v4_ctl_socket->sk, rxskb); 709 if (dst == NULL) 710 return; 711 712 skb = alloc_skb(dccp_v4_ctl_socket->sk->sk_prot->max_header, 713 GFP_ATOMIC); 714 if (skb == NULL) 715 goto out; 716 717 /* Reserve space for headers. */ 718 skb_reserve(skb, dccp_v4_ctl_socket->sk->sk_prot->max_header); 719 skb->dst = dst_clone(dst); 720 721 skb->h.raw = skb_push(skb, dccp_hdr_reset_len); 722 dh = dccp_hdr(skb); 723 memset(dh, 0, dccp_hdr_reset_len); 724 725 /* Build DCCP header and checksum it. */ 726 dh->dccph_type = DCCP_PKT_RESET; 727 dh->dccph_sport = rxdh->dccph_dport; 728 dh->dccph_dport = rxdh->dccph_sport; 729 dh->dccph_doff = dccp_hdr_reset_len / 4; 730 dh->dccph_x = 1; 731 dccp_hdr_reset(skb)->dccph_reset_code = 732 DCCP_SKB_CB(rxskb)->dccpd_reset_code; 733 734 /* See "8.3.1. Abnormal Termination" in draft-ietf-dccp-spec-11 */ 735 seqno = 0; 736 if (DCCP_SKB_CB(rxskb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ) 737 dccp_set_seqno(&seqno, DCCP_SKB_CB(rxskb)->dccpd_ack_seq + 1); 738 739 dccp_hdr_set_seq(dh, seqno); 740 dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), 741 DCCP_SKB_CB(rxskb)->dccpd_seq); 742 743 dh->dccph_checksum = dccp_v4_checksum(skb, rxskb->nh.iph->saddr, 744 rxskb->nh.iph->daddr); 745 746 bh_lock_sock(dccp_v4_ctl_socket->sk); 747 err = ip_build_and_send_pkt(skb, dccp_v4_ctl_socket->sk, 748 rxskb->nh.iph->daddr, 749 rxskb->nh.iph->saddr, NULL); 750 bh_unlock_sock(dccp_v4_ctl_socket->sk); 751 752 if (err == NET_XMIT_CN || err == 0) { 753 DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS); 754 DCCP_INC_STATS_BH(DCCP_MIB_OUTRSTS); 755 } 756 out: 757 dst_release(dst); 758 } 759 760 int dccp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) 761 { 762 struct dccp_hdr *dh = dccp_hdr(skb); 763 764 if (sk->sk_state == DCCP_OPEN) { /* Fast path */ 765 if (dccp_rcv_established(sk, skb, dh, skb->len)) 766 goto reset; 767 return 0; 768 } 769 770 /* 771 * Step 3: Process LISTEN state 772 * If S.state == LISTEN, 773 * If P.type == Request or P contains a valid Init Cookie 774 * option, 775 * * Must scan the packet's options to check for an Init 776 * Cookie. Only the Init Cookie is processed here, 777 * however; other options are processed in Step 8. This 778 * scan need only be performed if the endpoint uses Init 779 * Cookies * 780 * * Generate a new socket and switch to that socket * 781 * Set S := new socket for this port pair 782 * S.state = RESPOND 783 * Choose S.ISS (initial seqno) or set from Init Cookie 784 * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookie 785 * Continue with S.state == RESPOND 786 * * A Response packet will be generated in Step 11 * 787 * Otherwise, 788 * Generate Reset(No Connection) unless P.type == Reset 789 * Drop packet and return 790 * 791 * NOTE: the check for the packet types is done in 792 * dccp_rcv_state_process 793 */ 794 if (sk->sk_state == DCCP_LISTEN) { 795 struct sock *nsk = dccp_v4_hnd_req(sk, skb); 796 797 if (nsk == NULL) 798 goto discard; 799 800 if (nsk != sk) { 801 if (dccp_child_process(sk, nsk, skb)) 802 goto reset; 803 return 0; 804 } 805 } 806 807 if (dccp_rcv_state_process(sk, skb, dh, skb->len)) 808 goto reset; 809 return 0; 810 811 reset: 812 dccp_v4_ctl_send_reset(skb); 813 discard: 814 kfree_skb(skb); 815 return 0; 816 } 817 818 EXPORT_SYMBOL_GPL(dccp_v4_do_rcv); 819 820 int dccp_invalid_packet(struct sk_buff *skb) 821 { 822 const struct dccp_hdr *dh; 823 824 if (skb->pkt_type != PACKET_HOST) 825 return 1; 826 827 if (!pskb_may_pull(skb, sizeof(struct dccp_hdr))) { 828 LIMIT_NETDEBUG(KERN_WARNING "DCCP: pskb_may_pull failed\n"); 829 return 1; 830 } 831 832 dh = dccp_hdr(skb); 833 834 /* If the packet type is not understood, drop packet and return */ 835 if (dh->dccph_type >= DCCP_PKT_INVALID) { 836 LIMIT_NETDEBUG(KERN_WARNING "DCCP: invalid packet type\n"); 837 return 1; 838 } 839 840 /* 841 * If P.Data Offset is too small for packet type, or too large for 842 * packet, drop packet and return 843 */ 844 if (dh->dccph_doff < dccp_hdr_len(skb) / sizeof(u32)) { 845 LIMIT_NETDEBUG(KERN_WARNING "DCCP: P.Data Offset(%u) " 846 "too small 1\n", 847 dh->dccph_doff); 848 return 1; 849 } 850 851 if (!pskb_may_pull(skb, dh->dccph_doff * sizeof(u32))) { 852 LIMIT_NETDEBUG(KERN_WARNING "DCCP: P.Data Offset(%u) " 853 "too small 2\n", 854 dh->dccph_doff); 855 return 1; 856 } 857 858 dh = dccp_hdr(skb); 859 860 /* 861 * If P.type is not Data, Ack, or DataAck and P.X == 0 (the packet 862 * has short sequence numbers), drop packet and return 863 */ 864 if (dh->dccph_x == 0 && 865 dh->dccph_type != DCCP_PKT_DATA && 866 dh->dccph_type != DCCP_PKT_ACK && 867 dh->dccph_type != DCCP_PKT_DATAACK) { 868 LIMIT_NETDEBUG(KERN_WARNING "DCCP: P.type (%s) not Data, Ack " 869 "nor DataAck and P.X == 0\n", 870 dccp_packet_name(dh->dccph_type)); 871 return 1; 872 } 873 874 return 0; 875 } 876 877 EXPORT_SYMBOL_GPL(dccp_invalid_packet); 878 879 /* this is called when real data arrives */ 880 static int dccp_v4_rcv(struct sk_buff *skb) 881 { 882 const struct dccp_hdr *dh; 883 struct sock *sk; 884 885 /* Step 1: Check header basics: */ 886 887 if (dccp_invalid_packet(skb)) 888 goto discard_it; 889 890 /* If the header checksum is incorrect, drop packet and return */ 891 if (dccp_v4_verify_checksum(skb, skb->nh.iph->saddr, 892 skb->nh.iph->daddr) < 0) { 893 LIMIT_NETDEBUG(KERN_WARNING "%s: incorrect header checksum\n", 894 __FUNCTION__); 895 goto discard_it; 896 } 897 898 dh = dccp_hdr(skb); 899 900 DCCP_SKB_CB(skb)->dccpd_seq = dccp_hdr_seq(skb); 901 DCCP_SKB_CB(skb)->dccpd_type = dh->dccph_type; 902 903 dccp_pr_debug("%8.8s " 904 "src=%u.%u.%u.%u@%-5d " 905 "dst=%u.%u.%u.%u@%-5d seq=%llu", 906 dccp_packet_name(dh->dccph_type), 907 NIPQUAD(skb->nh.iph->saddr), ntohs(dh->dccph_sport), 908 NIPQUAD(skb->nh.iph->daddr), ntohs(dh->dccph_dport), 909 (unsigned long long) DCCP_SKB_CB(skb)->dccpd_seq); 910 911 if (dccp_packet_without_ack(skb)) { 912 DCCP_SKB_CB(skb)->dccpd_ack_seq = DCCP_PKT_WITHOUT_ACK_SEQ; 913 dccp_pr_debug_cat("\n"); 914 } else { 915 DCCP_SKB_CB(skb)->dccpd_ack_seq = dccp_hdr_ack_seq(skb); 916 dccp_pr_debug_cat(", ack=%llu\n", 917 (unsigned long long) 918 DCCP_SKB_CB(skb)->dccpd_ack_seq); 919 } 920 921 /* Step 2: 922 * Look up flow ID in table and get corresponding socket */ 923 sk = __inet_lookup(&dccp_hashinfo, 924 skb->nh.iph->saddr, dh->dccph_sport, 925 skb->nh.iph->daddr, ntohs(dh->dccph_dport), 926 inet_iif(skb)); 927 928 /* 929 * Step 2: 930 * If no socket ... 931 * Generate Reset(No Connection) unless P.type == Reset 932 * Drop packet and return 933 */ 934 if (sk == NULL) { 935 dccp_pr_debug("failed to look up flow ID in table and " 936 "get corresponding socket\n"); 937 goto no_dccp_socket; 938 } 939 940 /* 941 * Step 2: 942 * ... or S.state == TIMEWAIT, 943 * Generate Reset(No Connection) unless P.type == Reset 944 * Drop packet and return 945 */ 946 947 if (sk->sk_state == DCCP_TIME_WAIT) { 948 dccp_pr_debug("sk->sk_state == DCCP_TIME_WAIT: " 949 "do_time_wait\n"); 950 goto do_time_wait; 951 } 952 953 if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) 954 goto discard_and_relse; 955 nf_reset(skb); 956 957 return sk_receive_skb(sk, skb); 958 959 no_dccp_socket: 960 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) 961 goto discard_it; 962 /* 963 * Step 2: 964 * Generate Reset(No Connection) unless P.type == Reset 965 * Drop packet and return 966 */ 967 if (dh->dccph_type != DCCP_PKT_RESET) { 968 DCCP_SKB_CB(skb)->dccpd_reset_code = 969 DCCP_RESET_CODE_NO_CONNECTION; 970 dccp_v4_ctl_send_reset(skb); 971 } 972 973 discard_it: 974 /* Discard frame. */ 975 kfree_skb(skb); 976 return 0; 977 978 discard_and_relse: 979 sock_put(sk); 980 goto discard_it; 981 982 do_time_wait: 983 inet_twsk_put((struct inet_timewait_sock *)sk); 984 goto no_dccp_socket; 985 } 986 987 static struct inet_connection_sock_af_ops dccp_ipv4_af_ops = { 988 .queue_xmit = ip_queue_xmit, 989 .send_check = dccp_v4_send_check, 990 .rebuild_header = inet_sk_rebuild_header, 991 .conn_request = dccp_v4_conn_request, 992 .syn_recv_sock = dccp_v4_request_recv_sock, 993 .net_header_len = sizeof(struct iphdr), 994 .setsockopt = ip_setsockopt, 995 .getsockopt = ip_getsockopt, 996 .addr2sockaddr = inet_csk_addr2sockaddr, 997 .sockaddr_len = sizeof(struct sockaddr_in), 998 #ifdef CONFIG_COMPAT 999 .compat_setsockopt = compat_ip_setsockopt, 1000 .compat_getsockopt = compat_ip_getsockopt, 1001 #endif 1002 }; 1003 1004 static int dccp_v4_init_sock(struct sock *sk) 1005 { 1006 static __u8 dccp_v4_ctl_sock_initialized; 1007 int err = dccp_init_sock(sk, dccp_v4_ctl_sock_initialized); 1008 1009 if (err == 0) { 1010 if (unlikely(!dccp_v4_ctl_sock_initialized)) 1011 dccp_v4_ctl_sock_initialized = 1; 1012 inet_csk(sk)->icsk_af_ops = &dccp_ipv4_af_ops; 1013 } 1014 1015 return err; 1016 } 1017 1018 static void dccp_v4_reqsk_destructor(struct request_sock *req) 1019 { 1020 kfree(inet_rsk(req)->opt); 1021 } 1022 1023 static struct request_sock_ops dccp_request_sock_ops = { 1024 .family = PF_INET, 1025 .obj_size = sizeof(struct dccp_request_sock), 1026 .rtx_syn_ack = dccp_v4_send_response, 1027 .send_ack = dccp_v4_reqsk_send_ack, 1028 .destructor = dccp_v4_reqsk_destructor, 1029 .send_reset = dccp_v4_ctl_send_reset, 1030 }; 1031 1032 static struct timewait_sock_ops dccp_timewait_sock_ops = { 1033 .twsk_obj_size = sizeof(struct inet_timewait_sock), 1034 }; 1035 1036 static struct proto dccp_v4_prot = { 1037 .name = "DCCP", 1038 .owner = THIS_MODULE, 1039 .close = dccp_close, 1040 .connect = dccp_v4_connect, 1041 .disconnect = dccp_disconnect, 1042 .ioctl = dccp_ioctl, 1043 .init = dccp_v4_init_sock, 1044 .setsockopt = dccp_setsockopt, 1045 .getsockopt = dccp_getsockopt, 1046 .sendmsg = dccp_sendmsg, 1047 .recvmsg = dccp_recvmsg, 1048 .backlog_rcv = dccp_v4_do_rcv, 1049 .hash = dccp_hash, 1050 .unhash = dccp_unhash, 1051 .accept = inet_csk_accept, 1052 .get_port = dccp_v4_get_port, 1053 .shutdown = dccp_shutdown, 1054 .destroy = dccp_destroy_sock, 1055 .orphan_count = &dccp_orphan_count, 1056 .max_header = MAX_DCCP_HEADER, 1057 .obj_size = sizeof(struct dccp_sock), 1058 .rsk_prot = &dccp_request_sock_ops, 1059 .twsk_prot = &dccp_timewait_sock_ops, 1060 #ifdef CONFIG_COMPAT 1061 .compat_setsockopt = compat_dccp_setsockopt, 1062 .compat_getsockopt = compat_dccp_getsockopt, 1063 #endif 1064 }; 1065 1066 static struct net_protocol dccp_v4_protocol = { 1067 .handler = dccp_v4_rcv, 1068 .err_handler = dccp_v4_err, 1069 .no_policy = 1, 1070 }; 1071 1072 static const struct proto_ops inet_dccp_ops = { 1073 .family = PF_INET, 1074 .owner = THIS_MODULE, 1075 .release = inet_release, 1076 .bind = inet_bind, 1077 .connect = inet_stream_connect, 1078 .socketpair = sock_no_socketpair, 1079 .accept = inet_accept, 1080 .getname = inet_getname, 1081 /* FIXME: work on tcp_poll to rename it to inet_csk_poll */ 1082 .poll = dccp_poll, 1083 .ioctl = inet_ioctl, 1084 /* FIXME: work on inet_listen to rename it to sock_common_listen */ 1085 .listen = inet_dccp_listen, 1086 .shutdown = inet_shutdown, 1087 .setsockopt = sock_common_setsockopt, 1088 .getsockopt = sock_common_getsockopt, 1089 .sendmsg = inet_sendmsg, 1090 .recvmsg = sock_common_recvmsg, 1091 .mmap = sock_no_mmap, 1092 .sendpage = sock_no_sendpage, 1093 #ifdef CONFIG_COMPAT 1094 .compat_setsockopt = compat_sock_common_setsockopt, 1095 .compat_getsockopt = compat_sock_common_getsockopt, 1096 #endif 1097 }; 1098 1099 static struct inet_protosw dccp_v4_protosw = { 1100 .type = SOCK_DCCP, 1101 .protocol = IPPROTO_DCCP, 1102 .prot = &dccp_v4_prot, 1103 .ops = &inet_dccp_ops, 1104 .capability = -1, 1105 .no_check = 0, 1106 .flags = INET_PROTOSW_ICSK, 1107 }; 1108 1109 static int __init dccp_v4_init(void) 1110 { 1111 int err = proto_register(&dccp_v4_prot, 1); 1112 1113 if (err != 0) 1114 goto out; 1115 1116 err = inet_add_protocol(&dccp_v4_protocol, IPPROTO_DCCP); 1117 if (err != 0) 1118 goto out_proto_unregister; 1119 1120 inet_register_protosw(&dccp_v4_protosw); 1121 1122 err = inet_csk_ctl_sock_create(&dccp_v4_ctl_socket, PF_INET, 1123 SOCK_DCCP, IPPROTO_DCCP); 1124 if (err) 1125 goto out_unregister_protosw; 1126 out: 1127 return err; 1128 out_unregister_protosw: 1129 inet_unregister_protosw(&dccp_v4_protosw); 1130 inet_del_protocol(&dccp_v4_protocol, IPPROTO_DCCP); 1131 out_proto_unregister: 1132 proto_unregister(&dccp_v4_prot); 1133 goto out; 1134 } 1135 1136 static void __exit dccp_v4_exit(void) 1137 { 1138 inet_unregister_protosw(&dccp_v4_protosw); 1139 inet_del_protocol(&dccp_v4_protocol, IPPROTO_DCCP); 1140 proto_unregister(&dccp_v4_prot); 1141 } 1142 1143 module_init(dccp_v4_init); 1144 module_exit(dccp_v4_exit); 1145 1146 /* 1147 * __stringify doesn't likes enums, so use SOCK_DCCP (6) and IPPROTO_DCCP (33) 1148 * values directly, Also cover the case where the protocol is not specified, 1149 * i.e. net-pf-PF_INET-proto-0-type-SOCK_DCCP 1150 */ 1151 MODULE_ALIAS("net-pf-" __stringify(PF_INET) "-proto-33-type-6"); 1152 MODULE_ALIAS("net-pf-" __stringify(PF_INET) "-proto-0-type-6"); 1153 MODULE_LICENSE("GPL"); 1154 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@mandriva.com>"); 1155 MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol"); 1156