1 /* 2 * net/dccp/ipv4.c 3 * 4 * An implementation of the DCCP protocol 5 * Arnaldo Carvalho de Melo <acme@conectiva.com.br> 6 * 7 * This program is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU General Public License 9 * as published by the Free Software Foundation; either version 10 * 2 of the License, or (at your option) any later version. 11 */ 12 13 #include <linux/dccp.h> 14 #include <linux/icmp.h> 15 #include <linux/module.h> 16 #include <linux/skbuff.h> 17 #include <linux/random.h> 18 19 #include <net/icmp.h> 20 #include <net/inet_common.h> 21 #include <net/inet_hashtables.h> 22 #include <net/inet_sock.h> 23 #include <net/protocol.h> 24 #include <net/sock.h> 25 #include <net/timewait_sock.h> 26 #include <net/tcp_states.h> 27 #include <net/xfrm.h> 28 29 #include "ackvec.h" 30 #include "ccid.h" 31 #include "dccp.h" 32 #include "feat.h" 33 34 /* 35 * This is the global socket data structure used for responding to 36 * the Out-of-the-blue (OOTB) packets. A control sock will be created 37 * for this socket at the initialization time. 38 */ 39 static struct socket *dccp_v4_ctl_socket; 40 41 static int dccp_v4_get_port(struct sock *sk, const unsigned short snum) 42 { 43 return inet_csk_get_port(&dccp_hashinfo, sk, snum, 44 inet_csk_bind_conflict); 45 } 46 47 int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) 48 { 49 struct inet_sock *inet = inet_sk(sk); 50 struct dccp_sock *dp = dccp_sk(sk); 51 const struct sockaddr_in *usin = (struct sockaddr_in *)uaddr; 52 struct rtable *rt; 53 u32 daddr, nexthop; 54 int tmp; 55 int err; 56 57 dp->dccps_role = DCCP_ROLE_CLIENT; 58 59 if (dccp_service_not_initialized(sk)) 60 return -EPROTO; 61 62 if (addr_len < sizeof(struct sockaddr_in)) 63 return -EINVAL; 64 65 if (usin->sin_family != AF_INET) 66 return -EAFNOSUPPORT; 67 68 nexthop = daddr = usin->sin_addr.s_addr; 69 if (inet->opt != NULL && inet->opt->srr) { 70 if (daddr == 0) 71 return -EINVAL; 72 nexthop = inet->opt->faddr; 73 } 74 75 tmp = ip_route_connect(&rt, nexthop, inet->saddr, 76 RT_CONN_FLAGS(sk), sk->sk_bound_dev_if, 77 IPPROTO_DCCP, 78 inet->sport, usin->sin_port, sk); 79 if (tmp < 0) 80 return tmp; 81 82 if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) { 83 ip_rt_put(rt); 84 return -ENETUNREACH; 85 } 86 87 if (inet->opt == NULL || !inet->opt->srr) 88 daddr = rt->rt_dst; 89 90 if (inet->saddr == 0) 91 inet->saddr = rt->rt_src; 92 inet->rcv_saddr = inet->saddr; 93 94 inet->dport = usin->sin_port; 95 inet->daddr = daddr; 96 97 inet_csk(sk)->icsk_ext_hdr_len = 0; 98 if (inet->opt != NULL) 99 inet_csk(sk)->icsk_ext_hdr_len = inet->opt->optlen; 100 /* 101 * Socket identity is still unknown (sport may be zero). 102 * However we set state to DCCP_REQUESTING and not releasing socket 103 * lock select source port, enter ourselves into the hash tables and 104 * complete initialization after this. 105 */ 106 dccp_set_state(sk, DCCP_REQUESTING); 107 err = inet_hash_connect(&dccp_death_row, sk); 108 if (err != 0) 109 goto failure; 110 111 err = ip_route_newports(&rt, IPPROTO_DCCP, inet->sport, inet->dport, 112 sk); 113 if (err != 0) 114 goto failure; 115 116 /* OK, now commit destination to socket. */ 117 sk_setup_caps(sk, &rt->u.dst); 118 119 dp->dccps_gar = 120 dp->dccps_iss = secure_dccp_sequence_number(inet->saddr, 121 inet->daddr, 122 inet->sport, 123 usin->sin_port); 124 dccp_update_gss(sk, dp->dccps_iss); 125 126 inet->id = dp->dccps_iss ^ jiffies; 127 128 err = dccp_connect(sk); 129 rt = NULL; 130 if (err != 0) 131 goto failure; 132 out: 133 return err; 134 failure: 135 /* 136 * This unhashes the socket and releases the local port, if necessary. 137 */ 138 dccp_set_state(sk, DCCP_CLOSED); 139 ip_rt_put(rt); 140 sk->sk_route_caps = 0; 141 inet->dport = 0; 142 goto out; 143 } 144 145 EXPORT_SYMBOL_GPL(dccp_v4_connect); 146 147 /* 148 * This routine does path mtu discovery as defined in RFC1191. 149 */ 150 static inline void dccp_do_pmtu_discovery(struct sock *sk, 151 const struct iphdr *iph, 152 u32 mtu) 153 { 154 struct dst_entry *dst; 155 const struct inet_sock *inet = inet_sk(sk); 156 const struct dccp_sock *dp = dccp_sk(sk); 157 158 /* We are not interested in DCCP_LISTEN and request_socks (RESPONSEs 159 * send out by Linux are always < 576bytes so they should go through 160 * unfragmented). 161 */ 162 if (sk->sk_state == DCCP_LISTEN) 163 return; 164 165 /* We don't check in the destentry if pmtu discovery is forbidden 166 * on this route. We just assume that no packet_to_big packets 167 * are send back when pmtu discovery is not active. 168 * There is a small race when the user changes this flag in the 169 * route, but I think that's acceptable. 170 */ 171 if ((dst = __sk_dst_check(sk, 0)) == NULL) 172 return; 173 174 dst->ops->update_pmtu(dst, mtu); 175 176 /* Something is about to be wrong... Remember soft error 177 * for the case, if this connection will not able to recover. 178 */ 179 if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst)) 180 sk->sk_err_soft = EMSGSIZE; 181 182 mtu = dst_mtu(dst); 183 184 if (inet->pmtudisc != IP_PMTUDISC_DONT && 185 inet_csk(sk)->icsk_pmtu_cookie > mtu) { 186 dccp_sync_mss(sk, mtu); 187 188 /* 189 * From: draft-ietf-dccp-spec-11.txt 190 * 191 * DCCP-Sync packets are the best choice for upward 192 * probing, since DCCP-Sync probes do not risk application 193 * data loss. 194 */ 195 dccp_send_sync(sk, dp->dccps_gsr, DCCP_PKT_SYNC); 196 } /* else let the usual retransmit timer handle it */ 197 } 198 199 static void dccp_v4_reqsk_send_ack(struct sk_buff *rxskb, 200 struct request_sock *req) 201 { 202 int err; 203 struct dccp_hdr *rxdh = dccp_hdr(rxskb), *dh; 204 const u32 dccp_hdr_ack_len = sizeof(struct dccp_hdr) + 205 sizeof(struct dccp_hdr_ext) + 206 sizeof(struct dccp_hdr_ack_bits); 207 struct sk_buff *skb; 208 209 if (((struct rtable *)rxskb->dst)->rt_type != RTN_LOCAL) 210 return; 211 212 skb = alloc_skb(dccp_v4_ctl_socket->sk->sk_prot->max_header, GFP_ATOMIC); 213 if (skb == NULL) 214 return; 215 216 /* Reserve space for headers. */ 217 skb_reserve(skb, dccp_v4_ctl_socket->sk->sk_prot->max_header); 218 219 skb->dst = dst_clone(rxskb->dst); 220 221 skb->h.raw = skb_push(skb, dccp_hdr_ack_len); 222 dh = dccp_hdr(skb); 223 memset(dh, 0, dccp_hdr_ack_len); 224 225 /* Build DCCP header and checksum it. */ 226 dh->dccph_type = DCCP_PKT_ACK; 227 dh->dccph_sport = rxdh->dccph_dport; 228 dh->dccph_dport = rxdh->dccph_sport; 229 dh->dccph_doff = dccp_hdr_ack_len / 4; 230 dh->dccph_x = 1; 231 232 dccp_hdr_set_seq(dh, DCCP_SKB_CB(rxskb)->dccpd_ack_seq); 233 dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), 234 DCCP_SKB_CB(rxskb)->dccpd_seq); 235 236 bh_lock_sock(dccp_v4_ctl_socket->sk); 237 err = ip_build_and_send_pkt(skb, dccp_v4_ctl_socket->sk, 238 rxskb->nh.iph->daddr, 239 rxskb->nh.iph->saddr, NULL); 240 bh_unlock_sock(dccp_v4_ctl_socket->sk); 241 242 if (err == NET_XMIT_CN || err == 0) { 243 DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS); 244 DCCP_INC_STATS_BH(DCCP_MIB_OUTRSTS); 245 } 246 } 247 248 static int dccp_v4_send_response(struct sock *sk, struct request_sock *req, 249 struct dst_entry *dst) 250 { 251 int err = -1; 252 struct sk_buff *skb; 253 254 /* First, grab a route. */ 255 256 if (dst == NULL && (dst = inet_csk_route_req(sk, req)) == NULL) 257 goto out; 258 259 skb = dccp_make_response(sk, dst, req); 260 if (skb != NULL) { 261 const struct inet_request_sock *ireq = inet_rsk(req); 262 struct dccp_hdr *dh = dccp_hdr(skb); 263 264 dh->dccph_checksum = dccp_v4_checksum(skb, ireq->loc_addr, 265 ireq->rmt_addr); 266 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); 267 err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr, 268 ireq->rmt_addr, 269 ireq->opt); 270 if (err == NET_XMIT_CN) 271 err = 0; 272 } 273 274 out: 275 dst_release(dst); 276 return err; 277 } 278 279 /* 280 * This routine is called by the ICMP module when it gets some sort of error 281 * condition. If err < 0 then the socket should be closed and the error 282 * returned to the user. If err > 0 it's just the icmp type << 8 | icmp code. 283 * After adjustment header points to the first 8 bytes of the tcp header. We 284 * need to find the appropriate port. 285 * 286 * The locking strategy used here is very "optimistic". When someone else 287 * accesses the socket the ICMP is just dropped and for some paths there is no 288 * check at all. A more general error queue to queue errors for later handling 289 * is probably better. 290 */ 291 static void dccp_v4_err(struct sk_buff *skb, u32 info) 292 { 293 const struct iphdr *iph = (struct iphdr *)skb->data; 294 const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data + 295 (iph->ihl << 2)); 296 struct dccp_sock *dp; 297 struct inet_sock *inet; 298 const int type = skb->h.icmph->type; 299 const int code = skb->h.icmph->code; 300 struct sock *sk; 301 __u64 seq; 302 int err; 303 304 if (skb->len < (iph->ihl << 2) + 8) { 305 ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); 306 return; 307 } 308 309 sk = inet_lookup(&dccp_hashinfo, iph->daddr, dh->dccph_dport, 310 iph->saddr, dh->dccph_sport, inet_iif(skb)); 311 if (sk == NULL) { 312 ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); 313 return; 314 } 315 316 if (sk->sk_state == DCCP_TIME_WAIT) { 317 inet_twsk_put((struct inet_timewait_sock *)sk); 318 return; 319 } 320 321 bh_lock_sock(sk); 322 /* If too many ICMPs get dropped on busy 323 * servers this needs to be solved differently. 324 */ 325 if (sock_owned_by_user(sk)) 326 NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS); 327 328 if (sk->sk_state == DCCP_CLOSED) 329 goto out; 330 331 dp = dccp_sk(sk); 332 seq = dccp_hdr_seq(skb); 333 if (sk->sk_state != DCCP_LISTEN && 334 !between48(seq, dp->dccps_swl, dp->dccps_swh)) { 335 NET_INC_STATS(LINUX_MIB_OUTOFWINDOWICMPS); 336 goto out; 337 } 338 339 switch (type) { 340 case ICMP_SOURCE_QUENCH: 341 /* Just silently ignore these. */ 342 goto out; 343 case ICMP_PARAMETERPROB: 344 err = EPROTO; 345 break; 346 case ICMP_DEST_UNREACH: 347 if (code > NR_ICMP_UNREACH) 348 goto out; 349 350 if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */ 351 if (!sock_owned_by_user(sk)) 352 dccp_do_pmtu_discovery(sk, iph, info); 353 goto out; 354 } 355 356 err = icmp_err_convert[code].errno; 357 break; 358 case ICMP_TIME_EXCEEDED: 359 err = EHOSTUNREACH; 360 break; 361 default: 362 goto out; 363 } 364 365 switch (sk->sk_state) { 366 struct request_sock *req , **prev; 367 case DCCP_LISTEN: 368 if (sock_owned_by_user(sk)) 369 goto out; 370 req = inet_csk_search_req(sk, &prev, dh->dccph_dport, 371 iph->daddr, iph->saddr); 372 if (!req) 373 goto out; 374 375 /* 376 * ICMPs are not backlogged, hence we cannot get an established 377 * socket here. 378 */ 379 BUG_TRAP(!req->sk); 380 381 if (seq != dccp_rsk(req)->dreq_iss) { 382 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS); 383 goto out; 384 } 385 /* 386 * Still in RESPOND, just remove it silently. 387 * There is no good way to pass the error to the newly 388 * created socket, and POSIX does not want network 389 * errors returned from accept(). 390 */ 391 inet_csk_reqsk_queue_drop(sk, req, prev); 392 goto out; 393 394 case DCCP_REQUESTING: 395 case DCCP_RESPOND: 396 if (!sock_owned_by_user(sk)) { 397 DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS); 398 sk->sk_err = err; 399 400 sk->sk_error_report(sk); 401 402 dccp_done(sk); 403 } else 404 sk->sk_err_soft = err; 405 goto out; 406 } 407 408 /* If we've already connected we will keep trying 409 * until we time out, or the user gives up. 410 * 411 * rfc1122 4.2.3.9 allows to consider as hard errors 412 * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too, 413 * but it is obsoleted by pmtu discovery). 414 * 415 * Note, that in modern internet, where routing is unreliable 416 * and in each dark corner broken firewalls sit, sending random 417 * errors ordered by their masters even this two messages finally lose 418 * their original sense (even Linux sends invalid PORT_UNREACHs) 419 * 420 * Now we are in compliance with RFCs. 421 * --ANK (980905) 422 */ 423 424 inet = inet_sk(sk); 425 if (!sock_owned_by_user(sk) && inet->recverr) { 426 sk->sk_err = err; 427 sk->sk_error_report(sk); 428 } else /* Only an error on timeout */ 429 sk->sk_err_soft = err; 430 out: 431 bh_unlock_sock(sk); 432 sock_put(sk); 433 } 434 435 /* This routine computes an IPv4 DCCP checksum. */ 436 void dccp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb) 437 { 438 const struct inet_sock *inet = inet_sk(sk); 439 struct dccp_hdr *dh = dccp_hdr(skb); 440 441 dh->dccph_checksum = dccp_v4_checksum(skb, inet->saddr, inet->daddr); 442 } 443 444 EXPORT_SYMBOL_GPL(dccp_v4_send_check); 445 446 static inline u64 dccp_v4_init_sequence(const struct sock *sk, 447 const struct sk_buff *skb) 448 { 449 return secure_dccp_sequence_number(skb->nh.iph->daddr, 450 skb->nh.iph->saddr, 451 dccp_hdr(skb)->dccph_dport, 452 dccp_hdr(skb)->dccph_sport); 453 } 454 455 int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) 456 { 457 struct inet_request_sock *ireq; 458 struct dccp_sock dp; 459 struct request_sock *req; 460 struct dccp_request_sock *dreq; 461 const __be32 saddr = skb->nh.iph->saddr; 462 const __be32 daddr = skb->nh.iph->daddr; 463 const __be32 service = dccp_hdr_request(skb)->dccph_req_service; 464 struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); 465 __u8 reset_code = DCCP_RESET_CODE_TOO_BUSY; 466 467 /* Never answer to DCCP_PKT_REQUESTs send to broadcast or multicast */ 468 if (((struct rtable *)skb->dst)->rt_flags & 469 (RTCF_BROADCAST | RTCF_MULTICAST)) { 470 reset_code = DCCP_RESET_CODE_NO_CONNECTION; 471 goto drop; 472 } 473 474 if (dccp_bad_service_code(sk, service)) { 475 reset_code = DCCP_RESET_CODE_BAD_SERVICE_CODE; 476 goto drop; 477 } 478 /* 479 * TW buckets are converted to open requests without 480 * limitations, they conserve resources and peer is 481 * evidently real one. 482 */ 483 if (inet_csk_reqsk_queue_is_full(sk)) 484 goto drop; 485 486 /* 487 * Accept backlog is full. If we have already queued enough 488 * of warm entries in syn queue, drop request. It is better than 489 * clogging syn queue with openreqs with exponentially increasing 490 * timeout. 491 */ 492 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) 493 goto drop; 494 495 req = reqsk_alloc(sk->sk_prot->rsk_prot); 496 if (req == NULL) 497 goto drop; 498 499 if (dccp_parse_options(sk, skb)) 500 goto drop_and_free; 501 502 dccp_openreq_init(req, &dp, skb); 503 504 ireq = inet_rsk(req); 505 ireq->loc_addr = daddr; 506 ireq->rmt_addr = saddr; 507 req->rcv_wnd = dccp_feat_default_sequence_window; 508 ireq->opt = NULL; 509 510 /* 511 * Step 3: Process LISTEN state 512 * 513 * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookie 514 * 515 * In fact we defer setting S.GSR, S.SWL, S.SWH to 516 * dccp_create_openreq_child. 517 */ 518 dreq = dccp_rsk(req); 519 dreq->dreq_isr = dcb->dccpd_seq; 520 dreq->dreq_iss = dccp_v4_init_sequence(sk, skb); 521 dreq->dreq_service = service; 522 523 if (dccp_v4_send_response(sk, req, NULL)) 524 goto drop_and_free; 525 526 inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT); 527 return 0; 528 529 drop_and_free: 530 reqsk_free(req); 531 drop: 532 DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS); 533 dcb->dccpd_reset_code = reset_code; 534 return -1; 535 } 536 537 EXPORT_SYMBOL_GPL(dccp_v4_conn_request); 538 539 /* 540 * The three way handshake has completed - we got a valid ACK or DATAACK - 541 * now create the new socket. 542 * 543 * This is the equivalent of TCP's tcp_v4_syn_recv_sock 544 */ 545 struct sock *dccp_v4_request_recv_sock(struct sock *sk, struct sk_buff *skb, 546 struct request_sock *req, 547 struct dst_entry *dst) 548 { 549 struct inet_request_sock *ireq; 550 struct inet_sock *newinet; 551 struct dccp_sock *newdp; 552 struct sock *newsk; 553 554 if (sk_acceptq_is_full(sk)) 555 goto exit_overflow; 556 557 if (dst == NULL && (dst = inet_csk_route_req(sk, req)) == NULL) 558 goto exit; 559 560 newsk = dccp_create_openreq_child(sk, req, skb); 561 if (newsk == NULL) 562 goto exit; 563 564 sk_setup_caps(newsk, dst); 565 566 newdp = dccp_sk(newsk); 567 newinet = inet_sk(newsk); 568 ireq = inet_rsk(req); 569 newinet->daddr = ireq->rmt_addr; 570 newinet->rcv_saddr = ireq->loc_addr; 571 newinet->saddr = ireq->loc_addr; 572 newinet->opt = ireq->opt; 573 ireq->opt = NULL; 574 newinet->mc_index = inet_iif(skb); 575 newinet->mc_ttl = skb->nh.iph->ttl; 576 newinet->id = jiffies; 577 578 dccp_sync_mss(newsk, dst_mtu(dst)); 579 580 __inet_hash(&dccp_hashinfo, newsk, 0); 581 __inet_inherit_port(&dccp_hashinfo, sk, newsk); 582 583 return newsk; 584 585 exit_overflow: 586 NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS); 587 exit: 588 NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS); 589 dst_release(dst); 590 return NULL; 591 } 592 593 EXPORT_SYMBOL_GPL(dccp_v4_request_recv_sock); 594 595 static struct sock *dccp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) 596 { 597 const struct dccp_hdr *dh = dccp_hdr(skb); 598 const struct iphdr *iph = skb->nh.iph; 599 struct sock *nsk; 600 struct request_sock **prev; 601 /* Find possible connection requests. */ 602 struct request_sock *req = inet_csk_search_req(sk, &prev, 603 dh->dccph_sport, 604 iph->saddr, iph->daddr); 605 if (req != NULL) 606 return dccp_check_req(sk, skb, req, prev); 607 608 nsk = __inet_lookup_established(&dccp_hashinfo, 609 iph->saddr, dh->dccph_sport, 610 iph->daddr, ntohs(dh->dccph_dport), 611 inet_iif(skb)); 612 if (nsk != NULL) { 613 if (nsk->sk_state != DCCP_TIME_WAIT) { 614 bh_lock_sock(nsk); 615 return nsk; 616 } 617 inet_twsk_put((struct inet_timewait_sock *)nsk); 618 return NULL; 619 } 620 621 return sk; 622 } 623 624 int dccp_v4_checksum(const struct sk_buff *skb, const __be32 saddr, 625 const __be32 daddr) 626 { 627 const struct dccp_hdr* dh = dccp_hdr(skb); 628 int checksum_len; 629 u32 tmp; 630 631 if (dh->dccph_cscov == 0) 632 checksum_len = skb->len; 633 else { 634 checksum_len = (dh->dccph_cscov + dh->dccph_x) * sizeof(u32); 635 checksum_len = checksum_len < skb->len ? checksum_len : 636 skb->len; 637 } 638 639 tmp = csum_partial((unsigned char *)dh, checksum_len, 0); 640 return csum_tcpudp_magic(saddr, daddr, checksum_len, 641 IPPROTO_DCCP, tmp); 642 } 643 644 EXPORT_SYMBOL_GPL(dccp_v4_checksum); 645 646 static int dccp_v4_verify_checksum(struct sk_buff *skb, 647 const __be32 saddr, const __be32 daddr) 648 { 649 struct dccp_hdr *dh = dccp_hdr(skb); 650 int checksum_len; 651 u32 tmp; 652 653 if (dh->dccph_cscov == 0) 654 checksum_len = skb->len; 655 else { 656 checksum_len = (dh->dccph_cscov + dh->dccph_x) * sizeof(u32); 657 checksum_len = checksum_len < skb->len ? checksum_len : 658 skb->len; 659 } 660 tmp = csum_partial((unsigned char *)dh, checksum_len, 0); 661 return csum_tcpudp_magic(saddr, daddr, checksum_len, 662 IPPROTO_DCCP, tmp) == 0 ? 0 : -1; 663 } 664 665 static struct dst_entry* dccp_v4_route_skb(struct sock *sk, 666 struct sk_buff *skb) 667 { 668 struct rtable *rt; 669 struct flowi fl = { .oif = ((struct rtable *)skb->dst)->rt_iif, 670 .nl_u = { .ip4_u = 671 { .daddr = skb->nh.iph->saddr, 672 .saddr = skb->nh.iph->daddr, 673 .tos = RT_CONN_FLAGS(sk) } }, 674 .proto = sk->sk_protocol, 675 .uli_u = { .ports = 676 { .sport = dccp_hdr(skb)->dccph_dport, 677 .dport = dccp_hdr(skb)->dccph_sport } 678 } 679 }; 680 681 if (ip_route_output_flow(&rt, &fl, sk, 0)) { 682 IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); 683 return NULL; 684 } 685 686 return &rt->u.dst; 687 } 688 689 static void dccp_v4_ctl_send_reset(struct sk_buff *rxskb) 690 { 691 int err; 692 struct dccp_hdr *rxdh = dccp_hdr(rxskb), *dh; 693 const int dccp_hdr_reset_len = sizeof(struct dccp_hdr) + 694 sizeof(struct dccp_hdr_ext) + 695 sizeof(struct dccp_hdr_reset); 696 struct sk_buff *skb; 697 struct dst_entry *dst; 698 u64 seqno; 699 700 /* Never send a reset in response to a reset. */ 701 if (rxdh->dccph_type == DCCP_PKT_RESET) 702 return; 703 704 if (((struct rtable *)rxskb->dst)->rt_type != RTN_LOCAL) 705 return; 706 707 dst = dccp_v4_route_skb(dccp_v4_ctl_socket->sk, rxskb); 708 if (dst == NULL) 709 return; 710 711 skb = alloc_skb(dccp_v4_ctl_socket->sk->sk_prot->max_header, 712 GFP_ATOMIC); 713 if (skb == NULL) 714 goto out; 715 716 /* Reserve space for headers. */ 717 skb_reserve(skb, dccp_v4_ctl_socket->sk->sk_prot->max_header); 718 skb->dst = dst_clone(dst); 719 720 skb->h.raw = skb_push(skb, dccp_hdr_reset_len); 721 dh = dccp_hdr(skb); 722 memset(dh, 0, dccp_hdr_reset_len); 723 724 /* Build DCCP header and checksum it. */ 725 dh->dccph_type = DCCP_PKT_RESET; 726 dh->dccph_sport = rxdh->dccph_dport; 727 dh->dccph_dport = rxdh->dccph_sport; 728 dh->dccph_doff = dccp_hdr_reset_len / 4; 729 dh->dccph_x = 1; 730 dccp_hdr_reset(skb)->dccph_reset_code = 731 DCCP_SKB_CB(rxskb)->dccpd_reset_code; 732 733 /* See "8.3.1. Abnormal Termination" in draft-ietf-dccp-spec-11 */ 734 seqno = 0; 735 if (DCCP_SKB_CB(rxskb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ) 736 dccp_set_seqno(&seqno, DCCP_SKB_CB(rxskb)->dccpd_ack_seq + 1); 737 738 dccp_hdr_set_seq(dh, seqno); 739 dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), 740 DCCP_SKB_CB(rxskb)->dccpd_seq); 741 742 dh->dccph_checksum = dccp_v4_checksum(skb, rxskb->nh.iph->saddr, 743 rxskb->nh.iph->daddr); 744 745 bh_lock_sock(dccp_v4_ctl_socket->sk); 746 err = ip_build_and_send_pkt(skb, dccp_v4_ctl_socket->sk, 747 rxskb->nh.iph->daddr, 748 rxskb->nh.iph->saddr, NULL); 749 bh_unlock_sock(dccp_v4_ctl_socket->sk); 750 751 if (err == NET_XMIT_CN || err == 0) { 752 DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS); 753 DCCP_INC_STATS_BH(DCCP_MIB_OUTRSTS); 754 } 755 out: 756 dst_release(dst); 757 } 758 759 int dccp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) 760 { 761 struct dccp_hdr *dh = dccp_hdr(skb); 762 763 if (sk->sk_state == DCCP_OPEN) { /* Fast path */ 764 if (dccp_rcv_established(sk, skb, dh, skb->len)) 765 goto reset; 766 return 0; 767 } 768 769 /* 770 * Step 3: Process LISTEN state 771 * If S.state == LISTEN, 772 * If P.type == Request or P contains a valid Init Cookie 773 * option, 774 * * Must scan the packet's options to check for an Init 775 * Cookie. Only the Init Cookie is processed here, 776 * however; other options are processed in Step 8. This 777 * scan need only be performed if the endpoint uses Init 778 * Cookies * 779 * * Generate a new socket and switch to that socket * 780 * Set S := new socket for this port pair 781 * S.state = RESPOND 782 * Choose S.ISS (initial seqno) or set from Init Cookie 783 * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookie 784 * Continue with S.state == RESPOND 785 * * A Response packet will be generated in Step 11 * 786 * Otherwise, 787 * Generate Reset(No Connection) unless P.type == Reset 788 * Drop packet and return 789 * 790 * NOTE: the check for the packet types is done in 791 * dccp_rcv_state_process 792 */ 793 if (sk->sk_state == DCCP_LISTEN) { 794 struct sock *nsk = dccp_v4_hnd_req(sk, skb); 795 796 if (nsk == NULL) 797 goto discard; 798 799 if (nsk != sk) { 800 if (dccp_child_process(sk, nsk, skb)) 801 goto reset; 802 return 0; 803 } 804 } 805 806 if (dccp_rcv_state_process(sk, skb, dh, skb->len)) 807 goto reset; 808 return 0; 809 810 reset: 811 dccp_v4_ctl_send_reset(skb); 812 discard: 813 kfree_skb(skb); 814 return 0; 815 } 816 817 EXPORT_SYMBOL_GPL(dccp_v4_do_rcv); 818 819 int dccp_invalid_packet(struct sk_buff *skb) 820 { 821 const struct dccp_hdr *dh; 822 823 if (skb->pkt_type != PACKET_HOST) 824 return 1; 825 826 if (!pskb_may_pull(skb, sizeof(struct dccp_hdr))) { 827 LIMIT_NETDEBUG(KERN_WARNING "DCCP: pskb_may_pull failed\n"); 828 return 1; 829 } 830 831 dh = dccp_hdr(skb); 832 833 /* If the packet type is not understood, drop packet and return */ 834 if (dh->dccph_type >= DCCP_PKT_INVALID) { 835 LIMIT_NETDEBUG(KERN_WARNING "DCCP: invalid packet type\n"); 836 return 1; 837 } 838 839 /* 840 * If P.Data Offset is too small for packet type, or too large for 841 * packet, drop packet and return 842 */ 843 if (dh->dccph_doff < dccp_hdr_len(skb) / sizeof(u32)) { 844 LIMIT_NETDEBUG(KERN_WARNING "DCCP: P.Data Offset(%u) " 845 "too small 1\n", 846 dh->dccph_doff); 847 return 1; 848 } 849 850 if (!pskb_may_pull(skb, dh->dccph_doff * sizeof(u32))) { 851 LIMIT_NETDEBUG(KERN_WARNING "DCCP: P.Data Offset(%u) " 852 "too small 2\n", 853 dh->dccph_doff); 854 return 1; 855 } 856 857 dh = dccp_hdr(skb); 858 859 /* 860 * If P.type is not Data, Ack, or DataAck and P.X == 0 (the packet 861 * has short sequence numbers), drop packet and return 862 */ 863 if (dh->dccph_x == 0 && 864 dh->dccph_type != DCCP_PKT_DATA && 865 dh->dccph_type != DCCP_PKT_ACK && 866 dh->dccph_type != DCCP_PKT_DATAACK) { 867 LIMIT_NETDEBUG(KERN_WARNING "DCCP: P.type (%s) not Data, Ack " 868 "nor DataAck and P.X == 0\n", 869 dccp_packet_name(dh->dccph_type)); 870 return 1; 871 } 872 873 return 0; 874 } 875 876 EXPORT_SYMBOL_GPL(dccp_invalid_packet); 877 878 /* this is called when real data arrives */ 879 static int dccp_v4_rcv(struct sk_buff *skb) 880 { 881 const struct dccp_hdr *dh; 882 struct sock *sk; 883 884 /* Step 1: Check header basics: */ 885 886 if (dccp_invalid_packet(skb)) 887 goto discard_it; 888 889 /* If the header checksum is incorrect, drop packet and return */ 890 if (dccp_v4_verify_checksum(skb, skb->nh.iph->saddr, 891 skb->nh.iph->daddr) < 0) { 892 LIMIT_NETDEBUG(KERN_WARNING "%s: incorrect header checksum\n", 893 __FUNCTION__); 894 goto discard_it; 895 } 896 897 dh = dccp_hdr(skb); 898 899 DCCP_SKB_CB(skb)->dccpd_seq = dccp_hdr_seq(skb); 900 DCCP_SKB_CB(skb)->dccpd_type = dh->dccph_type; 901 902 dccp_pr_debug("%8.8s " 903 "src=%u.%u.%u.%u@%-5d " 904 "dst=%u.%u.%u.%u@%-5d seq=%llu", 905 dccp_packet_name(dh->dccph_type), 906 NIPQUAD(skb->nh.iph->saddr), ntohs(dh->dccph_sport), 907 NIPQUAD(skb->nh.iph->daddr), ntohs(dh->dccph_dport), 908 (unsigned long long) DCCP_SKB_CB(skb)->dccpd_seq); 909 910 if (dccp_packet_without_ack(skb)) { 911 DCCP_SKB_CB(skb)->dccpd_ack_seq = DCCP_PKT_WITHOUT_ACK_SEQ; 912 dccp_pr_debug_cat("\n"); 913 } else { 914 DCCP_SKB_CB(skb)->dccpd_ack_seq = dccp_hdr_ack_seq(skb); 915 dccp_pr_debug_cat(", ack=%llu\n", 916 (unsigned long long) 917 DCCP_SKB_CB(skb)->dccpd_ack_seq); 918 } 919 920 /* Step 2: 921 * Look up flow ID in table and get corresponding socket */ 922 sk = __inet_lookup(&dccp_hashinfo, 923 skb->nh.iph->saddr, dh->dccph_sport, 924 skb->nh.iph->daddr, ntohs(dh->dccph_dport), 925 inet_iif(skb)); 926 927 /* 928 * Step 2: 929 * If no socket ... 930 * Generate Reset(No Connection) unless P.type == Reset 931 * Drop packet and return 932 */ 933 if (sk == NULL) { 934 dccp_pr_debug("failed to look up flow ID in table and " 935 "get corresponding socket\n"); 936 goto no_dccp_socket; 937 } 938 939 /* 940 * Step 2: 941 * ... or S.state == TIMEWAIT, 942 * Generate Reset(No Connection) unless P.type == Reset 943 * Drop packet and return 944 */ 945 946 if (sk->sk_state == DCCP_TIME_WAIT) { 947 dccp_pr_debug("sk->sk_state == DCCP_TIME_WAIT: " 948 "do_time_wait\n"); 949 goto do_time_wait; 950 } 951 952 if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) 953 goto discard_and_relse; 954 nf_reset(skb); 955 956 return sk_receive_skb(sk, skb); 957 958 no_dccp_socket: 959 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) 960 goto discard_it; 961 /* 962 * Step 2: 963 * Generate Reset(No Connection) unless P.type == Reset 964 * Drop packet and return 965 */ 966 if (dh->dccph_type != DCCP_PKT_RESET) { 967 DCCP_SKB_CB(skb)->dccpd_reset_code = 968 DCCP_RESET_CODE_NO_CONNECTION; 969 dccp_v4_ctl_send_reset(skb); 970 } 971 972 discard_it: 973 /* Discard frame. */ 974 kfree_skb(skb); 975 return 0; 976 977 discard_and_relse: 978 sock_put(sk); 979 goto discard_it; 980 981 do_time_wait: 982 inet_twsk_put((struct inet_timewait_sock *)sk); 983 goto no_dccp_socket; 984 } 985 986 static struct inet_connection_sock_af_ops dccp_ipv4_af_ops = { 987 .queue_xmit = ip_queue_xmit, 988 .send_check = dccp_v4_send_check, 989 .rebuild_header = inet_sk_rebuild_header, 990 .conn_request = dccp_v4_conn_request, 991 .syn_recv_sock = dccp_v4_request_recv_sock, 992 .net_header_len = sizeof(struct iphdr), 993 .setsockopt = ip_setsockopt, 994 .getsockopt = ip_getsockopt, 995 .addr2sockaddr = inet_csk_addr2sockaddr, 996 .sockaddr_len = sizeof(struct sockaddr_in), 997 #ifdef CONFIG_COMPAT 998 .compat_setsockopt = compat_ip_setsockopt, 999 .compat_getsockopt = compat_ip_getsockopt, 1000 #endif 1001 }; 1002 1003 static int dccp_v4_init_sock(struct sock *sk) 1004 { 1005 static __u8 dccp_v4_ctl_sock_initialized; 1006 int err = dccp_init_sock(sk, dccp_v4_ctl_sock_initialized); 1007 1008 if (err == 0) { 1009 if (unlikely(!dccp_v4_ctl_sock_initialized)) 1010 dccp_v4_ctl_sock_initialized = 1; 1011 inet_csk(sk)->icsk_af_ops = &dccp_ipv4_af_ops; 1012 } 1013 1014 return err; 1015 } 1016 1017 static void dccp_v4_reqsk_destructor(struct request_sock *req) 1018 { 1019 kfree(inet_rsk(req)->opt); 1020 } 1021 1022 static struct request_sock_ops dccp_request_sock_ops = { 1023 .family = PF_INET, 1024 .obj_size = sizeof(struct dccp_request_sock), 1025 .rtx_syn_ack = dccp_v4_send_response, 1026 .send_ack = dccp_v4_reqsk_send_ack, 1027 .destructor = dccp_v4_reqsk_destructor, 1028 .send_reset = dccp_v4_ctl_send_reset, 1029 }; 1030 1031 static struct timewait_sock_ops dccp_timewait_sock_ops = { 1032 .twsk_obj_size = sizeof(struct inet_timewait_sock), 1033 }; 1034 1035 static struct proto dccp_v4_prot = { 1036 .name = "DCCP", 1037 .owner = THIS_MODULE, 1038 .close = dccp_close, 1039 .connect = dccp_v4_connect, 1040 .disconnect = dccp_disconnect, 1041 .ioctl = dccp_ioctl, 1042 .init = dccp_v4_init_sock, 1043 .setsockopt = dccp_setsockopt, 1044 .getsockopt = dccp_getsockopt, 1045 .sendmsg = dccp_sendmsg, 1046 .recvmsg = dccp_recvmsg, 1047 .backlog_rcv = dccp_v4_do_rcv, 1048 .hash = dccp_hash, 1049 .unhash = dccp_unhash, 1050 .accept = inet_csk_accept, 1051 .get_port = dccp_v4_get_port, 1052 .shutdown = dccp_shutdown, 1053 .destroy = dccp_destroy_sock, 1054 .orphan_count = &dccp_orphan_count, 1055 .max_header = MAX_DCCP_HEADER, 1056 .obj_size = sizeof(struct dccp_sock), 1057 .rsk_prot = &dccp_request_sock_ops, 1058 .twsk_prot = &dccp_timewait_sock_ops, 1059 #ifdef CONFIG_COMPAT 1060 .compat_setsockopt = compat_dccp_setsockopt, 1061 .compat_getsockopt = compat_dccp_getsockopt, 1062 #endif 1063 }; 1064 1065 static struct net_protocol dccp_v4_protocol = { 1066 .handler = dccp_v4_rcv, 1067 .err_handler = dccp_v4_err, 1068 .no_policy = 1, 1069 }; 1070 1071 static const struct proto_ops inet_dccp_ops = { 1072 .family = PF_INET, 1073 .owner = THIS_MODULE, 1074 .release = inet_release, 1075 .bind = inet_bind, 1076 .connect = inet_stream_connect, 1077 .socketpair = sock_no_socketpair, 1078 .accept = inet_accept, 1079 .getname = inet_getname, 1080 /* FIXME: work on tcp_poll to rename it to inet_csk_poll */ 1081 .poll = dccp_poll, 1082 .ioctl = inet_ioctl, 1083 /* FIXME: work on inet_listen to rename it to sock_common_listen */ 1084 .listen = inet_dccp_listen, 1085 .shutdown = inet_shutdown, 1086 .setsockopt = sock_common_setsockopt, 1087 .getsockopt = sock_common_getsockopt, 1088 .sendmsg = inet_sendmsg, 1089 .recvmsg = sock_common_recvmsg, 1090 .mmap = sock_no_mmap, 1091 .sendpage = sock_no_sendpage, 1092 #ifdef CONFIG_COMPAT 1093 .compat_setsockopt = compat_sock_common_setsockopt, 1094 .compat_getsockopt = compat_sock_common_getsockopt, 1095 #endif 1096 }; 1097 1098 static struct inet_protosw dccp_v4_protosw = { 1099 .type = SOCK_DCCP, 1100 .protocol = IPPROTO_DCCP, 1101 .prot = &dccp_v4_prot, 1102 .ops = &inet_dccp_ops, 1103 .capability = -1, 1104 .no_check = 0, 1105 .flags = INET_PROTOSW_ICSK, 1106 }; 1107 1108 static int __init dccp_v4_init(void) 1109 { 1110 int err = proto_register(&dccp_v4_prot, 1); 1111 1112 if (err != 0) 1113 goto out; 1114 1115 err = inet_add_protocol(&dccp_v4_protocol, IPPROTO_DCCP); 1116 if (err != 0) 1117 goto out_proto_unregister; 1118 1119 inet_register_protosw(&dccp_v4_protosw); 1120 1121 err = inet_csk_ctl_sock_create(&dccp_v4_ctl_socket, PF_INET, 1122 SOCK_DCCP, IPPROTO_DCCP); 1123 if (err) 1124 goto out_unregister_protosw; 1125 out: 1126 return err; 1127 out_unregister_protosw: 1128 inet_unregister_protosw(&dccp_v4_protosw); 1129 inet_del_protocol(&dccp_v4_protocol, IPPROTO_DCCP); 1130 out_proto_unregister: 1131 proto_unregister(&dccp_v4_prot); 1132 goto out; 1133 } 1134 1135 static void __exit dccp_v4_exit(void) 1136 { 1137 inet_unregister_protosw(&dccp_v4_protosw); 1138 inet_del_protocol(&dccp_v4_protocol, IPPROTO_DCCP); 1139 proto_unregister(&dccp_v4_prot); 1140 } 1141 1142 module_init(dccp_v4_init); 1143 module_exit(dccp_v4_exit); 1144 1145 /* 1146 * __stringify doesn't likes enums, so use SOCK_DCCP (6) and IPPROTO_DCCP (33) 1147 * values directly, Also cover the case where the protocol is not specified, 1148 * i.e. net-pf-PF_INET-proto-0-type-SOCK_DCCP 1149 */ 1150 MODULE_ALIAS("net-pf-" __stringify(PF_INET) "-proto-33-type-6"); 1151 MODULE_ALIAS("net-pf-" __stringify(PF_INET) "-proto-0-type-6"); 1152 MODULE_LICENSE("GPL"); 1153 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@mandriva.com>"); 1154 MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol"); 1155