1 /* 2 * net/dccp/proto.c 3 * 4 * An implementation of the DCCP protocol 5 * Arnaldo Carvalho de Melo <acme@conectiva.com.br> 6 * 7 * This program is free software; you can redistribute it and/or modify it 8 * under the terms of the GNU General Public License version 2 as 9 * published by the Free Software Foundation. 10 */ 11 12 #include <linux/dccp.h> 13 #include <linux/module.h> 14 #include <linux/types.h> 15 #include <linux/sched.h> 16 #include <linux/kernel.h> 17 #include <linux/skbuff.h> 18 #include <linux/netdevice.h> 19 #include <linux/in.h> 20 #include <linux/if_arp.h> 21 #include <linux/init.h> 22 #include <linux/random.h> 23 #include <linux/slab.h> 24 #include <net/checksum.h> 25 26 #include <net/inet_sock.h> 27 #include <net/inet_common.h> 28 #include <net/sock.h> 29 #include <net/xfrm.h> 30 31 #include <asm/ioctls.h> 32 #include <linux/spinlock.h> 33 #include <linux/timer.h> 34 #include <linux/delay.h> 35 #include <linux/poll.h> 36 37 #include "ccid.h" 38 #include "dccp.h" 39 #include "feat.h" 40 41 DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly; 42 43 EXPORT_SYMBOL_GPL(dccp_statistics); 44 45 struct percpu_counter dccp_orphan_count; 46 EXPORT_SYMBOL_GPL(dccp_orphan_count); 47 48 struct inet_hashinfo dccp_hashinfo; 49 EXPORT_SYMBOL_GPL(dccp_hashinfo); 50 51 /* the maximum queue length for tx in packets. 0 is no limit */ 52 int sysctl_dccp_tx_qlen __read_mostly = 5; 53 54 #ifdef CONFIG_IP_DCCP_DEBUG 55 static const char *dccp_state_name(const int state) 56 { 57 static const char *const dccp_state_names[] = { 58 [DCCP_OPEN] = "OPEN", 59 [DCCP_REQUESTING] = "REQUESTING", 60 [DCCP_PARTOPEN] = "PARTOPEN", 61 [DCCP_LISTEN] = "LISTEN", 62 [DCCP_RESPOND] = "RESPOND", 63 [DCCP_CLOSING] = "CLOSING", 64 [DCCP_ACTIVE_CLOSEREQ] = "CLOSEREQ", 65 [DCCP_PASSIVE_CLOSE] = "PASSIVE_CLOSE", 66 [DCCP_PASSIVE_CLOSEREQ] = "PASSIVE_CLOSEREQ", 67 [DCCP_TIME_WAIT] = "TIME_WAIT", 68 [DCCP_CLOSED] = "CLOSED", 69 }; 70 71 if (state >= DCCP_MAX_STATES) 72 return "INVALID STATE!"; 73 else 74 return dccp_state_names[state]; 75 } 76 #endif 77 78 void dccp_set_state(struct sock *sk, const int state) 79 { 80 const int oldstate = sk->sk_state; 81 82 dccp_pr_debug("%s(%p) %s --> %s\n", dccp_role(sk), sk, 83 dccp_state_name(oldstate), dccp_state_name(state)); 84 WARN_ON(state == oldstate); 85 86 switch (state) { 87 case DCCP_OPEN: 88 if (oldstate != DCCP_OPEN) 89 DCCP_INC_STATS(DCCP_MIB_CURRESTAB); 90 /* Client retransmits all Confirm options until entering OPEN */ 91 if (oldstate == DCCP_PARTOPEN) 92 dccp_feat_list_purge(&dccp_sk(sk)->dccps_featneg); 93 break; 94 95 case DCCP_CLOSED: 96 if (oldstate == DCCP_OPEN || oldstate == DCCP_ACTIVE_CLOSEREQ || 97 oldstate == DCCP_CLOSING) 98 DCCP_INC_STATS(DCCP_MIB_ESTABRESETS); 99 100 sk->sk_prot->unhash(sk); 101 if (inet_csk(sk)->icsk_bind_hash != NULL && 102 !(sk->sk_userlocks & SOCK_BINDPORT_LOCK)) 103 inet_put_port(sk); 104 /* fall through */ 105 default: 106 if (oldstate == DCCP_OPEN) 107 DCCP_DEC_STATS(DCCP_MIB_CURRESTAB); 108 } 109 110 /* Change state AFTER socket is unhashed to avoid closed 111 * socket sitting in hash tables. 112 */ 113 sk->sk_state = state; 114 } 115 116 EXPORT_SYMBOL_GPL(dccp_set_state); 117 118 static void dccp_finish_passive_close(struct sock *sk) 119 { 120 switch (sk->sk_state) { 121 case DCCP_PASSIVE_CLOSE: 122 /* Node (client or server) has received Close packet. */ 123 dccp_send_reset(sk, DCCP_RESET_CODE_CLOSED); 124 dccp_set_state(sk, DCCP_CLOSED); 125 break; 126 case DCCP_PASSIVE_CLOSEREQ: 127 /* 128 * Client received CloseReq. We set the `active' flag so that 129 * dccp_send_close() retransmits the Close as per RFC 4340, 8.3. 130 */ 131 dccp_send_close(sk, 1); 132 dccp_set_state(sk, DCCP_CLOSING); 133 } 134 } 135 136 void dccp_done(struct sock *sk) 137 { 138 dccp_set_state(sk, DCCP_CLOSED); 139 dccp_clear_xmit_timers(sk); 140 141 sk->sk_shutdown = SHUTDOWN_MASK; 142 143 if (!sock_flag(sk, SOCK_DEAD)) 144 sk->sk_state_change(sk); 145 else 146 inet_csk_destroy_sock(sk); 147 } 148 149 EXPORT_SYMBOL_GPL(dccp_done); 150 151 const char *dccp_packet_name(const int type) 152 { 153 static const char *const dccp_packet_names[] = { 154 [DCCP_PKT_REQUEST] = "REQUEST", 155 [DCCP_PKT_RESPONSE] = "RESPONSE", 156 [DCCP_PKT_DATA] = "DATA", 157 [DCCP_PKT_ACK] = "ACK", 158 [DCCP_PKT_DATAACK] = "DATAACK", 159 [DCCP_PKT_CLOSEREQ] = "CLOSEREQ", 160 [DCCP_PKT_CLOSE] = "CLOSE", 161 [DCCP_PKT_RESET] = "RESET", 162 [DCCP_PKT_SYNC] = "SYNC", 163 [DCCP_PKT_SYNCACK] = "SYNCACK", 164 }; 165 166 if (type >= DCCP_NR_PKT_TYPES) 167 return "INVALID"; 168 else 169 return dccp_packet_names[type]; 170 } 171 172 EXPORT_SYMBOL_GPL(dccp_packet_name); 173 174 static void dccp_sk_destruct(struct sock *sk) 175 { 176 struct dccp_sock *dp = dccp_sk(sk); 177 178 ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk); 179 dp->dccps_hc_tx_ccid = NULL; 180 inet_sock_destruct(sk); 181 } 182 183 int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized) 184 { 185 struct dccp_sock *dp = dccp_sk(sk); 186 struct inet_connection_sock *icsk = inet_csk(sk); 187 188 icsk->icsk_rto = DCCP_TIMEOUT_INIT; 189 icsk->icsk_syn_retries = sysctl_dccp_request_retries; 190 sk->sk_state = DCCP_CLOSED; 191 sk->sk_write_space = dccp_write_space; 192 sk->sk_destruct = dccp_sk_destruct; 193 icsk->icsk_sync_mss = dccp_sync_mss; 194 dp->dccps_mss_cache = 536; 195 dp->dccps_rate_last = jiffies; 196 dp->dccps_role = DCCP_ROLE_UNDEFINED; 197 dp->dccps_service = DCCP_SERVICE_CODE_IS_ABSENT; 198 dp->dccps_tx_qlen = sysctl_dccp_tx_qlen; 199 200 dccp_init_xmit_timers(sk); 201 202 INIT_LIST_HEAD(&dp->dccps_featneg); 203 /* control socket doesn't need feat nego */ 204 if (likely(ctl_sock_initialized)) 205 return dccp_feat_init(sk); 206 return 0; 207 } 208 209 EXPORT_SYMBOL_GPL(dccp_init_sock); 210 211 void dccp_destroy_sock(struct sock *sk) 212 { 213 struct dccp_sock *dp = dccp_sk(sk); 214 215 __skb_queue_purge(&sk->sk_write_queue); 216 if (sk->sk_send_head != NULL) { 217 kfree_skb(sk->sk_send_head); 218 sk->sk_send_head = NULL; 219 } 220 221 /* Clean up a referenced DCCP bind bucket. */ 222 if (inet_csk(sk)->icsk_bind_hash != NULL) 223 inet_put_port(sk); 224 225 kfree(dp->dccps_service_list); 226 dp->dccps_service_list = NULL; 227 228 if (dp->dccps_hc_rx_ackvec != NULL) { 229 dccp_ackvec_free(dp->dccps_hc_rx_ackvec); 230 dp->dccps_hc_rx_ackvec = NULL; 231 } 232 ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk); 233 dp->dccps_hc_rx_ccid = NULL; 234 235 /* clean up feature negotiation state */ 236 dccp_feat_list_purge(&dp->dccps_featneg); 237 } 238 239 EXPORT_SYMBOL_GPL(dccp_destroy_sock); 240 241 static inline int dccp_listen_start(struct sock *sk, int backlog) 242 { 243 struct dccp_sock *dp = dccp_sk(sk); 244 245 dp->dccps_role = DCCP_ROLE_LISTEN; 246 /* do not start to listen if feature negotiation setup fails */ 247 if (dccp_feat_finalise_settings(dp)) 248 return -EPROTO; 249 return inet_csk_listen_start(sk, backlog); 250 } 251 252 static inline int dccp_need_reset(int state) 253 { 254 return state != DCCP_CLOSED && state != DCCP_LISTEN && 255 state != DCCP_REQUESTING; 256 } 257 258 int dccp_disconnect(struct sock *sk, int flags) 259 { 260 struct inet_connection_sock *icsk = inet_csk(sk); 261 struct inet_sock *inet = inet_sk(sk); 262 int err = 0; 263 const int old_state = sk->sk_state; 264 265 if (old_state != DCCP_CLOSED) 266 dccp_set_state(sk, DCCP_CLOSED); 267 268 /* 269 * This corresponds to the ABORT function of RFC793, sec. 3.8 270 * TCP uses a RST segment, DCCP a Reset packet with Code 2, "Aborted". 271 */ 272 if (old_state == DCCP_LISTEN) { 273 inet_csk_listen_stop(sk); 274 } else if (dccp_need_reset(old_state)) { 275 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED); 276 sk->sk_err = ECONNRESET; 277 } else if (old_state == DCCP_REQUESTING) 278 sk->sk_err = ECONNRESET; 279 280 dccp_clear_xmit_timers(sk); 281 282 __skb_queue_purge(&sk->sk_receive_queue); 283 __skb_queue_purge(&sk->sk_write_queue); 284 if (sk->sk_send_head != NULL) { 285 __kfree_skb(sk->sk_send_head); 286 sk->sk_send_head = NULL; 287 } 288 289 inet->inet_dport = 0; 290 291 if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) 292 inet_reset_saddr(sk); 293 294 sk->sk_shutdown = 0; 295 sock_reset_flag(sk, SOCK_DONE); 296 297 icsk->icsk_backoff = 0; 298 inet_csk_delack_init(sk); 299 __sk_dst_reset(sk); 300 301 WARN_ON(inet->inet_num && !icsk->icsk_bind_hash); 302 303 sk->sk_error_report(sk); 304 return err; 305 } 306 307 EXPORT_SYMBOL_GPL(dccp_disconnect); 308 309 /* 310 * Wait for a DCCP event. 311 * 312 * Note that we don't need to lock the socket, as the upper poll layers 313 * take care of normal races (between the test and the event) and we don't 314 * go look at any of the socket buffers directly. 315 */ 316 unsigned int dccp_poll(struct file *file, struct socket *sock, 317 poll_table *wait) 318 { 319 unsigned int mask; 320 struct sock *sk = sock->sk; 321 322 sock_poll_wait(file, sk_sleep(sk), wait); 323 if (sk->sk_state == DCCP_LISTEN) 324 return inet_csk_listen_poll(sk); 325 326 /* Socket is not locked. We are protected from async events 327 by poll logic and correct handling of state changes 328 made by another threads is impossible in any case. 329 */ 330 331 mask = 0; 332 if (sk->sk_err) 333 mask = POLLERR; 334 335 if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED) 336 mask |= POLLHUP; 337 if (sk->sk_shutdown & RCV_SHUTDOWN) 338 mask |= POLLIN | POLLRDNORM | POLLRDHUP; 339 340 /* Connected? */ 341 if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) { 342 if (atomic_read(&sk->sk_rmem_alloc) > 0) 343 mask |= POLLIN | POLLRDNORM; 344 345 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) { 346 if (sk_stream_is_writeable(sk)) { 347 mask |= POLLOUT | POLLWRNORM; 348 } else { /* send SIGIO later */ 349 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); 350 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); 351 352 /* Race breaker. If space is freed after 353 * wspace test but before the flags are set, 354 * IO signal will be lost. 355 */ 356 if (sk_stream_is_writeable(sk)) 357 mask |= POLLOUT | POLLWRNORM; 358 } 359 } 360 } 361 return mask; 362 } 363 364 EXPORT_SYMBOL_GPL(dccp_poll); 365 366 int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg) 367 { 368 int rc = -ENOTCONN; 369 370 lock_sock(sk); 371 372 if (sk->sk_state == DCCP_LISTEN) 373 goto out; 374 375 switch (cmd) { 376 case SIOCINQ: { 377 struct sk_buff *skb; 378 unsigned long amount = 0; 379 380 skb = skb_peek(&sk->sk_receive_queue); 381 if (skb != NULL) { 382 /* 383 * We will only return the amount of this packet since 384 * that is all that will be read. 385 */ 386 amount = skb->len; 387 } 388 rc = put_user(amount, (int __user *)arg); 389 } 390 break; 391 default: 392 rc = -ENOIOCTLCMD; 393 break; 394 } 395 out: 396 release_sock(sk); 397 return rc; 398 } 399 400 EXPORT_SYMBOL_GPL(dccp_ioctl); 401 402 static int dccp_setsockopt_service(struct sock *sk, const __be32 service, 403 char __user *optval, unsigned int optlen) 404 { 405 struct dccp_sock *dp = dccp_sk(sk); 406 struct dccp_service_list *sl = NULL; 407 408 if (service == DCCP_SERVICE_INVALID_VALUE || 409 optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32)) 410 return -EINVAL; 411 412 if (optlen > sizeof(service)) { 413 sl = kmalloc(optlen, GFP_KERNEL); 414 if (sl == NULL) 415 return -ENOMEM; 416 417 sl->dccpsl_nr = optlen / sizeof(u32) - 1; 418 if (copy_from_user(sl->dccpsl_list, 419 optval + sizeof(service), 420 optlen - sizeof(service)) || 421 dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) { 422 kfree(sl); 423 return -EFAULT; 424 } 425 } 426 427 lock_sock(sk); 428 dp->dccps_service = service; 429 430 kfree(dp->dccps_service_list); 431 432 dp->dccps_service_list = sl; 433 release_sock(sk); 434 return 0; 435 } 436 437 static int dccp_setsockopt_cscov(struct sock *sk, int cscov, bool rx) 438 { 439 u8 *list, len; 440 int i, rc; 441 442 if (cscov < 0 || cscov > 15) 443 return -EINVAL; 444 /* 445 * Populate a list of permissible values, in the range cscov...15. This 446 * is necessary since feature negotiation of single values only works if 447 * both sides incidentally choose the same value. Since the list starts 448 * lowest-value first, negotiation will pick the smallest shared value. 449 */ 450 if (cscov == 0) 451 return 0; 452 len = 16 - cscov; 453 454 list = kmalloc(len, GFP_KERNEL); 455 if (list == NULL) 456 return -ENOBUFS; 457 458 for (i = 0; i < len; i++) 459 list[i] = cscov++; 460 461 rc = dccp_feat_register_sp(sk, DCCPF_MIN_CSUM_COVER, rx, list, len); 462 463 if (rc == 0) { 464 if (rx) 465 dccp_sk(sk)->dccps_pcrlen = cscov; 466 else 467 dccp_sk(sk)->dccps_pcslen = cscov; 468 } 469 kfree(list); 470 return rc; 471 } 472 473 static int dccp_setsockopt_ccid(struct sock *sk, int type, 474 char __user *optval, unsigned int optlen) 475 { 476 u8 *val; 477 int rc = 0; 478 479 if (optlen < 1 || optlen > DCCP_FEAT_MAX_SP_VALS) 480 return -EINVAL; 481 482 val = memdup_user(optval, optlen); 483 if (IS_ERR(val)) 484 return PTR_ERR(val); 485 486 lock_sock(sk); 487 if (type == DCCP_SOCKOPT_TX_CCID || type == DCCP_SOCKOPT_CCID) 488 rc = dccp_feat_register_sp(sk, DCCPF_CCID, 1, val, optlen); 489 490 if (!rc && (type == DCCP_SOCKOPT_RX_CCID || type == DCCP_SOCKOPT_CCID)) 491 rc = dccp_feat_register_sp(sk, DCCPF_CCID, 0, val, optlen); 492 release_sock(sk); 493 494 kfree(val); 495 return rc; 496 } 497 498 static int do_dccp_setsockopt(struct sock *sk, int level, int optname, 499 char __user *optval, unsigned int optlen) 500 { 501 struct dccp_sock *dp = dccp_sk(sk); 502 int val, err = 0; 503 504 switch (optname) { 505 case DCCP_SOCKOPT_PACKET_SIZE: 506 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n"); 507 return 0; 508 case DCCP_SOCKOPT_CHANGE_L: 509 case DCCP_SOCKOPT_CHANGE_R: 510 DCCP_WARN("sockopt(CHANGE_L/R) is deprecated: fix your app\n"); 511 return 0; 512 case DCCP_SOCKOPT_CCID: 513 case DCCP_SOCKOPT_RX_CCID: 514 case DCCP_SOCKOPT_TX_CCID: 515 return dccp_setsockopt_ccid(sk, optname, optval, optlen); 516 } 517 518 if (optlen < (int)sizeof(int)) 519 return -EINVAL; 520 521 if (get_user(val, (int __user *)optval)) 522 return -EFAULT; 523 524 if (optname == DCCP_SOCKOPT_SERVICE) 525 return dccp_setsockopt_service(sk, val, optval, optlen); 526 527 lock_sock(sk); 528 switch (optname) { 529 case DCCP_SOCKOPT_SERVER_TIMEWAIT: 530 if (dp->dccps_role != DCCP_ROLE_SERVER) 531 err = -EOPNOTSUPP; 532 else 533 dp->dccps_server_timewait = (val != 0); 534 break; 535 case DCCP_SOCKOPT_SEND_CSCOV: 536 err = dccp_setsockopt_cscov(sk, val, false); 537 break; 538 case DCCP_SOCKOPT_RECV_CSCOV: 539 err = dccp_setsockopt_cscov(sk, val, true); 540 break; 541 case DCCP_SOCKOPT_QPOLICY_ID: 542 if (sk->sk_state != DCCP_CLOSED) 543 err = -EISCONN; 544 else if (val < 0 || val >= DCCPQ_POLICY_MAX) 545 err = -EINVAL; 546 else 547 dp->dccps_qpolicy = val; 548 break; 549 case DCCP_SOCKOPT_QPOLICY_TXQLEN: 550 if (val < 0) 551 err = -EINVAL; 552 else 553 dp->dccps_tx_qlen = val; 554 break; 555 default: 556 err = -ENOPROTOOPT; 557 break; 558 } 559 release_sock(sk); 560 561 return err; 562 } 563 564 int dccp_setsockopt(struct sock *sk, int level, int optname, 565 char __user *optval, unsigned int optlen) 566 { 567 if (level != SOL_DCCP) 568 return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level, 569 optname, optval, 570 optlen); 571 return do_dccp_setsockopt(sk, level, optname, optval, optlen); 572 } 573 574 EXPORT_SYMBOL_GPL(dccp_setsockopt); 575 576 #ifdef CONFIG_COMPAT 577 int compat_dccp_setsockopt(struct sock *sk, int level, int optname, 578 char __user *optval, unsigned int optlen) 579 { 580 if (level != SOL_DCCP) 581 return inet_csk_compat_setsockopt(sk, level, optname, 582 optval, optlen); 583 return do_dccp_setsockopt(sk, level, optname, optval, optlen); 584 } 585 586 EXPORT_SYMBOL_GPL(compat_dccp_setsockopt); 587 #endif 588 589 static int dccp_getsockopt_service(struct sock *sk, int len, 590 __be32 __user *optval, 591 int __user *optlen) 592 { 593 const struct dccp_sock *dp = dccp_sk(sk); 594 const struct dccp_service_list *sl; 595 int err = -ENOENT, slen = 0, total_len = sizeof(u32); 596 597 lock_sock(sk); 598 if ((sl = dp->dccps_service_list) != NULL) { 599 slen = sl->dccpsl_nr * sizeof(u32); 600 total_len += slen; 601 } 602 603 err = -EINVAL; 604 if (total_len > len) 605 goto out; 606 607 err = 0; 608 if (put_user(total_len, optlen) || 609 put_user(dp->dccps_service, optval) || 610 (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen))) 611 err = -EFAULT; 612 out: 613 release_sock(sk); 614 return err; 615 } 616 617 static int do_dccp_getsockopt(struct sock *sk, int level, int optname, 618 char __user *optval, int __user *optlen) 619 { 620 struct dccp_sock *dp; 621 int val, len; 622 623 if (get_user(len, optlen)) 624 return -EFAULT; 625 626 if (len < (int)sizeof(int)) 627 return -EINVAL; 628 629 dp = dccp_sk(sk); 630 631 switch (optname) { 632 case DCCP_SOCKOPT_PACKET_SIZE: 633 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n"); 634 return 0; 635 case DCCP_SOCKOPT_SERVICE: 636 return dccp_getsockopt_service(sk, len, 637 (__be32 __user *)optval, optlen); 638 case DCCP_SOCKOPT_GET_CUR_MPS: 639 val = dp->dccps_mss_cache; 640 break; 641 case DCCP_SOCKOPT_AVAILABLE_CCIDS: 642 return ccid_getsockopt_builtin_ccids(sk, len, optval, optlen); 643 case DCCP_SOCKOPT_TX_CCID: 644 val = ccid_get_current_tx_ccid(dp); 645 if (val < 0) 646 return -ENOPROTOOPT; 647 break; 648 case DCCP_SOCKOPT_RX_CCID: 649 val = ccid_get_current_rx_ccid(dp); 650 if (val < 0) 651 return -ENOPROTOOPT; 652 break; 653 case DCCP_SOCKOPT_SERVER_TIMEWAIT: 654 val = dp->dccps_server_timewait; 655 break; 656 case DCCP_SOCKOPT_SEND_CSCOV: 657 val = dp->dccps_pcslen; 658 break; 659 case DCCP_SOCKOPT_RECV_CSCOV: 660 val = dp->dccps_pcrlen; 661 break; 662 case DCCP_SOCKOPT_QPOLICY_ID: 663 val = dp->dccps_qpolicy; 664 break; 665 case DCCP_SOCKOPT_QPOLICY_TXQLEN: 666 val = dp->dccps_tx_qlen; 667 break; 668 case 128 ... 191: 669 return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname, 670 len, (u32 __user *)optval, optlen); 671 case 192 ... 255: 672 return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname, 673 len, (u32 __user *)optval, optlen); 674 default: 675 return -ENOPROTOOPT; 676 } 677 678 len = sizeof(val); 679 if (put_user(len, optlen) || copy_to_user(optval, &val, len)) 680 return -EFAULT; 681 682 return 0; 683 } 684 685 int dccp_getsockopt(struct sock *sk, int level, int optname, 686 char __user *optval, int __user *optlen) 687 { 688 if (level != SOL_DCCP) 689 return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level, 690 optname, optval, 691 optlen); 692 return do_dccp_getsockopt(sk, level, optname, optval, optlen); 693 } 694 695 EXPORT_SYMBOL_GPL(dccp_getsockopt); 696 697 #ifdef CONFIG_COMPAT 698 int compat_dccp_getsockopt(struct sock *sk, int level, int optname, 699 char __user *optval, int __user *optlen) 700 { 701 if (level != SOL_DCCP) 702 return inet_csk_compat_getsockopt(sk, level, optname, 703 optval, optlen); 704 return do_dccp_getsockopt(sk, level, optname, optval, optlen); 705 } 706 707 EXPORT_SYMBOL_GPL(compat_dccp_getsockopt); 708 #endif 709 710 static int dccp_msghdr_parse(struct msghdr *msg, struct sk_buff *skb) 711 { 712 struct cmsghdr *cmsg; 713 714 /* 715 * Assign an (opaque) qpolicy priority value to skb->priority. 716 * 717 * We are overloading this skb field for use with the qpolicy subystem. 718 * The skb->priority is normally used for the SO_PRIORITY option, which 719 * is initialised from sk_priority. Since the assignment of sk_priority 720 * to skb->priority happens later (on layer 3), we overload this field 721 * for use with queueing priorities as long as the skb is on layer 4. 722 * The default priority value (if nothing is set) is 0. 723 */ 724 skb->priority = 0; 725 726 for_each_cmsghdr(cmsg, msg) { 727 if (!CMSG_OK(msg, cmsg)) 728 return -EINVAL; 729 730 if (cmsg->cmsg_level != SOL_DCCP) 731 continue; 732 733 if (cmsg->cmsg_type <= DCCP_SCM_QPOLICY_MAX && 734 !dccp_qpolicy_param_ok(skb->sk, cmsg->cmsg_type)) 735 return -EINVAL; 736 737 switch (cmsg->cmsg_type) { 738 case DCCP_SCM_PRIORITY: 739 if (cmsg->cmsg_len != CMSG_LEN(sizeof(__u32))) 740 return -EINVAL; 741 skb->priority = *(__u32 *)CMSG_DATA(cmsg); 742 break; 743 default: 744 return -EINVAL; 745 } 746 } 747 return 0; 748 } 749 750 int dccp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) 751 { 752 const struct dccp_sock *dp = dccp_sk(sk); 753 const int flags = msg->msg_flags; 754 const int noblock = flags & MSG_DONTWAIT; 755 struct sk_buff *skb; 756 int rc, size; 757 long timeo; 758 759 if (len > dp->dccps_mss_cache) 760 return -EMSGSIZE; 761 762 lock_sock(sk); 763 764 if (dccp_qpolicy_full(sk)) { 765 rc = -EAGAIN; 766 goto out_release; 767 } 768 769 timeo = sock_sndtimeo(sk, noblock); 770 771 /* 772 * We have to use sk_stream_wait_connect here to set sk_write_pending, 773 * so that the trick in dccp_rcv_request_sent_state_process. 774 */ 775 /* Wait for a connection to finish. */ 776 if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN)) 777 if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0) 778 goto out_release; 779 780 size = sk->sk_prot->max_header + len; 781 release_sock(sk); 782 skb = sock_alloc_send_skb(sk, size, noblock, &rc); 783 lock_sock(sk); 784 if (skb == NULL) 785 goto out_release; 786 787 skb_reserve(skb, sk->sk_prot->max_header); 788 rc = memcpy_from_msg(skb_put(skb, len), msg, len); 789 if (rc != 0) 790 goto out_discard; 791 792 rc = dccp_msghdr_parse(msg, skb); 793 if (rc != 0) 794 goto out_discard; 795 796 dccp_qpolicy_push(sk, skb); 797 /* 798 * The xmit_timer is set if the TX CCID is rate-based and will expire 799 * when congestion control permits to release further packets into the 800 * network. Window-based CCIDs do not use this timer. 801 */ 802 if (!timer_pending(&dp->dccps_xmit_timer)) 803 dccp_write_xmit(sk); 804 out_release: 805 release_sock(sk); 806 return rc ? : len; 807 out_discard: 808 kfree_skb(skb); 809 goto out_release; 810 } 811 812 EXPORT_SYMBOL_GPL(dccp_sendmsg); 813 814 int dccp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, 815 int flags, int *addr_len) 816 { 817 const struct dccp_hdr *dh; 818 long timeo; 819 820 lock_sock(sk); 821 822 if (sk->sk_state == DCCP_LISTEN) { 823 len = -ENOTCONN; 824 goto out; 825 } 826 827 timeo = sock_rcvtimeo(sk, nonblock); 828 829 do { 830 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue); 831 832 if (skb == NULL) 833 goto verify_sock_status; 834 835 dh = dccp_hdr(skb); 836 837 switch (dh->dccph_type) { 838 case DCCP_PKT_DATA: 839 case DCCP_PKT_DATAACK: 840 goto found_ok_skb; 841 842 case DCCP_PKT_CLOSE: 843 case DCCP_PKT_CLOSEREQ: 844 if (!(flags & MSG_PEEK)) 845 dccp_finish_passive_close(sk); 846 /* fall through */ 847 case DCCP_PKT_RESET: 848 dccp_pr_debug("found fin (%s) ok!\n", 849 dccp_packet_name(dh->dccph_type)); 850 len = 0; 851 goto found_fin_ok; 852 default: 853 dccp_pr_debug("packet_type=%s\n", 854 dccp_packet_name(dh->dccph_type)); 855 sk_eat_skb(sk, skb); 856 } 857 verify_sock_status: 858 if (sock_flag(sk, SOCK_DONE)) { 859 len = 0; 860 break; 861 } 862 863 if (sk->sk_err) { 864 len = sock_error(sk); 865 break; 866 } 867 868 if (sk->sk_shutdown & RCV_SHUTDOWN) { 869 len = 0; 870 break; 871 } 872 873 if (sk->sk_state == DCCP_CLOSED) { 874 if (!sock_flag(sk, SOCK_DONE)) { 875 /* This occurs when user tries to read 876 * from never connected socket. 877 */ 878 len = -ENOTCONN; 879 break; 880 } 881 len = 0; 882 break; 883 } 884 885 if (!timeo) { 886 len = -EAGAIN; 887 break; 888 } 889 890 if (signal_pending(current)) { 891 len = sock_intr_errno(timeo); 892 break; 893 } 894 895 sk_wait_data(sk, &timeo, NULL); 896 continue; 897 found_ok_skb: 898 if (len > skb->len) 899 len = skb->len; 900 else if (len < skb->len) 901 msg->msg_flags |= MSG_TRUNC; 902 903 if (skb_copy_datagram_msg(skb, 0, msg, len)) { 904 /* Exception. Bailout! */ 905 len = -EFAULT; 906 break; 907 } 908 if (flags & MSG_TRUNC) 909 len = skb->len; 910 found_fin_ok: 911 if (!(flags & MSG_PEEK)) 912 sk_eat_skb(sk, skb); 913 break; 914 } while (1); 915 out: 916 release_sock(sk); 917 return len; 918 } 919 920 EXPORT_SYMBOL_GPL(dccp_recvmsg); 921 922 int inet_dccp_listen(struct socket *sock, int backlog) 923 { 924 struct sock *sk = sock->sk; 925 unsigned char old_state; 926 int err; 927 928 lock_sock(sk); 929 930 err = -EINVAL; 931 if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP) 932 goto out; 933 934 old_state = sk->sk_state; 935 if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN))) 936 goto out; 937 938 /* Really, if the socket is already in listen state 939 * we can only allow the backlog to be adjusted. 940 */ 941 if (old_state != DCCP_LISTEN) { 942 /* 943 * FIXME: here it probably should be sk->sk_prot->listen_start 944 * see tcp_listen_start 945 */ 946 err = dccp_listen_start(sk, backlog); 947 if (err) 948 goto out; 949 } 950 sk->sk_max_ack_backlog = backlog; 951 err = 0; 952 953 out: 954 release_sock(sk); 955 return err; 956 } 957 958 EXPORT_SYMBOL_GPL(inet_dccp_listen); 959 960 static void dccp_terminate_connection(struct sock *sk) 961 { 962 u8 next_state = DCCP_CLOSED; 963 964 switch (sk->sk_state) { 965 case DCCP_PASSIVE_CLOSE: 966 case DCCP_PASSIVE_CLOSEREQ: 967 dccp_finish_passive_close(sk); 968 break; 969 case DCCP_PARTOPEN: 970 dccp_pr_debug("Stop PARTOPEN timer (%p)\n", sk); 971 inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK); 972 /* fall through */ 973 case DCCP_OPEN: 974 dccp_send_close(sk, 1); 975 976 if (dccp_sk(sk)->dccps_role == DCCP_ROLE_SERVER && 977 !dccp_sk(sk)->dccps_server_timewait) 978 next_state = DCCP_ACTIVE_CLOSEREQ; 979 else 980 next_state = DCCP_CLOSING; 981 /* fall through */ 982 default: 983 dccp_set_state(sk, next_state); 984 } 985 } 986 987 void dccp_close(struct sock *sk, long timeout) 988 { 989 struct dccp_sock *dp = dccp_sk(sk); 990 struct sk_buff *skb; 991 u32 data_was_unread = 0; 992 int state; 993 994 lock_sock(sk); 995 996 sk->sk_shutdown = SHUTDOWN_MASK; 997 998 if (sk->sk_state == DCCP_LISTEN) { 999 dccp_set_state(sk, DCCP_CLOSED); 1000 1001 /* Special case. */ 1002 inet_csk_listen_stop(sk); 1003 1004 goto adjudge_to_death; 1005 } 1006 1007 sk_stop_timer(sk, &dp->dccps_xmit_timer); 1008 1009 /* 1010 * We need to flush the recv. buffs. We do this only on the 1011 * descriptor close, not protocol-sourced closes, because the 1012 *reader process may not have drained the data yet! 1013 */ 1014 while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) { 1015 data_was_unread += skb->len; 1016 __kfree_skb(skb); 1017 } 1018 1019 /* If socket has been already reset kill it. */ 1020 if (sk->sk_state == DCCP_CLOSED) 1021 goto adjudge_to_death; 1022 1023 if (data_was_unread) { 1024 /* Unread data was tossed, send an appropriate Reset Code */ 1025 DCCP_WARN("ABORT with %u bytes unread\n", data_was_unread); 1026 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED); 1027 dccp_set_state(sk, DCCP_CLOSED); 1028 } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) { 1029 /* Check zero linger _after_ checking for unread data. */ 1030 sk->sk_prot->disconnect(sk, 0); 1031 } else if (sk->sk_state != DCCP_CLOSED) { 1032 /* 1033 * Normal connection termination. May need to wait if there are 1034 * still packets in the TX queue that are delayed by the CCID. 1035 */ 1036 dccp_flush_write_queue(sk, &timeout); 1037 dccp_terminate_connection(sk); 1038 } 1039 1040 /* 1041 * Flush write queue. This may be necessary in several cases: 1042 * - we have been closed by the peer but still have application data; 1043 * - abortive termination (unread data or zero linger time), 1044 * - normal termination but queue could not be flushed within time limit 1045 */ 1046 __skb_queue_purge(&sk->sk_write_queue); 1047 1048 sk_stream_wait_close(sk, timeout); 1049 1050 adjudge_to_death: 1051 state = sk->sk_state; 1052 sock_hold(sk); 1053 sock_orphan(sk); 1054 1055 /* 1056 * It is the last release_sock in its life. It will remove backlog. 1057 */ 1058 release_sock(sk); 1059 /* 1060 * Now socket is owned by kernel and we acquire BH lock 1061 * to finish close. No need to check for user refs. 1062 */ 1063 local_bh_disable(); 1064 bh_lock_sock(sk); 1065 WARN_ON(sock_owned_by_user(sk)); 1066 1067 percpu_counter_inc(sk->sk_prot->orphan_count); 1068 1069 /* Have we already been destroyed by a softirq or backlog? */ 1070 if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED) 1071 goto out; 1072 1073 if (sk->sk_state == DCCP_CLOSED) 1074 inet_csk_destroy_sock(sk); 1075 1076 /* Otherwise, socket is reprieved until protocol close. */ 1077 1078 out: 1079 bh_unlock_sock(sk); 1080 local_bh_enable(); 1081 sock_put(sk); 1082 } 1083 1084 EXPORT_SYMBOL_GPL(dccp_close); 1085 1086 void dccp_shutdown(struct sock *sk, int how) 1087 { 1088 dccp_pr_debug("called shutdown(%x)\n", how); 1089 } 1090 1091 EXPORT_SYMBOL_GPL(dccp_shutdown); 1092 1093 static inline int __init dccp_mib_init(void) 1094 { 1095 dccp_statistics = alloc_percpu(struct dccp_mib); 1096 if (!dccp_statistics) 1097 return -ENOMEM; 1098 return 0; 1099 } 1100 1101 static inline void dccp_mib_exit(void) 1102 { 1103 free_percpu(dccp_statistics); 1104 } 1105 1106 static int thash_entries; 1107 module_param(thash_entries, int, 0444); 1108 MODULE_PARM_DESC(thash_entries, "Number of ehash buckets"); 1109 1110 #ifdef CONFIG_IP_DCCP_DEBUG 1111 bool dccp_debug; 1112 module_param(dccp_debug, bool, 0644); 1113 MODULE_PARM_DESC(dccp_debug, "Enable debug messages"); 1114 1115 EXPORT_SYMBOL_GPL(dccp_debug); 1116 #endif 1117 1118 static int __init dccp_init(void) 1119 { 1120 unsigned long goal; 1121 int ehash_order, bhash_order, i; 1122 int rc; 1123 1124 BUILD_BUG_ON(sizeof(struct dccp_skb_cb) > 1125 FIELD_SIZEOF(struct sk_buff, cb)); 1126 rc = percpu_counter_init(&dccp_orphan_count, 0, GFP_KERNEL); 1127 if (rc) 1128 goto out_fail; 1129 rc = -ENOBUFS; 1130 inet_hashinfo_init(&dccp_hashinfo); 1131 dccp_hashinfo.bind_bucket_cachep = 1132 kmem_cache_create("dccp_bind_bucket", 1133 sizeof(struct inet_bind_bucket), 0, 1134 SLAB_HWCACHE_ALIGN, NULL); 1135 if (!dccp_hashinfo.bind_bucket_cachep) 1136 goto out_free_percpu; 1137 1138 /* 1139 * Size and allocate the main established and bind bucket 1140 * hash tables. 1141 * 1142 * The methodology is similar to that of the buffer cache. 1143 */ 1144 if (totalram_pages >= (128 * 1024)) 1145 goal = totalram_pages >> (21 - PAGE_SHIFT); 1146 else 1147 goal = totalram_pages >> (23 - PAGE_SHIFT); 1148 1149 if (thash_entries) 1150 goal = (thash_entries * 1151 sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT; 1152 for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++) 1153 ; 1154 do { 1155 unsigned long hash_size = (1UL << ehash_order) * PAGE_SIZE / 1156 sizeof(struct inet_ehash_bucket); 1157 1158 while (hash_size & (hash_size - 1)) 1159 hash_size--; 1160 dccp_hashinfo.ehash_mask = hash_size - 1; 1161 dccp_hashinfo.ehash = (struct inet_ehash_bucket *) 1162 __get_free_pages(GFP_ATOMIC|__GFP_NOWARN, ehash_order); 1163 } while (!dccp_hashinfo.ehash && --ehash_order > 0); 1164 1165 if (!dccp_hashinfo.ehash) { 1166 DCCP_CRIT("Failed to allocate DCCP established hash table"); 1167 goto out_free_bind_bucket_cachep; 1168 } 1169 1170 for (i = 0; i <= dccp_hashinfo.ehash_mask; i++) 1171 INIT_HLIST_NULLS_HEAD(&dccp_hashinfo.ehash[i].chain, i); 1172 1173 if (inet_ehash_locks_alloc(&dccp_hashinfo)) 1174 goto out_free_dccp_ehash; 1175 1176 bhash_order = ehash_order; 1177 1178 do { 1179 dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE / 1180 sizeof(struct inet_bind_hashbucket); 1181 if ((dccp_hashinfo.bhash_size > (64 * 1024)) && 1182 bhash_order > 0) 1183 continue; 1184 dccp_hashinfo.bhash = (struct inet_bind_hashbucket *) 1185 __get_free_pages(GFP_ATOMIC|__GFP_NOWARN, bhash_order); 1186 } while (!dccp_hashinfo.bhash && --bhash_order >= 0); 1187 1188 if (!dccp_hashinfo.bhash) { 1189 DCCP_CRIT("Failed to allocate DCCP bind hash table"); 1190 goto out_free_dccp_locks; 1191 } 1192 1193 for (i = 0; i < dccp_hashinfo.bhash_size; i++) { 1194 spin_lock_init(&dccp_hashinfo.bhash[i].lock); 1195 INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain); 1196 } 1197 1198 rc = dccp_mib_init(); 1199 if (rc) 1200 goto out_free_dccp_bhash; 1201 1202 rc = dccp_ackvec_init(); 1203 if (rc) 1204 goto out_free_dccp_mib; 1205 1206 rc = dccp_sysctl_init(); 1207 if (rc) 1208 goto out_ackvec_exit; 1209 1210 rc = ccid_initialize_builtins(); 1211 if (rc) 1212 goto out_sysctl_exit; 1213 1214 dccp_timestamping_init(); 1215 1216 return 0; 1217 1218 out_sysctl_exit: 1219 dccp_sysctl_exit(); 1220 out_ackvec_exit: 1221 dccp_ackvec_exit(); 1222 out_free_dccp_mib: 1223 dccp_mib_exit(); 1224 out_free_dccp_bhash: 1225 free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order); 1226 out_free_dccp_locks: 1227 inet_ehash_locks_free(&dccp_hashinfo); 1228 out_free_dccp_ehash: 1229 free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order); 1230 out_free_bind_bucket_cachep: 1231 kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep); 1232 out_free_percpu: 1233 percpu_counter_destroy(&dccp_orphan_count); 1234 out_fail: 1235 dccp_hashinfo.bhash = NULL; 1236 dccp_hashinfo.ehash = NULL; 1237 dccp_hashinfo.bind_bucket_cachep = NULL; 1238 return rc; 1239 } 1240 1241 static void __exit dccp_fini(void) 1242 { 1243 ccid_cleanup_builtins(); 1244 dccp_mib_exit(); 1245 free_pages((unsigned long)dccp_hashinfo.bhash, 1246 get_order(dccp_hashinfo.bhash_size * 1247 sizeof(struct inet_bind_hashbucket))); 1248 free_pages((unsigned long)dccp_hashinfo.ehash, 1249 get_order((dccp_hashinfo.ehash_mask + 1) * 1250 sizeof(struct inet_ehash_bucket))); 1251 inet_ehash_locks_free(&dccp_hashinfo); 1252 kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep); 1253 dccp_ackvec_exit(); 1254 dccp_sysctl_exit(); 1255 percpu_counter_destroy(&dccp_orphan_count); 1256 } 1257 1258 module_init(dccp_init); 1259 module_exit(dccp_fini); 1260 1261 MODULE_LICENSE("GPL"); 1262 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>"); 1263 MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol"); 1264