11da177e4SLinus Torvalds /* 21da177e4SLinus Torvalds * INET An implementation of the TCP/IP protocol suite for the LINUX 31da177e4SLinus Torvalds * operating system. INET is implemented using the BSD Socket 41da177e4SLinus Torvalds * interface as the means of communication with the user level. 51da177e4SLinus Torvalds * 61da177e4SLinus Torvalds * The User Datagram Protocol (UDP). 71da177e4SLinus Torvalds * 802c30a84SJesper Juhl * Authors: Ross Biro 91da177e4SLinus Torvalds * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> 101da177e4SLinus Torvalds * Arnt Gulbrandsen, <agulbra@nvg.unit.no> 11113aa838SAlan Cox * Alan Cox, <alan@lxorguk.ukuu.org.uk> 121da177e4SLinus Torvalds * Hirokazu Takahashi, <taka@valinux.co.jp> 131da177e4SLinus Torvalds * 141da177e4SLinus Torvalds * Fixes: 151da177e4SLinus Torvalds * Alan Cox : verify_area() calls 161da177e4SLinus Torvalds * Alan Cox : stopped close while in use off icmp 171da177e4SLinus Torvalds * messages. Not a fix but a botch that 181da177e4SLinus Torvalds * for udp at least is 'valid'. 191da177e4SLinus Torvalds * Alan Cox : Fixed icmp handling properly 201da177e4SLinus Torvalds * Alan Cox : Correct error for oversized datagrams 211da177e4SLinus Torvalds * Alan Cox : Tidied select() semantics. 221da177e4SLinus Torvalds * Alan Cox : udp_err() fixed properly, also now 231da177e4SLinus Torvalds * select and read wake correctly on errors 241da177e4SLinus Torvalds * Alan Cox : udp_send verify_area moved to avoid mem leak 251da177e4SLinus Torvalds * Alan Cox : UDP can count its memory 261da177e4SLinus Torvalds * Alan Cox : send to an unknown connection causes 271da177e4SLinus Torvalds * an ECONNREFUSED off the icmp, but 281da177e4SLinus Torvalds * does NOT close. 291da177e4SLinus Torvalds * Alan Cox : Switched to new sk_buff handlers. No more backlog! 301da177e4SLinus Torvalds * Alan Cox : Using generic datagram code. Even smaller and the PEEK 311da177e4SLinus Torvalds * bug no longer crashes it. 321da177e4SLinus Torvalds * Fred Van Kempen : Net2e support for sk->broadcast. 331da177e4SLinus Torvalds * Alan Cox : Uses skb_free_datagram 341da177e4SLinus Torvalds * Alan Cox : Added get/set sockopt support. 351da177e4SLinus Torvalds * Alan Cox : Broadcasting without option set returns EACCES. 361da177e4SLinus Torvalds * Alan Cox : No wakeup calls. Instead we now use the callbacks. 371da177e4SLinus Torvalds * Alan Cox : Use ip_tos and ip_ttl 381da177e4SLinus Torvalds * Alan Cox : SNMP Mibs 391da177e4SLinus Torvalds * Alan Cox : MSG_DONTROUTE, and 0.0.0.0 support. 401da177e4SLinus Torvalds * Matt Dillon : UDP length checks. 411da177e4SLinus Torvalds * Alan Cox : Smarter af_inet used properly. 421da177e4SLinus Torvalds * Alan Cox : Use new kernel side addressing. 431da177e4SLinus Torvalds * Alan Cox : Incorrect return on truncated datagram receive. 441da177e4SLinus Torvalds * Arnt Gulbrandsen : New udp_send and stuff 451da177e4SLinus Torvalds * Alan Cox : Cache last socket 461da177e4SLinus Torvalds * Alan Cox : Route cache 471da177e4SLinus Torvalds * Jon Peatfield : Minor efficiency fix to sendto(). 481da177e4SLinus Torvalds * Mike Shaver : RFC1122 checks. 491da177e4SLinus Torvalds * Alan Cox : Nonblocking error fix. 501da177e4SLinus Torvalds * Willy Konynenberg : Transparent proxying support. 511da177e4SLinus Torvalds * Mike McLagan : Routing by source 521da177e4SLinus Torvalds * David S. Miller : New socket lookup architecture. 531da177e4SLinus Torvalds * Last socket cache retained as it 541da177e4SLinus Torvalds * does have a high hit rate. 551da177e4SLinus Torvalds * Olaf Kirch : Don't linearise iovec on sendmsg. 561da177e4SLinus Torvalds * Andi Kleen : Some cleanups, cache destination entry 571da177e4SLinus Torvalds * for connect. 581da177e4SLinus Torvalds * Vitaly E. Lavrov : Transparent proxy revived after year coma. 591da177e4SLinus Torvalds * Melvin Smith : Check msg_name not msg_namelen in sendto(), 601da177e4SLinus Torvalds * return ENOTCONN for unconnected sockets (POSIX) 611da177e4SLinus Torvalds * Janos Farkas : don't deliver multi/broadcasts to a different 621da177e4SLinus Torvalds * bound-to-device socket 631da177e4SLinus Torvalds * Hirokazu Takahashi : HW checksumming for outgoing UDP 641da177e4SLinus Torvalds * datagrams. 651da177e4SLinus Torvalds * Hirokazu Takahashi : sendfile() on UDP works now. 661da177e4SLinus Torvalds * Arnaldo C. Melo : convert /proc/net/udp to seq_file 671da177e4SLinus Torvalds * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which 681da177e4SLinus Torvalds * Alexey Kuznetsov: allow both IPv4 and IPv6 sockets to bind 691da177e4SLinus Torvalds * a single port at the same time. 701da177e4SLinus Torvalds * Derek Atkins <derek@ihtfp.com>: Add Encapulation Support 71342f0234SJames Chapman * James Chapman : Add L2TP encapsulation type. 721da177e4SLinus Torvalds * 731da177e4SLinus Torvalds * 741da177e4SLinus Torvalds * This program is free software; you can redistribute it and/or 751da177e4SLinus Torvalds * modify it under the terms of the GNU General Public License 761da177e4SLinus Torvalds * as published by the Free Software Foundation; either version 771da177e4SLinus Torvalds * 2 of the License, or (at your option) any later version. 781da177e4SLinus Torvalds */ 791da177e4SLinus Torvalds 80afd46503SJoe Perches #define pr_fmt(fmt) "UDP: " fmt 81afd46503SJoe Perches 827c0f6ba6SLinus Torvalds #include <linux/uaccess.h> 831da177e4SLinus Torvalds #include <asm/ioctls.h> 8495766fffSHideo Aoki #include <linux/bootmem.h> 858203efb3SEric Dumazet #include <linux/highmem.h> 868203efb3SEric Dumazet #include <linux/swap.h> 871da177e4SLinus Torvalds #include <linux/types.h> 881da177e4SLinus Torvalds #include <linux/fcntl.h> 891da177e4SLinus Torvalds #include <linux/module.h> 901da177e4SLinus Torvalds #include <linux/socket.h> 911da177e4SLinus Torvalds #include <linux/sockios.h> 9214c85021SArnaldo Carvalho de Melo #include <linux/igmp.h> 936e540309SShawn Bohrer #include <linux/inetdevice.h> 941da177e4SLinus Torvalds #include <linux/in.h> 951da177e4SLinus Torvalds #include <linux/errno.h> 961da177e4SLinus Torvalds #include <linux/timer.h> 971da177e4SLinus Torvalds #include <linux/mm.h> 981da177e4SLinus Torvalds #include <linux/inet.h> 991da177e4SLinus Torvalds #include <linux/netdevice.h> 1005a0e3ad6STejun Heo #include <linux/slab.h> 101c752f073SArnaldo Carvalho de Melo #include <net/tcp_states.h> 1021da177e4SLinus Torvalds #include <linux/skbuff.h> 1031da177e4SLinus Torvalds #include <linux/proc_fs.h> 1041da177e4SLinus Torvalds #include <linux/seq_file.h> 105457c4cbcSEric W. Biederman #include <net/net_namespace.h> 1061da177e4SLinus Torvalds #include <net/icmp.h> 107421b3885SShawn Bohrer #include <net/inet_hashtables.h> 1081da177e4SLinus Torvalds #include <net/route.h> 1091da177e4SLinus Torvalds #include <net/checksum.h> 1101da177e4SLinus Torvalds #include <net/xfrm.h> 111296f7ea7SSatoru Moriya #include <trace/events/udp.h> 112447167bfSEric Dumazet #include <linux/static_key.h> 11322911fc5SEric Dumazet #include <trace/events/skb.h> 114076bb0c8SEliezer Tamir #include <net/busy_poll.h> 115ba4e58ecSGerrit Renker #include "udp_impl.h" 116e32ea7e7SCraig Gallek #include <net/sock_reuseport.h> 117217375a0SEric Dumazet #include <net/addrconf.h> 1181da177e4SLinus Torvalds 119f86dcc5aSEric Dumazet struct udp_table udp_table __read_mostly; 120645ca708SEric Dumazet EXPORT_SYMBOL(udp_table); 1211da177e4SLinus Torvalds 1228d987e5cSEric Dumazet long sysctl_udp_mem[3] __read_mostly; 12395766fffSHideo Aoki EXPORT_SYMBOL(sysctl_udp_mem); 124c482c568SEric Dumazet 125c482c568SEric Dumazet int sysctl_udp_rmem_min __read_mostly; 12695766fffSHideo Aoki EXPORT_SYMBOL(sysctl_udp_rmem_min); 127c482c568SEric Dumazet 128c482c568SEric Dumazet int sysctl_udp_wmem_min __read_mostly; 12995766fffSHideo Aoki EXPORT_SYMBOL(sysctl_udp_wmem_min); 13095766fffSHideo Aoki 1318d987e5cSEric Dumazet atomic_long_t udp_memory_allocated; 13295766fffSHideo Aoki EXPORT_SYMBOL(udp_memory_allocated); 13395766fffSHideo Aoki 134f86dcc5aSEric Dumazet #define MAX_UDP_PORTS 65536 135f86dcc5aSEric Dumazet #define PORTS_PER_CHAIN (MAX_UDP_PORTS / UDP_HTABLE_SIZE_MIN) 13698322f22SEric Dumazet 137*63a6fff3SRobert Shearman /* IPCB reference means this can not be used from early demux */ 138*63a6fff3SRobert Shearman static bool udp_lib_exact_dif_match(struct net *net, struct sk_buff *skb) 139*63a6fff3SRobert Shearman { 140*63a6fff3SRobert Shearman #if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) 141*63a6fff3SRobert Shearman if (!net->ipv4.sysctl_udp_l3mdev_accept && 142*63a6fff3SRobert Shearman skb && ipv4_l3mdev_skb(IPCB(skb)->flags)) 143*63a6fff3SRobert Shearman return true; 144*63a6fff3SRobert Shearman #endif 145*63a6fff3SRobert Shearman return false; 146*63a6fff3SRobert Shearman } 147*63a6fff3SRobert Shearman 148f24d43c0SEric Dumazet static int udp_lib_lport_inuse(struct net *net, __u16 num, 149645ca708SEric Dumazet const struct udp_hslot *hslot, 15098322f22SEric Dumazet unsigned long *bitmap, 151fe38d2a1SJosef Bacik struct sock *sk, unsigned int log) 15225030a7fSGerrit Renker { 153f24d43c0SEric Dumazet struct sock *sk2; 154ba418fa3STom Herbert kuid_t uid = sock_i_uid(sk); 15525030a7fSGerrit Renker 156ca065d0cSEric Dumazet sk_for_each(sk2, &hslot->head) { 157f24d43c0SEric Dumazet if (net_eq(sock_net(sk2), net) && 158f24d43c0SEric Dumazet sk2 != sk && 159d4cada4aSEric Dumazet (bitmap || udp_sk(sk2)->udp_port_hash == num) && 160f24d43c0SEric Dumazet (!sk2->sk_reuse || !sk->sk_reuse) && 1619d4fb27dSJoe Perches (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if || 1629d4fb27dSJoe Perches sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && 163fe38d2a1SJosef Bacik inet_rcv_saddr_equal(sk, sk2, true)) { 164df560056SEric Garver if (sk2->sk_reuseport && sk->sk_reuseport && 165df560056SEric Garver !rcu_access_pointer(sk->sk_reuseport_cb) && 166df560056SEric Garver uid_eq(uid, sock_i_uid(sk2))) { 167df560056SEric Garver if (!bitmap) 168df560056SEric Garver return 0; 169df560056SEric Garver } else { 1704243cdc2SJoe Perches if (!bitmap) 171fc038410SDavid S. Miller return 1; 172df560056SEric Garver __set_bit(udp_sk(sk2)->udp_port_hash >> log, 173df560056SEric Garver bitmap); 174df560056SEric Garver } 1754243cdc2SJoe Perches } 17698322f22SEric Dumazet } 17725030a7fSGerrit Renker return 0; 17825030a7fSGerrit Renker } 17925030a7fSGerrit Renker 18030fff923SEric Dumazet /* 18130fff923SEric Dumazet * Note: we still hold spinlock of primary hash chain, so no other writer 18230fff923SEric Dumazet * can insert/delete a socket with local_port == num 18330fff923SEric Dumazet */ 18430fff923SEric Dumazet static int udp_lib_lport_inuse2(struct net *net, __u16 num, 18530fff923SEric Dumazet struct udp_hslot *hslot2, 186fe38d2a1SJosef Bacik struct sock *sk) 18730fff923SEric Dumazet { 18830fff923SEric Dumazet struct sock *sk2; 189ba418fa3STom Herbert kuid_t uid = sock_i_uid(sk); 19030fff923SEric Dumazet int res = 0; 19130fff923SEric Dumazet 19230fff923SEric Dumazet spin_lock(&hslot2->lock); 193ca065d0cSEric Dumazet udp_portaddr_for_each_entry(sk2, &hslot2->head) { 19430fff923SEric Dumazet if (net_eq(sock_net(sk2), net) && 19530fff923SEric Dumazet sk2 != sk && 19630fff923SEric Dumazet (udp_sk(sk2)->udp_port_hash == num) && 19730fff923SEric Dumazet (!sk2->sk_reuse || !sk->sk_reuse) && 1989d4fb27dSJoe Perches (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if || 1999d4fb27dSJoe Perches sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && 200fe38d2a1SJosef Bacik inet_rcv_saddr_equal(sk, sk2, true)) { 201df560056SEric Garver if (sk2->sk_reuseport && sk->sk_reuseport && 202df560056SEric Garver !rcu_access_pointer(sk->sk_reuseport_cb) && 203df560056SEric Garver uid_eq(uid, sock_i_uid(sk2))) { 204df560056SEric Garver res = 0; 205df560056SEric Garver } else { 20630fff923SEric Dumazet res = 1; 207df560056SEric Garver } 20830fff923SEric Dumazet break; 20930fff923SEric Dumazet } 2104243cdc2SJoe Perches } 21130fff923SEric Dumazet spin_unlock(&hslot2->lock); 21230fff923SEric Dumazet return res; 21330fff923SEric Dumazet } 21430fff923SEric Dumazet 215fe38d2a1SJosef Bacik static int udp_reuseport_add_sock(struct sock *sk, struct udp_hslot *hslot) 216e32ea7e7SCraig Gallek { 217e32ea7e7SCraig Gallek struct net *net = sock_net(sk); 218e32ea7e7SCraig Gallek kuid_t uid = sock_i_uid(sk); 219e32ea7e7SCraig Gallek struct sock *sk2; 220e32ea7e7SCraig Gallek 221ca065d0cSEric Dumazet sk_for_each(sk2, &hslot->head) { 222e32ea7e7SCraig Gallek if (net_eq(sock_net(sk2), net) && 223e32ea7e7SCraig Gallek sk2 != sk && 224e32ea7e7SCraig Gallek sk2->sk_family == sk->sk_family && 225e32ea7e7SCraig Gallek ipv6_only_sock(sk2) == ipv6_only_sock(sk) && 226e32ea7e7SCraig Gallek (udp_sk(sk2)->udp_port_hash == udp_sk(sk)->udp_port_hash) && 227e32ea7e7SCraig Gallek (sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && 228e32ea7e7SCraig Gallek sk2->sk_reuseport && uid_eq(uid, sock_i_uid(sk2)) && 229fe38d2a1SJosef Bacik inet_rcv_saddr_equal(sk, sk2, false)) { 230e32ea7e7SCraig Gallek return reuseport_add_sock(sk, sk2); 231e32ea7e7SCraig Gallek } 232e32ea7e7SCraig Gallek } 233e32ea7e7SCraig Gallek 234e32ea7e7SCraig Gallek /* Initial allocation may have already happened via setsockopt */ 235e32ea7e7SCraig Gallek if (!rcu_access_pointer(sk->sk_reuseport_cb)) 236e32ea7e7SCraig Gallek return reuseport_alloc(sk); 237e32ea7e7SCraig Gallek return 0; 238e32ea7e7SCraig Gallek } 239e32ea7e7SCraig Gallek 24025030a7fSGerrit Renker /** 2416ba5a3c5SPavel Emelyanov * udp_lib_get_port - UDP/-Lite port lookup for IPv4 and IPv6 24225030a7fSGerrit Renker * 24325030a7fSGerrit Renker * @sk: socket struct in question 24425030a7fSGerrit Renker * @snum: port number to look up 24525985edcSLucas De Marchi * @hash2_nulladdr: AF-dependent hash value in secondary hash chains, 24630fff923SEric Dumazet * with NULL address 24725030a7fSGerrit Renker */ 2486ba5a3c5SPavel Emelyanov int udp_lib_get_port(struct sock *sk, unsigned short snum, 24930fff923SEric Dumazet unsigned int hash2_nulladdr) 2501da177e4SLinus Torvalds { 251512615b6SEric Dumazet struct udp_hslot *hslot, *hslot2; 252645ca708SEric Dumazet struct udp_table *udptable = sk->sk_prot->h.udp_table; 25325030a7fSGerrit Renker int error = 1; 2543b1e0a65SYOSHIFUJI Hideaki struct net *net = sock_net(sk); 2551da177e4SLinus Torvalds 25632c1da70SStephen Hemminger if (!snum) { 2579088c560SEric Dumazet int low, high, remaining; 25895c96174SEric Dumazet unsigned int rand; 25998322f22SEric Dumazet unsigned short first, last; 26098322f22SEric Dumazet DECLARE_BITMAP(bitmap, PORTS_PER_CHAIN); 2611da177e4SLinus Torvalds 2620bbf87d8SEric W. Biederman inet_get_local_port_range(net, &low, &high); 263a25de534SAnton Arapov remaining = (high - low) + 1; 264227b60f5SStephen Hemminger 26563862b5bSAruna-Hewapathirane rand = prandom_u32(); 2668fc54f68SDaniel Borkmann first = reciprocal_scale(rand, remaining) + low; 26798322f22SEric Dumazet /* 26898322f22SEric Dumazet * force rand to be an odd multiple of UDP_HTABLE_SIZE 26998322f22SEric Dumazet */ 270f86dcc5aSEric Dumazet rand = (rand | 1) * (udptable->mask + 1); 2715781b235SEric Dumazet last = first + udptable->mask + 1; 2725781b235SEric Dumazet do { 273f86dcc5aSEric Dumazet hslot = udp_hashslot(udptable, net, first); 27498322f22SEric Dumazet bitmap_zero(bitmap, PORTS_PER_CHAIN); 275645ca708SEric Dumazet spin_lock_bh(&hslot->lock); 27698322f22SEric Dumazet udp_lib_lport_inuse(net, snum, hslot, bitmap, sk, 277fe38d2a1SJosef Bacik udptable->log); 27898322f22SEric Dumazet 27998322f22SEric Dumazet snum = first; 28098322f22SEric Dumazet /* 28198322f22SEric Dumazet * Iterate on all possible values of snum for this hash. 28298322f22SEric Dumazet * Using steps of an odd multiple of UDP_HTABLE_SIZE 28398322f22SEric Dumazet * give us randomization and full range coverage. 28498322f22SEric Dumazet */ 2859088c560SEric Dumazet do { 28698322f22SEric Dumazet if (low <= snum && snum <= high && 287e3826f1eSAmerigo Wang !test_bit(snum >> udptable->log, bitmap) && 288122ff243SWANG Cong !inet_is_local_reserved_port(net, snum)) 28998322f22SEric Dumazet goto found; 29098322f22SEric Dumazet snum += rand; 29198322f22SEric Dumazet } while (snum != first); 29298322f22SEric Dumazet spin_unlock_bh(&hslot->lock); 293df560056SEric Garver cond_resched(); 2945781b235SEric Dumazet } while (++first != last); 29598322f22SEric Dumazet goto fail; 296645ca708SEric Dumazet } else { 297f86dcc5aSEric Dumazet hslot = udp_hashslot(udptable, net, snum); 298645ca708SEric Dumazet spin_lock_bh(&hslot->lock); 29930fff923SEric Dumazet if (hslot->count > 10) { 30030fff923SEric Dumazet int exist; 30130fff923SEric Dumazet unsigned int slot2 = udp_sk(sk)->udp_portaddr_hash ^ snum; 30230fff923SEric Dumazet 30330fff923SEric Dumazet slot2 &= udptable->mask; 30430fff923SEric Dumazet hash2_nulladdr &= udptable->mask; 30530fff923SEric Dumazet 30630fff923SEric Dumazet hslot2 = udp_hashslot2(udptable, slot2); 30730fff923SEric Dumazet if (hslot->count < hslot2->count) 30830fff923SEric Dumazet goto scan_primary_hash; 30930fff923SEric Dumazet 310fe38d2a1SJosef Bacik exist = udp_lib_lport_inuse2(net, snum, hslot2, sk); 31130fff923SEric Dumazet if (!exist && (hash2_nulladdr != slot2)) { 31230fff923SEric Dumazet hslot2 = udp_hashslot2(udptable, hash2_nulladdr); 31330fff923SEric Dumazet exist = udp_lib_lport_inuse2(net, snum, hslot2, 314fe38d2a1SJosef Bacik sk); 31530fff923SEric Dumazet } 31630fff923SEric Dumazet if (exist) 31730fff923SEric Dumazet goto fail_unlock; 31830fff923SEric Dumazet else 31930fff923SEric Dumazet goto found; 32030fff923SEric Dumazet } 32130fff923SEric Dumazet scan_primary_hash: 322fe38d2a1SJosef Bacik if (udp_lib_lport_inuse(net, snum, hslot, NULL, sk, 0)) 323645ca708SEric Dumazet goto fail_unlock; 324645ca708SEric Dumazet } 32598322f22SEric Dumazet found: 326c720c7e8SEric Dumazet inet_sk(sk)->inet_num = snum; 327d4cada4aSEric Dumazet udp_sk(sk)->udp_port_hash = snum; 328d4cada4aSEric Dumazet udp_sk(sk)->udp_portaddr_hash ^= snum; 3291da177e4SLinus Torvalds if (sk_unhashed(sk)) { 330e32ea7e7SCraig Gallek if (sk->sk_reuseport && 331fe38d2a1SJosef Bacik udp_reuseport_add_sock(sk, hslot)) { 332e32ea7e7SCraig Gallek inet_sk(sk)->inet_num = 0; 333e32ea7e7SCraig Gallek udp_sk(sk)->udp_port_hash = 0; 334e32ea7e7SCraig Gallek udp_sk(sk)->udp_portaddr_hash ^= snum; 335e32ea7e7SCraig Gallek goto fail_unlock; 336e32ea7e7SCraig Gallek } 337e32ea7e7SCraig Gallek 338ca065d0cSEric Dumazet sk_add_node_rcu(sk, &hslot->head); 339fdcc8aa9SEric Dumazet hslot->count++; 340c29a0bc4SPavel Emelyanov sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); 341512615b6SEric Dumazet 342512615b6SEric Dumazet hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash); 343512615b6SEric Dumazet spin_lock(&hslot2->lock); 344d894ba18SCraig Gallek if (IS_ENABLED(CONFIG_IPV6) && sk->sk_reuseport && 345d894ba18SCraig Gallek sk->sk_family == AF_INET6) 3461602f49bSDavid S. Miller hlist_add_tail_rcu(&udp_sk(sk)->udp_portaddr_node, 347d894ba18SCraig Gallek &hslot2->head); 348d894ba18SCraig Gallek else 349ca065d0cSEric Dumazet hlist_add_head_rcu(&udp_sk(sk)->udp_portaddr_node, 350512615b6SEric Dumazet &hslot2->head); 351512615b6SEric Dumazet hslot2->count++; 352512615b6SEric Dumazet spin_unlock(&hslot2->lock); 3531da177e4SLinus Torvalds } 354ca065d0cSEric Dumazet sock_set_flag(sk, SOCK_RCU_FREE); 35525030a7fSGerrit Renker error = 0; 356645ca708SEric Dumazet fail_unlock: 357645ca708SEric Dumazet spin_unlock_bh(&hslot->lock); 3581da177e4SLinus Torvalds fail: 35925030a7fSGerrit Renker return error; 3601da177e4SLinus Torvalds } 361c482c568SEric Dumazet EXPORT_SYMBOL(udp_lib_get_port); 3621da177e4SLinus Torvalds 3636eada011SEric Dumazet static u32 udp4_portaddr_hash(const struct net *net, __be32 saddr, 364d4cada4aSEric Dumazet unsigned int port) 365d4cada4aSEric Dumazet { 3660eae88f3SEric Dumazet return jhash_1word((__force u32)saddr, net_hash_mix(net)) ^ port; 367d4cada4aSEric Dumazet } 368d4cada4aSEric Dumazet 3696ba5a3c5SPavel Emelyanov int udp_v4_get_port(struct sock *sk, unsigned short snum) 370db8dac20SDavid S. Miller { 37130fff923SEric Dumazet unsigned int hash2_nulladdr = 3720eae88f3SEric Dumazet udp4_portaddr_hash(sock_net(sk), htonl(INADDR_ANY), snum); 37330fff923SEric Dumazet unsigned int hash2_partial = 37430fff923SEric Dumazet udp4_portaddr_hash(sock_net(sk), inet_sk(sk)->inet_rcv_saddr, 0); 37530fff923SEric Dumazet 376d4cada4aSEric Dumazet /* precompute partial secondary hash */ 37730fff923SEric Dumazet udp_sk(sk)->udp_portaddr_hash = hash2_partial; 378fe38d2a1SJosef Bacik return udp_lib_get_port(sk, snum, hash2_nulladdr); 379db8dac20SDavid S. Miller } 380db8dac20SDavid S. Miller 381d1e37288SSu, Xuemin static int compute_score(struct sock *sk, struct net *net, 382d1e37288SSu, Xuemin __be32 saddr, __be16 sport, 383*63a6fff3SRobert Shearman __be32 daddr, unsigned short hnum, int dif, 384*63a6fff3SRobert Shearman bool exact_dif) 385645ca708SEric Dumazet { 38660c04aecSJoe Perches int score; 38760c04aecSJoe Perches struct inet_sock *inet; 388645ca708SEric Dumazet 38960c04aecSJoe Perches if (!net_eq(sock_net(sk), net) || 39060c04aecSJoe Perches udp_sk(sk)->udp_port_hash != hnum || 39160c04aecSJoe Perches ipv6_only_sock(sk)) 39260c04aecSJoe Perches return -1; 393645ca708SEric Dumazet 39460c04aecSJoe Perches score = (sk->sk_family == PF_INET) ? 2 : 1; 39560c04aecSJoe Perches inet = inet_sk(sk); 39660c04aecSJoe Perches 397c720c7e8SEric Dumazet if (inet->inet_rcv_saddr) { 398c720c7e8SEric Dumazet if (inet->inet_rcv_saddr != daddr) 399645ca708SEric Dumazet return -1; 400ba418fa3STom Herbert score += 4; 401645ca708SEric Dumazet } 40260c04aecSJoe Perches 403c720c7e8SEric Dumazet if (inet->inet_daddr) { 404c720c7e8SEric Dumazet if (inet->inet_daddr != saddr) 405645ca708SEric Dumazet return -1; 406ba418fa3STom Herbert score += 4; 407645ca708SEric Dumazet } 40860c04aecSJoe Perches 409c720c7e8SEric Dumazet if (inet->inet_dport) { 410c720c7e8SEric Dumazet if (inet->inet_dport != sport) 411645ca708SEric Dumazet return -1; 412ba418fa3STom Herbert score += 4; 413645ca708SEric Dumazet } 41460c04aecSJoe Perches 415*63a6fff3SRobert Shearman if (sk->sk_bound_dev_if || exact_dif) { 416645ca708SEric Dumazet if (sk->sk_bound_dev_if != dif) 417645ca708SEric Dumazet return -1; 418ba418fa3STom Herbert score += 4; 419645ca708SEric Dumazet } 42070da268bSEric Dumazet if (sk->sk_incoming_cpu == raw_smp_processor_id()) 42170da268bSEric Dumazet score++; 422645ca708SEric Dumazet return score; 423645ca708SEric Dumazet } 424645ca708SEric Dumazet 4256eada011SEric Dumazet static u32 udp_ehashfn(const struct net *net, const __be32 laddr, 42665cd8033SHannes Frederic Sowa const __u16 lport, const __be32 faddr, 42765cd8033SHannes Frederic Sowa const __be16 fport) 42865cd8033SHannes Frederic Sowa { 4291bbdceefSHannes Frederic Sowa static u32 udp_ehash_secret __read_mostly; 4301bbdceefSHannes Frederic Sowa 4311bbdceefSHannes Frederic Sowa net_get_random_once(&udp_ehash_secret, sizeof(udp_ehash_secret)); 4321bbdceefSHannes Frederic Sowa 43365cd8033SHannes Frederic Sowa return __inet_ehashfn(laddr, lport, faddr, fport, 4341bbdceefSHannes Frederic Sowa udp_ehash_secret + net_hash_mix(net)); 43565cd8033SHannes Frederic Sowa } 43665cd8033SHannes Frederic Sowa 437d1e37288SSu, Xuemin /* called with rcu_read_lock() */ 4385051ebd2SEric Dumazet static struct sock *udp4_lib_lookup2(struct net *net, 4395051ebd2SEric Dumazet __be32 saddr, __be16 sport, 440*63a6fff3SRobert Shearman __be32 daddr, unsigned int hnum, int dif, bool exact_dif, 441d1e37288SSu, Xuemin struct udp_hslot *hslot2, 4421134158bSCraig Gallek struct sk_buff *skb) 4435051ebd2SEric Dumazet { 4445051ebd2SEric Dumazet struct sock *sk, *result; 445ba418fa3STom Herbert int score, badness, matches = 0, reuseport = 0; 446ba418fa3STom Herbert u32 hash = 0; 4475051ebd2SEric Dumazet 4485051ebd2SEric Dumazet result = NULL; 449ba418fa3STom Herbert badness = 0; 450ca065d0cSEric Dumazet udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) { 451d1e37288SSu, Xuemin score = compute_score(sk, net, saddr, sport, 452*63a6fff3SRobert Shearman daddr, hnum, dif, exact_dif); 4535051ebd2SEric Dumazet if (score > badness) { 454ba418fa3STom Herbert reuseport = sk->sk_reuseport; 455ba418fa3STom Herbert if (reuseport) { 45665cd8033SHannes Frederic Sowa hash = udp_ehashfn(net, daddr, hnum, 4577c0cadc6SEric Dumazet saddr, sport); 458ca065d0cSEric Dumazet result = reuseport_select_sock(sk, hash, skb, 4591134158bSCraig Gallek sizeof(struct udphdr)); 460ca065d0cSEric Dumazet if (result) 461ca065d0cSEric Dumazet return result; 462ba418fa3STom Herbert matches = 1; 463ba418fa3STom Herbert } 464ca065d0cSEric Dumazet badness = score; 465ca065d0cSEric Dumazet result = sk; 466ba418fa3STom Herbert } else if (score == badness && reuseport) { 467ba418fa3STom Herbert matches++; 4688fc54f68SDaniel Borkmann if (reciprocal_scale(hash, matches) == 0) 469ba418fa3STom Herbert result = sk; 470ba418fa3STom Herbert hash = next_pseudo_random32(hash); 4715051ebd2SEric Dumazet } 4725051ebd2SEric Dumazet } 4735051ebd2SEric Dumazet return result; 4745051ebd2SEric Dumazet } 4755051ebd2SEric Dumazet 476db8dac20SDavid S. Miller /* UDP is nearly always wildcards out the wazoo, it makes no sense to try 477db8dac20SDavid S. Miller * harder than this. -DaveM 478db8dac20SDavid S. Miller */ 479fce82338SPavel Emelyanov struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, 480db8dac20SDavid S. Miller __be16 sport, __be32 daddr, __be16 dport, 481538950a1SCraig Gallek int dif, struct udp_table *udptable, struct sk_buff *skb) 482db8dac20SDavid S. Miller { 483271b72c7SEric Dumazet struct sock *sk, *result; 484db8dac20SDavid S. Miller unsigned short hnum = ntohs(dport); 4855051ebd2SEric Dumazet unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask); 4865051ebd2SEric Dumazet struct udp_hslot *hslot2, *hslot = &udptable->hash[slot]; 487*63a6fff3SRobert Shearman bool exact_dif = udp_lib_exact_dif_match(net, skb); 488ba418fa3STom Herbert int score, badness, matches = 0, reuseport = 0; 489ba418fa3STom Herbert u32 hash = 0; 490db8dac20SDavid S. Miller 4915051ebd2SEric Dumazet if (hslot->count > 10) { 4925051ebd2SEric Dumazet hash2 = udp4_portaddr_hash(net, daddr, hnum); 4935051ebd2SEric Dumazet slot2 = hash2 & udptable->mask; 4945051ebd2SEric Dumazet hslot2 = &udptable->hash2[slot2]; 4955051ebd2SEric Dumazet if (hslot->count < hslot2->count) 4965051ebd2SEric Dumazet goto begin; 4975051ebd2SEric Dumazet 4985051ebd2SEric Dumazet result = udp4_lib_lookup2(net, saddr, sport, 4995051ebd2SEric Dumazet daddr, hnum, dif, 500*63a6fff3SRobert Shearman exact_dif, hslot2, skb); 5015051ebd2SEric Dumazet if (!result) { 502d1e37288SSu, Xuemin unsigned int old_slot2 = slot2; 5030eae88f3SEric Dumazet hash2 = udp4_portaddr_hash(net, htonl(INADDR_ANY), hnum); 5045051ebd2SEric Dumazet slot2 = hash2 & udptable->mask; 505d1e37288SSu, Xuemin /* avoid searching the same slot again. */ 506d1e37288SSu, Xuemin if (unlikely(slot2 == old_slot2)) 507d1e37288SSu, Xuemin return result; 508d1e37288SSu, Xuemin 5095051ebd2SEric Dumazet hslot2 = &udptable->hash2[slot2]; 5105051ebd2SEric Dumazet if (hslot->count < hslot2->count) 5115051ebd2SEric Dumazet goto begin; 5125051ebd2SEric Dumazet 5131223c67cSJorge Boncompte [DTI2] result = udp4_lib_lookup2(net, saddr, sport, 514d1e37288SSu, Xuemin daddr, hnum, dif, 515*63a6fff3SRobert Shearman exact_dif, hslot2, skb); 5165051ebd2SEric Dumazet } 5175051ebd2SEric Dumazet return result; 5185051ebd2SEric Dumazet } 519271b72c7SEric Dumazet begin: 520271b72c7SEric Dumazet result = NULL; 521ba418fa3STom Herbert badness = 0; 522ca065d0cSEric Dumazet sk_for_each_rcu(sk, &hslot->head) { 523d1e37288SSu, Xuemin score = compute_score(sk, net, saddr, sport, 524*63a6fff3SRobert Shearman daddr, hnum, dif, exact_dif); 525645ca708SEric Dumazet if (score > badness) { 526ba418fa3STom Herbert reuseport = sk->sk_reuseport; 527ba418fa3STom Herbert if (reuseport) { 52865cd8033SHannes Frederic Sowa hash = udp_ehashfn(net, daddr, hnum, 5297c0cadc6SEric Dumazet saddr, sport); 530ca065d0cSEric Dumazet result = reuseport_select_sock(sk, hash, skb, 531538950a1SCraig Gallek sizeof(struct udphdr)); 532ca065d0cSEric Dumazet if (result) 533ca065d0cSEric Dumazet return result; 534ba418fa3STom Herbert matches = 1; 535ba418fa3STom Herbert } 536ca065d0cSEric Dumazet result = sk; 537ca065d0cSEric Dumazet badness = score; 538ba418fa3STom Herbert } else if (score == badness && reuseport) { 539ba418fa3STom Herbert matches++; 5408fc54f68SDaniel Borkmann if (reciprocal_scale(hash, matches) == 0) 541ba418fa3STom Herbert result = sk; 542ba418fa3STom Herbert hash = next_pseudo_random32(hash); 543db8dac20SDavid S. Miller } 544db8dac20SDavid S. Miller } 545db8dac20SDavid S. Miller return result; 546db8dac20SDavid S. Miller } 547fce82338SPavel Emelyanov EXPORT_SYMBOL_GPL(__udp4_lib_lookup); 548db8dac20SDavid S. Miller 549607c4aafSKOVACS Krisztian static inline struct sock *__udp4_lib_lookup_skb(struct sk_buff *skb, 550607c4aafSKOVACS Krisztian __be16 sport, __be16 dport, 551645ca708SEric Dumazet struct udp_table *udptable) 552607c4aafSKOVACS Krisztian { 553607c4aafSKOVACS Krisztian const struct iphdr *iph = ip_hdr(skb); 554607c4aafSKOVACS Krisztian 555ed7cbbceSAlexander Duyck return __udp4_lib_lookup(dev_net(skb->dev), iph->saddr, sport, 556607c4aafSKOVACS Krisztian iph->daddr, dport, inet_iif(skb), 557538950a1SCraig Gallek udptable, skb); 558607c4aafSKOVACS Krisztian } 559607c4aafSKOVACS Krisztian 56063058308STom Herbert struct sock *udp4_lib_lookup_skb(struct sk_buff *skb, 56163058308STom Herbert __be16 sport, __be16 dport) 56263058308STom Herbert { 563ed7cbbceSAlexander Duyck return __udp4_lib_lookup_skb(skb, sport, dport, &udp_table); 56463058308STom Herbert } 56563058308STom Herbert EXPORT_SYMBOL_GPL(udp4_lib_lookup_skb); 56663058308STom Herbert 567ca065d0cSEric Dumazet /* Must be called under rcu_read_lock(). 568ca065d0cSEric Dumazet * Does increment socket refcount. 569ca065d0cSEric Dumazet */ 570ca065d0cSEric Dumazet #if IS_ENABLED(CONFIG_NETFILTER_XT_MATCH_SOCKET) || \ 57130f58158SArnd Bergmann IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TPROXY) || \ 57230f58158SArnd Bergmann IS_ENABLED(CONFIG_NF_SOCKET_IPV4) 573bcd41303SKOVACS Krisztian struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport, 574bcd41303SKOVACS Krisztian __be32 daddr, __be16 dport, int dif) 575bcd41303SKOVACS Krisztian { 576ca065d0cSEric Dumazet struct sock *sk; 577ca065d0cSEric Dumazet 578ca065d0cSEric Dumazet sk = __udp4_lib_lookup(net, saddr, sport, daddr, dport, 579ca065d0cSEric Dumazet dif, &udp_table, NULL); 580ca065d0cSEric Dumazet if (sk && !atomic_inc_not_zero(&sk->sk_refcnt)) 581ca065d0cSEric Dumazet sk = NULL; 582ca065d0cSEric Dumazet return sk; 583bcd41303SKOVACS Krisztian } 584bcd41303SKOVACS Krisztian EXPORT_SYMBOL_GPL(udp4_lib_lookup); 585ca065d0cSEric Dumazet #endif 586bcd41303SKOVACS Krisztian 587421b3885SShawn Bohrer static inline bool __udp_is_mcast_sock(struct net *net, struct sock *sk, 588421b3885SShawn Bohrer __be16 loc_port, __be32 loc_addr, 589421b3885SShawn Bohrer __be16 rmt_port, __be32 rmt_addr, 590421b3885SShawn Bohrer int dif, unsigned short hnum) 591421b3885SShawn Bohrer { 592421b3885SShawn Bohrer struct inet_sock *inet = inet_sk(sk); 593421b3885SShawn Bohrer 594421b3885SShawn Bohrer if (!net_eq(sock_net(sk), net) || 595421b3885SShawn Bohrer udp_sk(sk)->udp_port_hash != hnum || 596421b3885SShawn Bohrer (inet->inet_daddr && inet->inet_daddr != rmt_addr) || 597421b3885SShawn Bohrer (inet->inet_dport != rmt_port && inet->inet_dport) || 598421b3885SShawn Bohrer (inet->inet_rcv_saddr && inet->inet_rcv_saddr != loc_addr) || 599421b3885SShawn Bohrer ipv6_only_sock(sk) || 600421b3885SShawn Bohrer (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif)) 601421b3885SShawn Bohrer return false; 602421b3885SShawn Bohrer if (!ip_mc_sf_allow(sk, loc_addr, rmt_addr, dif)) 603421b3885SShawn Bohrer return false; 604421b3885SShawn Bohrer return true; 605421b3885SShawn Bohrer } 606421b3885SShawn Bohrer 607db8dac20SDavid S. Miller /* 608db8dac20SDavid S. Miller * This routine is called by the ICMP module when it gets some 609db8dac20SDavid S. Miller * sort of error condition. If err < 0 then the socket should 610db8dac20SDavid S. Miller * be closed and the error returned to the user. If err > 0 611db8dac20SDavid S. Miller * it's just the icmp type << 8 | icmp code. 612db8dac20SDavid S. Miller * Header points to the ip header of the error packet. We move 613db8dac20SDavid S. Miller * on past this. Then (as it used to claim before adjustment) 614db8dac20SDavid S. Miller * header points to the first 8 bytes of the udp header. We need 615db8dac20SDavid S. Miller * to find the appropriate port. 616db8dac20SDavid S. Miller */ 617db8dac20SDavid S. Miller 618645ca708SEric Dumazet void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable) 619db8dac20SDavid S. Miller { 620db8dac20SDavid S. Miller struct inet_sock *inet; 621b71d1d42SEric Dumazet const struct iphdr *iph = (const struct iphdr *)skb->data; 622db8dac20SDavid S. Miller struct udphdr *uh = (struct udphdr *)(skb->data+(iph->ihl<<2)); 623db8dac20SDavid S. Miller const int type = icmp_hdr(skb)->type; 624db8dac20SDavid S. Miller const int code = icmp_hdr(skb)->code; 625db8dac20SDavid S. Miller struct sock *sk; 626db8dac20SDavid S. Miller int harderr; 627db8dac20SDavid S. Miller int err; 628fd54d716SPavel Emelyanov struct net *net = dev_net(skb->dev); 629db8dac20SDavid S. Miller 630fd54d716SPavel Emelyanov sk = __udp4_lib_lookup(net, iph->daddr, uh->dest, 631538950a1SCraig Gallek iph->saddr, uh->source, skb->dev->ifindex, udptable, 632538950a1SCraig Gallek NULL); 63351456b29SIan Morris if (!sk) { 6345d3848bcSEric Dumazet __ICMP_INC_STATS(net, ICMP_MIB_INERRORS); 635db8dac20SDavid S. Miller return; /* No socket for error */ 636db8dac20SDavid S. Miller } 637db8dac20SDavid S. Miller 638db8dac20SDavid S. Miller err = 0; 639db8dac20SDavid S. Miller harderr = 0; 640db8dac20SDavid S. Miller inet = inet_sk(sk); 641db8dac20SDavid S. Miller 642db8dac20SDavid S. Miller switch (type) { 643db8dac20SDavid S. Miller default: 644db8dac20SDavid S. Miller case ICMP_TIME_EXCEEDED: 645db8dac20SDavid S. Miller err = EHOSTUNREACH; 646db8dac20SDavid S. Miller break; 647db8dac20SDavid S. Miller case ICMP_SOURCE_QUENCH: 648db8dac20SDavid S. Miller goto out; 649db8dac20SDavid S. Miller case ICMP_PARAMETERPROB: 650db8dac20SDavid S. Miller err = EPROTO; 651db8dac20SDavid S. Miller harderr = 1; 652db8dac20SDavid S. Miller break; 653db8dac20SDavid S. Miller case ICMP_DEST_UNREACH: 654db8dac20SDavid S. Miller if (code == ICMP_FRAG_NEEDED) { /* Path MTU discovery */ 65536393395SDavid S. Miller ipv4_sk_update_pmtu(skb, sk, info); 656db8dac20SDavid S. Miller if (inet->pmtudisc != IP_PMTUDISC_DONT) { 657db8dac20SDavid S. Miller err = EMSGSIZE; 658db8dac20SDavid S. Miller harderr = 1; 659db8dac20SDavid S. Miller break; 660db8dac20SDavid S. Miller } 661db8dac20SDavid S. Miller goto out; 662db8dac20SDavid S. Miller } 663db8dac20SDavid S. Miller err = EHOSTUNREACH; 664db8dac20SDavid S. Miller if (code <= NR_ICMP_UNREACH) { 665db8dac20SDavid S. Miller harderr = icmp_err_convert[code].fatal; 666db8dac20SDavid S. Miller err = icmp_err_convert[code].errno; 667db8dac20SDavid S. Miller } 668db8dac20SDavid S. Miller break; 66955be7a9cSDavid S. Miller case ICMP_REDIRECT: 67055be7a9cSDavid S. Miller ipv4_sk_redirect(skb, sk); 6711a462d18SDuan Jiong goto out; 672db8dac20SDavid S. Miller } 673db8dac20SDavid S. Miller 674db8dac20SDavid S. Miller /* 675db8dac20SDavid S. Miller * RFC1122: OK. Passes ICMP errors back to application, as per 676db8dac20SDavid S. Miller * 4.1.3.3. 677db8dac20SDavid S. Miller */ 678db8dac20SDavid S. Miller if (!inet->recverr) { 679db8dac20SDavid S. Miller if (!harderr || sk->sk_state != TCP_ESTABLISHED) 680db8dac20SDavid S. Miller goto out; 681b1faf566SEric Dumazet } else 682db8dac20SDavid S. Miller ip_icmp_error(sk, skb, err, uh->dest, info, (u8 *)(uh+1)); 683b1faf566SEric Dumazet 684db8dac20SDavid S. Miller sk->sk_err = err; 685db8dac20SDavid S. Miller sk->sk_error_report(sk); 686db8dac20SDavid S. Miller out: 687ca065d0cSEric Dumazet return; 688db8dac20SDavid S. Miller } 689db8dac20SDavid S. Miller 690db8dac20SDavid S. Miller void udp_err(struct sk_buff *skb, u32 info) 691db8dac20SDavid S. Miller { 692645ca708SEric Dumazet __udp4_lib_err(skb, info, &udp_table); 693db8dac20SDavid S. Miller } 694db8dac20SDavid S. Miller 695db8dac20SDavid S. Miller /* 696db8dac20SDavid S. Miller * Throw away all pending data and cancel the corking. Socket is locked. 697db8dac20SDavid S. Miller */ 69836d926b9SDenis V. Lunev void udp_flush_pending_frames(struct sock *sk) 699db8dac20SDavid S. Miller { 700db8dac20SDavid S. Miller struct udp_sock *up = udp_sk(sk); 701db8dac20SDavid S. Miller 702db8dac20SDavid S. Miller if (up->pending) { 703db8dac20SDavid S. Miller up->len = 0; 704db8dac20SDavid S. Miller up->pending = 0; 705db8dac20SDavid S. Miller ip_flush_pending_frames(sk); 706db8dac20SDavid S. Miller } 707db8dac20SDavid S. Miller } 70836d926b9SDenis V. Lunev EXPORT_SYMBOL(udp_flush_pending_frames); 709db8dac20SDavid S. Miller 710db8dac20SDavid S. Miller /** 711f6b9664fSHerbert Xu * udp4_hwcsum - handle outgoing HW checksumming 712db8dac20SDavid S. Miller * @skb: sk_buff containing the filled-in UDP header 713db8dac20SDavid S. Miller * (checksum field must be zeroed out) 714f6b9664fSHerbert Xu * @src: source IP address 715f6b9664fSHerbert Xu * @dst: destination IP address 716db8dac20SDavid S. Miller */ 717c26bf4a5SThomas Graf void udp4_hwcsum(struct sk_buff *skb, __be32 src, __be32 dst) 718db8dac20SDavid S. Miller { 719db8dac20SDavid S. Miller struct udphdr *uh = udp_hdr(skb); 720f6b9664fSHerbert Xu int offset = skb_transport_offset(skb); 721f6b9664fSHerbert Xu int len = skb->len - offset; 722f6b9664fSHerbert Xu int hlen = len; 723db8dac20SDavid S. Miller __wsum csum = 0; 724db8dac20SDavid S. Miller 725ebbe495fSWANG Cong if (!skb_has_frag_list(skb)) { 726db8dac20SDavid S. Miller /* 727db8dac20SDavid S. Miller * Only one fragment on the socket. 728db8dac20SDavid S. Miller */ 729db8dac20SDavid S. Miller skb->csum_start = skb_transport_header(skb) - skb->head; 730db8dac20SDavid S. Miller skb->csum_offset = offsetof(struct udphdr, check); 731f6b9664fSHerbert Xu uh->check = ~csum_tcpudp_magic(src, dst, len, 732f6b9664fSHerbert Xu IPPROTO_UDP, 0); 733db8dac20SDavid S. Miller } else { 734ebbe495fSWANG Cong struct sk_buff *frags; 735ebbe495fSWANG Cong 736db8dac20SDavid S. Miller /* 737db8dac20SDavid S. Miller * HW-checksum won't work as there are two or more 738db8dac20SDavid S. Miller * fragments on the socket so that all csums of sk_buffs 739db8dac20SDavid S. Miller * should be together 740db8dac20SDavid S. Miller */ 741ebbe495fSWANG Cong skb_walk_frags(skb, frags) { 742f6b9664fSHerbert Xu csum = csum_add(csum, frags->csum); 743f6b9664fSHerbert Xu hlen -= frags->len; 744ebbe495fSWANG Cong } 745db8dac20SDavid S. Miller 746f6b9664fSHerbert Xu csum = skb_checksum(skb, offset, hlen, csum); 747db8dac20SDavid S. Miller skb->ip_summed = CHECKSUM_NONE; 748db8dac20SDavid S. Miller 749db8dac20SDavid S. Miller uh->check = csum_tcpudp_magic(src, dst, len, IPPROTO_UDP, csum); 750db8dac20SDavid S. Miller if (uh->check == 0) 751db8dac20SDavid S. Miller uh->check = CSUM_MANGLED_0; 752db8dac20SDavid S. Miller } 753db8dac20SDavid S. Miller } 754c26bf4a5SThomas Graf EXPORT_SYMBOL_GPL(udp4_hwcsum); 755db8dac20SDavid S. Miller 756af5fcba7STom Herbert /* Function to set UDP checksum for an IPv4 UDP packet. This is intended 757af5fcba7STom Herbert * for the simple case like when setting the checksum for a UDP tunnel. 758af5fcba7STom Herbert */ 759af5fcba7STom Herbert void udp_set_csum(bool nocheck, struct sk_buff *skb, 760af5fcba7STom Herbert __be32 saddr, __be32 daddr, int len) 761af5fcba7STom Herbert { 762af5fcba7STom Herbert struct udphdr *uh = udp_hdr(skb); 763af5fcba7STom Herbert 764179bc67fSEdward Cree if (nocheck) { 765af5fcba7STom Herbert uh->check = 0; 766179bc67fSEdward Cree } else if (skb_is_gso(skb)) { 767af5fcba7STom Herbert uh->check = ~udp_v4_check(len, saddr, daddr, 0); 768179bc67fSEdward Cree } else if (skb->ip_summed == CHECKSUM_PARTIAL) { 769179bc67fSEdward Cree uh->check = 0; 770179bc67fSEdward Cree uh->check = udp_v4_check(len, saddr, daddr, lco_csum(skb)); 771179bc67fSEdward Cree if (uh->check == 0) 772179bc67fSEdward Cree uh->check = CSUM_MANGLED_0; 773d75f1306SEdward Cree } else { 774af5fcba7STom Herbert skb->ip_summed = CHECKSUM_PARTIAL; 775af5fcba7STom Herbert skb->csum_start = skb_transport_header(skb) - skb->head; 776af5fcba7STom Herbert skb->csum_offset = offsetof(struct udphdr, check); 777af5fcba7STom Herbert uh->check = ~udp_v4_check(len, saddr, daddr, 0); 778af5fcba7STom Herbert } 779af5fcba7STom Herbert } 780af5fcba7STom Herbert EXPORT_SYMBOL(udp_set_csum); 781af5fcba7STom Herbert 78279ab0531SDavid S. Miller static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4) 783f6b9664fSHerbert Xu { 784f6b9664fSHerbert Xu struct sock *sk = skb->sk; 785f6b9664fSHerbert Xu struct inet_sock *inet = inet_sk(sk); 786f6b9664fSHerbert Xu struct udphdr *uh; 787f6b9664fSHerbert Xu int err = 0; 788f6b9664fSHerbert Xu int is_udplite = IS_UDPLITE(sk); 789f6b9664fSHerbert Xu int offset = skb_transport_offset(skb); 790f6b9664fSHerbert Xu int len = skb->len - offset; 791f6b9664fSHerbert Xu __wsum csum = 0; 792f6b9664fSHerbert Xu 793f6b9664fSHerbert Xu /* 794f6b9664fSHerbert Xu * Create a UDP header 795f6b9664fSHerbert Xu */ 796f6b9664fSHerbert Xu uh = udp_hdr(skb); 797f6b9664fSHerbert Xu uh->source = inet->inet_sport; 79879ab0531SDavid S. Miller uh->dest = fl4->fl4_dport; 799f6b9664fSHerbert Xu uh->len = htons(len); 800f6b9664fSHerbert Xu uh->check = 0; 801f6b9664fSHerbert Xu 802f6b9664fSHerbert Xu if (is_udplite) /* UDP-Lite */ 803f6b9664fSHerbert Xu csum = udplite_csum(skb); 804f6b9664fSHerbert Xu 80528448b80STom Herbert else if (sk->sk_no_check_tx) { /* UDP csum disabled */ 806f6b9664fSHerbert Xu 807f6b9664fSHerbert Xu skb->ip_summed = CHECKSUM_NONE; 808f6b9664fSHerbert Xu goto send; 809f6b9664fSHerbert Xu 810f6b9664fSHerbert Xu } else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */ 811f6b9664fSHerbert Xu 81279ab0531SDavid S. Miller udp4_hwcsum(skb, fl4->saddr, fl4->daddr); 813f6b9664fSHerbert Xu goto send; 814f6b9664fSHerbert Xu 815f6b9664fSHerbert Xu } else 816f6b9664fSHerbert Xu csum = udp_csum(skb); 817f6b9664fSHerbert Xu 818f6b9664fSHerbert Xu /* add protocol-dependent pseudo-header */ 81979ab0531SDavid S. Miller uh->check = csum_tcpudp_magic(fl4->saddr, fl4->daddr, len, 820f6b9664fSHerbert Xu sk->sk_protocol, csum); 821f6b9664fSHerbert Xu if (uh->check == 0) 822f6b9664fSHerbert Xu uh->check = CSUM_MANGLED_0; 823f6b9664fSHerbert Xu 824f6b9664fSHerbert Xu send: 825b5ec8eeaSEric Dumazet err = ip_send_skb(sock_net(sk), skb); 826f6b9664fSHerbert Xu if (err) { 827f6b9664fSHerbert Xu if (err == -ENOBUFS && !inet->recverr) { 8286aef70a8SEric Dumazet UDP_INC_STATS(sock_net(sk), 829f6b9664fSHerbert Xu UDP_MIB_SNDBUFERRORS, is_udplite); 830f6b9664fSHerbert Xu err = 0; 831f6b9664fSHerbert Xu } 832f6b9664fSHerbert Xu } else 8336aef70a8SEric Dumazet UDP_INC_STATS(sock_net(sk), 834f6b9664fSHerbert Xu UDP_MIB_OUTDATAGRAMS, is_udplite); 835f6b9664fSHerbert Xu return err; 836f6b9664fSHerbert Xu } 837f6b9664fSHerbert Xu 838db8dac20SDavid S. Miller /* 839db8dac20SDavid S. Miller * Push out all pending data as one UDP datagram. Socket is locked. 840db8dac20SDavid S. Miller */ 8418822b64aSHannes Frederic Sowa int udp_push_pending_frames(struct sock *sk) 842db8dac20SDavid S. Miller { 843db8dac20SDavid S. Miller struct udp_sock *up = udp_sk(sk); 844db8dac20SDavid S. Miller struct inet_sock *inet = inet_sk(sk); 845b6f21b26SDavid S. Miller struct flowi4 *fl4 = &inet->cork.fl.u.ip4; 846db8dac20SDavid S. Miller struct sk_buff *skb; 847db8dac20SDavid S. Miller int err = 0; 848db8dac20SDavid S. Miller 84977968b78SDavid S. Miller skb = ip_finish_skb(sk, fl4); 850f6b9664fSHerbert Xu if (!skb) 851db8dac20SDavid S. Miller goto out; 852db8dac20SDavid S. Miller 85379ab0531SDavid S. Miller err = udp_send_skb(skb, fl4); 854db8dac20SDavid S. Miller 855db8dac20SDavid S. Miller out: 856db8dac20SDavid S. Miller up->len = 0; 857db8dac20SDavid S. Miller up->pending = 0; 858db8dac20SDavid S. Miller return err; 859db8dac20SDavid S. Miller } 8608822b64aSHannes Frederic Sowa EXPORT_SYMBOL(udp_push_pending_frames); 861db8dac20SDavid S. Miller 8621b784140SYing Xue int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) 863db8dac20SDavid S. Miller { 864db8dac20SDavid S. Miller struct inet_sock *inet = inet_sk(sk); 865db8dac20SDavid S. Miller struct udp_sock *up = udp_sk(sk); 866e474995fSDavid S. Miller struct flowi4 fl4_stack; 867b6f21b26SDavid S. Miller struct flowi4 *fl4; 868db8dac20SDavid S. Miller int ulen = len; 869db8dac20SDavid S. Miller struct ipcm_cookie ipc; 870db8dac20SDavid S. Miller struct rtable *rt = NULL; 871db8dac20SDavid S. Miller int free = 0; 872db8dac20SDavid S. Miller int connected = 0; 873db8dac20SDavid S. Miller __be32 daddr, faddr, saddr; 874db8dac20SDavid S. Miller __be16 dport; 875db8dac20SDavid S. Miller u8 tos; 876db8dac20SDavid S. Miller int err, is_udplite = IS_UDPLITE(sk); 877db8dac20SDavid S. Miller int corkreq = up->corkflag || msg->msg_flags&MSG_MORE; 878db8dac20SDavid S. Miller int (*getfrag)(void *, char *, int, int, int, struct sk_buff *); 879903ab86dSHerbert Xu struct sk_buff *skb; 880f6d8bd05SEric Dumazet struct ip_options_data opt_copy; 881db8dac20SDavid S. Miller 882db8dac20SDavid S. Miller if (len > 0xFFFF) 883db8dac20SDavid S. Miller return -EMSGSIZE; 884db8dac20SDavid S. Miller 885db8dac20SDavid S. Miller /* 886db8dac20SDavid S. Miller * Check the flags. 887db8dac20SDavid S. Miller */ 888db8dac20SDavid S. Miller 889db8dac20SDavid S. Miller if (msg->msg_flags & MSG_OOB) /* Mirror BSD error message compatibility */ 890db8dac20SDavid S. Miller return -EOPNOTSUPP; 891db8dac20SDavid S. Miller 892db8dac20SDavid S. Miller ipc.opt = NULL; 8932244d07bSOliver Hartkopp ipc.tx_flags = 0; 894aa661581SFrancesco Fusco ipc.ttl = 0; 895aa661581SFrancesco Fusco ipc.tos = -1; 896db8dac20SDavid S. Miller 897903ab86dSHerbert Xu getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag; 898903ab86dSHerbert Xu 899f5fca608SDavid S. Miller fl4 = &inet->cork.fl.u.ip4; 900db8dac20SDavid S. Miller if (up->pending) { 901db8dac20SDavid S. Miller /* 902db8dac20SDavid S. Miller * There are pending frames. 903db8dac20SDavid S. Miller * The socket lock must be held while it's corked. 904db8dac20SDavid S. Miller */ 905db8dac20SDavid S. Miller lock_sock(sk); 906db8dac20SDavid S. Miller if (likely(up->pending)) { 907db8dac20SDavid S. Miller if (unlikely(up->pending != AF_INET)) { 908db8dac20SDavid S. Miller release_sock(sk); 909db8dac20SDavid S. Miller return -EINVAL; 910db8dac20SDavid S. Miller } 911db8dac20SDavid S. Miller goto do_append_data; 912db8dac20SDavid S. Miller } 913db8dac20SDavid S. Miller release_sock(sk); 914db8dac20SDavid S. Miller } 915db8dac20SDavid S. Miller ulen += sizeof(struct udphdr); 916db8dac20SDavid S. Miller 917db8dac20SDavid S. Miller /* 918db8dac20SDavid S. Miller * Get and verify the address. 919db8dac20SDavid S. Miller */ 920db8dac20SDavid S. Miller if (msg->msg_name) { 921342dfc30SSteffen Hurrle DECLARE_SOCKADDR(struct sockaddr_in *, usin, msg->msg_name); 922db8dac20SDavid S. Miller if (msg->msg_namelen < sizeof(*usin)) 923db8dac20SDavid S. Miller return -EINVAL; 924db8dac20SDavid S. Miller if (usin->sin_family != AF_INET) { 925db8dac20SDavid S. Miller if (usin->sin_family != AF_UNSPEC) 926db8dac20SDavid S. Miller return -EAFNOSUPPORT; 927db8dac20SDavid S. Miller } 928db8dac20SDavid S. Miller 929db8dac20SDavid S. Miller daddr = usin->sin_addr.s_addr; 930db8dac20SDavid S. Miller dport = usin->sin_port; 931db8dac20SDavid S. Miller if (dport == 0) 932db8dac20SDavid S. Miller return -EINVAL; 933db8dac20SDavid S. Miller } else { 934db8dac20SDavid S. Miller if (sk->sk_state != TCP_ESTABLISHED) 935db8dac20SDavid S. Miller return -EDESTADDRREQ; 936c720c7e8SEric Dumazet daddr = inet->inet_daddr; 937c720c7e8SEric Dumazet dport = inet->inet_dport; 938db8dac20SDavid S. Miller /* Open fast path for connected socket. 939db8dac20SDavid S. Miller Route will not be used, if at least one option is set. 940db8dac20SDavid S. Miller */ 941db8dac20SDavid S. Miller connected = 1; 942db8dac20SDavid S. Miller } 943c14ac945SSoheil Hassas Yeganeh 944c14ac945SSoheil Hassas Yeganeh ipc.sockc.tsflags = sk->sk_tsflags; 945c720c7e8SEric Dumazet ipc.addr = inet->inet_saddr; 946db8dac20SDavid S. Miller ipc.oif = sk->sk_bound_dev_if; 947bf84a010SDaniel Borkmann 948db8dac20SDavid S. Miller if (msg->msg_controllen) { 94924025c46SSoheil Hassas Yeganeh err = ip_cmsg_send(sk, msg, &ipc, sk->sk_family == AF_INET6); 95091948309SEric Dumazet if (unlikely(err)) { 95191948309SEric Dumazet kfree(ipc.opt); 952db8dac20SDavid S. Miller return err; 95391948309SEric Dumazet } 954db8dac20SDavid S. Miller if (ipc.opt) 955db8dac20SDavid S. Miller free = 1; 956db8dac20SDavid S. Miller connected = 0; 957db8dac20SDavid S. Miller } 958f6d8bd05SEric Dumazet if (!ipc.opt) { 959f6d8bd05SEric Dumazet struct ip_options_rcu *inet_opt; 960f6d8bd05SEric Dumazet 961f6d8bd05SEric Dumazet rcu_read_lock(); 962f6d8bd05SEric Dumazet inet_opt = rcu_dereference(inet->inet_opt); 963f6d8bd05SEric Dumazet if (inet_opt) { 964f6d8bd05SEric Dumazet memcpy(&opt_copy, inet_opt, 965f6d8bd05SEric Dumazet sizeof(*inet_opt) + inet_opt->opt.optlen); 966f6d8bd05SEric Dumazet ipc.opt = &opt_copy.opt; 967f6d8bd05SEric Dumazet } 968f6d8bd05SEric Dumazet rcu_read_unlock(); 969f6d8bd05SEric Dumazet } 970db8dac20SDavid S. Miller 971db8dac20SDavid S. Miller saddr = ipc.addr; 972db8dac20SDavid S. Miller ipc.addr = faddr = daddr; 973db8dac20SDavid S. Miller 974c14ac945SSoheil Hassas Yeganeh sock_tx_timestamp(sk, ipc.sockc.tsflags, &ipc.tx_flags); 975c14ac945SSoheil Hassas Yeganeh 976f6d8bd05SEric Dumazet if (ipc.opt && ipc.opt->opt.srr) { 977db8dac20SDavid S. Miller if (!daddr) 978db8dac20SDavid S. Miller return -EINVAL; 979f6d8bd05SEric Dumazet faddr = ipc.opt->opt.faddr; 980db8dac20SDavid S. Miller connected = 0; 981db8dac20SDavid S. Miller } 982aa661581SFrancesco Fusco tos = get_rttos(&ipc, inet); 983db8dac20SDavid S. Miller if (sock_flag(sk, SOCK_LOCALROUTE) || 984db8dac20SDavid S. Miller (msg->msg_flags & MSG_DONTROUTE) || 985f6d8bd05SEric Dumazet (ipc.opt && ipc.opt->opt.is_strictroute)) { 986db8dac20SDavid S. Miller tos |= RTO_ONLINK; 987db8dac20SDavid S. Miller connected = 0; 988db8dac20SDavid S. Miller } 989db8dac20SDavid S. Miller 990db8dac20SDavid S. Miller if (ipv4_is_multicast(daddr)) { 991db8dac20SDavid S. Miller if (!ipc.oif) 992db8dac20SDavid S. Miller ipc.oif = inet->mc_index; 993db8dac20SDavid S. Miller if (!saddr) 994db8dac20SDavid S. Miller saddr = inet->mc_addr; 995db8dac20SDavid S. Miller connected = 0; 99676e21053SErich E. Hoover } else if (!ipc.oif) 99776e21053SErich E. Hoover ipc.oif = inet->uc_index; 998db8dac20SDavid S. Miller 999db8dac20SDavid S. Miller if (connected) 1000db8dac20SDavid S. Miller rt = (struct rtable *)sk_dst_check(sk, 0); 1001db8dac20SDavid S. Miller 100251456b29SIan Morris if (!rt) { 100384a3aa00SPavel Emelyanov struct net *net = sock_net(sk); 10049a24abfaSDavid Ahern __u8 flow_flags = inet_sk_flowi_flags(sk); 100584a3aa00SPavel Emelyanov 1006e474995fSDavid S. Miller fl4 = &fl4_stack; 10079a24abfaSDavid Ahern 10089a24abfaSDavid Ahern flowi4_init_output(fl4, ipc.oif, sk->sk_mark, tos, 10099a24abfaSDavid Ahern RT_SCOPE_UNIVERSE, sk->sk_protocol, 10109a24abfaSDavid Ahern flow_flags, 1011e2d118a1SLorenzo Colitti faddr, saddr, dport, inet->inet_sport, 1012e2d118a1SLorenzo Colitti sk->sk_uid); 1013c0951cbcSDavid S. Miller 1014e474995fSDavid S. Miller security_sk_classify_flow(sk, flowi4_to_flowi(fl4)); 1015e474995fSDavid S. Miller rt = ip_route_output_flow(net, fl4, sk); 1016b23dd4feSDavid S. Miller if (IS_ERR(rt)) { 1017b23dd4feSDavid S. Miller err = PTR_ERR(rt); 101806dc94b1SDavid S. Miller rt = NULL; 1019db8dac20SDavid S. Miller if (err == -ENETUNREACH) 1020f1d8cba6SEric Dumazet IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES); 1021db8dac20SDavid S. Miller goto out; 1022db8dac20SDavid S. Miller } 1023db8dac20SDavid S. Miller 1024db8dac20SDavid S. Miller err = -EACCES; 1025db8dac20SDavid S. Miller if ((rt->rt_flags & RTCF_BROADCAST) && 1026db8dac20SDavid S. Miller !sock_flag(sk, SOCK_BROADCAST)) 1027db8dac20SDavid S. Miller goto out; 1028db8dac20SDavid S. Miller if (connected) 1029d8d1f30bSChangli Gao sk_dst_set(sk, dst_clone(&rt->dst)); 1030db8dac20SDavid S. Miller } 1031db8dac20SDavid S. Miller 1032db8dac20SDavid S. Miller if (msg->msg_flags&MSG_CONFIRM) 1033db8dac20SDavid S. Miller goto do_confirm; 1034db8dac20SDavid S. Miller back_from_confirm: 1035db8dac20SDavid S. Miller 1036e474995fSDavid S. Miller saddr = fl4->saddr; 1037db8dac20SDavid S. Miller if (!ipc.addr) 1038e474995fSDavid S. Miller daddr = ipc.addr = fl4->daddr; 1039db8dac20SDavid S. Miller 1040903ab86dSHerbert Xu /* Lockless fast path for the non-corking case. */ 1041903ab86dSHerbert Xu if (!corkreq) { 1042f69e6d13SAl Viro skb = ip_make_skb(sk, fl4, getfrag, msg, ulen, 1043903ab86dSHerbert Xu sizeof(struct udphdr), &ipc, &rt, 1044903ab86dSHerbert Xu msg->msg_flags); 1045903ab86dSHerbert Xu err = PTR_ERR(skb); 104650c3a487SYOSHIFUJI Hideaki / 吉藤英明 if (!IS_ERR_OR_NULL(skb)) 104779ab0531SDavid S. Miller err = udp_send_skb(skb, fl4); 1048903ab86dSHerbert Xu goto out; 1049903ab86dSHerbert Xu } 1050903ab86dSHerbert Xu 1051db8dac20SDavid S. Miller lock_sock(sk); 1052db8dac20SDavid S. Miller if (unlikely(up->pending)) { 1053db8dac20SDavid S. Miller /* The socket is already corked while preparing it. */ 1054db8dac20SDavid S. Miller /* ... which is an evident application bug. --ANK */ 1055db8dac20SDavid S. Miller release_sock(sk); 1056db8dac20SDavid S. Miller 1057ba7a46f1SJoe Perches net_dbg_ratelimited("cork app bug 2\n"); 1058db8dac20SDavid S. Miller err = -EINVAL; 1059db8dac20SDavid S. Miller goto out; 1060db8dac20SDavid S. Miller } 1061db8dac20SDavid S. Miller /* 1062db8dac20SDavid S. Miller * Now cork the socket to pend data. 1063db8dac20SDavid S. Miller */ 1064b6f21b26SDavid S. Miller fl4 = &inet->cork.fl.u.ip4; 1065b6f21b26SDavid S. Miller fl4->daddr = daddr; 1066b6f21b26SDavid S. Miller fl4->saddr = saddr; 10679cce96dfSDavid S. Miller fl4->fl4_dport = dport; 10689cce96dfSDavid S. Miller fl4->fl4_sport = inet->inet_sport; 1069db8dac20SDavid S. Miller up->pending = AF_INET; 1070db8dac20SDavid S. Miller 1071db8dac20SDavid S. Miller do_append_data: 1072db8dac20SDavid S. Miller up->len += ulen; 1073f69e6d13SAl Viro err = ip_append_data(sk, fl4, getfrag, msg, ulen, 10742e77d89bSEric Dumazet sizeof(struct udphdr), &ipc, &rt, 1075db8dac20SDavid S. Miller corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags); 1076db8dac20SDavid S. Miller if (err) 1077db8dac20SDavid S. Miller udp_flush_pending_frames(sk); 1078db8dac20SDavid S. Miller else if (!corkreq) 1079db8dac20SDavid S. Miller err = udp_push_pending_frames(sk); 1080db8dac20SDavid S. Miller else if (unlikely(skb_queue_empty(&sk->sk_write_queue))) 1081db8dac20SDavid S. Miller up->pending = 0; 1082db8dac20SDavid S. Miller release_sock(sk); 1083db8dac20SDavid S. Miller 1084db8dac20SDavid S. Miller out: 1085db8dac20SDavid S. Miller ip_rt_put(rt); 1086db8dac20SDavid S. Miller if (free) 1087db8dac20SDavid S. Miller kfree(ipc.opt); 1088db8dac20SDavid S. Miller if (!err) 1089db8dac20SDavid S. Miller return len; 1090db8dac20SDavid S. Miller /* 1091db8dac20SDavid S. Miller * ENOBUFS = no kernel mem, SOCK_NOSPACE = no sndbuf space. Reporting 1092db8dac20SDavid S. Miller * ENOBUFS might not be good (it's not tunable per se), but otherwise 1093db8dac20SDavid S. Miller * we don't have a good statistic (IpOutDiscards but it can be too many 1094db8dac20SDavid S. Miller * things). We could add another new stat but at least for now that 1095db8dac20SDavid S. Miller * seems like overkill. 1096db8dac20SDavid S. Miller */ 1097db8dac20SDavid S. Miller if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) { 10986aef70a8SEric Dumazet UDP_INC_STATS(sock_net(sk), 1099629ca23cSPavel Emelyanov UDP_MIB_SNDBUFERRORS, is_udplite); 1100db8dac20SDavid S. Miller } 1101db8dac20SDavid S. Miller return err; 1102db8dac20SDavid S. Miller 1103db8dac20SDavid S. Miller do_confirm: 1104d8d1f30bSChangli Gao dst_confirm(&rt->dst); 1105db8dac20SDavid S. Miller if (!(msg->msg_flags&MSG_PROBE) || len) 1106db8dac20SDavid S. Miller goto back_from_confirm; 1107db8dac20SDavid S. Miller err = 0; 1108db8dac20SDavid S. Miller goto out; 1109db8dac20SDavid S. Miller } 1110c482c568SEric Dumazet EXPORT_SYMBOL(udp_sendmsg); 1111db8dac20SDavid S. Miller 1112db8dac20SDavid S. Miller int udp_sendpage(struct sock *sk, struct page *page, int offset, 1113db8dac20SDavid S. Miller size_t size, int flags) 1114db8dac20SDavid S. Miller { 1115f5fca608SDavid S. Miller struct inet_sock *inet = inet_sk(sk); 1116db8dac20SDavid S. Miller struct udp_sock *up = udp_sk(sk); 1117db8dac20SDavid S. Miller int ret; 1118db8dac20SDavid S. Miller 1119d3f7d56aSShawn Landden if (flags & MSG_SENDPAGE_NOTLAST) 1120d3f7d56aSShawn Landden flags |= MSG_MORE; 1121d3f7d56aSShawn Landden 1122db8dac20SDavid S. Miller if (!up->pending) { 1123db8dac20SDavid S. Miller struct msghdr msg = { .msg_flags = flags|MSG_MORE }; 1124db8dac20SDavid S. Miller 1125db8dac20SDavid S. Miller /* Call udp_sendmsg to specify destination address which 1126db8dac20SDavid S. Miller * sendpage interface can't pass. 1127db8dac20SDavid S. Miller * This will succeed only when the socket is connected. 1128db8dac20SDavid S. Miller */ 11291b784140SYing Xue ret = udp_sendmsg(sk, &msg, 0); 1130db8dac20SDavid S. Miller if (ret < 0) 1131db8dac20SDavid S. Miller return ret; 1132db8dac20SDavid S. Miller } 1133db8dac20SDavid S. Miller 1134db8dac20SDavid S. Miller lock_sock(sk); 1135db8dac20SDavid S. Miller 1136db8dac20SDavid S. Miller if (unlikely(!up->pending)) { 1137db8dac20SDavid S. Miller release_sock(sk); 1138db8dac20SDavid S. Miller 1139ba7a46f1SJoe Perches net_dbg_ratelimited("udp cork app bug 3\n"); 1140db8dac20SDavid S. Miller return -EINVAL; 1141db8dac20SDavid S. Miller } 1142db8dac20SDavid S. Miller 1143f5fca608SDavid S. Miller ret = ip_append_page(sk, &inet->cork.fl.u.ip4, 1144f5fca608SDavid S. Miller page, offset, size, flags); 1145db8dac20SDavid S. Miller if (ret == -EOPNOTSUPP) { 1146db8dac20SDavid S. Miller release_sock(sk); 1147db8dac20SDavid S. Miller return sock_no_sendpage(sk->sk_socket, page, offset, 1148db8dac20SDavid S. Miller size, flags); 1149db8dac20SDavid S. Miller } 1150db8dac20SDavid S. Miller if (ret < 0) { 1151db8dac20SDavid S. Miller udp_flush_pending_frames(sk); 1152db8dac20SDavid S. Miller goto out; 1153db8dac20SDavid S. Miller } 1154db8dac20SDavid S. Miller 1155db8dac20SDavid S. Miller up->len += size; 1156db8dac20SDavid S. Miller if (!(up->corkflag || (flags&MSG_MORE))) 1157db8dac20SDavid S. Miller ret = udp_push_pending_frames(sk); 1158db8dac20SDavid S. Miller if (!ret) 1159db8dac20SDavid S. Miller ret = size; 1160db8dac20SDavid S. Miller out: 1161db8dac20SDavid S. Miller release_sock(sk); 1162db8dac20SDavid S. Miller return ret; 1163db8dac20SDavid S. Miller } 1164db8dac20SDavid S. Miller 11657c13f97fSPaolo Abeni /* fully reclaim rmem/fwd memory allocated for skb */ 1166f970bd9eSPaolo Abeni static void udp_rmem_release(struct sock *sk, int size, int partial) 1167f970bd9eSPaolo Abeni { 11686b229cf7SEric Dumazet struct udp_sock *up = udp_sk(sk); 1169f970bd9eSPaolo Abeni int amt; 1170f970bd9eSPaolo Abeni 11716b229cf7SEric Dumazet if (likely(partial)) { 11726b229cf7SEric Dumazet up->forward_deficit += size; 11736b229cf7SEric Dumazet size = up->forward_deficit; 11746b229cf7SEric Dumazet if (size < (sk->sk_rcvbuf >> 2) && 11756b229cf7SEric Dumazet !skb_queue_empty(&sk->sk_receive_queue)) 11766b229cf7SEric Dumazet return; 11776b229cf7SEric Dumazet } else { 11786b229cf7SEric Dumazet size += up->forward_deficit; 11796b229cf7SEric Dumazet } 11806b229cf7SEric Dumazet up->forward_deficit = 0; 11816b229cf7SEric Dumazet 1182f970bd9eSPaolo Abeni sk->sk_forward_alloc += size; 1183f970bd9eSPaolo Abeni amt = (sk->sk_forward_alloc - partial) & ~(SK_MEM_QUANTUM - 1); 1184f970bd9eSPaolo Abeni sk->sk_forward_alloc -= amt; 1185f970bd9eSPaolo Abeni 1186f970bd9eSPaolo Abeni if (amt) 1187f970bd9eSPaolo Abeni __sk_mem_reduce_allocated(sk, amt >> SK_MEM_QUANTUM_SHIFT); 118802ab0d13SEric Dumazet 118902ab0d13SEric Dumazet atomic_sub(size, &sk->sk_rmem_alloc); 1190f970bd9eSPaolo Abeni } 1191f970bd9eSPaolo Abeni 1192c84d9490SEric Dumazet /* Note: called with sk_receive_queue.lock held. 1193c84d9490SEric Dumazet * Instead of using skb->truesize here, find a copy of it in skb->dev_scratch 1194c84d9490SEric Dumazet * This avoids a cache line miss while receive_queue lock is held. 1195c84d9490SEric Dumazet * Look at __udp_enqueue_schedule_skb() to find where this copy is done. 1196c84d9490SEric Dumazet */ 11977c13f97fSPaolo Abeni void udp_skb_destructor(struct sock *sk, struct sk_buff *skb) 1198f970bd9eSPaolo Abeni { 1199c84d9490SEric Dumazet udp_rmem_release(sk, skb->dev_scratch, 1); 1200f970bd9eSPaolo Abeni } 12017c13f97fSPaolo Abeni EXPORT_SYMBOL(udp_skb_destructor); 1202f970bd9eSPaolo Abeni 12034b272750SEric Dumazet /* Idea of busylocks is to let producers grab an extra spinlock 12044b272750SEric Dumazet * to relieve pressure on the receive_queue spinlock shared by consumer. 12054b272750SEric Dumazet * Under flood, this means that only one producer can be in line 12064b272750SEric Dumazet * trying to acquire the receive_queue spinlock. 12074b272750SEric Dumazet * These busylock can be allocated on a per cpu manner, instead of a 12084b272750SEric Dumazet * per socket one (that would consume a cache line per socket) 12094b272750SEric Dumazet */ 12104b272750SEric Dumazet static int udp_busylocks_log __read_mostly; 12114b272750SEric Dumazet static spinlock_t *udp_busylocks __read_mostly; 12124b272750SEric Dumazet 12134b272750SEric Dumazet static spinlock_t *busylock_acquire(void *ptr) 12144b272750SEric Dumazet { 12154b272750SEric Dumazet spinlock_t *busy; 12164b272750SEric Dumazet 12174b272750SEric Dumazet busy = udp_busylocks + hash_ptr(ptr, udp_busylocks_log); 12184b272750SEric Dumazet spin_lock(busy); 12194b272750SEric Dumazet return busy; 12204b272750SEric Dumazet } 12214b272750SEric Dumazet 12224b272750SEric Dumazet static void busylock_release(spinlock_t *busy) 12234b272750SEric Dumazet { 12244b272750SEric Dumazet if (busy) 12254b272750SEric Dumazet spin_unlock(busy); 12264b272750SEric Dumazet } 12274b272750SEric Dumazet 1228f970bd9eSPaolo Abeni int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb) 1229f970bd9eSPaolo Abeni { 1230f970bd9eSPaolo Abeni struct sk_buff_head *list = &sk->sk_receive_queue; 1231f970bd9eSPaolo Abeni int rmem, delta, amt, err = -ENOMEM; 12324b272750SEric Dumazet spinlock_t *busy = NULL; 1233c8c8b127SEric Dumazet int size; 1234f970bd9eSPaolo Abeni 1235f970bd9eSPaolo Abeni /* try to avoid the costly atomic add/sub pair when the receive 1236f970bd9eSPaolo Abeni * queue is full; always allow at least a packet 1237f970bd9eSPaolo Abeni */ 1238f970bd9eSPaolo Abeni rmem = atomic_read(&sk->sk_rmem_alloc); 1239363dc73aSPaolo Abeni if (rmem > sk->sk_rcvbuf) 1240f970bd9eSPaolo Abeni goto drop; 1241f970bd9eSPaolo Abeni 1242c8c8b127SEric Dumazet /* Under mem pressure, it might be helpful to help udp_recvmsg() 1243c8c8b127SEric Dumazet * having linear skbs : 1244c8c8b127SEric Dumazet * - Reduce memory overhead and thus increase receive queue capacity 1245c8c8b127SEric Dumazet * - Less cache line misses at copyout() time 1246c8c8b127SEric Dumazet * - Less work at consume_skb() (less alien page frag freeing) 1247c8c8b127SEric Dumazet */ 12484b272750SEric Dumazet if (rmem > (sk->sk_rcvbuf >> 1)) { 1249c8c8b127SEric Dumazet skb_condense(skb); 12504b272750SEric Dumazet 12514b272750SEric Dumazet busy = busylock_acquire(sk); 12524b272750SEric Dumazet } 1253c8c8b127SEric Dumazet size = skb->truesize; 1254c84d9490SEric Dumazet /* Copy skb->truesize into skb->dev_scratch to avoid a cache line miss 1255c84d9490SEric Dumazet * in udp_skb_destructor() 1256c84d9490SEric Dumazet */ 1257c84d9490SEric Dumazet skb->dev_scratch = size; 1258c8c8b127SEric Dumazet 1259f970bd9eSPaolo Abeni /* we drop only if the receive buf is full and the receive 1260f970bd9eSPaolo Abeni * queue contains some other skb 1261f970bd9eSPaolo Abeni */ 1262f970bd9eSPaolo Abeni rmem = atomic_add_return(size, &sk->sk_rmem_alloc); 1263363dc73aSPaolo Abeni if (rmem > (size + sk->sk_rcvbuf)) 1264f970bd9eSPaolo Abeni goto uncharge_drop; 1265f970bd9eSPaolo Abeni 1266f970bd9eSPaolo Abeni spin_lock(&list->lock); 1267f970bd9eSPaolo Abeni if (size >= sk->sk_forward_alloc) { 1268f970bd9eSPaolo Abeni amt = sk_mem_pages(size); 1269f970bd9eSPaolo Abeni delta = amt << SK_MEM_QUANTUM_SHIFT; 1270f970bd9eSPaolo Abeni if (!__sk_mem_raise_allocated(sk, delta, amt, SK_MEM_RECV)) { 1271f970bd9eSPaolo Abeni err = -ENOBUFS; 1272f970bd9eSPaolo Abeni spin_unlock(&list->lock); 1273f970bd9eSPaolo Abeni goto uncharge_drop; 1274f970bd9eSPaolo Abeni } 1275f970bd9eSPaolo Abeni 1276f970bd9eSPaolo Abeni sk->sk_forward_alloc += delta; 1277f970bd9eSPaolo Abeni } 1278f970bd9eSPaolo Abeni 1279f970bd9eSPaolo Abeni sk->sk_forward_alloc -= size; 1280f970bd9eSPaolo Abeni 12817c13f97fSPaolo Abeni /* no need to setup a destructor, we will explicitly release the 12827c13f97fSPaolo Abeni * forward allocated memory on dequeue 12837c13f97fSPaolo Abeni */ 1284f970bd9eSPaolo Abeni sock_skb_set_dropcount(sk, skb); 1285f970bd9eSPaolo Abeni 1286f970bd9eSPaolo Abeni __skb_queue_tail(list, skb); 1287f970bd9eSPaolo Abeni spin_unlock(&list->lock); 1288f970bd9eSPaolo Abeni 1289f970bd9eSPaolo Abeni if (!sock_flag(sk, SOCK_DEAD)) 1290f970bd9eSPaolo Abeni sk->sk_data_ready(sk); 1291f970bd9eSPaolo Abeni 12924b272750SEric Dumazet busylock_release(busy); 1293f970bd9eSPaolo Abeni return 0; 1294f970bd9eSPaolo Abeni 1295f970bd9eSPaolo Abeni uncharge_drop: 1296f970bd9eSPaolo Abeni atomic_sub(skb->truesize, &sk->sk_rmem_alloc); 1297f970bd9eSPaolo Abeni 1298f970bd9eSPaolo Abeni drop: 1299f970bd9eSPaolo Abeni atomic_inc(&sk->sk_drops); 13004b272750SEric Dumazet busylock_release(busy); 1301f970bd9eSPaolo Abeni return err; 1302f970bd9eSPaolo Abeni } 1303f970bd9eSPaolo Abeni EXPORT_SYMBOL_GPL(__udp_enqueue_schedule_skb); 1304f970bd9eSPaolo Abeni 1305c915fe13SPaolo Abeni void udp_destruct_sock(struct sock *sk) 1306f970bd9eSPaolo Abeni { 1307f970bd9eSPaolo Abeni /* reclaim completely the forward allocated memory */ 13087c13f97fSPaolo Abeni unsigned int total = 0; 13097c13f97fSPaolo Abeni struct sk_buff *skb; 13107c13f97fSPaolo Abeni 13117c13f97fSPaolo Abeni while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) { 13127c13f97fSPaolo Abeni total += skb->truesize; 13137c13f97fSPaolo Abeni kfree_skb(skb); 13147c13f97fSPaolo Abeni } 13157c13f97fSPaolo Abeni udp_rmem_release(sk, total, 0); 13167c13f97fSPaolo Abeni 1317f970bd9eSPaolo Abeni inet_sock_destruct(sk); 1318f970bd9eSPaolo Abeni } 1319c915fe13SPaolo Abeni EXPORT_SYMBOL_GPL(udp_destruct_sock); 1320f970bd9eSPaolo Abeni 1321f970bd9eSPaolo Abeni int udp_init_sock(struct sock *sk) 1322f970bd9eSPaolo Abeni { 1323f970bd9eSPaolo Abeni sk->sk_destruct = udp_destruct_sock; 1324f970bd9eSPaolo Abeni return 0; 1325f970bd9eSPaolo Abeni } 1326f970bd9eSPaolo Abeni EXPORT_SYMBOL_GPL(udp_init_sock); 1327f970bd9eSPaolo Abeni 1328f970bd9eSPaolo Abeni void skb_consume_udp(struct sock *sk, struct sk_buff *skb, int len) 1329f970bd9eSPaolo Abeni { 1330f970bd9eSPaolo Abeni if (unlikely(READ_ONCE(sk->sk_peek_off) >= 0)) { 1331f970bd9eSPaolo Abeni bool slow = lock_sock_fast(sk); 1332f970bd9eSPaolo Abeni 1333f970bd9eSPaolo Abeni sk_peek_offset_bwd(sk, len); 1334f970bd9eSPaolo Abeni unlock_sock_fast(sk, slow); 1335f970bd9eSPaolo Abeni } 1336f970bd9eSPaolo Abeni consume_skb(skb); 1337f970bd9eSPaolo Abeni } 1338f970bd9eSPaolo Abeni EXPORT_SYMBOL_GPL(skb_consume_udp); 1339f970bd9eSPaolo Abeni 134085584672SEric Dumazet /** 134185584672SEric Dumazet * first_packet_length - return length of first packet in receive queue 134285584672SEric Dumazet * @sk: socket 134385584672SEric Dumazet * 134485584672SEric Dumazet * Drops all bad checksum frames, until a valid one is found. 1345e83c6744SEric Dumazet * Returns the length of found skb, or -1 if none is found. 134685584672SEric Dumazet */ 1347e83c6744SEric Dumazet static int first_packet_length(struct sock *sk) 134885584672SEric Dumazet { 13497c13f97fSPaolo Abeni struct sk_buff_head *rcvq = &sk->sk_receive_queue; 135085584672SEric Dumazet struct sk_buff *skb; 13517c13f97fSPaolo Abeni int total = 0; 1352e83c6744SEric Dumazet int res; 135385584672SEric Dumazet 135485584672SEric Dumazet spin_lock_bh(&rcvq->lock); 135585584672SEric Dumazet while ((skb = skb_peek(rcvq)) != NULL && 135685584672SEric Dumazet udp_lib_checksum_complete(skb)) { 135702c22347SEric Dumazet __UDP_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS, 13586a5dc9e5SEric Dumazet IS_UDPLITE(sk)); 135902c22347SEric Dumazet __UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, 136085584672SEric Dumazet IS_UDPLITE(sk)); 13618edf19c2SEric Dumazet atomic_inc(&sk->sk_drops); 136285584672SEric Dumazet __skb_unlink(skb, rcvq); 13637c13f97fSPaolo Abeni total += skb->truesize; 13647c13f97fSPaolo Abeni kfree_skb(skb); 136585584672SEric Dumazet } 1366e83c6744SEric Dumazet res = skb ? skb->len : -1; 13677c13f97fSPaolo Abeni if (total) 13687c13f97fSPaolo Abeni udp_rmem_release(sk, total, 1); 136985584672SEric Dumazet spin_unlock_bh(&rcvq->lock); 137085584672SEric Dumazet return res; 137185584672SEric Dumazet } 137285584672SEric Dumazet 13731da177e4SLinus Torvalds /* 13741da177e4SLinus Torvalds * IOCTL requests applicable to the UDP protocol 13751da177e4SLinus Torvalds */ 13761da177e4SLinus Torvalds 13771da177e4SLinus Torvalds int udp_ioctl(struct sock *sk, int cmd, unsigned long arg) 13781da177e4SLinus Torvalds { 13796516c655SStephen Hemminger switch (cmd) { 13801da177e4SLinus Torvalds case SIOCOUTQ: 13811da177e4SLinus Torvalds { 138231e6d363SEric Dumazet int amount = sk_wmem_alloc_get(sk); 138331e6d363SEric Dumazet 13841da177e4SLinus Torvalds return put_user(amount, (int __user *)arg); 13851da177e4SLinus Torvalds } 13861da177e4SLinus Torvalds 13871da177e4SLinus Torvalds case SIOCINQ: 13881da177e4SLinus Torvalds { 1389e83c6744SEric Dumazet int amount = max_t(int, 0, first_packet_length(sk)); 13901da177e4SLinus Torvalds 13911da177e4SLinus Torvalds return put_user(amount, (int __user *)arg); 13921da177e4SLinus Torvalds } 13931da177e4SLinus Torvalds 13941da177e4SLinus Torvalds default: 13951da177e4SLinus Torvalds return -ENOIOCTLCMD; 13961da177e4SLinus Torvalds } 13976516c655SStephen Hemminger 13986516c655SStephen Hemminger return 0; 13991da177e4SLinus Torvalds } 1400c482c568SEric Dumazet EXPORT_SYMBOL(udp_ioctl); 14011da177e4SLinus Torvalds 1402db8dac20SDavid S. Miller /* 1403db8dac20SDavid S. Miller * This should be easy, if there is something there we 1404db8dac20SDavid S. Miller * return it, otherwise we block. 1405db8dac20SDavid S. Miller */ 1406db8dac20SDavid S. Miller 14071b784140SYing Xue int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock, 14081b784140SYing Xue int flags, int *addr_len) 1409db8dac20SDavid S. Miller { 1410db8dac20SDavid S. Miller struct inet_sock *inet = inet_sk(sk); 1411342dfc30SSteffen Hurrle DECLARE_SOCKADDR(struct sockaddr_in *, sin, msg->msg_name); 1412db8dac20SDavid S. Miller struct sk_buff *skb; 141359c2cdaeSDavid S. Miller unsigned int ulen, copied; 1414627d2d6bSsamanthakumar int peeked, peeking, off; 1415db8dac20SDavid S. Miller int err; 1416db8dac20SDavid S. Miller int is_udplite = IS_UDPLITE(sk); 1417197c949eSEric Dumazet bool checksum_valid = false; 1418db8dac20SDavid S. Miller 1419db8dac20SDavid S. Miller if (flags & MSG_ERRQUEUE) 142085fbaa75SHannes Frederic Sowa return ip_recv_error(sk, msg, len, addr_len); 1421db8dac20SDavid S. Miller 1422db8dac20SDavid S. Miller try_again: 1423627d2d6bSsamanthakumar peeking = off = sk_peek_offset(sk, flags); 14247c13f97fSPaolo Abeni skb = __skb_recv_udp(sk, flags, noblock, &peeked, &off, &err); 1425db8dac20SDavid S. Miller if (!skb) 1426627d2d6bSsamanthakumar return err; 1427db8dac20SDavid S. Miller 1428e6afc8acSsamanthakumar ulen = skb->len; 142959c2cdaeSDavid S. Miller copied = len; 1430627d2d6bSsamanthakumar if (copied > ulen - off) 1431627d2d6bSsamanthakumar copied = ulen - off; 143259c2cdaeSDavid S. Miller else if (copied < ulen) 1433db8dac20SDavid S. Miller msg->msg_flags |= MSG_TRUNC; 1434db8dac20SDavid S. Miller 1435db8dac20SDavid S. Miller /* 1436db8dac20SDavid S. Miller * If checksum is needed at all, try to do it while copying the 1437db8dac20SDavid S. Miller * data. If the data is truncated, or if we only want a partial 1438db8dac20SDavid S. Miller * coverage checksum (UDP-Lite), do it before the copy. 1439db8dac20SDavid S. Miller */ 1440db8dac20SDavid S. Miller 1441d21dbdfeSEric Dumazet if (copied < ulen || peeking || 1442d21dbdfeSEric Dumazet (is_udplite && UDP_SKB_CB(skb)->partial_cov)) { 1443197c949eSEric Dumazet checksum_valid = !udp_lib_checksum_complete(skb); 1444197c949eSEric Dumazet if (!checksum_valid) 1445db8dac20SDavid S. Miller goto csum_copy_err; 1446db8dac20SDavid S. Miller } 1447db8dac20SDavid S. Miller 1448197c949eSEric Dumazet if (checksum_valid || skb_csum_unnecessary(skb)) 1449627d2d6bSsamanthakumar err = skb_copy_datagram_msg(skb, off, msg, copied); 1450db8dac20SDavid S. Miller else { 1451627d2d6bSsamanthakumar err = skb_copy_and_csum_datagram_msg(skb, off, msg); 1452db8dac20SDavid S. Miller 1453db8dac20SDavid S. Miller if (err == -EINVAL) 1454db8dac20SDavid S. Miller goto csum_copy_err; 1455db8dac20SDavid S. Miller } 1456db8dac20SDavid S. Miller 145722911fc5SEric Dumazet if (unlikely(err)) { 1458979402b1SEric Dumazet if (!peeked) { 1459979402b1SEric Dumazet atomic_inc(&sk->sk_drops); 14606aef70a8SEric Dumazet UDP_INC_STATS(sock_net(sk), 1461979402b1SEric Dumazet UDP_MIB_INERRORS, is_udplite); 1462979402b1SEric Dumazet } 1463850cbaddSPaolo Abeni kfree_skb(skb); 1464627d2d6bSsamanthakumar return err; 146522911fc5SEric Dumazet } 1466db8dac20SDavid S. Miller 1467db8dac20SDavid S. Miller if (!peeked) 14686aef70a8SEric Dumazet UDP_INC_STATS(sock_net(sk), 1469629ca23cSPavel Emelyanov UDP_MIB_INDATAGRAMS, is_udplite); 1470db8dac20SDavid S. Miller 14713b885787SNeil Horman sock_recv_ts_and_drops(msg, sk, skb); 1472db8dac20SDavid S. Miller 1473db8dac20SDavid S. Miller /* Copy the address. */ 1474c482c568SEric Dumazet if (sin) { 1475db8dac20SDavid S. Miller sin->sin_family = AF_INET; 1476db8dac20SDavid S. Miller sin->sin_port = udp_hdr(skb)->source; 1477db8dac20SDavid S. Miller sin->sin_addr.s_addr = ip_hdr(skb)->saddr; 1478db8dac20SDavid S. Miller memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); 1479bceaa902SHannes Frederic Sowa *addr_len = sizeof(*sin); 1480db8dac20SDavid S. Miller } 1481db8dac20SDavid S. Miller if (inet->cmsg_flags) 1482ad959036SPaolo Abeni ip_cmsg_recv_offset(msg, sk, skb, sizeof(struct udphdr), off); 1483db8dac20SDavid S. Miller 148459c2cdaeSDavid S. Miller err = copied; 1485db8dac20SDavid S. Miller if (flags & MSG_TRUNC) 1486db8dac20SDavid S. Miller err = ulen; 1487db8dac20SDavid S. Miller 1488850cbaddSPaolo Abeni skb_consume_udp(sk, skb, peeking ? -err : err); 1489db8dac20SDavid S. Miller return err; 1490db8dac20SDavid S. Miller 1491db8dac20SDavid S. Miller csum_copy_err: 1492850cbaddSPaolo Abeni if (!__sk_queue_drop_skb(sk, skb, flags)) { 14936aef70a8SEric Dumazet UDP_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite); 14946aef70a8SEric Dumazet UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite); 14956a5dc9e5SEric Dumazet } 1496850cbaddSPaolo Abeni kfree_skb(skb); 1497db8dac20SDavid S. Miller 1498beb39db5SEric Dumazet /* starting over for a new packet, but check if we need to yield */ 1499beb39db5SEric Dumazet cond_resched(); 15009cfaa8deSXufeng Zhang msg->msg_flags &= ~MSG_TRUNC; 1501db8dac20SDavid S. Miller goto try_again; 1502db8dac20SDavid S. Miller } 1503db8dac20SDavid S. Miller 1504286c72deSEric Dumazet int __udp_disconnect(struct sock *sk, int flags) 15051da177e4SLinus Torvalds { 15061da177e4SLinus Torvalds struct inet_sock *inet = inet_sk(sk); 15071da177e4SLinus Torvalds /* 15081da177e4SLinus Torvalds * 1003.1g - break association. 15091da177e4SLinus Torvalds */ 15101da177e4SLinus Torvalds 15111da177e4SLinus Torvalds sk->sk_state = TCP_CLOSE; 1512c720c7e8SEric Dumazet inet->inet_daddr = 0; 1513c720c7e8SEric Dumazet inet->inet_dport = 0; 1514bdeab991STom Herbert sock_rps_reset_rxhash(sk); 15151da177e4SLinus Torvalds sk->sk_bound_dev_if = 0; 15161da177e4SLinus Torvalds if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) 15171da177e4SLinus Torvalds inet_reset_saddr(sk); 15181da177e4SLinus Torvalds 15191da177e4SLinus Torvalds if (!(sk->sk_userlocks & SOCK_BINDPORT_LOCK)) { 15201da177e4SLinus Torvalds sk->sk_prot->unhash(sk); 1521c720c7e8SEric Dumazet inet->inet_sport = 0; 15221da177e4SLinus Torvalds } 15231da177e4SLinus Torvalds sk_dst_reset(sk); 15241da177e4SLinus Torvalds return 0; 15251da177e4SLinus Torvalds } 1526286c72deSEric Dumazet EXPORT_SYMBOL(__udp_disconnect); 1527286c72deSEric Dumazet 1528286c72deSEric Dumazet int udp_disconnect(struct sock *sk, int flags) 1529286c72deSEric Dumazet { 1530286c72deSEric Dumazet lock_sock(sk); 1531286c72deSEric Dumazet __udp_disconnect(sk, flags); 1532286c72deSEric Dumazet release_sock(sk); 1533286c72deSEric Dumazet return 0; 1534286c72deSEric Dumazet } 1535c482c568SEric Dumazet EXPORT_SYMBOL(udp_disconnect); 15361da177e4SLinus Torvalds 1537645ca708SEric Dumazet void udp_lib_unhash(struct sock *sk) 1538645ca708SEric Dumazet { 1539723b4610SEric Dumazet if (sk_hashed(sk)) { 1540645ca708SEric Dumazet struct udp_table *udptable = sk->sk_prot->h.udp_table; 1541512615b6SEric Dumazet struct udp_hslot *hslot, *hslot2; 1542512615b6SEric Dumazet 1543512615b6SEric Dumazet hslot = udp_hashslot(udptable, sock_net(sk), 1544d4cada4aSEric Dumazet udp_sk(sk)->udp_port_hash); 1545512615b6SEric Dumazet hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash); 1546645ca708SEric Dumazet 1547c8db3fecSEric Dumazet spin_lock_bh(&hslot->lock); 1548e32ea7e7SCraig Gallek if (rcu_access_pointer(sk->sk_reuseport_cb)) 1549e32ea7e7SCraig Gallek reuseport_detach_sock(sk); 1550ca065d0cSEric Dumazet if (sk_del_node_init_rcu(sk)) { 1551fdcc8aa9SEric Dumazet hslot->count--; 1552c720c7e8SEric Dumazet inet_sk(sk)->inet_num = 0; 1553645ca708SEric Dumazet sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); 1554512615b6SEric Dumazet 1555512615b6SEric Dumazet spin_lock(&hslot2->lock); 1556ca065d0cSEric Dumazet hlist_del_init_rcu(&udp_sk(sk)->udp_portaddr_node); 1557512615b6SEric Dumazet hslot2->count--; 1558512615b6SEric Dumazet spin_unlock(&hslot2->lock); 1559645ca708SEric Dumazet } 1560c8db3fecSEric Dumazet spin_unlock_bh(&hslot->lock); 1561645ca708SEric Dumazet } 1562723b4610SEric Dumazet } 1563645ca708SEric Dumazet EXPORT_SYMBOL(udp_lib_unhash); 1564645ca708SEric Dumazet 1565719f8358SEric Dumazet /* 1566719f8358SEric Dumazet * inet_rcv_saddr was changed, we must rehash secondary hash 1567719f8358SEric Dumazet */ 1568719f8358SEric Dumazet void udp_lib_rehash(struct sock *sk, u16 newhash) 1569719f8358SEric Dumazet { 1570719f8358SEric Dumazet if (sk_hashed(sk)) { 1571719f8358SEric Dumazet struct udp_table *udptable = sk->sk_prot->h.udp_table; 1572719f8358SEric Dumazet struct udp_hslot *hslot, *hslot2, *nhslot2; 1573719f8358SEric Dumazet 1574719f8358SEric Dumazet hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash); 1575719f8358SEric Dumazet nhslot2 = udp_hashslot2(udptable, newhash); 1576719f8358SEric Dumazet udp_sk(sk)->udp_portaddr_hash = newhash; 1577e32ea7e7SCraig Gallek 1578e32ea7e7SCraig Gallek if (hslot2 != nhslot2 || 1579e32ea7e7SCraig Gallek rcu_access_pointer(sk->sk_reuseport_cb)) { 1580719f8358SEric Dumazet hslot = udp_hashslot(udptable, sock_net(sk), 1581719f8358SEric Dumazet udp_sk(sk)->udp_port_hash); 1582719f8358SEric Dumazet /* we must lock primary chain too */ 1583719f8358SEric Dumazet spin_lock_bh(&hslot->lock); 1584e32ea7e7SCraig Gallek if (rcu_access_pointer(sk->sk_reuseport_cb)) 1585e32ea7e7SCraig Gallek reuseport_detach_sock(sk); 1586719f8358SEric Dumazet 1587e32ea7e7SCraig Gallek if (hslot2 != nhslot2) { 1588719f8358SEric Dumazet spin_lock(&hslot2->lock); 1589ca065d0cSEric Dumazet hlist_del_init_rcu(&udp_sk(sk)->udp_portaddr_node); 1590719f8358SEric Dumazet hslot2->count--; 1591719f8358SEric Dumazet spin_unlock(&hslot2->lock); 1592719f8358SEric Dumazet 1593719f8358SEric Dumazet spin_lock(&nhslot2->lock); 1594ca065d0cSEric Dumazet hlist_add_head_rcu(&udp_sk(sk)->udp_portaddr_node, 1595719f8358SEric Dumazet &nhslot2->head); 1596719f8358SEric Dumazet nhslot2->count++; 1597719f8358SEric Dumazet spin_unlock(&nhslot2->lock); 1598e32ea7e7SCraig Gallek } 1599719f8358SEric Dumazet 1600719f8358SEric Dumazet spin_unlock_bh(&hslot->lock); 1601719f8358SEric Dumazet } 1602719f8358SEric Dumazet } 1603719f8358SEric Dumazet } 1604719f8358SEric Dumazet EXPORT_SYMBOL(udp_lib_rehash); 1605719f8358SEric Dumazet 1606719f8358SEric Dumazet static void udp_v4_rehash(struct sock *sk) 1607719f8358SEric Dumazet { 1608719f8358SEric Dumazet u16 new_hash = udp4_portaddr_hash(sock_net(sk), 1609719f8358SEric Dumazet inet_sk(sk)->inet_rcv_saddr, 1610719f8358SEric Dumazet inet_sk(sk)->inet_num); 1611719f8358SEric Dumazet udp_lib_rehash(sk, new_hash); 1612719f8358SEric Dumazet } 1613719f8358SEric Dumazet 161430c7be26SEric Dumazet int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) 161593821778SHerbert Xu { 1616fec5e652STom Herbert int rc; 161793821778SHerbert Xu 1618005ec974SShawn Bohrer if (inet_sk(sk)->inet_daddr) { 1619bdeab991STom Herbert sock_rps_save_rxhash(sk, skb); 1620005ec974SShawn Bohrer sk_mark_napi_id(sk, skb); 16212c8c56e1SEric Dumazet sk_incoming_cpu_update(sk); 1622e68b6e50SEric Dumazet } else { 1623e68b6e50SEric Dumazet sk_mark_napi_id_once(sk, skb); 1624005ec974SShawn Bohrer } 1625fec5e652STom Herbert 1626850cbaddSPaolo Abeni rc = __udp_enqueue_schedule_skb(sk, skb); 1627766e9037SEric Dumazet if (rc < 0) { 1628766e9037SEric Dumazet int is_udplite = IS_UDPLITE(sk); 1629766e9037SEric Dumazet 163093821778SHerbert Xu /* Note that an ENOMEM error is charged twice */ 1631766e9037SEric Dumazet if (rc == -ENOMEM) 1632e61da9e2SEric Dumazet UDP_INC_STATS(sock_net(sk), UDP_MIB_RCVBUFERRORS, 163393821778SHerbert Xu is_udplite); 1634e61da9e2SEric Dumazet UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite); 1635766e9037SEric Dumazet kfree_skb(skb); 1636296f7ea7SSatoru Moriya trace_udp_fail_queue_rcv_skb(rc, sk); 1637766e9037SEric Dumazet return -1; 163893821778SHerbert Xu } 163993821778SHerbert Xu 164093821778SHerbert Xu return 0; 164193821778SHerbert Xu } 164293821778SHerbert Xu 1643447167bfSEric Dumazet static struct static_key udp_encap_needed __read_mostly; 1644447167bfSEric Dumazet void udp_encap_enable(void) 1645447167bfSEric Dumazet { 1646447167bfSEric Dumazet if (!static_key_enabled(&udp_encap_needed)) 1647447167bfSEric Dumazet static_key_slow_inc(&udp_encap_needed); 1648447167bfSEric Dumazet } 1649447167bfSEric Dumazet EXPORT_SYMBOL(udp_encap_enable); 1650447167bfSEric Dumazet 1651db8dac20SDavid S. Miller /* returns: 1652db8dac20SDavid S. Miller * -1: error 1653db8dac20SDavid S. Miller * 0: success 1654db8dac20SDavid S. Miller * >0: "udp encap" protocol resubmission 1655db8dac20SDavid S. Miller * 1656db8dac20SDavid S. Miller * Note that in the success and error cases, the skb is assumed to 1657db8dac20SDavid S. Miller * have either been requeued or freed. 1658db8dac20SDavid S. Miller */ 1659db8dac20SDavid S. Miller int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) 1660db8dac20SDavid S. Miller { 1661db8dac20SDavid S. Miller struct udp_sock *up = udp_sk(sk); 1662db8dac20SDavid S. Miller int is_udplite = IS_UDPLITE(sk); 1663db8dac20SDavid S. Miller 1664db8dac20SDavid S. Miller /* 1665db8dac20SDavid S. Miller * Charge it to the socket, dropping if the queue is full. 1666db8dac20SDavid S. Miller */ 1667db8dac20SDavid S. Miller if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) 1668db8dac20SDavid S. Miller goto drop; 1669db8dac20SDavid S. Miller nf_reset(skb); 1670db8dac20SDavid S. Miller 1671447167bfSEric Dumazet if (static_key_false(&udp_encap_needed) && up->encap_type) { 16720ad92ad0SEric Dumazet int (*encap_rcv)(struct sock *sk, struct sk_buff *skb); 16730ad92ad0SEric Dumazet 1674db8dac20SDavid S. Miller /* 1675db8dac20SDavid S. Miller * This is an encapsulation socket so pass the skb to 1676db8dac20SDavid S. Miller * the socket's udp_encap_rcv() hook. Otherwise, just 1677db8dac20SDavid S. Miller * fall through and pass this up the UDP socket. 1678db8dac20SDavid S. Miller * up->encap_rcv() returns the following value: 1679db8dac20SDavid S. Miller * =0 if skb was successfully passed to the encap 1680db8dac20SDavid S. Miller * handler or was discarded by it. 1681db8dac20SDavid S. Miller * >0 if skb should be passed on to UDP. 1682db8dac20SDavid S. Miller * <0 if skb should be resubmitted as proto -N 1683db8dac20SDavid S. Miller */ 1684db8dac20SDavid S. Miller 1685db8dac20SDavid S. Miller /* if we're overly short, let UDP handle it */ 16860ad92ad0SEric Dumazet encap_rcv = ACCESS_ONCE(up->encap_rcv); 1687e5aed006SHannes Frederic Sowa if (encap_rcv) { 1688db8dac20SDavid S. Miller int ret; 1689db8dac20SDavid S. Miller 16900a80966bSTom Herbert /* Verify checksum before giving to encap */ 16910a80966bSTom Herbert if (udp_lib_checksum_complete(skb)) 16920a80966bSTom Herbert goto csum_error; 16930a80966bSTom Herbert 16940ad92ad0SEric Dumazet ret = encap_rcv(sk, skb); 1695db8dac20SDavid S. Miller if (ret <= 0) { 169602c22347SEric Dumazet __UDP_INC_STATS(sock_net(sk), 16970283328eSPavel Emelyanov UDP_MIB_INDATAGRAMS, 1698db8dac20SDavid S. Miller is_udplite); 1699db8dac20SDavid S. Miller return -ret; 1700db8dac20SDavid S. Miller } 1701db8dac20SDavid S. Miller } 1702db8dac20SDavid S. Miller 1703db8dac20SDavid S. Miller /* FALLTHROUGH -- it's a UDP Packet */ 1704db8dac20SDavid S. Miller } 1705db8dac20SDavid S. Miller 1706db8dac20SDavid S. Miller /* 1707db8dac20SDavid S. Miller * UDP-Lite specific tests, ignored on UDP sockets 1708db8dac20SDavid S. Miller */ 1709db8dac20SDavid S. Miller if ((is_udplite & UDPLITE_RECV_CC) && UDP_SKB_CB(skb)->partial_cov) { 1710db8dac20SDavid S. Miller 1711db8dac20SDavid S. Miller /* 1712db8dac20SDavid S. Miller * MIB statistics other than incrementing the error count are 1713db8dac20SDavid S. Miller * disabled for the following two types of errors: these depend 1714db8dac20SDavid S. Miller * on the application settings, not on the functioning of the 1715db8dac20SDavid S. Miller * protocol stack as such. 1716db8dac20SDavid S. Miller * 1717db8dac20SDavid S. Miller * RFC 3828 here recommends (sec 3.3): "There should also be a 1718db8dac20SDavid S. Miller * way ... to ... at least let the receiving application block 1719db8dac20SDavid S. Miller * delivery of packets with coverage values less than a value 1720db8dac20SDavid S. Miller * provided by the application." 1721db8dac20SDavid S. Miller */ 1722db8dac20SDavid S. Miller if (up->pcrlen == 0) { /* full coverage was set */ 1723ba7a46f1SJoe Perches net_dbg_ratelimited("UDPLite: partial coverage %d while full coverage %d requested\n", 1724db8dac20SDavid S. Miller UDP_SKB_CB(skb)->cscov, skb->len); 1725db8dac20SDavid S. Miller goto drop; 1726db8dac20SDavid S. Miller } 1727db8dac20SDavid S. Miller /* The next case involves violating the min. coverage requested 1728db8dac20SDavid S. Miller * by the receiver. This is subtle: if receiver wants x and x is 1729db8dac20SDavid S. Miller * greater than the buffersize/MTU then receiver will complain 1730db8dac20SDavid S. Miller * that it wants x while sender emits packets of smaller size y. 1731db8dac20SDavid S. Miller * Therefore the above ...()->partial_cov statement is essential. 1732db8dac20SDavid S. Miller */ 1733db8dac20SDavid S. Miller if (UDP_SKB_CB(skb)->cscov < up->pcrlen) { 1734ba7a46f1SJoe Perches net_dbg_ratelimited("UDPLite: coverage %d too small, need min %d\n", 1735db8dac20SDavid S. Miller UDP_SKB_CB(skb)->cscov, up->pcrlen); 1736db8dac20SDavid S. Miller goto drop; 1737db8dac20SDavid S. Miller } 1738db8dac20SDavid S. Miller } 1739db8dac20SDavid S. Miller 1740ce25d66aSEric Dumazet if (rcu_access_pointer(sk->sk_filter) && 1741ce25d66aSEric Dumazet udp_lib_checksum_complete(skb)) 17426a5dc9e5SEric Dumazet goto csum_error; 1743ce25d66aSEric Dumazet 1744ba66bbe5SDaniel Borkmann if (sk_filter_trim_cap(sk, skb, sizeof(struct udphdr))) 1745a6127697SMichal Kubeček goto drop; 1746db8dac20SDavid S. Miller 1747e6afc8acSsamanthakumar udp_csum_pull_header(skb); 1748db8dac20SDavid S. Miller 1749fbf8866dSShawn Bohrer ipv4_pktinfo_prepare(sk, skb); 1750850cbaddSPaolo Abeni return __udp_queue_rcv_skb(sk, skb); 1751db8dac20SDavid S. Miller 17526a5dc9e5SEric Dumazet csum_error: 175302c22347SEric Dumazet __UDP_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite); 1754db8dac20SDavid S. Miller drop: 175502c22347SEric Dumazet __UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite); 17568edf19c2SEric Dumazet atomic_inc(&sk->sk_drops); 1757db8dac20SDavid S. Miller kfree_skb(skb); 1758db8dac20SDavid S. Miller return -1; 1759db8dac20SDavid S. Miller } 1760db8dac20SDavid S. Miller 176197502231SEric Dumazet /* For TCP sockets, sk_rx_dst is protected by socket lock 1762e47eb5dfSEric Dumazet * For UDP, we use xchg() to guard against concurrent changes. 176397502231SEric Dumazet */ 176497502231SEric Dumazet static void udp_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst) 1765421b3885SShawn Bohrer { 176697502231SEric Dumazet struct dst_entry *old; 1767421b3885SShawn Bohrer 1768421b3885SShawn Bohrer dst_hold(dst); 1769e47eb5dfSEric Dumazet old = xchg(&sk->sk_rx_dst, dst); 177097502231SEric Dumazet dst_release(old); 177197502231SEric Dumazet } 1772421b3885SShawn Bohrer 1773db8dac20SDavid S. Miller /* 1774db8dac20SDavid S. Miller * Multicasts and broadcasts go to each listener. 1775db8dac20SDavid S. Miller * 17761240d137SEric Dumazet * Note: called only from the BH handler context. 1777db8dac20SDavid S. Miller */ 1778e3163493SPavel Emelyanov static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb, 1779db8dac20SDavid S. Miller struct udphdr *uh, 1780db8dac20SDavid S. Miller __be32 saddr, __be32 daddr, 178136cbb245SRick Jones struct udp_table *udptable, 178236cbb245SRick Jones int proto) 1783db8dac20SDavid S. Miller { 1784ca065d0cSEric Dumazet struct sock *sk, *first = NULL; 17855cf3d461SDavid Held unsigned short hnum = ntohs(uh->dest); 17865cf3d461SDavid Held struct udp_hslot *hslot = udp_hashslot(udptable, net, hnum); 17872dc41cffSDavid Held unsigned int hash2 = 0, hash2_any = 0, use_hash2 = (hslot->count > 10); 1788ca065d0cSEric Dumazet unsigned int offset = offsetof(typeof(*sk), sk_node); 1789ca065d0cSEric Dumazet int dif = skb->dev->ifindex; 1790ca065d0cSEric Dumazet struct hlist_node *node; 1791ca065d0cSEric Dumazet struct sk_buff *nskb; 17922dc41cffSDavid Held 17932dc41cffSDavid Held if (use_hash2) { 17942dc41cffSDavid Held hash2_any = udp4_portaddr_hash(net, htonl(INADDR_ANY), hnum) & 179573e2d5e3SPablo Neira udptable->mask; 179673e2d5e3SPablo Neira hash2 = udp4_portaddr_hash(net, daddr, hnum) & udptable->mask; 17972dc41cffSDavid Held start_lookup: 179873e2d5e3SPablo Neira hslot = &udptable->hash2[hash2]; 17992dc41cffSDavid Held offset = offsetof(typeof(*sk), __sk_common.skc_portaddr_node); 18002dc41cffSDavid Held } 1801db8dac20SDavid S. Miller 1802ca065d0cSEric Dumazet sk_for_each_entry_offset_rcu(sk, node, &hslot->head, offset) { 1803ca065d0cSEric Dumazet if (!__udp_is_mcast_sock(net, sk, uh->dest, daddr, 1804ca065d0cSEric Dumazet uh->source, saddr, dif, hnum)) 1805ca065d0cSEric Dumazet continue; 18061240d137SEric Dumazet 1807ca065d0cSEric Dumazet if (!first) { 1808ca065d0cSEric Dumazet first = sk; 1809ca065d0cSEric Dumazet continue; 1810ca065d0cSEric Dumazet } 1811ca065d0cSEric Dumazet nskb = skb_clone(skb, GFP_ATOMIC); 1812ca065d0cSEric Dumazet 1813ca065d0cSEric Dumazet if (unlikely(!nskb)) { 1814ca065d0cSEric Dumazet atomic_inc(&sk->sk_drops); 181502c22347SEric Dumazet __UDP_INC_STATS(net, UDP_MIB_RCVBUFERRORS, 1816ca065d0cSEric Dumazet IS_UDPLITE(sk)); 181702c22347SEric Dumazet __UDP_INC_STATS(net, UDP_MIB_INERRORS, 1818ca065d0cSEric Dumazet IS_UDPLITE(sk)); 1819ca065d0cSEric Dumazet continue; 1820ca065d0cSEric Dumazet } 1821ca065d0cSEric Dumazet if (udp_queue_rcv_skb(sk, nskb) > 0) 1822ca065d0cSEric Dumazet consume_skb(nskb); 1823ca065d0cSEric Dumazet } 18241240d137SEric Dumazet 18252dc41cffSDavid Held /* Also lookup *:port if we are using hash2 and haven't done so yet. */ 18262dc41cffSDavid Held if (use_hash2 && hash2 != hash2_any) { 18272dc41cffSDavid Held hash2 = hash2_any; 18282dc41cffSDavid Held goto start_lookup; 18292dc41cffSDavid Held } 18302dc41cffSDavid Held 1831ca065d0cSEric Dumazet if (first) { 1832ca065d0cSEric Dumazet if (udp_queue_rcv_skb(first, skb) > 0) 1833ca065d0cSEric Dumazet consume_skb(skb); 18341240d137SEric Dumazet } else { 1835ca065d0cSEric Dumazet kfree_skb(skb); 183602c22347SEric Dumazet __UDP_INC_STATS(net, UDP_MIB_IGNOREDMULTI, 183736cbb245SRick Jones proto == IPPROTO_UDPLITE); 18381240d137SEric Dumazet } 1839db8dac20SDavid S. Miller return 0; 1840db8dac20SDavid S. Miller } 1841db8dac20SDavid S. Miller 1842db8dac20SDavid S. Miller /* Initialize UDP checksum. If exited with zero value (success), 1843db8dac20SDavid S. Miller * CHECKSUM_UNNECESSARY means, that no more checks are required. 1844db8dac20SDavid S. Miller * Otherwise, csum completion requires chacksumming packet body, 1845db8dac20SDavid S. Miller * including udp header and folding it to skb->csum. 1846db8dac20SDavid S. Miller */ 1847db8dac20SDavid S. Miller static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh, 1848db8dac20SDavid S. Miller int proto) 1849db8dac20SDavid S. Miller { 1850db8dac20SDavid S. Miller int err; 1851db8dac20SDavid S. Miller 1852db8dac20SDavid S. Miller UDP_SKB_CB(skb)->partial_cov = 0; 1853db8dac20SDavid S. Miller UDP_SKB_CB(skb)->cscov = skb->len; 1854db8dac20SDavid S. Miller 1855db8dac20SDavid S. Miller if (proto == IPPROTO_UDPLITE) { 1856db8dac20SDavid S. Miller err = udplite_checksum_init(skb, uh); 1857db8dac20SDavid S. Miller if (err) 1858db8dac20SDavid S. Miller return err; 1859db8dac20SDavid S. Miller } 1860db8dac20SDavid S. Miller 1861b46d9f62SHannes Frederic Sowa /* Note, we are only interested in != 0 or == 0, thus the 1862b46d9f62SHannes Frederic Sowa * force to int. 1863b46d9f62SHannes Frederic Sowa */ 1864b46d9f62SHannes Frederic Sowa return (__force int)skb_checksum_init_zero_check(skb, proto, uh->check, 1865ed70fcfcSTom Herbert inet_compute_pseudo); 1866db8dac20SDavid S. Miller } 1867db8dac20SDavid S. Miller 1868db8dac20SDavid S. Miller /* 1869db8dac20SDavid S. Miller * All we need to do is get the socket, and then do a checksum. 1870db8dac20SDavid S. Miller */ 1871db8dac20SDavid S. Miller 1872645ca708SEric Dumazet int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, 1873db8dac20SDavid S. Miller int proto) 1874db8dac20SDavid S. Miller { 1875db8dac20SDavid S. Miller struct sock *sk; 18767b5e56f9SJesper Dangaard Brouer struct udphdr *uh; 1877db8dac20SDavid S. Miller unsigned short ulen; 1878adf30907SEric Dumazet struct rtable *rt = skb_rtable(skb); 18792783ef23SJesper Dangaard Brouer __be32 saddr, daddr; 18800283328eSPavel Emelyanov struct net *net = dev_net(skb->dev); 1881db8dac20SDavid S. Miller 1882db8dac20SDavid S. Miller /* 1883db8dac20SDavid S. Miller * Validate the packet. 1884db8dac20SDavid S. Miller */ 1885db8dac20SDavid S. Miller if (!pskb_may_pull(skb, sizeof(struct udphdr))) 1886db8dac20SDavid S. Miller goto drop; /* No space for header. */ 1887db8dac20SDavid S. Miller 18887b5e56f9SJesper Dangaard Brouer uh = udp_hdr(skb); 1889db8dac20SDavid S. Miller ulen = ntohs(uh->len); 1890ccc2d97cSBjørn Mork saddr = ip_hdr(skb)->saddr; 1891ccc2d97cSBjørn Mork daddr = ip_hdr(skb)->daddr; 1892ccc2d97cSBjørn Mork 1893db8dac20SDavid S. Miller if (ulen > skb->len) 1894db8dac20SDavid S. Miller goto short_packet; 1895db8dac20SDavid S. Miller 1896db8dac20SDavid S. Miller if (proto == IPPROTO_UDP) { 1897db8dac20SDavid S. Miller /* UDP validates ulen. */ 1898db8dac20SDavid S. Miller if (ulen < sizeof(*uh) || pskb_trim_rcsum(skb, ulen)) 1899db8dac20SDavid S. Miller goto short_packet; 1900db8dac20SDavid S. Miller uh = udp_hdr(skb); 1901db8dac20SDavid S. Miller } 1902db8dac20SDavid S. Miller 1903db8dac20SDavid S. Miller if (udp4_csum_init(skb, uh, proto)) 1904db8dac20SDavid S. Miller goto csum_error; 1905db8dac20SDavid S. Miller 19068afdd99aSEric Dumazet sk = skb_steal_sock(skb); 19078afdd99aSEric Dumazet if (sk) { 190897502231SEric Dumazet struct dst_entry *dst = skb_dst(skb); 1909421b3885SShawn Bohrer int ret; 1910421b3885SShawn Bohrer 191197502231SEric Dumazet if (unlikely(sk->sk_rx_dst != dst)) 191297502231SEric Dumazet udp_sk_rx_dst_set(sk, dst); 1913421b3885SShawn Bohrer 1914421b3885SShawn Bohrer ret = udp_queue_rcv_skb(sk, skb); 19158afdd99aSEric Dumazet sock_put(sk); 1916421b3885SShawn Bohrer /* a return value > 0 means to resubmit the input, but 1917421b3885SShawn Bohrer * it wants the return to be -protocol, or 0 1918421b3885SShawn Bohrer */ 1919421b3885SShawn Bohrer if (ret > 0) 1920421b3885SShawn Bohrer return -ret; 1921421b3885SShawn Bohrer return 0; 1922c18450a5SFabian Frederick } 1923c18450a5SFabian Frederick 1924db8dac20SDavid S. Miller if (rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST)) 1925e3163493SPavel Emelyanov return __udp4_lib_mcast_deliver(net, skb, uh, 192636cbb245SRick Jones saddr, daddr, udptable, proto); 1927db8dac20SDavid S. Miller 1928607c4aafSKOVACS Krisztian sk = __udp4_lib_lookup_skb(skb, uh->source, uh->dest, udptable); 192900db4124SIan Morris if (sk) { 1930a5b50476SEliezer Tamir int ret; 1931a5b50476SEliezer Tamir 1932224d019cSTom Herbert if (inet_get_convert_csum(sk) && uh->check && !IS_UDPLITE(sk)) 19332abb7cdcSTom Herbert skb_checksum_try_convert(skb, IPPROTO_UDP, uh->check, 19342abb7cdcSTom Herbert inet_compute_pseudo); 19352abb7cdcSTom Herbert 1936a5b50476SEliezer Tamir ret = udp_queue_rcv_skb(sk, skb); 1937db8dac20SDavid S. Miller 1938db8dac20SDavid S. Miller /* a return value > 0 means to resubmit the input, but 1939db8dac20SDavid S. Miller * it wants the return to be -protocol, or 0 1940db8dac20SDavid S. Miller */ 1941db8dac20SDavid S. Miller if (ret > 0) 1942db8dac20SDavid S. Miller return -ret; 1943db8dac20SDavid S. Miller return 0; 1944db8dac20SDavid S. Miller } 1945db8dac20SDavid S. Miller 1946db8dac20SDavid S. Miller if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) 1947db8dac20SDavid S. Miller goto drop; 1948db8dac20SDavid S. Miller nf_reset(skb); 1949db8dac20SDavid S. Miller 1950db8dac20SDavid S. Miller /* No socket. Drop packet silently, if checksum is wrong */ 1951db8dac20SDavid S. Miller if (udp_lib_checksum_complete(skb)) 1952db8dac20SDavid S. Miller goto csum_error; 1953db8dac20SDavid S. Miller 195402c22347SEric Dumazet __UDP_INC_STATS(net, UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE); 1955db8dac20SDavid S. Miller icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); 1956db8dac20SDavid S. Miller 1957db8dac20SDavid S. Miller /* 1958db8dac20SDavid S. Miller * Hmm. We got an UDP packet to a port to which we 1959db8dac20SDavid S. Miller * don't wanna listen. Ignore it. 1960db8dac20SDavid S. Miller */ 1961db8dac20SDavid S. Miller kfree_skb(skb); 1962db8dac20SDavid S. Miller return 0; 1963db8dac20SDavid S. Miller 1964db8dac20SDavid S. Miller short_packet: 1965ba7a46f1SJoe Perches net_dbg_ratelimited("UDP%s: short packet: From %pI4:%u %d/%d to %pI4:%u\n", 1966afd46503SJoe Perches proto == IPPROTO_UDPLITE ? "Lite" : "", 1967afd46503SJoe Perches &saddr, ntohs(uh->source), 1968afd46503SJoe Perches ulen, skb->len, 1969afd46503SJoe Perches &daddr, ntohs(uh->dest)); 1970db8dac20SDavid S. Miller goto drop; 1971db8dac20SDavid S. Miller 1972db8dac20SDavid S. Miller csum_error: 1973db8dac20SDavid S. Miller /* 1974db8dac20SDavid S. Miller * RFC1122: OK. Discards the bad packet silently (as far as 1975db8dac20SDavid S. Miller * the network is concerned, anyway) as per 4.1.3.4 (MUST). 1976db8dac20SDavid S. Miller */ 1977ba7a46f1SJoe Perches net_dbg_ratelimited("UDP%s: bad checksum. From %pI4:%u to %pI4:%u ulen %d\n", 1978afd46503SJoe Perches proto == IPPROTO_UDPLITE ? "Lite" : "", 1979afd46503SJoe Perches &saddr, ntohs(uh->source), &daddr, ntohs(uh->dest), 1980db8dac20SDavid S. Miller ulen); 198102c22347SEric Dumazet __UDP_INC_STATS(net, UDP_MIB_CSUMERRORS, proto == IPPROTO_UDPLITE); 1982db8dac20SDavid S. Miller drop: 198302c22347SEric Dumazet __UDP_INC_STATS(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE); 1984db8dac20SDavid S. Miller kfree_skb(skb); 1985db8dac20SDavid S. Miller return 0; 1986db8dac20SDavid S. Miller } 1987db8dac20SDavid S. Miller 1988421b3885SShawn Bohrer /* We can only early demux multicast if there is a single matching socket. 1989421b3885SShawn Bohrer * If more than one socket found returns NULL 1990421b3885SShawn Bohrer */ 1991421b3885SShawn Bohrer static struct sock *__udp4_lib_mcast_demux_lookup(struct net *net, 1992421b3885SShawn Bohrer __be16 loc_port, __be32 loc_addr, 1993421b3885SShawn Bohrer __be16 rmt_port, __be32 rmt_addr, 1994421b3885SShawn Bohrer int dif) 1995421b3885SShawn Bohrer { 1996421b3885SShawn Bohrer struct sock *sk, *result; 1997421b3885SShawn Bohrer unsigned short hnum = ntohs(loc_port); 1998ca065d0cSEric Dumazet unsigned int slot = udp_hashfn(net, hnum, udp_table.mask); 1999421b3885SShawn Bohrer struct udp_hslot *hslot = &udp_table.hash[slot]; 2000421b3885SShawn Bohrer 200163c6f81cSEric Dumazet /* Do not bother scanning a too big list */ 200263c6f81cSEric Dumazet if (hslot->count > 10) 200363c6f81cSEric Dumazet return NULL; 200463c6f81cSEric Dumazet 2005421b3885SShawn Bohrer result = NULL; 2006ca065d0cSEric Dumazet sk_for_each_rcu(sk, &hslot->head) { 2007ca065d0cSEric Dumazet if (__udp_is_mcast_sock(net, sk, loc_port, loc_addr, 2008ca065d0cSEric Dumazet rmt_port, rmt_addr, dif, hnum)) { 2009ca065d0cSEric Dumazet if (result) 2010ca065d0cSEric Dumazet return NULL; 2011421b3885SShawn Bohrer result = sk; 2012421b3885SShawn Bohrer } 2013421b3885SShawn Bohrer } 2014421b3885SShawn Bohrer 2015421b3885SShawn Bohrer return result; 2016421b3885SShawn Bohrer } 2017421b3885SShawn Bohrer 2018421b3885SShawn Bohrer /* For unicast we should only early demux connected sockets or we can 2019421b3885SShawn Bohrer * break forwarding setups. The chains here can be long so only check 2020421b3885SShawn Bohrer * if the first socket is an exact match and if not move on. 2021421b3885SShawn Bohrer */ 2022421b3885SShawn Bohrer static struct sock *__udp4_lib_demux_lookup(struct net *net, 2023421b3885SShawn Bohrer __be16 loc_port, __be32 loc_addr, 2024421b3885SShawn Bohrer __be16 rmt_port, __be32 rmt_addr, 2025421b3885SShawn Bohrer int dif) 2026421b3885SShawn Bohrer { 2027421b3885SShawn Bohrer unsigned short hnum = ntohs(loc_port); 2028421b3885SShawn Bohrer unsigned int hash2 = udp4_portaddr_hash(net, loc_addr, hnum); 2029421b3885SShawn Bohrer unsigned int slot2 = hash2 & udp_table.mask; 2030421b3885SShawn Bohrer struct udp_hslot *hslot2 = &udp_table.hash2[slot2]; 2031c7228317SJoe Perches INET_ADDR_COOKIE(acookie, rmt_addr, loc_addr); 2032421b3885SShawn Bohrer const __portpair ports = INET_COMBINED_PORTS(rmt_port, hnum); 2033ca065d0cSEric Dumazet struct sock *sk; 2034421b3885SShawn Bohrer 2035ca065d0cSEric Dumazet udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) { 2036ca065d0cSEric Dumazet if (INET_MATCH(sk, net, acookie, rmt_addr, 2037ca065d0cSEric Dumazet loc_addr, ports, dif)) 2038ca065d0cSEric Dumazet return sk; 2039421b3885SShawn Bohrer /* Only check first socket in chain */ 2040421b3885SShawn Bohrer break; 2041421b3885SShawn Bohrer } 2042ca065d0cSEric Dumazet return NULL; 2043421b3885SShawn Bohrer } 2044421b3885SShawn Bohrer 2045421b3885SShawn Bohrer void udp_v4_early_demux(struct sk_buff *skb) 2046421b3885SShawn Bohrer { 2047610438b7SEric Dumazet struct net *net = dev_net(skb->dev); 2048610438b7SEric Dumazet const struct iphdr *iph; 2049610438b7SEric Dumazet const struct udphdr *uh; 2050ca065d0cSEric Dumazet struct sock *sk = NULL; 2051421b3885SShawn Bohrer struct dst_entry *dst; 2052421b3885SShawn Bohrer int dif = skb->dev->ifindex; 20536e540309SShawn Bohrer int ours; 2054421b3885SShawn Bohrer 2055421b3885SShawn Bohrer /* validate the packet */ 2056421b3885SShawn Bohrer if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct udphdr))) 2057421b3885SShawn Bohrer return; 2058421b3885SShawn Bohrer 2059610438b7SEric Dumazet iph = ip_hdr(skb); 2060610438b7SEric Dumazet uh = udp_hdr(skb); 2061610438b7SEric Dumazet 2062421b3885SShawn Bohrer if (skb->pkt_type == PACKET_BROADCAST || 20636e540309SShawn Bohrer skb->pkt_type == PACKET_MULTICAST) { 20646e540309SShawn Bohrer struct in_device *in_dev = __in_dev_get_rcu(skb->dev); 20656e540309SShawn Bohrer 20666e540309SShawn Bohrer if (!in_dev) 20676e540309SShawn Bohrer return; 20686e540309SShawn Bohrer 2069ad0ea198SPaolo Abeni /* we are supposed to accept bcast packets */ 2070ad0ea198SPaolo Abeni if (skb->pkt_type == PACKET_MULTICAST) { 20716e540309SShawn Bohrer ours = ip_check_mc_rcu(in_dev, iph->daddr, iph->saddr, 20726e540309SShawn Bohrer iph->protocol); 20736e540309SShawn Bohrer if (!ours) 20746e540309SShawn Bohrer return; 2075ad0ea198SPaolo Abeni } 2076ad0ea198SPaolo Abeni 2077421b3885SShawn Bohrer sk = __udp4_lib_mcast_demux_lookup(net, uh->dest, iph->daddr, 2078421b3885SShawn Bohrer uh->source, iph->saddr, dif); 20796e540309SShawn Bohrer } else if (skb->pkt_type == PACKET_HOST) { 2080421b3885SShawn Bohrer sk = __udp4_lib_demux_lookup(net, uh->dest, iph->daddr, 2081421b3885SShawn Bohrer uh->source, iph->saddr, dif); 20826e540309SShawn Bohrer } 2083421b3885SShawn Bohrer 2084ca065d0cSEric Dumazet if (!sk || !atomic_inc_not_zero_hint(&sk->sk_refcnt, 2)) 2085421b3885SShawn Bohrer return; 2086421b3885SShawn Bohrer 2087421b3885SShawn Bohrer skb->sk = sk; 208882eabd9eSAlexander Duyck skb->destructor = sock_efree; 208910e2eb87SEric Dumazet dst = READ_ONCE(sk->sk_rx_dst); 2090421b3885SShawn Bohrer 2091421b3885SShawn Bohrer if (dst) 2092421b3885SShawn Bohrer dst = dst_check(dst, 0); 209310e2eb87SEric Dumazet if (dst) { 209410e2eb87SEric Dumazet /* DST_NOCACHE can not be used without taking a reference */ 209510e2eb87SEric Dumazet if (dst->flags & DST_NOCACHE) { 209610e2eb87SEric Dumazet if (likely(atomic_inc_not_zero(&dst->__refcnt))) 209710e2eb87SEric Dumazet skb_dst_set(skb, dst); 209810e2eb87SEric Dumazet } else { 2099421b3885SShawn Bohrer skb_dst_set_noref(skb, dst); 2100421b3885SShawn Bohrer } 210110e2eb87SEric Dumazet } 210210e2eb87SEric Dumazet } 2103421b3885SShawn Bohrer 2104db8dac20SDavid S. Miller int udp_rcv(struct sk_buff *skb) 2105db8dac20SDavid S. Miller { 2106645ca708SEric Dumazet return __udp4_lib_rcv(skb, &udp_table, IPPROTO_UDP); 2107db8dac20SDavid S. Miller } 2108db8dac20SDavid S. Miller 21097d06b2e0SBrian Haley void udp_destroy_sock(struct sock *sk) 2110db8dac20SDavid S. Miller { 211144046a59STom Parkin struct udp_sock *up = udp_sk(sk); 21128a74ad60SEric Dumazet bool slow = lock_sock_fast(sk); 2113db8dac20SDavid S. Miller udp_flush_pending_frames(sk); 21148a74ad60SEric Dumazet unlock_sock_fast(sk, slow); 211544046a59STom Parkin if (static_key_false(&udp_encap_needed) && up->encap_type) { 211644046a59STom Parkin void (*encap_destroy)(struct sock *sk); 211744046a59STom Parkin encap_destroy = ACCESS_ONCE(up->encap_destroy); 211844046a59STom Parkin if (encap_destroy) 211944046a59STom Parkin encap_destroy(sk); 212044046a59STom Parkin } 2121db8dac20SDavid S. Miller } 2122db8dac20SDavid S. Miller 21231da177e4SLinus Torvalds /* 21241da177e4SLinus Torvalds * Socket option code for UDP 21251da177e4SLinus Torvalds */ 21264c0a6cb0SGerrit Renker int udp_lib_setsockopt(struct sock *sk, int level, int optname, 2127b7058842SDavid S. Miller char __user *optval, unsigned int optlen, 21284c0a6cb0SGerrit Renker int (*push_pending_frames)(struct sock *)) 21291da177e4SLinus Torvalds { 21301da177e4SLinus Torvalds struct udp_sock *up = udp_sk(sk); 21311c19448cSTom Herbert int val, valbool; 21321da177e4SLinus Torvalds int err = 0; 2133b2bf1e26SWang Chen int is_udplite = IS_UDPLITE(sk); 21341da177e4SLinus Torvalds 21351da177e4SLinus Torvalds if (optlen < sizeof(int)) 21361da177e4SLinus Torvalds return -EINVAL; 21371da177e4SLinus Torvalds 21381da177e4SLinus Torvalds if (get_user(val, (int __user *)optval)) 21391da177e4SLinus Torvalds return -EFAULT; 21401da177e4SLinus Torvalds 21411c19448cSTom Herbert valbool = val ? 1 : 0; 21421c19448cSTom Herbert 21431da177e4SLinus Torvalds switch (optname) { 21441da177e4SLinus Torvalds case UDP_CORK: 21451da177e4SLinus Torvalds if (val != 0) { 21461da177e4SLinus Torvalds up->corkflag = 1; 21471da177e4SLinus Torvalds } else { 21481da177e4SLinus Torvalds up->corkflag = 0; 21491da177e4SLinus Torvalds lock_sock(sk); 21504243cdc2SJoe Perches push_pending_frames(sk); 21511da177e4SLinus Torvalds release_sock(sk); 21521da177e4SLinus Torvalds } 21531da177e4SLinus Torvalds break; 21541da177e4SLinus Torvalds 21551da177e4SLinus Torvalds case UDP_ENCAP: 21561da177e4SLinus Torvalds switch (val) { 21571da177e4SLinus Torvalds case 0: 21581da177e4SLinus Torvalds case UDP_ENCAP_ESPINUDP: 21591da177e4SLinus Torvalds case UDP_ENCAP_ESPINUDP_NON_IKE: 2160067b207bSJames Chapman up->encap_rcv = xfrm4_udp_encap_rcv; 2161067b207bSJames Chapman /* FALLTHROUGH */ 2162342f0234SJames Chapman case UDP_ENCAP_L2TPINUDP: 21631da177e4SLinus Torvalds up->encap_type = val; 2164447167bfSEric Dumazet udp_encap_enable(); 21651da177e4SLinus Torvalds break; 21661da177e4SLinus Torvalds default: 21671da177e4SLinus Torvalds err = -ENOPROTOOPT; 21681da177e4SLinus Torvalds break; 21691da177e4SLinus Torvalds } 21701da177e4SLinus Torvalds break; 21711da177e4SLinus Torvalds 21721c19448cSTom Herbert case UDP_NO_CHECK6_TX: 21731c19448cSTom Herbert up->no_check6_tx = valbool; 21741c19448cSTom Herbert break; 21751c19448cSTom Herbert 21761c19448cSTom Herbert case UDP_NO_CHECK6_RX: 21771c19448cSTom Herbert up->no_check6_rx = valbool; 21781c19448cSTom Herbert break; 21791c19448cSTom Herbert 2180ba4e58ecSGerrit Renker /* 2181ba4e58ecSGerrit Renker * UDP-Lite's partial checksum coverage (RFC 3828). 2182ba4e58ecSGerrit Renker */ 2183ba4e58ecSGerrit Renker /* The sender sets actual checksum coverage length via this option. 2184ba4e58ecSGerrit Renker * The case coverage > packet length is handled by send module. */ 2185ba4e58ecSGerrit Renker case UDPLITE_SEND_CSCOV: 2186b2bf1e26SWang Chen if (!is_udplite) /* Disable the option on UDP sockets */ 2187ba4e58ecSGerrit Renker return -ENOPROTOOPT; 2188ba4e58ecSGerrit Renker if (val != 0 && val < 8) /* Illegal coverage: use default (8) */ 2189ba4e58ecSGerrit Renker val = 8; 21904be929beSAlexey Dobriyan else if (val > USHRT_MAX) 21914be929beSAlexey Dobriyan val = USHRT_MAX; 2192ba4e58ecSGerrit Renker up->pcslen = val; 2193ba4e58ecSGerrit Renker up->pcflag |= UDPLITE_SEND_CC; 2194ba4e58ecSGerrit Renker break; 2195ba4e58ecSGerrit Renker 2196ba4e58ecSGerrit Renker /* The receiver specifies a minimum checksum coverage value. To make 2197ba4e58ecSGerrit Renker * sense, this should be set to at least 8 (as done below). If zero is 2198ba4e58ecSGerrit Renker * used, this again means full checksum coverage. */ 2199ba4e58ecSGerrit Renker case UDPLITE_RECV_CSCOV: 2200b2bf1e26SWang Chen if (!is_udplite) /* Disable the option on UDP sockets */ 2201ba4e58ecSGerrit Renker return -ENOPROTOOPT; 2202ba4e58ecSGerrit Renker if (val != 0 && val < 8) /* Avoid silly minimal values. */ 2203ba4e58ecSGerrit Renker val = 8; 22044be929beSAlexey Dobriyan else if (val > USHRT_MAX) 22054be929beSAlexey Dobriyan val = USHRT_MAX; 2206ba4e58ecSGerrit Renker up->pcrlen = val; 2207ba4e58ecSGerrit Renker up->pcflag |= UDPLITE_RECV_CC; 2208ba4e58ecSGerrit Renker break; 2209ba4e58ecSGerrit Renker 22101da177e4SLinus Torvalds default: 22111da177e4SLinus Torvalds err = -ENOPROTOOPT; 22121da177e4SLinus Torvalds break; 22136516c655SStephen Hemminger } 22141da177e4SLinus Torvalds 22151da177e4SLinus Torvalds return err; 22161da177e4SLinus Torvalds } 2217c482c568SEric Dumazet EXPORT_SYMBOL(udp_lib_setsockopt); 22181da177e4SLinus Torvalds 2219db8dac20SDavid S. Miller int udp_setsockopt(struct sock *sk, int level, int optname, 2220b7058842SDavid S. Miller char __user *optval, unsigned int optlen) 2221db8dac20SDavid S. Miller { 2222db8dac20SDavid S. Miller if (level == SOL_UDP || level == SOL_UDPLITE) 2223db8dac20SDavid S. Miller return udp_lib_setsockopt(sk, level, optname, optval, optlen, 2224db8dac20SDavid S. Miller udp_push_pending_frames); 2225db8dac20SDavid S. Miller return ip_setsockopt(sk, level, optname, optval, optlen); 2226db8dac20SDavid S. Miller } 2227db8dac20SDavid S. Miller 2228db8dac20SDavid S. Miller #ifdef CONFIG_COMPAT 2229db8dac20SDavid S. Miller int compat_udp_setsockopt(struct sock *sk, int level, int optname, 2230b7058842SDavid S. Miller char __user *optval, unsigned int optlen) 2231db8dac20SDavid S. Miller { 2232db8dac20SDavid S. Miller if (level == SOL_UDP || level == SOL_UDPLITE) 2233db8dac20SDavid S. Miller return udp_lib_setsockopt(sk, level, optname, optval, optlen, 2234db8dac20SDavid S. Miller udp_push_pending_frames); 2235db8dac20SDavid S. Miller return compat_ip_setsockopt(sk, level, optname, optval, optlen); 2236db8dac20SDavid S. Miller } 2237db8dac20SDavid S. Miller #endif 2238db8dac20SDavid S. Miller 22394c0a6cb0SGerrit Renker int udp_lib_getsockopt(struct sock *sk, int level, int optname, 22401da177e4SLinus Torvalds char __user *optval, int __user *optlen) 22411da177e4SLinus Torvalds { 22421da177e4SLinus Torvalds struct udp_sock *up = udp_sk(sk); 22431da177e4SLinus Torvalds int val, len; 22441da177e4SLinus Torvalds 22451da177e4SLinus Torvalds if (get_user(len, optlen)) 22461da177e4SLinus Torvalds return -EFAULT; 22471da177e4SLinus Torvalds 22481da177e4SLinus Torvalds len = min_t(unsigned int, len, sizeof(int)); 22491da177e4SLinus Torvalds 22501da177e4SLinus Torvalds if (len < 0) 22511da177e4SLinus Torvalds return -EINVAL; 22521da177e4SLinus Torvalds 22531da177e4SLinus Torvalds switch (optname) { 22541da177e4SLinus Torvalds case UDP_CORK: 22551da177e4SLinus Torvalds val = up->corkflag; 22561da177e4SLinus Torvalds break; 22571da177e4SLinus Torvalds 22581da177e4SLinus Torvalds case UDP_ENCAP: 22591da177e4SLinus Torvalds val = up->encap_type; 22601da177e4SLinus Torvalds break; 22611da177e4SLinus Torvalds 22621c19448cSTom Herbert case UDP_NO_CHECK6_TX: 22631c19448cSTom Herbert val = up->no_check6_tx; 22641c19448cSTom Herbert break; 22651c19448cSTom Herbert 22661c19448cSTom Herbert case UDP_NO_CHECK6_RX: 22671c19448cSTom Herbert val = up->no_check6_rx; 22681c19448cSTom Herbert break; 22691c19448cSTom Herbert 2270ba4e58ecSGerrit Renker /* The following two cannot be changed on UDP sockets, the return is 2271ba4e58ecSGerrit Renker * always 0 (which corresponds to the full checksum coverage of UDP). */ 2272ba4e58ecSGerrit Renker case UDPLITE_SEND_CSCOV: 2273ba4e58ecSGerrit Renker val = up->pcslen; 2274ba4e58ecSGerrit Renker break; 2275ba4e58ecSGerrit Renker 2276ba4e58ecSGerrit Renker case UDPLITE_RECV_CSCOV: 2277ba4e58ecSGerrit Renker val = up->pcrlen; 2278ba4e58ecSGerrit Renker break; 2279ba4e58ecSGerrit Renker 22801da177e4SLinus Torvalds default: 22811da177e4SLinus Torvalds return -ENOPROTOOPT; 22826516c655SStephen Hemminger } 22831da177e4SLinus Torvalds 22841da177e4SLinus Torvalds if (put_user(len, optlen)) 22851da177e4SLinus Torvalds return -EFAULT; 22861da177e4SLinus Torvalds if (copy_to_user(optval, &val, len)) 22871da177e4SLinus Torvalds return -EFAULT; 22881da177e4SLinus Torvalds return 0; 22891da177e4SLinus Torvalds } 2290c482c568SEric Dumazet EXPORT_SYMBOL(udp_lib_getsockopt); 22911da177e4SLinus Torvalds 2292db8dac20SDavid S. Miller int udp_getsockopt(struct sock *sk, int level, int optname, 2293db8dac20SDavid S. Miller char __user *optval, int __user *optlen) 2294db8dac20SDavid S. Miller { 2295db8dac20SDavid S. Miller if (level == SOL_UDP || level == SOL_UDPLITE) 2296db8dac20SDavid S. Miller return udp_lib_getsockopt(sk, level, optname, optval, optlen); 2297db8dac20SDavid S. Miller return ip_getsockopt(sk, level, optname, optval, optlen); 2298db8dac20SDavid S. Miller } 2299db8dac20SDavid S. Miller 2300db8dac20SDavid S. Miller #ifdef CONFIG_COMPAT 2301db8dac20SDavid S. Miller int compat_udp_getsockopt(struct sock *sk, int level, int optname, 2302db8dac20SDavid S. Miller char __user *optval, int __user *optlen) 2303db8dac20SDavid S. Miller { 2304db8dac20SDavid S. Miller if (level == SOL_UDP || level == SOL_UDPLITE) 2305db8dac20SDavid S. Miller return udp_lib_getsockopt(sk, level, optname, optval, optlen); 2306db8dac20SDavid S. Miller return compat_ip_getsockopt(sk, level, optname, optval, optlen); 2307db8dac20SDavid S. Miller } 2308db8dac20SDavid S. Miller #endif 23091da177e4SLinus Torvalds /** 23101da177e4SLinus Torvalds * udp_poll - wait for a UDP event. 23111da177e4SLinus Torvalds * @file - file struct 23121da177e4SLinus Torvalds * @sock - socket 23131da177e4SLinus Torvalds * @wait - poll table 23141da177e4SLinus Torvalds * 23151da177e4SLinus Torvalds * This is same as datagram poll, except for the special case of 23161da177e4SLinus Torvalds * blocking sockets. If application is using a blocking fd 23171da177e4SLinus Torvalds * and a packet with checksum error is in the queue; 23181da177e4SLinus Torvalds * then it could get return from select indicating data available 23191da177e4SLinus Torvalds * but then block when reading it. Add special case code 23201da177e4SLinus Torvalds * to work around these arguably broken applications. 23211da177e4SLinus Torvalds */ 23221da177e4SLinus Torvalds unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait) 23231da177e4SLinus Torvalds { 23241da177e4SLinus Torvalds unsigned int mask = datagram_poll(file, sock, wait); 23251da177e4SLinus Torvalds struct sock *sk = sock->sk; 23261da177e4SLinus Torvalds 2327c3f1dbafSDavid Majnemer sock_rps_record_flow(sk); 2328c3f1dbafSDavid Majnemer 23291da177e4SLinus Torvalds /* Check for false positives due to checksum errors */ 233085584672SEric Dumazet if ((mask & POLLRDNORM) && !(file->f_flags & O_NONBLOCK) && 2331e83c6744SEric Dumazet !(sk->sk_shutdown & RCV_SHUTDOWN) && first_packet_length(sk) == -1) 23321da177e4SLinus Torvalds mask &= ~(POLLIN | POLLRDNORM); 23331da177e4SLinus Torvalds 23341da177e4SLinus Torvalds return mask; 23351da177e4SLinus Torvalds 23361da177e4SLinus Torvalds } 2337c482c568SEric Dumazet EXPORT_SYMBOL(udp_poll); 23381da177e4SLinus Torvalds 23395d77dca8SDavid Ahern int udp_abort(struct sock *sk, int err) 23405d77dca8SDavid Ahern { 23415d77dca8SDavid Ahern lock_sock(sk); 23425d77dca8SDavid Ahern 23435d77dca8SDavid Ahern sk->sk_err = err; 23445d77dca8SDavid Ahern sk->sk_error_report(sk); 2345286c72deSEric Dumazet __udp_disconnect(sk, 0); 23465d77dca8SDavid Ahern 23475d77dca8SDavid Ahern release_sock(sk); 23485d77dca8SDavid Ahern 23495d77dca8SDavid Ahern return 0; 23505d77dca8SDavid Ahern } 23515d77dca8SDavid Ahern EXPORT_SYMBOL_GPL(udp_abort); 23525d77dca8SDavid Ahern 2353db8dac20SDavid S. Miller struct proto udp_prot = { 2354db8dac20SDavid S. Miller .name = "UDP", 2355db8dac20SDavid S. Miller .owner = THIS_MODULE, 2356db8dac20SDavid S. Miller .close = udp_lib_close, 2357db8dac20SDavid S. Miller .connect = ip4_datagram_connect, 2358db8dac20SDavid S. Miller .disconnect = udp_disconnect, 2359db8dac20SDavid S. Miller .ioctl = udp_ioctl, 2360850cbaddSPaolo Abeni .init = udp_init_sock, 2361db8dac20SDavid S. Miller .destroy = udp_destroy_sock, 2362db8dac20SDavid S. Miller .setsockopt = udp_setsockopt, 2363db8dac20SDavid S. Miller .getsockopt = udp_getsockopt, 2364db8dac20SDavid S. Miller .sendmsg = udp_sendmsg, 2365db8dac20SDavid S. Miller .recvmsg = udp_recvmsg, 2366db8dac20SDavid S. Miller .sendpage = udp_sendpage, 23678141ed9fSSteffen Klassert .release_cb = ip4_datagram_release_cb, 2368db8dac20SDavid S. Miller .hash = udp_lib_hash, 2369db8dac20SDavid S. Miller .unhash = udp_lib_unhash, 2370719f8358SEric Dumazet .rehash = udp_v4_rehash, 2371db8dac20SDavid S. Miller .get_port = udp_v4_get_port, 2372db8dac20SDavid S. Miller .memory_allocated = &udp_memory_allocated, 2373db8dac20SDavid S. Miller .sysctl_mem = sysctl_udp_mem, 2374db8dac20SDavid S. Miller .sysctl_wmem = &sysctl_udp_wmem_min, 2375db8dac20SDavid S. Miller .sysctl_rmem = &sysctl_udp_rmem_min, 2376db8dac20SDavid S. Miller .obj_size = sizeof(struct udp_sock), 2377645ca708SEric Dumazet .h.udp_table = &udp_table, 2378db8dac20SDavid S. Miller #ifdef CONFIG_COMPAT 2379db8dac20SDavid S. Miller .compat_setsockopt = compat_udp_setsockopt, 2380db8dac20SDavid S. Miller .compat_getsockopt = compat_udp_getsockopt, 2381db8dac20SDavid S. Miller #endif 23825d77dca8SDavid Ahern .diag_destroy = udp_abort, 2383db8dac20SDavid S. Miller }; 2384c482c568SEric Dumazet EXPORT_SYMBOL(udp_prot); 23851da177e4SLinus Torvalds 23861da177e4SLinus Torvalds /* ------------------------------------------------------------------------ */ 23871da177e4SLinus Torvalds #ifdef CONFIG_PROC_FS 23881da177e4SLinus Torvalds 2389645ca708SEric Dumazet static struct sock *udp_get_first(struct seq_file *seq, int start) 23901da177e4SLinus Torvalds { 23911da177e4SLinus Torvalds struct sock *sk; 23921da177e4SLinus Torvalds struct udp_iter_state *state = seq->private; 23936f191efeSDenis V. Lunev struct net *net = seq_file_net(seq); 23941da177e4SLinus Torvalds 2395f86dcc5aSEric Dumazet for (state->bucket = start; state->bucket <= state->udp_table->mask; 2396f86dcc5aSEric Dumazet ++state->bucket) { 2397645ca708SEric Dumazet struct udp_hslot *hslot = &state->udp_table->hash[state->bucket]; 2398f86dcc5aSEric Dumazet 2399ca065d0cSEric Dumazet if (hlist_empty(&hslot->head)) 2400f86dcc5aSEric Dumazet continue; 2401f86dcc5aSEric Dumazet 2402645ca708SEric Dumazet spin_lock_bh(&hslot->lock); 2403ca065d0cSEric Dumazet sk_for_each(sk, &hslot->head) { 2404878628fbSYOSHIFUJI Hideaki if (!net_eq(sock_net(sk), net)) 2405a91275efSDaniel Lezcano continue; 24061da177e4SLinus Torvalds if (sk->sk_family == state->family) 24071da177e4SLinus Torvalds goto found; 24081da177e4SLinus Torvalds } 2409645ca708SEric Dumazet spin_unlock_bh(&hslot->lock); 24101da177e4SLinus Torvalds } 24111da177e4SLinus Torvalds sk = NULL; 24121da177e4SLinus Torvalds found: 24131da177e4SLinus Torvalds return sk; 24141da177e4SLinus Torvalds } 24151da177e4SLinus Torvalds 24161da177e4SLinus Torvalds static struct sock *udp_get_next(struct seq_file *seq, struct sock *sk) 24171da177e4SLinus Torvalds { 24181da177e4SLinus Torvalds struct udp_iter_state *state = seq->private; 24196f191efeSDenis V. Lunev struct net *net = seq_file_net(seq); 24201da177e4SLinus Torvalds 24211da177e4SLinus Torvalds do { 2422ca065d0cSEric Dumazet sk = sk_next(sk); 2423878628fbSYOSHIFUJI Hideaki } while (sk && (!net_eq(sock_net(sk), net) || sk->sk_family != state->family)); 24241da177e4SLinus Torvalds 2425645ca708SEric Dumazet if (!sk) { 2426f86dcc5aSEric Dumazet if (state->bucket <= state->udp_table->mask) 2427f52b5054SEric Dumazet spin_unlock_bh(&state->udp_table->hash[state->bucket].lock); 2428645ca708SEric Dumazet return udp_get_first(seq, state->bucket + 1); 24291da177e4SLinus Torvalds } 24301da177e4SLinus Torvalds return sk; 24311da177e4SLinus Torvalds } 24321da177e4SLinus Torvalds 24331da177e4SLinus Torvalds static struct sock *udp_get_idx(struct seq_file *seq, loff_t pos) 24341da177e4SLinus Torvalds { 2435645ca708SEric Dumazet struct sock *sk = udp_get_first(seq, 0); 24361da177e4SLinus Torvalds 24371da177e4SLinus Torvalds if (sk) 24381da177e4SLinus Torvalds while (pos && (sk = udp_get_next(seq, sk)) != NULL) 24391da177e4SLinus Torvalds --pos; 24401da177e4SLinus Torvalds return pos ? NULL : sk; 24411da177e4SLinus Torvalds } 24421da177e4SLinus Torvalds 24431da177e4SLinus Torvalds static void *udp_seq_start(struct seq_file *seq, loff_t *pos) 24441da177e4SLinus Torvalds { 244530842f29SVitaly Mayatskikh struct udp_iter_state *state = seq->private; 2446f86dcc5aSEric Dumazet state->bucket = MAX_UDP_PORTS; 244730842f29SVitaly Mayatskikh 2448b50660f1SYOSHIFUJI Hideaki return *pos ? udp_get_idx(seq, *pos-1) : SEQ_START_TOKEN; 24491da177e4SLinus Torvalds } 24501da177e4SLinus Torvalds 24511da177e4SLinus Torvalds static void *udp_seq_next(struct seq_file *seq, void *v, loff_t *pos) 24521da177e4SLinus Torvalds { 24531da177e4SLinus Torvalds struct sock *sk; 24541da177e4SLinus Torvalds 2455b50660f1SYOSHIFUJI Hideaki if (v == SEQ_START_TOKEN) 24561da177e4SLinus Torvalds sk = udp_get_idx(seq, 0); 24571da177e4SLinus Torvalds else 24581da177e4SLinus Torvalds sk = udp_get_next(seq, v); 24591da177e4SLinus Torvalds 24601da177e4SLinus Torvalds ++*pos; 24611da177e4SLinus Torvalds return sk; 24621da177e4SLinus Torvalds } 24631da177e4SLinus Torvalds 24641da177e4SLinus Torvalds static void udp_seq_stop(struct seq_file *seq, void *v) 24651da177e4SLinus Torvalds { 2466645ca708SEric Dumazet struct udp_iter_state *state = seq->private; 2467645ca708SEric Dumazet 2468f86dcc5aSEric Dumazet if (state->bucket <= state->udp_table->mask) 2469645ca708SEric Dumazet spin_unlock_bh(&state->udp_table->hash[state->bucket].lock); 24701da177e4SLinus Torvalds } 24711da177e4SLinus Torvalds 247273cb88ecSArjan van de Ven int udp_seq_open(struct inode *inode, struct file *file) 24731da177e4SLinus Torvalds { 2474d9dda78bSAl Viro struct udp_seq_afinfo *afinfo = PDE_DATA(inode); 2475a2be75c1SDenis V. Lunev struct udp_iter_state *s; 2476a2be75c1SDenis V. Lunev int err; 24771da177e4SLinus Torvalds 2478a2be75c1SDenis V. Lunev err = seq_open_net(inode, file, &afinfo->seq_ops, 2479a2be75c1SDenis V. Lunev sizeof(struct udp_iter_state)); 2480a2be75c1SDenis V. Lunev if (err < 0) 2481a2be75c1SDenis V. Lunev return err; 2482a91275efSDaniel Lezcano 2483a2be75c1SDenis V. Lunev s = ((struct seq_file *)file->private_data)->private; 24841da177e4SLinus Torvalds s->family = afinfo->family; 2485645ca708SEric Dumazet s->udp_table = afinfo->udp_table; 2486a2be75c1SDenis V. Lunev return err; 2487a91275efSDaniel Lezcano } 248873cb88ecSArjan van de Ven EXPORT_SYMBOL(udp_seq_open); 2489a91275efSDaniel Lezcano 24901da177e4SLinus Torvalds /* ------------------------------------------------------------------------ */ 24910c96d8c5SDaniel Lezcano int udp_proc_register(struct net *net, struct udp_seq_afinfo *afinfo) 24921da177e4SLinus Torvalds { 24931da177e4SLinus Torvalds struct proc_dir_entry *p; 24941da177e4SLinus Torvalds int rc = 0; 24951da177e4SLinus Torvalds 2496dda61925SDenis V. Lunev afinfo->seq_ops.start = udp_seq_start; 2497dda61925SDenis V. Lunev afinfo->seq_ops.next = udp_seq_next; 2498dda61925SDenis V. Lunev afinfo->seq_ops.stop = udp_seq_stop; 2499dda61925SDenis V. Lunev 250084841c3cSDenis V. Lunev p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net, 250173cb88ecSArjan van de Ven afinfo->seq_fops, afinfo); 250284841c3cSDenis V. Lunev if (!p) 25031da177e4SLinus Torvalds rc = -ENOMEM; 25041da177e4SLinus Torvalds return rc; 25051da177e4SLinus Torvalds } 2506c482c568SEric Dumazet EXPORT_SYMBOL(udp_proc_register); 25071da177e4SLinus Torvalds 25080c96d8c5SDaniel Lezcano void udp_proc_unregister(struct net *net, struct udp_seq_afinfo *afinfo) 25091da177e4SLinus Torvalds { 2510ece31ffdSGao feng remove_proc_entry(afinfo->name, net->proc_net); 25111da177e4SLinus Torvalds } 2512c482c568SEric Dumazet EXPORT_SYMBOL(udp_proc_unregister); 2513db8dac20SDavid S. Miller 2514db8dac20SDavid S. Miller /* ------------------------------------------------------------------------ */ 25155e659e4cSPavel Emelyanov static void udp4_format_sock(struct sock *sp, struct seq_file *f, 2516652586dfSTetsuo Handa int bucket) 2517db8dac20SDavid S. Miller { 2518db8dac20SDavid S. Miller struct inet_sock *inet = inet_sk(sp); 2519c720c7e8SEric Dumazet __be32 dest = inet->inet_daddr; 2520c720c7e8SEric Dumazet __be32 src = inet->inet_rcv_saddr; 2521c720c7e8SEric Dumazet __u16 destp = ntohs(inet->inet_dport); 2522c720c7e8SEric Dumazet __u16 srcp = ntohs(inet->inet_sport); 2523db8dac20SDavid S. Miller 2524f86dcc5aSEric Dumazet seq_printf(f, "%5d: %08X:%04X %08X:%04X" 2525652586dfSTetsuo Handa " %02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %d", 2526db8dac20SDavid S. Miller bucket, src, srcp, dest, destp, sp->sk_state, 252731e6d363SEric Dumazet sk_wmem_alloc_get(sp), 252831e6d363SEric Dumazet sk_rmem_alloc_get(sp), 2529a7cb5a49SEric W. Biederman 0, 0L, 0, 2530a7cb5a49SEric W. Biederman from_kuid_munged(seq_user_ns(f), sock_i_uid(sp)), 2531a7cb5a49SEric W. Biederman 0, sock_i_ino(sp), 2532cb61cb9bSEric Dumazet atomic_read(&sp->sk_refcnt), sp, 2533652586dfSTetsuo Handa atomic_read(&sp->sk_drops)); 2534db8dac20SDavid S. Miller } 2535db8dac20SDavid S. Miller 2536db8dac20SDavid S. Miller int udp4_seq_show(struct seq_file *seq, void *v) 2537db8dac20SDavid S. Miller { 2538652586dfSTetsuo Handa seq_setwidth(seq, 127); 2539db8dac20SDavid S. Miller if (v == SEQ_START_TOKEN) 2540652586dfSTetsuo Handa seq_puts(seq, " sl local_address rem_address st tx_queue " 2541db8dac20SDavid S. Miller "rx_queue tr tm->when retrnsmt uid timeout " 2542cb61cb9bSEric Dumazet "inode ref pointer drops"); 2543db8dac20SDavid S. Miller else { 2544db8dac20SDavid S. Miller struct udp_iter_state *state = seq->private; 2545db8dac20SDavid S. Miller 2546652586dfSTetsuo Handa udp4_format_sock(v, seq, state->bucket); 2547db8dac20SDavid S. Miller } 2548652586dfSTetsuo Handa seq_pad(seq, '\n'); 2549db8dac20SDavid S. Miller return 0; 2550db8dac20SDavid S. Miller } 2551db8dac20SDavid S. Miller 255273cb88ecSArjan van de Ven static const struct file_operations udp_afinfo_seq_fops = { 255373cb88ecSArjan van de Ven .owner = THIS_MODULE, 255473cb88ecSArjan van de Ven .open = udp_seq_open, 255573cb88ecSArjan van de Ven .read = seq_read, 255673cb88ecSArjan van de Ven .llseek = seq_lseek, 255773cb88ecSArjan van de Ven .release = seq_release_net 255873cb88ecSArjan van de Ven }; 255973cb88ecSArjan van de Ven 2560db8dac20SDavid S. Miller /* ------------------------------------------------------------------------ */ 2561db8dac20SDavid S. Miller static struct udp_seq_afinfo udp4_seq_afinfo = { 2562db8dac20SDavid S. Miller .name = "udp", 2563db8dac20SDavid S. Miller .family = AF_INET, 2564645ca708SEric Dumazet .udp_table = &udp_table, 256573cb88ecSArjan van de Ven .seq_fops = &udp_afinfo_seq_fops, 2566dda61925SDenis V. Lunev .seq_ops = { 2567dda61925SDenis V. Lunev .show = udp4_seq_show, 2568dda61925SDenis V. Lunev }, 2569db8dac20SDavid S. Miller }; 2570db8dac20SDavid S. Miller 25712c8c1e72SAlexey Dobriyan static int __net_init udp4_proc_init_net(struct net *net) 257215439febSPavel Emelyanov { 257315439febSPavel Emelyanov return udp_proc_register(net, &udp4_seq_afinfo); 257415439febSPavel Emelyanov } 257515439febSPavel Emelyanov 25762c8c1e72SAlexey Dobriyan static void __net_exit udp4_proc_exit_net(struct net *net) 257715439febSPavel Emelyanov { 257815439febSPavel Emelyanov udp_proc_unregister(net, &udp4_seq_afinfo); 257915439febSPavel Emelyanov } 258015439febSPavel Emelyanov 258115439febSPavel Emelyanov static struct pernet_operations udp4_net_ops = { 258215439febSPavel Emelyanov .init = udp4_proc_init_net, 258315439febSPavel Emelyanov .exit = udp4_proc_exit_net, 258415439febSPavel Emelyanov }; 258515439febSPavel Emelyanov 2586db8dac20SDavid S. Miller int __init udp4_proc_init(void) 2587db8dac20SDavid S. Miller { 258815439febSPavel Emelyanov return register_pernet_subsys(&udp4_net_ops); 2589db8dac20SDavid S. Miller } 2590db8dac20SDavid S. Miller 2591db8dac20SDavid S. Miller void udp4_proc_exit(void) 2592db8dac20SDavid S. Miller { 259315439febSPavel Emelyanov unregister_pernet_subsys(&udp4_net_ops); 2594db8dac20SDavid S. Miller } 25951da177e4SLinus Torvalds #endif /* CONFIG_PROC_FS */ 25961da177e4SLinus Torvalds 2597f86dcc5aSEric Dumazet static __initdata unsigned long uhash_entries; 2598f86dcc5aSEric Dumazet static int __init set_uhash_entries(char *str) 2599645ca708SEric Dumazet { 2600413c27d8SEldad Zack ssize_t ret; 2601413c27d8SEldad Zack 2602f86dcc5aSEric Dumazet if (!str) 2603f86dcc5aSEric Dumazet return 0; 2604413c27d8SEldad Zack 2605413c27d8SEldad Zack ret = kstrtoul(str, 0, &uhash_entries); 2606413c27d8SEldad Zack if (ret) 2607413c27d8SEldad Zack return 0; 2608413c27d8SEldad Zack 2609f86dcc5aSEric Dumazet if (uhash_entries && uhash_entries < UDP_HTABLE_SIZE_MIN) 2610f86dcc5aSEric Dumazet uhash_entries = UDP_HTABLE_SIZE_MIN; 2611f86dcc5aSEric Dumazet return 1; 2612f86dcc5aSEric Dumazet } 2613f86dcc5aSEric Dumazet __setup("uhash_entries=", set_uhash_entries); 2614645ca708SEric Dumazet 2615f86dcc5aSEric Dumazet void __init udp_table_init(struct udp_table *table, const char *name) 2616f86dcc5aSEric Dumazet { 2617f86dcc5aSEric Dumazet unsigned int i; 2618f86dcc5aSEric Dumazet 2619f86dcc5aSEric Dumazet table->hash = alloc_large_system_hash(name, 2620512615b6SEric Dumazet 2 * sizeof(struct udp_hslot), 2621f86dcc5aSEric Dumazet uhash_entries, 2622f86dcc5aSEric Dumazet 21, /* one slot per 2 MB */ 2623f86dcc5aSEric Dumazet 0, 2624f86dcc5aSEric Dumazet &table->log, 2625f86dcc5aSEric Dumazet &table->mask, 262631fe62b9STim Bird UDP_HTABLE_SIZE_MIN, 2627f86dcc5aSEric Dumazet 64 * 1024); 262831fe62b9STim Bird 2629512615b6SEric Dumazet table->hash2 = table->hash + (table->mask + 1); 2630f86dcc5aSEric Dumazet for (i = 0; i <= table->mask; i++) { 2631ca065d0cSEric Dumazet INIT_HLIST_HEAD(&table->hash[i].head); 2632fdcc8aa9SEric Dumazet table->hash[i].count = 0; 2633645ca708SEric Dumazet spin_lock_init(&table->hash[i].lock); 2634645ca708SEric Dumazet } 2635512615b6SEric Dumazet for (i = 0; i <= table->mask; i++) { 2636ca065d0cSEric Dumazet INIT_HLIST_HEAD(&table->hash2[i].head); 2637512615b6SEric Dumazet table->hash2[i].count = 0; 2638512615b6SEric Dumazet spin_lock_init(&table->hash2[i].lock); 2639512615b6SEric Dumazet } 2640645ca708SEric Dumazet } 2641645ca708SEric Dumazet 2642723b8e46STom Herbert u32 udp_flow_hashrnd(void) 2643723b8e46STom Herbert { 2644723b8e46STom Herbert static u32 hashrnd __read_mostly; 2645723b8e46STom Herbert 2646723b8e46STom Herbert net_get_random_once(&hashrnd, sizeof(hashrnd)); 2647723b8e46STom Herbert 2648723b8e46STom Herbert return hashrnd; 2649723b8e46STom Herbert } 2650723b8e46STom Herbert EXPORT_SYMBOL(udp_flow_hashrnd); 2651723b8e46STom Herbert 265295766fffSHideo Aoki void __init udp_init(void) 265395766fffSHideo Aoki { 2654f03d78dbSEric Dumazet unsigned long limit; 26554b272750SEric Dumazet unsigned int i; 265695766fffSHideo Aoki 2657f86dcc5aSEric Dumazet udp_table_init(&udp_table, "UDP"); 2658f03d78dbSEric Dumazet limit = nr_free_buffer_pages() / 8; 265995766fffSHideo Aoki limit = max(limit, 128UL); 266095766fffSHideo Aoki sysctl_udp_mem[0] = limit / 4 * 3; 266195766fffSHideo Aoki sysctl_udp_mem[1] = limit; 266295766fffSHideo Aoki sysctl_udp_mem[2] = sysctl_udp_mem[0] * 2; 266395766fffSHideo Aoki 266495766fffSHideo Aoki sysctl_udp_rmem_min = SK_MEM_QUANTUM; 266595766fffSHideo Aoki sysctl_udp_wmem_min = SK_MEM_QUANTUM; 26664b272750SEric Dumazet 26674b272750SEric Dumazet /* 16 spinlocks per cpu */ 26684b272750SEric Dumazet udp_busylocks_log = ilog2(nr_cpu_ids) + 4; 26694b272750SEric Dumazet udp_busylocks = kmalloc(sizeof(spinlock_t) << udp_busylocks_log, 26704b272750SEric Dumazet GFP_KERNEL); 26714b272750SEric Dumazet if (!udp_busylocks) 26724b272750SEric Dumazet panic("UDP: failed to alloc udp_busylocks\n"); 26734b272750SEric Dumazet for (i = 0; i < (1U << udp_busylocks_log); i++) 26744b272750SEric Dumazet spin_lock_init(udp_busylocks + i); 267595766fffSHideo Aoki } 2676