11da177e4SLinus Torvalds /* 21da177e4SLinus Torvalds * INET An implementation of the TCP/IP protocol suite for the LINUX 31da177e4SLinus Torvalds * operating system. INET is implemented using the BSD Socket 41da177e4SLinus Torvalds * interface as the means of communication with the user level. 51da177e4SLinus Torvalds * 61da177e4SLinus Torvalds * The User Datagram Protocol (UDP). 71da177e4SLinus Torvalds * 802c30a84SJesper Juhl * Authors: Ross Biro 91da177e4SLinus Torvalds * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> 101da177e4SLinus Torvalds * Arnt Gulbrandsen, <agulbra@nvg.unit.no> 11113aa838SAlan Cox * Alan Cox, <alan@lxorguk.ukuu.org.uk> 121da177e4SLinus Torvalds * Hirokazu Takahashi, <taka@valinux.co.jp> 131da177e4SLinus Torvalds * 141da177e4SLinus Torvalds * Fixes: 151da177e4SLinus Torvalds * Alan Cox : verify_area() calls 161da177e4SLinus Torvalds * Alan Cox : stopped close while in use off icmp 171da177e4SLinus Torvalds * messages. Not a fix but a botch that 181da177e4SLinus Torvalds * for udp at least is 'valid'. 191da177e4SLinus Torvalds * Alan Cox : Fixed icmp handling properly 201da177e4SLinus Torvalds * Alan Cox : Correct error for oversized datagrams 211da177e4SLinus Torvalds * Alan Cox : Tidied select() semantics. 221da177e4SLinus Torvalds * Alan Cox : udp_err() fixed properly, also now 231da177e4SLinus Torvalds * select and read wake correctly on errors 241da177e4SLinus Torvalds * Alan Cox : udp_send verify_area moved to avoid mem leak 251da177e4SLinus Torvalds * Alan Cox : UDP can count its memory 261da177e4SLinus Torvalds * Alan Cox : send to an unknown connection causes 271da177e4SLinus Torvalds * an ECONNREFUSED off the icmp, but 281da177e4SLinus Torvalds * does NOT close. 291da177e4SLinus Torvalds * Alan Cox : Switched to new sk_buff handlers. No more backlog! 301da177e4SLinus Torvalds * Alan Cox : Using generic datagram code. Even smaller and the PEEK 311da177e4SLinus Torvalds * bug no longer crashes it. 321da177e4SLinus Torvalds * Fred Van Kempen : Net2e support for sk->broadcast. 331da177e4SLinus Torvalds * Alan Cox : Uses skb_free_datagram 341da177e4SLinus Torvalds * Alan Cox : Added get/set sockopt support. 351da177e4SLinus Torvalds * Alan Cox : Broadcasting without option set returns EACCES. 361da177e4SLinus Torvalds * Alan Cox : No wakeup calls. Instead we now use the callbacks. 371da177e4SLinus Torvalds * Alan Cox : Use ip_tos and ip_ttl 381da177e4SLinus Torvalds * Alan Cox : SNMP Mibs 391da177e4SLinus Torvalds * Alan Cox : MSG_DONTROUTE, and 0.0.0.0 support. 401da177e4SLinus Torvalds * Matt Dillon : UDP length checks. 411da177e4SLinus Torvalds * Alan Cox : Smarter af_inet used properly. 421da177e4SLinus Torvalds * Alan Cox : Use new kernel side addressing. 431da177e4SLinus Torvalds * Alan Cox : Incorrect return on truncated datagram receive. 441da177e4SLinus Torvalds * Arnt Gulbrandsen : New udp_send and stuff 451da177e4SLinus Torvalds * Alan Cox : Cache last socket 461da177e4SLinus Torvalds * Alan Cox : Route cache 471da177e4SLinus Torvalds * Jon Peatfield : Minor efficiency fix to sendto(). 481da177e4SLinus Torvalds * Mike Shaver : RFC1122 checks. 491da177e4SLinus Torvalds * Alan Cox : Nonblocking error fix. 501da177e4SLinus Torvalds * Willy Konynenberg : Transparent proxying support. 511da177e4SLinus Torvalds * Mike McLagan : Routing by source 521da177e4SLinus Torvalds * David S. Miller : New socket lookup architecture. 531da177e4SLinus Torvalds * Last socket cache retained as it 541da177e4SLinus Torvalds * does have a high hit rate. 551da177e4SLinus Torvalds * Olaf Kirch : Don't linearise iovec on sendmsg. 561da177e4SLinus Torvalds * Andi Kleen : Some cleanups, cache destination entry 571da177e4SLinus Torvalds * for connect. 581da177e4SLinus Torvalds * Vitaly E. Lavrov : Transparent proxy revived after year coma. 591da177e4SLinus Torvalds * Melvin Smith : Check msg_name not msg_namelen in sendto(), 601da177e4SLinus Torvalds * return ENOTCONN for unconnected sockets (POSIX) 611da177e4SLinus Torvalds * Janos Farkas : don't deliver multi/broadcasts to a different 621da177e4SLinus Torvalds * bound-to-device socket 631da177e4SLinus Torvalds * Hirokazu Takahashi : HW checksumming for outgoing UDP 641da177e4SLinus Torvalds * datagrams. 651da177e4SLinus Torvalds * Hirokazu Takahashi : sendfile() on UDP works now. 661da177e4SLinus Torvalds * Arnaldo C. Melo : convert /proc/net/udp to seq_file 671da177e4SLinus Torvalds * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which 681da177e4SLinus Torvalds * Alexey Kuznetsov: allow both IPv4 and IPv6 sockets to bind 691da177e4SLinus Torvalds * a single port at the same time. 701da177e4SLinus Torvalds * Derek Atkins <derek@ihtfp.com>: Add Encapulation Support 71342f0234SJames Chapman * James Chapman : Add L2TP encapsulation type. 721da177e4SLinus Torvalds * 731da177e4SLinus Torvalds * 741da177e4SLinus Torvalds * This program is free software; you can redistribute it and/or 751da177e4SLinus Torvalds * modify it under the terms of the GNU General Public License 761da177e4SLinus Torvalds * as published by the Free Software Foundation; either version 771da177e4SLinus Torvalds * 2 of the License, or (at your option) any later version. 781da177e4SLinus Torvalds */ 791da177e4SLinus Torvalds 80afd46503SJoe Perches #define pr_fmt(fmt) "UDP: " fmt 81afd46503SJoe Perches 821da177e4SLinus Torvalds #include <asm/uaccess.h> 831da177e4SLinus Torvalds #include <asm/ioctls.h> 8495766fffSHideo Aoki #include <linux/bootmem.h> 858203efb3SEric Dumazet #include <linux/highmem.h> 868203efb3SEric Dumazet #include <linux/swap.h> 871da177e4SLinus Torvalds #include <linux/types.h> 881da177e4SLinus Torvalds #include <linux/fcntl.h> 891da177e4SLinus Torvalds #include <linux/module.h> 901da177e4SLinus Torvalds #include <linux/socket.h> 911da177e4SLinus Torvalds #include <linux/sockios.h> 9214c85021SArnaldo Carvalho de Melo #include <linux/igmp.h> 931da177e4SLinus Torvalds #include <linux/in.h> 941da177e4SLinus Torvalds #include <linux/errno.h> 951da177e4SLinus Torvalds #include <linux/timer.h> 961da177e4SLinus Torvalds #include <linux/mm.h> 971da177e4SLinus Torvalds #include <linux/inet.h> 981da177e4SLinus Torvalds #include <linux/netdevice.h> 995a0e3ad6STejun Heo #include <linux/slab.h> 100c752f073SArnaldo Carvalho de Melo #include <net/tcp_states.h> 1011da177e4SLinus Torvalds #include <linux/skbuff.h> 1021da177e4SLinus Torvalds #include <linux/proc_fs.h> 1031da177e4SLinus Torvalds #include <linux/seq_file.h> 104457c4cbcSEric W. Biederman #include <net/net_namespace.h> 1051da177e4SLinus Torvalds #include <net/icmp.h> 106421b3885SShawn Bohrer #include <net/inet_hashtables.h> 1071da177e4SLinus Torvalds #include <net/route.h> 1081da177e4SLinus Torvalds #include <net/checksum.h> 1091da177e4SLinus Torvalds #include <net/xfrm.h> 110296f7ea7SSatoru Moriya #include <trace/events/udp.h> 111447167bfSEric Dumazet #include <linux/static_key.h> 11222911fc5SEric Dumazet #include <trace/events/skb.h> 113076bb0c8SEliezer Tamir #include <net/busy_poll.h> 114ba4e58ecSGerrit Renker #include "udp_impl.h" 1151da177e4SLinus Torvalds 116f86dcc5aSEric Dumazet struct udp_table udp_table __read_mostly; 117645ca708SEric Dumazet EXPORT_SYMBOL(udp_table); 1181da177e4SLinus Torvalds 1198d987e5cSEric Dumazet long sysctl_udp_mem[3] __read_mostly; 12095766fffSHideo Aoki EXPORT_SYMBOL(sysctl_udp_mem); 121c482c568SEric Dumazet 122c482c568SEric Dumazet int sysctl_udp_rmem_min __read_mostly; 12395766fffSHideo Aoki EXPORT_SYMBOL(sysctl_udp_rmem_min); 124c482c568SEric Dumazet 125c482c568SEric Dumazet int sysctl_udp_wmem_min __read_mostly; 12695766fffSHideo Aoki EXPORT_SYMBOL(sysctl_udp_wmem_min); 12795766fffSHideo Aoki 1288d987e5cSEric Dumazet atomic_long_t udp_memory_allocated; 12995766fffSHideo Aoki EXPORT_SYMBOL(udp_memory_allocated); 13095766fffSHideo Aoki 131f86dcc5aSEric Dumazet #define MAX_UDP_PORTS 65536 132f86dcc5aSEric Dumazet #define PORTS_PER_CHAIN (MAX_UDP_PORTS / UDP_HTABLE_SIZE_MIN) 13398322f22SEric Dumazet 134f24d43c0SEric Dumazet static int udp_lib_lport_inuse(struct net *net, __u16 num, 135645ca708SEric Dumazet const struct udp_hslot *hslot, 13698322f22SEric Dumazet unsigned long *bitmap, 137f24d43c0SEric Dumazet struct sock *sk, 138f24d43c0SEric Dumazet int (*saddr_comp)(const struct sock *sk1, 139f86dcc5aSEric Dumazet const struct sock *sk2), 140f86dcc5aSEric Dumazet unsigned int log) 14125030a7fSGerrit Renker { 142f24d43c0SEric Dumazet struct sock *sk2; 14388ab1932SEric Dumazet struct hlist_nulls_node *node; 144ba418fa3STom Herbert kuid_t uid = sock_i_uid(sk); 14525030a7fSGerrit Renker 14688ab1932SEric Dumazet sk_nulls_for_each(sk2, node, &hslot->head) 147f24d43c0SEric Dumazet if (net_eq(sock_net(sk2), net) && 148f24d43c0SEric Dumazet sk2 != sk && 149d4cada4aSEric Dumazet (bitmap || udp_sk(sk2)->udp_port_hash == num) && 150f24d43c0SEric Dumazet (!sk2->sk_reuse || !sk->sk_reuse) && 1519d4fb27dSJoe Perches (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if || 1529d4fb27dSJoe Perches sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && 153ba418fa3STom Herbert (!sk2->sk_reuseport || !sk->sk_reuseport || 154ba418fa3STom Herbert !uid_eq(uid, sock_i_uid(sk2))) && 15598322f22SEric Dumazet (*saddr_comp)(sk, sk2)) { 15698322f22SEric Dumazet if (bitmap) 157d4cada4aSEric Dumazet __set_bit(udp_sk(sk2)->udp_port_hash >> log, 158d4cada4aSEric Dumazet bitmap); 15998322f22SEric Dumazet else 160fc038410SDavid S. Miller return 1; 16198322f22SEric Dumazet } 16225030a7fSGerrit Renker return 0; 16325030a7fSGerrit Renker } 16425030a7fSGerrit Renker 16530fff923SEric Dumazet /* 16630fff923SEric Dumazet * Note: we still hold spinlock of primary hash chain, so no other writer 16730fff923SEric Dumazet * can insert/delete a socket with local_port == num 16830fff923SEric Dumazet */ 16930fff923SEric Dumazet static int udp_lib_lport_inuse2(struct net *net, __u16 num, 17030fff923SEric Dumazet struct udp_hslot *hslot2, 17130fff923SEric Dumazet struct sock *sk, 17230fff923SEric Dumazet int (*saddr_comp)(const struct sock *sk1, 17330fff923SEric Dumazet const struct sock *sk2)) 17430fff923SEric Dumazet { 17530fff923SEric Dumazet struct sock *sk2; 17630fff923SEric Dumazet struct hlist_nulls_node *node; 177ba418fa3STom Herbert kuid_t uid = sock_i_uid(sk); 17830fff923SEric Dumazet int res = 0; 17930fff923SEric Dumazet 18030fff923SEric Dumazet spin_lock(&hslot2->lock); 18130fff923SEric Dumazet udp_portaddr_for_each_entry(sk2, node, &hslot2->head) 18230fff923SEric Dumazet if (net_eq(sock_net(sk2), net) && 18330fff923SEric Dumazet sk2 != sk && 18430fff923SEric Dumazet (udp_sk(sk2)->udp_port_hash == num) && 18530fff923SEric Dumazet (!sk2->sk_reuse || !sk->sk_reuse) && 1869d4fb27dSJoe Perches (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if || 1879d4fb27dSJoe Perches sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && 188ba418fa3STom Herbert (!sk2->sk_reuseport || !sk->sk_reuseport || 189ba418fa3STom Herbert !uid_eq(uid, sock_i_uid(sk2))) && 19030fff923SEric Dumazet (*saddr_comp)(sk, sk2)) { 19130fff923SEric Dumazet res = 1; 19230fff923SEric Dumazet break; 19330fff923SEric Dumazet } 19430fff923SEric Dumazet spin_unlock(&hslot2->lock); 19530fff923SEric Dumazet return res; 19630fff923SEric Dumazet } 19730fff923SEric Dumazet 19825030a7fSGerrit Renker /** 1996ba5a3c5SPavel Emelyanov * udp_lib_get_port - UDP/-Lite port lookup for IPv4 and IPv6 20025030a7fSGerrit Renker * 20125030a7fSGerrit Renker * @sk: socket struct in question 20225030a7fSGerrit Renker * @snum: port number to look up 203df2bc459SDavid S. Miller * @saddr_comp: AF-dependent comparison of bound local IP addresses 20425985edcSLucas De Marchi * @hash2_nulladdr: AF-dependent hash value in secondary hash chains, 20530fff923SEric Dumazet * with NULL address 20625030a7fSGerrit Renker */ 2076ba5a3c5SPavel Emelyanov int udp_lib_get_port(struct sock *sk, unsigned short snum, 208df2bc459SDavid S. Miller int (*saddr_comp)(const struct sock *sk1, 20930fff923SEric Dumazet const struct sock *sk2), 21030fff923SEric Dumazet unsigned int hash2_nulladdr) 2111da177e4SLinus Torvalds { 212512615b6SEric Dumazet struct udp_hslot *hslot, *hslot2; 213645ca708SEric Dumazet struct udp_table *udptable = sk->sk_prot->h.udp_table; 21425030a7fSGerrit Renker int error = 1; 2153b1e0a65SYOSHIFUJI Hideaki struct net *net = sock_net(sk); 2161da177e4SLinus Torvalds 21732c1da70SStephen Hemminger if (!snum) { 2189088c560SEric Dumazet int low, high, remaining; 21995c96174SEric Dumazet unsigned int rand; 22098322f22SEric Dumazet unsigned short first, last; 22198322f22SEric Dumazet DECLARE_BITMAP(bitmap, PORTS_PER_CHAIN); 2221da177e4SLinus Torvalds 2230bbf87d8SEric W. Biederman inet_get_local_port_range(net, &low, &high); 224a25de534SAnton Arapov remaining = (high - low) + 1; 225227b60f5SStephen Hemminger 22663862b5bSAruna-Hewapathirane rand = prandom_u32(); 22798322f22SEric Dumazet first = (((u64)rand * remaining) >> 32) + low; 22898322f22SEric Dumazet /* 22998322f22SEric Dumazet * force rand to be an odd multiple of UDP_HTABLE_SIZE 23098322f22SEric Dumazet */ 231f86dcc5aSEric Dumazet rand = (rand | 1) * (udptable->mask + 1); 2325781b235SEric Dumazet last = first + udptable->mask + 1; 2335781b235SEric Dumazet do { 234f86dcc5aSEric Dumazet hslot = udp_hashslot(udptable, net, first); 23598322f22SEric Dumazet bitmap_zero(bitmap, PORTS_PER_CHAIN); 236645ca708SEric Dumazet spin_lock_bh(&hslot->lock); 23798322f22SEric Dumazet udp_lib_lport_inuse(net, snum, hslot, bitmap, sk, 238f86dcc5aSEric Dumazet saddr_comp, udptable->log); 23998322f22SEric Dumazet 24098322f22SEric Dumazet snum = first; 24198322f22SEric Dumazet /* 24298322f22SEric Dumazet * Iterate on all possible values of snum for this hash. 24398322f22SEric Dumazet * Using steps of an odd multiple of UDP_HTABLE_SIZE 24498322f22SEric Dumazet * give us randomization and full range coverage. 24598322f22SEric Dumazet */ 2469088c560SEric Dumazet do { 24798322f22SEric Dumazet if (low <= snum && snum <= high && 248e3826f1eSAmerigo Wang !test_bit(snum >> udptable->log, bitmap) && 249122ff243SWANG Cong !inet_is_local_reserved_port(net, snum)) 25098322f22SEric Dumazet goto found; 25198322f22SEric Dumazet snum += rand; 25298322f22SEric Dumazet } while (snum != first); 25398322f22SEric Dumazet spin_unlock_bh(&hslot->lock); 2545781b235SEric Dumazet } while (++first != last); 25598322f22SEric Dumazet goto fail; 256645ca708SEric Dumazet } else { 257f86dcc5aSEric Dumazet hslot = udp_hashslot(udptable, net, snum); 258645ca708SEric Dumazet spin_lock_bh(&hslot->lock); 25930fff923SEric Dumazet if (hslot->count > 10) { 26030fff923SEric Dumazet int exist; 26130fff923SEric Dumazet unsigned int slot2 = udp_sk(sk)->udp_portaddr_hash ^ snum; 26230fff923SEric Dumazet 26330fff923SEric Dumazet slot2 &= udptable->mask; 26430fff923SEric Dumazet hash2_nulladdr &= udptable->mask; 26530fff923SEric Dumazet 26630fff923SEric Dumazet hslot2 = udp_hashslot2(udptable, slot2); 26730fff923SEric Dumazet if (hslot->count < hslot2->count) 26830fff923SEric Dumazet goto scan_primary_hash; 26930fff923SEric Dumazet 27030fff923SEric Dumazet exist = udp_lib_lport_inuse2(net, snum, hslot2, 27130fff923SEric Dumazet sk, saddr_comp); 27230fff923SEric Dumazet if (!exist && (hash2_nulladdr != slot2)) { 27330fff923SEric Dumazet hslot2 = udp_hashslot2(udptable, hash2_nulladdr); 27430fff923SEric Dumazet exist = udp_lib_lport_inuse2(net, snum, hslot2, 27530fff923SEric Dumazet sk, saddr_comp); 27630fff923SEric Dumazet } 27730fff923SEric Dumazet if (exist) 27830fff923SEric Dumazet goto fail_unlock; 27930fff923SEric Dumazet else 28030fff923SEric Dumazet goto found; 28130fff923SEric Dumazet } 28230fff923SEric Dumazet scan_primary_hash: 283f86dcc5aSEric Dumazet if (udp_lib_lport_inuse(net, snum, hslot, NULL, sk, 284f86dcc5aSEric Dumazet saddr_comp, 0)) 285645ca708SEric Dumazet goto fail_unlock; 286645ca708SEric Dumazet } 28798322f22SEric Dumazet found: 288c720c7e8SEric Dumazet inet_sk(sk)->inet_num = snum; 289d4cada4aSEric Dumazet udp_sk(sk)->udp_port_hash = snum; 290d4cada4aSEric Dumazet udp_sk(sk)->udp_portaddr_hash ^= snum; 2911da177e4SLinus Torvalds if (sk_unhashed(sk)) { 29288ab1932SEric Dumazet sk_nulls_add_node_rcu(sk, &hslot->head); 293fdcc8aa9SEric Dumazet hslot->count++; 294c29a0bc4SPavel Emelyanov sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); 295512615b6SEric Dumazet 296512615b6SEric Dumazet hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash); 297512615b6SEric Dumazet spin_lock(&hslot2->lock); 298512615b6SEric Dumazet hlist_nulls_add_head_rcu(&udp_sk(sk)->udp_portaddr_node, 299512615b6SEric Dumazet &hslot2->head); 300512615b6SEric Dumazet hslot2->count++; 301512615b6SEric Dumazet spin_unlock(&hslot2->lock); 3021da177e4SLinus Torvalds } 30325030a7fSGerrit Renker error = 0; 304645ca708SEric Dumazet fail_unlock: 305645ca708SEric Dumazet spin_unlock_bh(&hslot->lock); 3061da177e4SLinus Torvalds fail: 30725030a7fSGerrit Renker return error; 3081da177e4SLinus Torvalds } 309c482c568SEric Dumazet EXPORT_SYMBOL(udp_lib_get_port); 3101da177e4SLinus Torvalds 311499923c7SVlad Yasevich static int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2) 312db8dac20SDavid S. Miller { 313db8dac20SDavid S. Miller struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2); 314db8dac20SDavid S. Miller 315db8dac20SDavid S. Miller return (!ipv6_only_sock(sk2) && 316c720c7e8SEric Dumazet (!inet1->inet_rcv_saddr || !inet2->inet_rcv_saddr || 317c720c7e8SEric Dumazet inet1->inet_rcv_saddr == inet2->inet_rcv_saddr)); 318db8dac20SDavid S. Miller } 319db8dac20SDavid S. Miller 320d4cada4aSEric Dumazet static unsigned int udp4_portaddr_hash(struct net *net, __be32 saddr, 321d4cada4aSEric Dumazet unsigned int port) 322d4cada4aSEric Dumazet { 3230eae88f3SEric Dumazet return jhash_1word((__force u32)saddr, net_hash_mix(net)) ^ port; 324d4cada4aSEric Dumazet } 325d4cada4aSEric Dumazet 3266ba5a3c5SPavel Emelyanov int udp_v4_get_port(struct sock *sk, unsigned short snum) 327db8dac20SDavid S. Miller { 32830fff923SEric Dumazet unsigned int hash2_nulladdr = 3290eae88f3SEric Dumazet udp4_portaddr_hash(sock_net(sk), htonl(INADDR_ANY), snum); 33030fff923SEric Dumazet unsigned int hash2_partial = 33130fff923SEric Dumazet udp4_portaddr_hash(sock_net(sk), inet_sk(sk)->inet_rcv_saddr, 0); 33230fff923SEric Dumazet 333d4cada4aSEric Dumazet /* precompute partial secondary hash */ 33430fff923SEric Dumazet udp_sk(sk)->udp_portaddr_hash = hash2_partial; 33530fff923SEric Dumazet return udp_lib_get_port(sk, snum, ipv4_rcv_saddr_equal, hash2_nulladdr); 336db8dac20SDavid S. Miller } 337db8dac20SDavid S. Miller 338645ca708SEric Dumazet static inline int compute_score(struct sock *sk, struct net *net, __be32 saddr, 339645ca708SEric Dumazet unsigned short hnum, 340645ca708SEric Dumazet __be16 sport, __be32 daddr, __be16 dport, int dif) 341645ca708SEric Dumazet { 342645ca708SEric Dumazet int score = -1; 343645ca708SEric Dumazet 344d4cada4aSEric Dumazet if (net_eq(sock_net(sk), net) && udp_sk(sk)->udp_port_hash == hnum && 345645ca708SEric Dumazet !ipv6_only_sock(sk)) { 346645ca708SEric Dumazet struct inet_sock *inet = inet_sk(sk); 347645ca708SEric Dumazet 348ba418fa3STom Herbert score = (sk->sk_family == PF_INET ? 2 : 1); 349c720c7e8SEric Dumazet if (inet->inet_rcv_saddr) { 350c720c7e8SEric Dumazet if (inet->inet_rcv_saddr != daddr) 351645ca708SEric Dumazet return -1; 352ba418fa3STom Herbert score += 4; 353645ca708SEric Dumazet } 354c720c7e8SEric Dumazet if (inet->inet_daddr) { 355c720c7e8SEric Dumazet if (inet->inet_daddr != saddr) 356645ca708SEric Dumazet return -1; 357ba418fa3STom Herbert score += 4; 358645ca708SEric Dumazet } 359c720c7e8SEric Dumazet if (inet->inet_dport) { 360c720c7e8SEric Dumazet if (inet->inet_dport != sport) 361645ca708SEric Dumazet return -1; 362ba418fa3STom Herbert score += 4; 363645ca708SEric Dumazet } 364645ca708SEric Dumazet if (sk->sk_bound_dev_if) { 365645ca708SEric Dumazet if (sk->sk_bound_dev_if != dif) 366645ca708SEric Dumazet return -1; 367ba418fa3STom Herbert score += 4; 368645ca708SEric Dumazet } 369645ca708SEric Dumazet } 370645ca708SEric Dumazet return score; 371645ca708SEric Dumazet } 372645ca708SEric Dumazet 3735051ebd2SEric Dumazet /* 3745051ebd2SEric Dumazet * In this second variant, we check (daddr, dport) matches (inet_rcv_sadd, inet_num) 3755051ebd2SEric Dumazet */ 3765051ebd2SEric Dumazet static inline int compute_score2(struct sock *sk, struct net *net, 3775051ebd2SEric Dumazet __be32 saddr, __be16 sport, 3785051ebd2SEric Dumazet __be32 daddr, unsigned int hnum, int dif) 3795051ebd2SEric Dumazet { 3805051ebd2SEric Dumazet int score = -1; 3815051ebd2SEric Dumazet 3825051ebd2SEric Dumazet if (net_eq(sock_net(sk), net) && !ipv6_only_sock(sk)) { 3835051ebd2SEric Dumazet struct inet_sock *inet = inet_sk(sk); 3845051ebd2SEric Dumazet 3855051ebd2SEric Dumazet if (inet->inet_rcv_saddr != daddr) 3865051ebd2SEric Dumazet return -1; 3875051ebd2SEric Dumazet if (inet->inet_num != hnum) 3885051ebd2SEric Dumazet return -1; 3895051ebd2SEric Dumazet 390ba418fa3STom Herbert score = (sk->sk_family == PF_INET ? 2 : 1); 3915051ebd2SEric Dumazet if (inet->inet_daddr) { 3925051ebd2SEric Dumazet if (inet->inet_daddr != saddr) 3935051ebd2SEric Dumazet return -1; 394ba418fa3STom Herbert score += 4; 3955051ebd2SEric Dumazet } 3965051ebd2SEric Dumazet if (inet->inet_dport) { 3975051ebd2SEric Dumazet if (inet->inet_dport != sport) 3985051ebd2SEric Dumazet return -1; 399ba418fa3STom Herbert score += 4; 4005051ebd2SEric Dumazet } 4015051ebd2SEric Dumazet if (sk->sk_bound_dev_if) { 4025051ebd2SEric Dumazet if (sk->sk_bound_dev_if != dif) 4035051ebd2SEric Dumazet return -1; 404ba418fa3STom Herbert score += 4; 4055051ebd2SEric Dumazet } 4065051ebd2SEric Dumazet } 4075051ebd2SEric Dumazet return score; 4085051ebd2SEric Dumazet } 4095051ebd2SEric Dumazet 41065cd8033SHannes Frederic Sowa static unsigned int udp_ehashfn(struct net *net, const __be32 laddr, 41165cd8033SHannes Frederic Sowa const __u16 lport, const __be32 faddr, 41265cd8033SHannes Frederic Sowa const __be16 fport) 41365cd8033SHannes Frederic Sowa { 4141bbdceefSHannes Frederic Sowa static u32 udp_ehash_secret __read_mostly; 4151bbdceefSHannes Frederic Sowa 4161bbdceefSHannes Frederic Sowa net_get_random_once(&udp_ehash_secret, sizeof(udp_ehash_secret)); 4171bbdceefSHannes Frederic Sowa 41865cd8033SHannes Frederic Sowa return __inet_ehashfn(laddr, lport, faddr, fport, 4191bbdceefSHannes Frederic Sowa udp_ehash_secret + net_hash_mix(net)); 42065cd8033SHannes Frederic Sowa } 42165cd8033SHannes Frederic Sowa 4225051ebd2SEric Dumazet 4235051ebd2SEric Dumazet /* called with read_rcu_lock() */ 4245051ebd2SEric Dumazet static struct sock *udp4_lib_lookup2(struct net *net, 4255051ebd2SEric Dumazet __be32 saddr, __be16 sport, 4265051ebd2SEric Dumazet __be32 daddr, unsigned int hnum, int dif, 4275051ebd2SEric Dumazet struct udp_hslot *hslot2, unsigned int slot2) 4285051ebd2SEric Dumazet { 4295051ebd2SEric Dumazet struct sock *sk, *result; 4305051ebd2SEric Dumazet struct hlist_nulls_node *node; 431ba418fa3STom Herbert int score, badness, matches = 0, reuseport = 0; 432ba418fa3STom Herbert u32 hash = 0; 4335051ebd2SEric Dumazet 4345051ebd2SEric Dumazet begin: 4355051ebd2SEric Dumazet result = NULL; 436ba418fa3STom Herbert badness = 0; 4375051ebd2SEric Dumazet udp_portaddr_for_each_entry_rcu(sk, node, &hslot2->head) { 4385051ebd2SEric Dumazet score = compute_score2(sk, net, saddr, sport, 4395051ebd2SEric Dumazet daddr, hnum, dif); 4405051ebd2SEric Dumazet if (score > badness) { 4415051ebd2SEric Dumazet result = sk; 4425051ebd2SEric Dumazet badness = score; 443ba418fa3STom Herbert reuseport = sk->sk_reuseport; 444ba418fa3STom Herbert if (reuseport) { 44565cd8033SHannes Frederic Sowa hash = udp_ehashfn(net, daddr, hnum, 4467c0cadc6SEric Dumazet saddr, sport); 447ba418fa3STom Herbert matches = 1; 448ba418fa3STom Herbert } 449ba418fa3STom Herbert } else if (score == badness && reuseport) { 450ba418fa3STom Herbert matches++; 451ba418fa3STom Herbert if (((u64)hash * matches) >> 32 == 0) 452ba418fa3STom Herbert result = sk; 453ba418fa3STom Herbert hash = next_pseudo_random32(hash); 4545051ebd2SEric Dumazet } 4555051ebd2SEric Dumazet } 4565051ebd2SEric Dumazet /* 4575051ebd2SEric Dumazet * if the nulls value we got at the end of this lookup is 4585051ebd2SEric Dumazet * not the expected one, we must restart lookup. 4595051ebd2SEric Dumazet * We probably met an item that was moved to another chain. 4605051ebd2SEric Dumazet */ 4615051ebd2SEric Dumazet if (get_nulls_value(node) != slot2) 4625051ebd2SEric Dumazet goto begin; 4635051ebd2SEric Dumazet if (result) { 464c31504dcSEric Dumazet if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2))) 4655051ebd2SEric Dumazet result = NULL; 4665051ebd2SEric Dumazet else if (unlikely(compute_score2(result, net, saddr, sport, 4675051ebd2SEric Dumazet daddr, hnum, dif) < badness)) { 4685051ebd2SEric Dumazet sock_put(result); 4695051ebd2SEric Dumazet goto begin; 4705051ebd2SEric Dumazet } 4715051ebd2SEric Dumazet } 4725051ebd2SEric Dumazet return result; 4735051ebd2SEric Dumazet } 4745051ebd2SEric Dumazet 475db8dac20SDavid S. Miller /* UDP is nearly always wildcards out the wazoo, it makes no sense to try 476db8dac20SDavid S. Miller * harder than this. -DaveM 477db8dac20SDavid S. Miller */ 478fce82338SPavel Emelyanov struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, 479db8dac20SDavid S. Miller __be16 sport, __be32 daddr, __be16 dport, 480645ca708SEric Dumazet int dif, struct udp_table *udptable) 481db8dac20SDavid S. Miller { 482271b72c7SEric Dumazet struct sock *sk, *result; 48388ab1932SEric Dumazet struct hlist_nulls_node *node; 484db8dac20SDavid S. Miller unsigned short hnum = ntohs(dport); 4855051ebd2SEric Dumazet unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask); 4865051ebd2SEric Dumazet struct udp_hslot *hslot2, *hslot = &udptable->hash[slot]; 487ba418fa3STom Herbert int score, badness, matches = 0, reuseport = 0; 488ba418fa3STom Herbert u32 hash = 0; 489db8dac20SDavid S. Miller 490271b72c7SEric Dumazet rcu_read_lock(); 4915051ebd2SEric Dumazet if (hslot->count > 10) { 4925051ebd2SEric Dumazet hash2 = udp4_portaddr_hash(net, daddr, hnum); 4935051ebd2SEric Dumazet slot2 = hash2 & udptable->mask; 4945051ebd2SEric Dumazet hslot2 = &udptable->hash2[slot2]; 4955051ebd2SEric Dumazet if (hslot->count < hslot2->count) 4965051ebd2SEric Dumazet goto begin; 4975051ebd2SEric Dumazet 4985051ebd2SEric Dumazet result = udp4_lib_lookup2(net, saddr, sport, 4995051ebd2SEric Dumazet daddr, hnum, dif, 5005051ebd2SEric Dumazet hslot2, slot2); 5015051ebd2SEric Dumazet if (!result) { 5020eae88f3SEric Dumazet hash2 = udp4_portaddr_hash(net, htonl(INADDR_ANY), hnum); 5035051ebd2SEric Dumazet slot2 = hash2 & udptable->mask; 5045051ebd2SEric Dumazet hslot2 = &udptable->hash2[slot2]; 5055051ebd2SEric Dumazet if (hslot->count < hslot2->count) 5065051ebd2SEric Dumazet goto begin; 5075051ebd2SEric Dumazet 5081223c67cSJorge Boncompte [DTI2] result = udp4_lib_lookup2(net, saddr, sport, 5090eae88f3SEric Dumazet htonl(INADDR_ANY), hnum, dif, 5105051ebd2SEric Dumazet hslot2, slot2); 5115051ebd2SEric Dumazet } 5125051ebd2SEric Dumazet rcu_read_unlock(); 5135051ebd2SEric Dumazet return result; 5145051ebd2SEric Dumazet } 515271b72c7SEric Dumazet begin: 516271b72c7SEric Dumazet result = NULL; 517ba418fa3STom Herbert badness = 0; 51888ab1932SEric Dumazet sk_nulls_for_each_rcu(sk, node, &hslot->head) { 519645ca708SEric Dumazet score = compute_score(sk, net, saddr, hnum, sport, 520645ca708SEric Dumazet daddr, dport, dif); 521645ca708SEric Dumazet if (score > badness) { 522db8dac20SDavid S. Miller result = sk; 523db8dac20SDavid S. Miller badness = score; 524ba418fa3STom Herbert reuseport = sk->sk_reuseport; 525ba418fa3STom Herbert if (reuseport) { 52665cd8033SHannes Frederic Sowa hash = udp_ehashfn(net, daddr, hnum, 5277c0cadc6SEric Dumazet saddr, sport); 528ba418fa3STom Herbert matches = 1; 529ba418fa3STom Herbert } 530ba418fa3STom Herbert } else if (score == badness && reuseport) { 531ba418fa3STom Herbert matches++; 532ba418fa3STom Herbert if (((u64)hash * matches) >> 32 == 0) 533ba418fa3STom Herbert result = sk; 534ba418fa3STom Herbert hash = next_pseudo_random32(hash); 535db8dac20SDavid S. Miller } 536db8dac20SDavid S. Miller } 53788ab1932SEric Dumazet /* 53888ab1932SEric Dumazet * if the nulls value we got at the end of this lookup is 53988ab1932SEric Dumazet * not the expected one, we must restart lookup. 54088ab1932SEric Dumazet * We probably met an item that was moved to another chain. 54188ab1932SEric Dumazet */ 5425051ebd2SEric Dumazet if (get_nulls_value(node) != slot) 54388ab1932SEric Dumazet goto begin; 54488ab1932SEric Dumazet 545271b72c7SEric Dumazet if (result) { 546c31504dcSEric Dumazet if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2))) 547271b72c7SEric Dumazet result = NULL; 548271b72c7SEric Dumazet else if (unlikely(compute_score(result, net, saddr, hnum, sport, 549271b72c7SEric Dumazet daddr, dport, dif) < badness)) { 550271b72c7SEric Dumazet sock_put(result); 551271b72c7SEric Dumazet goto begin; 552271b72c7SEric Dumazet } 553271b72c7SEric Dumazet } 554271b72c7SEric Dumazet rcu_read_unlock(); 555db8dac20SDavid S. Miller return result; 556db8dac20SDavid S. Miller } 557fce82338SPavel Emelyanov EXPORT_SYMBOL_GPL(__udp4_lib_lookup); 558db8dac20SDavid S. Miller 559607c4aafSKOVACS Krisztian static inline struct sock *__udp4_lib_lookup_skb(struct sk_buff *skb, 560607c4aafSKOVACS Krisztian __be16 sport, __be16 dport, 561645ca708SEric Dumazet struct udp_table *udptable) 562607c4aafSKOVACS Krisztian { 563607c4aafSKOVACS Krisztian const struct iphdr *iph = ip_hdr(skb); 564607c4aafSKOVACS Krisztian 565adf30907SEric Dumazet return __udp4_lib_lookup(dev_net(skb_dst(skb)->dev), iph->saddr, sport, 566607c4aafSKOVACS Krisztian iph->daddr, dport, inet_iif(skb), 567607c4aafSKOVACS Krisztian udptable); 568607c4aafSKOVACS Krisztian } 569607c4aafSKOVACS Krisztian 570bcd41303SKOVACS Krisztian struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport, 571bcd41303SKOVACS Krisztian __be32 daddr, __be16 dport, int dif) 572bcd41303SKOVACS Krisztian { 573645ca708SEric Dumazet return __udp4_lib_lookup(net, saddr, sport, daddr, dport, dif, &udp_table); 574bcd41303SKOVACS Krisztian } 575bcd41303SKOVACS Krisztian EXPORT_SYMBOL_GPL(udp4_lib_lookup); 576bcd41303SKOVACS Krisztian 577421b3885SShawn Bohrer static inline bool __udp_is_mcast_sock(struct net *net, struct sock *sk, 578421b3885SShawn Bohrer __be16 loc_port, __be32 loc_addr, 579421b3885SShawn Bohrer __be16 rmt_port, __be32 rmt_addr, 580421b3885SShawn Bohrer int dif, unsigned short hnum) 581421b3885SShawn Bohrer { 582421b3885SShawn Bohrer struct inet_sock *inet = inet_sk(sk); 583421b3885SShawn Bohrer 584421b3885SShawn Bohrer if (!net_eq(sock_net(sk), net) || 585421b3885SShawn Bohrer udp_sk(sk)->udp_port_hash != hnum || 586421b3885SShawn Bohrer (inet->inet_daddr && inet->inet_daddr != rmt_addr) || 587421b3885SShawn Bohrer (inet->inet_dport != rmt_port && inet->inet_dport) || 588421b3885SShawn Bohrer (inet->inet_rcv_saddr && inet->inet_rcv_saddr != loc_addr) || 589421b3885SShawn Bohrer ipv6_only_sock(sk) || 590421b3885SShawn Bohrer (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif)) 591421b3885SShawn Bohrer return false; 592421b3885SShawn Bohrer if (!ip_mc_sf_allow(sk, loc_addr, rmt_addr, dif)) 593421b3885SShawn Bohrer return false; 594421b3885SShawn Bohrer return true; 595421b3885SShawn Bohrer } 596421b3885SShawn Bohrer 597920a4611SEric Dumazet static inline struct sock *udp_v4_mcast_next(struct net *net, struct sock *sk, 598db8dac20SDavid S. Miller __be16 loc_port, __be32 loc_addr, 599db8dac20SDavid S. Miller __be16 rmt_port, __be32 rmt_addr, 600db8dac20SDavid S. Miller int dif) 601db8dac20SDavid S. Miller { 60288ab1932SEric Dumazet struct hlist_nulls_node *node; 603db8dac20SDavid S. Miller struct sock *s = sk; 604db8dac20SDavid S. Miller unsigned short hnum = ntohs(loc_port); 605db8dac20SDavid S. Miller 60688ab1932SEric Dumazet sk_nulls_for_each_from(s, node) { 607421b3885SShawn Bohrer if (__udp_is_mcast_sock(net, s, 608421b3885SShawn Bohrer loc_port, loc_addr, 609421b3885SShawn Bohrer rmt_port, rmt_addr, 610421b3885SShawn Bohrer dif, hnum)) 611db8dac20SDavid S. Miller goto found; 612db8dac20SDavid S. Miller } 613db8dac20SDavid S. Miller s = NULL; 614db8dac20SDavid S. Miller found: 615db8dac20SDavid S. Miller return s; 616db8dac20SDavid S. Miller } 617db8dac20SDavid S. Miller 618db8dac20SDavid S. Miller /* 619db8dac20SDavid S. Miller * This routine is called by the ICMP module when it gets some 620db8dac20SDavid S. Miller * sort of error condition. If err < 0 then the socket should 621db8dac20SDavid S. Miller * be closed and the error returned to the user. If err > 0 622db8dac20SDavid S. Miller * it's just the icmp type << 8 | icmp code. 623db8dac20SDavid S. Miller * Header points to the ip header of the error packet. We move 624db8dac20SDavid S. Miller * on past this. Then (as it used to claim before adjustment) 625db8dac20SDavid S. Miller * header points to the first 8 bytes of the udp header. We need 626db8dac20SDavid S. Miller * to find the appropriate port. 627db8dac20SDavid S. Miller */ 628db8dac20SDavid S. Miller 629645ca708SEric Dumazet void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable) 630db8dac20SDavid S. Miller { 631db8dac20SDavid S. Miller struct inet_sock *inet; 632b71d1d42SEric Dumazet const struct iphdr *iph = (const struct iphdr *)skb->data; 633db8dac20SDavid S. Miller struct udphdr *uh = (struct udphdr *)(skb->data+(iph->ihl<<2)); 634db8dac20SDavid S. Miller const int type = icmp_hdr(skb)->type; 635db8dac20SDavid S. Miller const int code = icmp_hdr(skb)->code; 636db8dac20SDavid S. Miller struct sock *sk; 637db8dac20SDavid S. Miller int harderr; 638db8dac20SDavid S. Miller int err; 639fd54d716SPavel Emelyanov struct net *net = dev_net(skb->dev); 640db8dac20SDavid S. Miller 641fd54d716SPavel Emelyanov sk = __udp4_lib_lookup(net, iph->daddr, uh->dest, 642db8dac20SDavid S. Miller iph->saddr, uh->source, skb->dev->ifindex, udptable); 643db8dac20SDavid S. Miller if (sk == NULL) { 644dcfc23caSPavel Emelyanov ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); 645db8dac20SDavid S. Miller return; /* No socket for error */ 646db8dac20SDavid S. Miller } 647db8dac20SDavid S. Miller 648db8dac20SDavid S. Miller err = 0; 649db8dac20SDavid S. Miller harderr = 0; 650db8dac20SDavid S. Miller inet = inet_sk(sk); 651db8dac20SDavid S. Miller 652db8dac20SDavid S. Miller switch (type) { 653db8dac20SDavid S. Miller default: 654db8dac20SDavid S. Miller case ICMP_TIME_EXCEEDED: 655db8dac20SDavid S. Miller err = EHOSTUNREACH; 656db8dac20SDavid S. Miller break; 657db8dac20SDavid S. Miller case ICMP_SOURCE_QUENCH: 658db8dac20SDavid S. Miller goto out; 659db8dac20SDavid S. Miller case ICMP_PARAMETERPROB: 660db8dac20SDavid S. Miller err = EPROTO; 661db8dac20SDavid S. Miller harderr = 1; 662db8dac20SDavid S. Miller break; 663db8dac20SDavid S. Miller case ICMP_DEST_UNREACH: 664db8dac20SDavid S. Miller if (code == ICMP_FRAG_NEEDED) { /* Path MTU discovery */ 66536393395SDavid S. Miller ipv4_sk_update_pmtu(skb, sk, info); 666db8dac20SDavid S. Miller if (inet->pmtudisc != IP_PMTUDISC_DONT) { 667db8dac20SDavid S. Miller err = EMSGSIZE; 668db8dac20SDavid S. Miller harderr = 1; 669db8dac20SDavid S. Miller break; 670db8dac20SDavid S. Miller } 671db8dac20SDavid S. Miller goto out; 672db8dac20SDavid S. Miller } 673db8dac20SDavid S. Miller err = EHOSTUNREACH; 674db8dac20SDavid S. Miller if (code <= NR_ICMP_UNREACH) { 675db8dac20SDavid S. Miller harderr = icmp_err_convert[code].fatal; 676db8dac20SDavid S. Miller err = icmp_err_convert[code].errno; 677db8dac20SDavid S. Miller } 678db8dac20SDavid S. Miller break; 67955be7a9cSDavid S. Miller case ICMP_REDIRECT: 68055be7a9cSDavid S. Miller ipv4_sk_redirect(skb, sk); 6811a462d18SDuan Jiong goto out; 682db8dac20SDavid S. Miller } 683db8dac20SDavid S. Miller 684db8dac20SDavid S. Miller /* 685db8dac20SDavid S. Miller * RFC1122: OK. Passes ICMP errors back to application, as per 686db8dac20SDavid S. Miller * 4.1.3.3. 687db8dac20SDavid S. Miller */ 688db8dac20SDavid S. Miller if (!inet->recverr) { 689db8dac20SDavid S. Miller if (!harderr || sk->sk_state != TCP_ESTABLISHED) 690db8dac20SDavid S. Miller goto out; 691b1faf566SEric Dumazet } else 692db8dac20SDavid S. Miller ip_icmp_error(sk, skb, err, uh->dest, info, (u8 *)(uh+1)); 693b1faf566SEric Dumazet 694db8dac20SDavid S. Miller sk->sk_err = err; 695db8dac20SDavid S. Miller sk->sk_error_report(sk); 696db8dac20SDavid S. Miller out: 697db8dac20SDavid S. Miller sock_put(sk); 698db8dac20SDavid S. Miller } 699db8dac20SDavid S. Miller 700db8dac20SDavid S. Miller void udp_err(struct sk_buff *skb, u32 info) 701db8dac20SDavid S. Miller { 702645ca708SEric Dumazet __udp4_lib_err(skb, info, &udp_table); 703db8dac20SDavid S. Miller } 704db8dac20SDavid S. Miller 705db8dac20SDavid S. Miller /* 706db8dac20SDavid S. Miller * Throw away all pending data and cancel the corking. Socket is locked. 707db8dac20SDavid S. Miller */ 70836d926b9SDenis V. Lunev void udp_flush_pending_frames(struct sock *sk) 709db8dac20SDavid S. Miller { 710db8dac20SDavid S. Miller struct udp_sock *up = udp_sk(sk); 711db8dac20SDavid S. Miller 712db8dac20SDavid S. Miller if (up->pending) { 713db8dac20SDavid S. Miller up->len = 0; 714db8dac20SDavid S. Miller up->pending = 0; 715db8dac20SDavid S. Miller ip_flush_pending_frames(sk); 716db8dac20SDavid S. Miller } 717db8dac20SDavid S. Miller } 71836d926b9SDenis V. Lunev EXPORT_SYMBOL(udp_flush_pending_frames); 719db8dac20SDavid S. Miller 720db8dac20SDavid S. Miller /** 721f6b9664fSHerbert Xu * udp4_hwcsum - handle outgoing HW checksumming 722db8dac20SDavid S. Miller * @skb: sk_buff containing the filled-in UDP header 723db8dac20SDavid S. Miller * (checksum field must be zeroed out) 724f6b9664fSHerbert Xu * @src: source IP address 725f6b9664fSHerbert Xu * @dst: destination IP address 726db8dac20SDavid S. Miller */ 727c26bf4a5SThomas Graf void udp4_hwcsum(struct sk_buff *skb, __be32 src, __be32 dst) 728db8dac20SDavid S. Miller { 729db8dac20SDavid S. Miller struct udphdr *uh = udp_hdr(skb); 730f6b9664fSHerbert Xu struct sk_buff *frags = skb_shinfo(skb)->frag_list; 731f6b9664fSHerbert Xu int offset = skb_transport_offset(skb); 732f6b9664fSHerbert Xu int len = skb->len - offset; 733f6b9664fSHerbert Xu int hlen = len; 734db8dac20SDavid S. Miller __wsum csum = 0; 735db8dac20SDavid S. Miller 736f6b9664fSHerbert Xu if (!frags) { 737db8dac20SDavid S. Miller /* 738db8dac20SDavid S. Miller * Only one fragment on the socket. 739db8dac20SDavid S. Miller */ 740db8dac20SDavid S. Miller skb->csum_start = skb_transport_header(skb) - skb->head; 741db8dac20SDavid S. Miller skb->csum_offset = offsetof(struct udphdr, check); 742f6b9664fSHerbert Xu uh->check = ~csum_tcpudp_magic(src, dst, len, 743f6b9664fSHerbert Xu IPPROTO_UDP, 0); 744db8dac20SDavid S. Miller } else { 745db8dac20SDavid S. Miller /* 746db8dac20SDavid S. Miller * HW-checksum won't work as there are two or more 747db8dac20SDavid S. Miller * fragments on the socket so that all csums of sk_buffs 748db8dac20SDavid S. Miller * should be together 749db8dac20SDavid S. Miller */ 750f6b9664fSHerbert Xu do { 751f6b9664fSHerbert Xu csum = csum_add(csum, frags->csum); 752f6b9664fSHerbert Xu hlen -= frags->len; 753f6b9664fSHerbert Xu } while ((frags = frags->next)); 754db8dac20SDavid S. Miller 755f6b9664fSHerbert Xu csum = skb_checksum(skb, offset, hlen, csum); 756db8dac20SDavid S. Miller skb->ip_summed = CHECKSUM_NONE; 757db8dac20SDavid S. Miller 758db8dac20SDavid S. Miller uh->check = csum_tcpudp_magic(src, dst, len, IPPROTO_UDP, csum); 759db8dac20SDavid S. Miller if (uh->check == 0) 760db8dac20SDavid S. Miller uh->check = CSUM_MANGLED_0; 761db8dac20SDavid S. Miller } 762db8dac20SDavid S. Miller } 763c26bf4a5SThomas Graf EXPORT_SYMBOL_GPL(udp4_hwcsum); 764db8dac20SDavid S. Miller 76579ab0531SDavid S. Miller static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4) 766f6b9664fSHerbert Xu { 767f6b9664fSHerbert Xu struct sock *sk = skb->sk; 768f6b9664fSHerbert Xu struct inet_sock *inet = inet_sk(sk); 769f6b9664fSHerbert Xu struct udphdr *uh; 770f6b9664fSHerbert Xu int err = 0; 771f6b9664fSHerbert Xu int is_udplite = IS_UDPLITE(sk); 772f6b9664fSHerbert Xu int offset = skb_transport_offset(skb); 773f6b9664fSHerbert Xu int len = skb->len - offset; 774f6b9664fSHerbert Xu __wsum csum = 0; 775f6b9664fSHerbert Xu 776f6b9664fSHerbert Xu /* 777f6b9664fSHerbert Xu * Create a UDP header 778f6b9664fSHerbert Xu */ 779f6b9664fSHerbert Xu uh = udp_hdr(skb); 780f6b9664fSHerbert Xu uh->source = inet->inet_sport; 78179ab0531SDavid S. Miller uh->dest = fl4->fl4_dport; 782f6b9664fSHerbert Xu uh->len = htons(len); 783f6b9664fSHerbert Xu uh->check = 0; 784f6b9664fSHerbert Xu 785f6b9664fSHerbert Xu if (is_udplite) /* UDP-Lite */ 786f6b9664fSHerbert Xu csum = udplite_csum(skb); 787f6b9664fSHerbert Xu 788f6b9664fSHerbert Xu else if (sk->sk_no_check == UDP_CSUM_NOXMIT) { /* UDP csum disabled */ 789f6b9664fSHerbert Xu 790f6b9664fSHerbert Xu skb->ip_summed = CHECKSUM_NONE; 791f6b9664fSHerbert Xu goto send; 792f6b9664fSHerbert Xu 793f6b9664fSHerbert Xu } else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */ 794f6b9664fSHerbert Xu 79579ab0531SDavid S. Miller udp4_hwcsum(skb, fl4->saddr, fl4->daddr); 796f6b9664fSHerbert Xu goto send; 797f6b9664fSHerbert Xu 798f6b9664fSHerbert Xu } else 799f6b9664fSHerbert Xu csum = udp_csum(skb); 800f6b9664fSHerbert Xu 801f6b9664fSHerbert Xu /* add protocol-dependent pseudo-header */ 80279ab0531SDavid S. Miller uh->check = csum_tcpudp_magic(fl4->saddr, fl4->daddr, len, 803f6b9664fSHerbert Xu sk->sk_protocol, csum); 804f6b9664fSHerbert Xu if (uh->check == 0) 805f6b9664fSHerbert Xu uh->check = CSUM_MANGLED_0; 806f6b9664fSHerbert Xu 807f6b9664fSHerbert Xu send: 808b5ec8eeaSEric Dumazet err = ip_send_skb(sock_net(sk), skb); 809f6b9664fSHerbert Xu if (err) { 810f6b9664fSHerbert Xu if (err == -ENOBUFS && !inet->recverr) { 811f6b9664fSHerbert Xu UDP_INC_STATS_USER(sock_net(sk), 812f6b9664fSHerbert Xu UDP_MIB_SNDBUFERRORS, is_udplite); 813f6b9664fSHerbert Xu err = 0; 814f6b9664fSHerbert Xu } 815f6b9664fSHerbert Xu } else 816f6b9664fSHerbert Xu UDP_INC_STATS_USER(sock_net(sk), 817f6b9664fSHerbert Xu UDP_MIB_OUTDATAGRAMS, is_udplite); 818f6b9664fSHerbert Xu return err; 819f6b9664fSHerbert Xu } 820f6b9664fSHerbert Xu 821db8dac20SDavid S. Miller /* 822db8dac20SDavid S. Miller * Push out all pending data as one UDP datagram. Socket is locked. 823db8dac20SDavid S. Miller */ 8248822b64aSHannes Frederic Sowa int udp_push_pending_frames(struct sock *sk) 825db8dac20SDavid S. Miller { 826db8dac20SDavid S. Miller struct udp_sock *up = udp_sk(sk); 827db8dac20SDavid S. Miller struct inet_sock *inet = inet_sk(sk); 828b6f21b26SDavid S. Miller struct flowi4 *fl4 = &inet->cork.fl.u.ip4; 829db8dac20SDavid S. Miller struct sk_buff *skb; 830db8dac20SDavid S. Miller int err = 0; 831db8dac20SDavid S. Miller 83277968b78SDavid S. Miller skb = ip_finish_skb(sk, fl4); 833f6b9664fSHerbert Xu if (!skb) 834db8dac20SDavid S. Miller goto out; 835db8dac20SDavid S. Miller 83679ab0531SDavid S. Miller err = udp_send_skb(skb, fl4); 837db8dac20SDavid S. Miller 838db8dac20SDavid S. Miller out: 839db8dac20SDavid S. Miller up->len = 0; 840db8dac20SDavid S. Miller up->pending = 0; 841db8dac20SDavid S. Miller return err; 842db8dac20SDavid S. Miller } 8438822b64aSHannes Frederic Sowa EXPORT_SYMBOL(udp_push_pending_frames); 844db8dac20SDavid S. Miller 845db8dac20SDavid S. Miller int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, 846db8dac20SDavid S. Miller size_t len) 847db8dac20SDavid S. Miller { 848db8dac20SDavid S. Miller struct inet_sock *inet = inet_sk(sk); 849db8dac20SDavid S. Miller struct udp_sock *up = udp_sk(sk); 850e474995fSDavid S. Miller struct flowi4 fl4_stack; 851b6f21b26SDavid S. Miller struct flowi4 *fl4; 852db8dac20SDavid S. Miller int ulen = len; 853db8dac20SDavid S. Miller struct ipcm_cookie ipc; 854db8dac20SDavid S. Miller struct rtable *rt = NULL; 855db8dac20SDavid S. Miller int free = 0; 856db8dac20SDavid S. Miller int connected = 0; 857db8dac20SDavid S. Miller __be32 daddr, faddr, saddr; 858db8dac20SDavid S. Miller __be16 dport; 859db8dac20SDavid S. Miller u8 tos; 860db8dac20SDavid S. Miller int err, is_udplite = IS_UDPLITE(sk); 861db8dac20SDavid S. Miller int corkreq = up->corkflag || msg->msg_flags&MSG_MORE; 862db8dac20SDavid S. Miller int (*getfrag)(void *, char *, int, int, int, struct sk_buff *); 863903ab86dSHerbert Xu struct sk_buff *skb; 864f6d8bd05SEric Dumazet struct ip_options_data opt_copy; 865db8dac20SDavid S. Miller 866db8dac20SDavid S. Miller if (len > 0xFFFF) 867db8dac20SDavid S. Miller return -EMSGSIZE; 868db8dac20SDavid S. Miller 869db8dac20SDavid S. Miller /* 870db8dac20SDavid S. Miller * Check the flags. 871db8dac20SDavid S. Miller */ 872db8dac20SDavid S. Miller 873db8dac20SDavid S. Miller if (msg->msg_flags & MSG_OOB) /* Mirror BSD error message compatibility */ 874db8dac20SDavid S. Miller return -EOPNOTSUPP; 875db8dac20SDavid S. Miller 876db8dac20SDavid S. Miller ipc.opt = NULL; 8772244d07bSOliver Hartkopp ipc.tx_flags = 0; 878aa661581SFrancesco Fusco ipc.ttl = 0; 879aa661581SFrancesco Fusco ipc.tos = -1; 880db8dac20SDavid S. Miller 881903ab86dSHerbert Xu getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag; 882903ab86dSHerbert Xu 883f5fca608SDavid S. Miller fl4 = &inet->cork.fl.u.ip4; 884db8dac20SDavid S. Miller if (up->pending) { 885db8dac20SDavid S. Miller /* 886db8dac20SDavid S. Miller * There are pending frames. 887db8dac20SDavid S. Miller * The socket lock must be held while it's corked. 888db8dac20SDavid S. Miller */ 889db8dac20SDavid S. Miller lock_sock(sk); 890db8dac20SDavid S. Miller if (likely(up->pending)) { 891db8dac20SDavid S. Miller if (unlikely(up->pending != AF_INET)) { 892db8dac20SDavid S. Miller release_sock(sk); 893db8dac20SDavid S. Miller return -EINVAL; 894db8dac20SDavid S. Miller } 895db8dac20SDavid S. Miller goto do_append_data; 896db8dac20SDavid S. Miller } 897db8dac20SDavid S. Miller release_sock(sk); 898db8dac20SDavid S. Miller } 899db8dac20SDavid S. Miller ulen += sizeof(struct udphdr); 900db8dac20SDavid S. Miller 901db8dac20SDavid S. Miller /* 902db8dac20SDavid S. Miller * Get and verify the address. 903db8dac20SDavid S. Miller */ 904db8dac20SDavid S. Miller if (msg->msg_name) { 905342dfc30SSteffen Hurrle DECLARE_SOCKADDR(struct sockaddr_in *, usin, msg->msg_name); 906db8dac20SDavid S. Miller if (msg->msg_namelen < sizeof(*usin)) 907db8dac20SDavid S. Miller return -EINVAL; 908db8dac20SDavid S. Miller if (usin->sin_family != AF_INET) { 909db8dac20SDavid S. Miller if (usin->sin_family != AF_UNSPEC) 910db8dac20SDavid S. Miller return -EAFNOSUPPORT; 911db8dac20SDavid S. Miller } 912db8dac20SDavid S. Miller 913db8dac20SDavid S. Miller daddr = usin->sin_addr.s_addr; 914db8dac20SDavid S. Miller dport = usin->sin_port; 915db8dac20SDavid S. Miller if (dport == 0) 916db8dac20SDavid S. Miller return -EINVAL; 917db8dac20SDavid S. Miller } else { 918db8dac20SDavid S. Miller if (sk->sk_state != TCP_ESTABLISHED) 919db8dac20SDavid S. Miller return -EDESTADDRREQ; 920c720c7e8SEric Dumazet daddr = inet->inet_daddr; 921c720c7e8SEric Dumazet dport = inet->inet_dport; 922db8dac20SDavid S. Miller /* Open fast path for connected socket. 923db8dac20SDavid S. Miller Route will not be used, if at least one option is set. 924db8dac20SDavid S. Miller */ 925db8dac20SDavid S. Miller connected = 1; 926db8dac20SDavid S. Miller } 927c720c7e8SEric Dumazet ipc.addr = inet->inet_saddr; 928db8dac20SDavid S. Miller 929db8dac20SDavid S. Miller ipc.oif = sk->sk_bound_dev_if; 930bf84a010SDaniel Borkmann 931bf84a010SDaniel Borkmann sock_tx_timestamp(sk, &ipc.tx_flags); 932bf84a010SDaniel Borkmann 933db8dac20SDavid S. Miller if (msg->msg_controllen) { 934c8e6ad08SHannes Frederic Sowa err = ip_cmsg_send(sock_net(sk), msg, &ipc, 935c8e6ad08SHannes Frederic Sowa sk->sk_family == AF_INET6); 936db8dac20SDavid S. Miller if (err) 937db8dac20SDavid S. Miller return err; 938db8dac20SDavid S. Miller if (ipc.opt) 939db8dac20SDavid S. Miller free = 1; 940db8dac20SDavid S. Miller connected = 0; 941db8dac20SDavid S. Miller } 942f6d8bd05SEric Dumazet if (!ipc.opt) { 943f6d8bd05SEric Dumazet struct ip_options_rcu *inet_opt; 944f6d8bd05SEric Dumazet 945f6d8bd05SEric Dumazet rcu_read_lock(); 946f6d8bd05SEric Dumazet inet_opt = rcu_dereference(inet->inet_opt); 947f6d8bd05SEric Dumazet if (inet_opt) { 948f6d8bd05SEric Dumazet memcpy(&opt_copy, inet_opt, 949f6d8bd05SEric Dumazet sizeof(*inet_opt) + inet_opt->opt.optlen); 950f6d8bd05SEric Dumazet ipc.opt = &opt_copy.opt; 951f6d8bd05SEric Dumazet } 952f6d8bd05SEric Dumazet rcu_read_unlock(); 953f6d8bd05SEric Dumazet } 954db8dac20SDavid S. Miller 955db8dac20SDavid S. Miller saddr = ipc.addr; 956db8dac20SDavid S. Miller ipc.addr = faddr = daddr; 957db8dac20SDavid S. Miller 958f6d8bd05SEric Dumazet if (ipc.opt && ipc.opt->opt.srr) { 959db8dac20SDavid S. Miller if (!daddr) 960db8dac20SDavid S. Miller return -EINVAL; 961f6d8bd05SEric Dumazet faddr = ipc.opt->opt.faddr; 962db8dac20SDavid S. Miller connected = 0; 963db8dac20SDavid S. Miller } 964aa661581SFrancesco Fusco tos = get_rttos(&ipc, inet); 965db8dac20SDavid S. Miller if (sock_flag(sk, SOCK_LOCALROUTE) || 966db8dac20SDavid S. Miller (msg->msg_flags & MSG_DONTROUTE) || 967f6d8bd05SEric Dumazet (ipc.opt && ipc.opt->opt.is_strictroute)) { 968db8dac20SDavid S. Miller tos |= RTO_ONLINK; 969db8dac20SDavid S. Miller connected = 0; 970db8dac20SDavid S. Miller } 971db8dac20SDavid S. Miller 972db8dac20SDavid S. Miller if (ipv4_is_multicast(daddr)) { 973db8dac20SDavid S. Miller if (!ipc.oif) 974db8dac20SDavid S. Miller ipc.oif = inet->mc_index; 975db8dac20SDavid S. Miller if (!saddr) 976db8dac20SDavid S. Miller saddr = inet->mc_addr; 977db8dac20SDavid S. Miller connected = 0; 97876e21053SErich E. Hoover } else if (!ipc.oif) 97976e21053SErich E. Hoover ipc.oif = inet->uc_index; 980db8dac20SDavid S. Miller 981db8dac20SDavid S. Miller if (connected) 982db8dac20SDavid S. Miller rt = (struct rtable *)sk_dst_check(sk, 0); 983db8dac20SDavid S. Miller 984db8dac20SDavid S. Miller if (rt == NULL) { 98584a3aa00SPavel Emelyanov struct net *net = sock_net(sk); 98684a3aa00SPavel Emelyanov 987e474995fSDavid S. Miller fl4 = &fl4_stack; 988e474995fSDavid S. Miller flowi4_init_output(fl4, ipc.oif, sk->sk_mark, tos, 989c0951cbcSDavid S. Miller RT_SCOPE_UNIVERSE, sk->sk_protocol, 9900e0d44abSSteffen Klassert inet_sk_flowi_flags(sk), 991c0951cbcSDavid S. Miller faddr, saddr, dport, inet->inet_sport); 992c0951cbcSDavid S. Miller 993e474995fSDavid S. Miller security_sk_classify_flow(sk, flowi4_to_flowi(fl4)); 994e474995fSDavid S. Miller rt = ip_route_output_flow(net, fl4, sk); 995b23dd4feSDavid S. Miller if (IS_ERR(rt)) { 996b23dd4feSDavid S. Miller err = PTR_ERR(rt); 99706dc94b1SDavid S. Miller rt = NULL; 998db8dac20SDavid S. Miller if (err == -ENETUNREACH) 999f1d8cba6SEric Dumazet IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES); 1000db8dac20SDavid S. Miller goto out; 1001db8dac20SDavid S. Miller } 1002db8dac20SDavid S. Miller 1003db8dac20SDavid S. Miller err = -EACCES; 1004db8dac20SDavid S. Miller if ((rt->rt_flags & RTCF_BROADCAST) && 1005db8dac20SDavid S. Miller !sock_flag(sk, SOCK_BROADCAST)) 1006db8dac20SDavid S. Miller goto out; 1007db8dac20SDavid S. Miller if (connected) 1008d8d1f30bSChangli Gao sk_dst_set(sk, dst_clone(&rt->dst)); 1009db8dac20SDavid S. Miller } 1010db8dac20SDavid S. Miller 1011db8dac20SDavid S. Miller if (msg->msg_flags&MSG_CONFIRM) 1012db8dac20SDavid S. Miller goto do_confirm; 1013db8dac20SDavid S. Miller back_from_confirm: 1014db8dac20SDavid S. Miller 1015e474995fSDavid S. Miller saddr = fl4->saddr; 1016db8dac20SDavid S. Miller if (!ipc.addr) 1017e474995fSDavid S. Miller daddr = ipc.addr = fl4->daddr; 1018db8dac20SDavid S. Miller 1019903ab86dSHerbert Xu /* Lockless fast path for the non-corking case. */ 1020903ab86dSHerbert Xu if (!corkreq) { 102177968b78SDavid S. Miller skb = ip_make_skb(sk, fl4, getfrag, msg->msg_iov, ulen, 1022903ab86dSHerbert Xu sizeof(struct udphdr), &ipc, &rt, 1023903ab86dSHerbert Xu msg->msg_flags); 1024903ab86dSHerbert Xu err = PTR_ERR(skb); 102550c3a487SYOSHIFUJI Hideaki / 吉藤英明 if (!IS_ERR_OR_NULL(skb)) 102679ab0531SDavid S. Miller err = udp_send_skb(skb, fl4); 1027903ab86dSHerbert Xu goto out; 1028903ab86dSHerbert Xu } 1029903ab86dSHerbert Xu 1030db8dac20SDavid S. Miller lock_sock(sk); 1031db8dac20SDavid S. Miller if (unlikely(up->pending)) { 1032db8dac20SDavid S. Miller /* The socket is already corked while preparing it. */ 1033db8dac20SDavid S. Miller /* ... which is an evident application bug. --ANK */ 1034db8dac20SDavid S. Miller release_sock(sk); 1035db8dac20SDavid S. Miller 1036afd46503SJoe Perches LIMIT_NETDEBUG(KERN_DEBUG pr_fmt("cork app bug 2\n")); 1037db8dac20SDavid S. Miller err = -EINVAL; 1038db8dac20SDavid S. Miller goto out; 1039db8dac20SDavid S. Miller } 1040db8dac20SDavid S. Miller /* 1041db8dac20SDavid S. Miller * Now cork the socket to pend data. 1042db8dac20SDavid S. Miller */ 1043b6f21b26SDavid S. Miller fl4 = &inet->cork.fl.u.ip4; 1044b6f21b26SDavid S. Miller fl4->daddr = daddr; 1045b6f21b26SDavid S. Miller fl4->saddr = saddr; 10469cce96dfSDavid S. Miller fl4->fl4_dport = dport; 10479cce96dfSDavid S. Miller fl4->fl4_sport = inet->inet_sport; 1048db8dac20SDavid S. Miller up->pending = AF_INET; 1049db8dac20SDavid S. Miller 1050db8dac20SDavid S. Miller do_append_data: 1051db8dac20SDavid S. Miller up->len += ulen; 1052f5fca608SDavid S. Miller err = ip_append_data(sk, fl4, getfrag, msg->msg_iov, ulen, 10532e77d89bSEric Dumazet sizeof(struct udphdr), &ipc, &rt, 1054db8dac20SDavid S. Miller corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags); 1055db8dac20SDavid S. Miller if (err) 1056db8dac20SDavid S. Miller udp_flush_pending_frames(sk); 1057db8dac20SDavid S. Miller else if (!corkreq) 1058db8dac20SDavid S. Miller err = udp_push_pending_frames(sk); 1059db8dac20SDavid S. Miller else if (unlikely(skb_queue_empty(&sk->sk_write_queue))) 1060db8dac20SDavid S. Miller up->pending = 0; 1061db8dac20SDavid S. Miller release_sock(sk); 1062db8dac20SDavid S. Miller 1063db8dac20SDavid S. Miller out: 1064db8dac20SDavid S. Miller ip_rt_put(rt); 1065db8dac20SDavid S. Miller if (free) 1066db8dac20SDavid S. Miller kfree(ipc.opt); 1067db8dac20SDavid S. Miller if (!err) 1068db8dac20SDavid S. Miller return len; 1069db8dac20SDavid S. Miller /* 1070db8dac20SDavid S. Miller * ENOBUFS = no kernel mem, SOCK_NOSPACE = no sndbuf space. Reporting 1071db8dac20SDavid S. Miller * ENOBUFS might not be good (it's not tunable per se), but otherwise 1072db8dac20SDavid S. Miller * we don't have a good statistic (IpOutDiscards but it can be too many 1073db8dac20SDavid S. Miller * things). We could add another new stat but at least for now that 1074db8dac20SDavid S. Miller * seems like overkill. 1075db8dac20SDavid S. Miller */ 1076db8dac20SDavid S. Miller if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) { 1077629ca23cSPavel Emelyanov UDP_INC_STATS_USER(sock_net(sk), 1078629ca23cSPavel Emelyanov UDP_MIB_SNDBUFERRORS, is_udplite); 1079db8dac20SDavid S. Miller } 1080db8dac20SDavid S. Miller return err; 1081db8dac20SDavid S. Miller 1082db8dac20SDavid S. Miller do_confirm: 1083d8d1f30bSChangli Gao dst_confirm(&rt->dst); 1084db8dac20SDavid S. Miller if (!(msg->msg_flags&MSG_PROBE) || len) 1085db8dac20SDavid S. Miller goto back_from_confirm; 1086db8dac20SDavid S. Miller err = 0; 1087db8dac20SDavid S. Miller goto out; 1088db8dac20SDavid S. Miller } 1089c482c568SEric Dumazet EXPORT_SYMBOL(udp_sendmsg); 1090db8dac20SDavid S. Miller 1091db8dac20SDavid S. Miller int udp_sendpage(struct sock *sk, struct page *page, int offset, 1092db8dac20SDavid S. Miller size_t size, int flags) 1093db8dac20SDavid S. Miller { 1094f5fca608SDavid S. Miller struct inet_sock *inet = inet_sk(sk); 1095db8dac20SDavid S. Miller struct udp_sock *up = udp_sk(sk); 1096db8dac20SDavid S. Miller int ret; 1097db8dac20SDavid S. Miller 1098d3f7d56aSShawn Landden if (flags & MSG_SENDPAGE_NOTLAST) 1099d3f7d56aSShawn Landden flags |= MSG_MORE; 1100d3f7d56aSShawn Landden 1101db8dac20SDavid S. Miller if (!up->pending) { 1102db8dac20SDavid S. Miller struct msghdr msg = { .msg_flags = flags|MSG_MORE }; 1103db8dac20SDavid S. Miller 1104db8dac20SDavid S. Miller /* Call udp_sendmsg to specify destination address which 1105db8dac20SDavid S. Miller * sendpage interface can't pass. 1106db8dac20SDavid S. Miller * This will succeed only when the socket is connected. 1107db8dac20SDavid S. Miller */ 1108db8dac20SDavid S. Miller ret = udp_sendmsg(NULL, sk, &msg, 0); 1109db8dac20SDavid S. Miller if (ret < 0) 1110db8dac20SDavid S. Miller return ret; 1111db8dac20SDavid S. Miller } 1112db8dac20SDavid S. Miller 1113db8dac20SDavid S. Miller lock_sock(sk); 1114db8dac20SDavid S. Miller 1115db8dac20SDavid S. Miller if (unlikely(!up->pending)) { 1116db8dac20SDavid S. Miller release_sock(sk); 1117db8dac20SDavid S. Miller 1118afd46503SJoe Perches LIMIT_NETDEBUG(KERN_DEBUG pr_fmt("udp cork app bug 3\n")); 1119db8dac20SDavid S. Miller return -EINVAL; 1120db8dac20SDavid S. Miller } 1121db8dac20SDavid S. Miller 1122f5fca608SDavid S. Miller ret = ip_append_page(sk, &inet->cork.fl.u.ip4, 1123f5fca608SDavid S. Miller page, offset, size, flags); 1124db8dac20SDavid S. Miller if (ret == -EOPNOTSUPP) { 1125db8dac20SDavid S. Miller release_sock(sk); 1126db8dac20SDavid S. Miller return sock_no_sendpage(sk->sk_socket, page, offset, 1127db8dac20SDavid S. Miller size, flags); 1128db8dac20SDavid S. Miller } 1129db8dac20SDavid S. Miller if (ret < 0) { 1130db8dac20SDavid S. Miller udp_flush_pending_frames(sk); 1131db8dac20SDavid S. Miller goto out; 1132db8dac20SDavid S. Miller } 1133db8dac20SDavid S. Miller 1134db8dac20SDavid S. Miller up->len += size; 1135db8dac20SDavid S. Miller if (!(up->corkflag || (flags&MSG_MORE))) 1136db8dac20SDavid S. Miller ret = udp_push_pending_frames(sk); 1137db8dac20SDavid S. Miller if (!ret) 1138db8dac20SDavid S. Miller ret = size; 1139db8dac20SDavid S. Miller out: 1140db8dac20SDavid S. Miller release_sock(sk); 1141db8dac20SDavid S. Miller return ret; 1142db8dac20SDavid S. Miller } 1143db8dac20SDavid S. Miller 114485584672SEric Dumazet 114585584672SEric Dumazet /** 114685584672SEric Dumazet * first_packet_length - return length of first packet in receive queue 114785584672SEric Dumazet * @sk: socket 114885584672SEric Dumazet * 114985584672SEric Dumazet * Drops all bad checksum frames, until a valid one is found. 115085584672SEric Dumazet * Returns the length of found skb, or 0 if none is found. 115185584672SEric Dumazet */ 115285584672SEric Dumazet static unsigned int first_packet_length(struct sock *sk) 115385584672SEric Dumazet { 115485584672SEric Dumazet struct sk_buff_head list_kill, *rcvq = &sk->sk_receive_queue; 115585584672SEric Dumazet struct sk_buff *skb; 115685584672SEric Dumazet unsigned int res; 115785584672SEric Dumazet 115885584672SEric Dumazet __skb_queue_head_init(&list_kill); 115985584672SEric Dumazet 116085584672SEric Dumazet spin_lock_bh(&rcvq->lock); 116185584672SEric Dumazet while ((skb = skb_peek(rcvq)) != NULL && 116285584672SEric Dumazet udp_lib_checksum_complete(skb)) { 11636a5dc9e5SEric Dumazet UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_CSUMERRORS, 11646a5dc9e5SEric Dumazet IS_UDPLITE(sk)); 116585584672SEric Dumazet UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, 116685584672SEric Dumazet IS_UDPLITE(sk)); 11678edf19c2SEric Dumazet atomic_inc(&sk->sk_drops); 116885584672SEric Dumazet __skb_unlink(skb, rcvq); 116985584672SEric Dumazet __skb_queue_tail(&list_kill, skb); 117085584672SEric Dumazet } 117185584672SEric Dumazet res = skb ? skb->len : 0; 117285584672SEric Dumazet spin_unlock_bh(&rcvq->lock); 117385584672SEric Dumazet 117485584672SEric Dumazet if (!skb_queue_empty(&list_kill)) { 11758a74ad60SEric Dumazet bool slow = lock_sock_fast(sk); 11768a74ad60SEric Dumazet 117785584672SEric Dumazet __skb_queue_purge(&list_kill); 117885584672SEric Dumazet sk_mem_reclaim_partial(sk); 11798a74ad60SEric Dumazet unlock_sock_fast(sk, slow); 118085584672SEric Dumazet } 118185584672SEric Dumazet return res; 118285584672SEric Dumazet } 118385584672SEric Dumazet 11841da177e4SLinus Torvalds /* 11851da177e4SLinus Torvalds * IOCTL requests applicable to the UDP protocol 11861da177e4SLinus Torvalds */ 11871da177e4SLinus Torvalds 11881da177e4SLinus Torvalds int udp_ioctl(struct sock *sk, int cmd, unsigned long arg) 11891da177e4SLinus Torvalds { 11906516c655SStephen Hemminger switch (cmd) { 11911da177e4SLinus Torvalds case SIOCOUTQ: 11921da177e4SLinus Torvalds { 119331e6d363SEric Dumazet int amount = sk_wmem_alloc_get(sk); 119431e6d363SEric Dumazet 11951da177e4SLinus Torvalds return put_user(amount, (int __user *)arg); 11961da177e4SLinus Torvalds } 11971da177e4SLinus Torvalds 11981da177e4SLinus Torvalds case SIOCINQ: 11991da177e4SLinus Torvalds { 120085584672SEric Dumazet unsigned int amount = first_packet_length(sk); 12011da177e4SLinus Torvalds 120285584672SEric Dumazet if (amount) 12031da177e4SLinus Torvalds /* 12041da177e4SLinus Torvalds * We will only return the amount 12051da177e4SLinus Torvalds * of this packet since that is all 12061da177e4SLinus Torvalds * that will be read. 12071da177e4SLinus Torvalds */ 120885584672SEric Dumazet amount -= sizeof(struct udphdr); 120985584672SEric Dumazet 12101da177e4SLinus Torvalds return put_user(amount, (int __user *)arg); 12111da177e4SLinus Torvalds } 12121da177e4SLinus Torvalds 12131da177e4SLinus Torvalds default: 12141da177e4SLinus Torvalds return -ENOIOCTLCMD; 12151da177e4SLinus Torvalds } 12166516c655SStephen Hemminger 12176516c655SStephen Hemminger return 0; 12181da177e4SLinus Torvalds } 1219c482c568SEric Dumazet EXPORT_SYMBOL(udp_ioctl); 12201da177e4SLinus Torvalds 1221db8dac20SDavid S. Miller /* 1222db8dac20SDavid S. Miller * This should be easy, if there is something there we 1223db8dac20SDavid S. Miller * return it, otherwise we block. 1224db8dac20SDavid S. Miller */ 1225db8dac20SDavid S. Miller 1226db8dac20SDavid S. Miller int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, 1227db8dac20SDavid S. Miller size_t len, int noblock, int flags, int *addr_len) 1228db8dac20SDavid S. Miller { 1229db8dac20SDavid S. Miller struct inet_sock *inet = inet_sk(sk); 1230342dfc30SSteffen Hurrle DECLARE_SOCKADDR(struct sockaddr_in *, sin, msg->msg_name); 1231db8dac20SDavid S. Miller struct sk_buff *skb; 123259c2cdaeSDavid S. Miller unsigned int ulen, copied; 12333f518bf7SPavel Emelyanov int peeked, off = 0; 1234db8dac20SDavid S. Miller int err; 1235db8dac20SDavid S. Miller int is_udplite = IS_UDPLITE(sk); 12368a74ad60SEric Dumazet bool slow; 1237db8dac20SDavid S. Miller 1238db8dac20SDavid S. Miller if (flags & MSG_ERRQUEUE) 123985fbaa75SHannes Frederic Sowa return ip_recv_error(sk, msg, len, addr_len); 1240db8dac20SDavid S. Miller 1241db8dac20SDavid S. Miller try_again: 1242db8dac20SDavid S. Miller skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0), 12433f518bf7SPavel Emelyanov &peeked, &off, &err); 1244db8dac20SDavid S. Miller if (!skb) 1245db8dac20SDavid S. Miller goto out; 1246db8dac20SDavid S. Miller 1247db8dac20SDavid S. Miller ulen = skb->len - sizeof(struct udphdr); 124859c2cdaeSDavid S. Miller copied = len; 124959c2cdaeSDavid S. Miller if (copied > ulen) 125059c2cdaeSDavid S. Miller copied = ulen; 125159c2cdaeSDavid S. Miller else if (copied < ulen) 1252db8dac20SDavid S. Miller msg->msg_flags |= MSG_TRUNC; 1253db8dac20SDavid S. Miller 1254db8dac20SDavid S. Miller /* 1255db8dac20SDavid S. Miller * If checksum is needed at all, try to do it while copying the 1256db8dac20SDavid S. Miller * data. If the data is truncated, or if we only want a partial 1257db8dac20SDavid S. Miller * coverage checksum (UDP-Lite), do it before the copy. 1258db8dac20SDavid S. Miller */ 1259db8dac20SDavid S. Miller 126059c2cdaeSDavid S. Miller if (copied < ulen || UDP_SKB_CB(skb)->partial_cov) { 1261db8dac20SDavid S. Miller if (udp_lib_checksum_complete(skb)) 1262db8dac20SDavid S. Miller goto csum_copy_err; 1263db8dac20SDavid S. Miller } 1264db8dac20SDavid S. Miller 1265db8dac20SDavid S. Miller if (skb_csum_unnecessary(skb)) 1266db8dac20SDavid S. Miller err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), 126759c2cdaeSDavid S. Miller msg->msg_iov, copied); 1268db8dac20SDavid S. Miller else { 1269c482c568SEric Dumazet err = skb_copy_and_csum_datagram_iovec(skb, 1270c482c568SEric Dumazet sizeof(struct udphdr), 1271c482c568SEric Dumazet msg->msg_iov); 1272db8dac20SDavid S. Miller 1273db8dac20SDavid S. Miller if (err == -EINVAL) 1274db8dac20SDavid S. Miller goto csum_copy_err; 1275db8dac20SDavid S. Miller } 1276db8dac20SDavid S. Miller 127722911fc5SEric Dumazet if (unlikely(err)) { 127822911fc5SEric Dumazet trace_kfree_skb(skb, udp_recvmsg); 1279979402b1SEric Dumazet if (!peeked) { 1280979402b1SEric Dumazet atomic_inc(&sk->sk_drops); 1281979402b1SEric Dumazet UDP_INC_STATS_USER(sock_net(sk), 1282979402b1SEric Dumazet UDP_MIB_INERRORS, is_udplite); 1283979402b1SEric Dumazet } 1284db8dac20SDavid S. Miller goto out_free; 128522911fc5SEric Dumazet } 1286db8dac20SDavid S. Miller 1287db8dac20SDavid S. Miller if (!peeked) 1288629ca23cSPavel Emelyanov UDP_INC_STATS_USER(sock_net(sk), 1289629ca23cSPavel Emelyanov UDP_MIB_INDATAGRAMS, is_udplite); 1290db8dac20SDavid S. Miller 12913b885787SNeil Horman sock_recv_ts_and_drops(msg, sk, skb); 1292db8dac20SDavid S. Miller 1293db8dac20SDavid S. Miller /* Copy the address. */ 1294c482c568SEric Dumazet if (sin) { 1295db8dac20SDavid S. Miller sin->sin_family = AF_INET; 1296db8dac20SDavid S. Miller sin->sin_port = udp_hdr(skb)->source; 1297db8dac20SDavid S. Miller sin->sin_addr.s_addr = ip_hdr(skb)->saddr; 1298db8dac20SDavid S. Miller memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); 1299bceaa902SHannes Frederic Sowa *addr_len = sizeof(*sin); 1300db8dac20SDavid S. Miller } 1301db8dac20SDavid S. Miller if (inet->cmsg_flags) 1302db8dac20SDavid S. Miller ip_cmsg_recv(msg, skb); 1303db8dac20SDavid S. Miller 130459c2cdaeSDavid S. Miller err = copied; 1305db8dac20SDavid S. Miller if (flags & MSG_TRUNC) 1306db8dac20SDavid S. Miller err = ulen; 1307db8dac20SDavid S. Miller 1308db8dac20SDavid S. Miller out_free: 13099d410c79SEric Dumazet skb_free_datagram_locked(sk, skb); 1310db8dac20SDavid S. Miller out: 1311db8dac20SDavid S. Miller return err; 1312db8dac20SDavid S. Miller 1313db8dac20SDavid S. Miller csum_copy_err: 13148a74ad60SEric Dumazet slow = lock_sock_fast(sk); 13156a5dc9e5SEric Dumazet if (!skb_kill_datagram(sk, skb, flags)) { 13166a5dc9e5SEric Dumazet UDP_INC_STATS_USER(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite); 1317629ca23cSPavel Emelyanov UDP_INC_STATS_USER(sock_net(sk), UDP_MIB_INERRORS, is_udplite); 13186a5dc9e5SEric Dumazet } 13198a74ad60SEric Dumazet unlock_sock_fast(sk, slow); 1320db8dac20SDavid S. Miller 1321db8dac20SDavid S. Miller if (noblock) 1322db8dac20SDavid S. Miller return -EAGAIN; 13239cfaa8deSXufeng Zhang 13249cfaa8deSXufeng Zhang /* starting over for a new packet */ 13259cfaa8deSXufeng Zhang msg->msg_flags &= ~MSG_TRUNC; 1326db8dac20SDavid S. Miller goto try_again; 1327db8dac20SDavid S. Miller } 1328db8dac20SDavid S. Miller 1329db8dac20SDavid S. Miller 13301da177e4SLinus Torvalds int udp_disconnect(struct sock *sk, int flags) 13311da177e4SLinus Torvalds { 13321da177e4SLinus Torvalds struct inet_sock *inet = inet_sk(sk); 13331da177e4SLinus Torvalds /* 13341da177e4SLinus Torvalds * 1003.1g - break association. 13351da177e4SLinus Torvalds */ 13361da177e4SLinus Torvalds 13371da177e4SLinus Torvalds sk->sk_state = TCP_CLOSE; 1338c720c7e8SEric Dumazet inet->inet_daddr = 0; 1339c720c7e8SEric Dumazet inet->inet_dport = 0; 1340bdeab991STom Herbert sock_rps_reset_rxhash(sk); 13411da177e4SLinus Torvalds sk->sk_bound_dev_if = 0; 13421da177e4SLinus Torvalds if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) 13431da177e4SLinus Torvalds inet_reset_saddr(sk); 13441da177e4SLinus Torvalds 13451da177e4SLinus Torvalds if (!(sk->sk_userlocks & SOCK_BINDPORT_LOCK)) { 13461da177e4SLinus Torvalds sk->sk_prot->unhash(sk); 1347c720c7e8SEric Dumazet inet->inet_sport = 0; 13481da177e4SLinus Torvalds } 13491da177e4SLinus Torvalds sk_dst_reset(sk); 13501da177e4SLinus Torvalds return 0; 13511da177e4SLinus Torvalds } 1352c482c568SEric Dumazet EXPORT_SYMBOL(udp_disconnect); 13531da177e4SLinus Torvalds 1354645ca708SEric Dumazet void udp_lib_unhash(struct sock *sk) 1355645ca708SEric Dumazet { 1356723b4610SEric Dumazet if (sk_hashed(sk)) { 1357645ca708SEric Dumazet struct udp_table *udptable = sk->sk_prot->h.udp_table; 1358512615b6SEric Dumazet struct udp_hslot *hslot, *hslot2; 1359512615b6SEric Dumazet 1360512615b6SEric Dumazet hslot = udp_hashslot(udptable, sock_net(sk), 1361d4cada4aSEric Dumazet udp_sk(sk)->udp_port_hash); 1362512615b6SEric Dumazet hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash); 1363645ca708SEric Dumazet 1364c8db3fecSEric Dumazet spin_lock_bh(&hslot->lock); 136588ab1932SEric Dumazet if (sk_nulls_del_node_init_rcu(sk)) { 1366fdcc8aa9SEric Dumazet hslot->count--; 1367c720c7e8SEric Dumazet inet_sk(sk)->inet_num = 0; 1368645ca708SEric Dumazet sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); 1369512615b6SEric Dumazet 1370512615b6SEric Dumazet spin_lock(&hslot2->lock); 1371512615b6SEric Dumazet hlist_nulls_del_init_rcu(&udp_sk(sk)->udp_portaddr_node); 1372512615b6SEric Dumazet hslot2->count--; 1373512615b6SEric Dumazet spin_unlock(&hslot2->lock); 1374645ca708SEric Dumazet } 1375c8db3fecSEric Dumazet spin_unlock_bh(&hslot->lock); 1376645ca708SEric Dumazet } 1377723b4610SEric Dumazet } 1378645ca708SEric Dumazet EXPORT_SYMBOL(udp_lib_unhash); 1379645ca708SEric Dumazet 1380719f8358SEric Dumazet /* 1381719f8358SEric Dumazet * inet_rcv_saddr was changed, we must rehash secondary hash 1382719f8358SEric Dumazet */ 1383719f8358SEric Dumazet void udp_lib_rehash(struct sock *sk, u16 newhash) 1384719f8358SEric Dumazet { 1385719f8358SEric Dumazet if (sk_hashed(sk)) { 1386719f8358SEric Dumazet struct udp_table *udptable = sk->sk_prot->h.udp_table; 1387719f8358SEric Dumazet struct udp_hslot *hslot, *hslot2, *nhslot2; 1388719f8358SEric Dumazet 1389719f8358SEric Dumazet hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash); 1390719f8358SEric Dumazet nhslot2 = udp_hashslot2(udptable, newhash); 1391719f8358SEric Dumazet udp_sk(sk)->udp_portaddr_hash = newhash; 1392719f8358SEric Dumazet if (hslot2 != nhslot2) { 1393719f8358SEric Dumazet hslot = udp_hashslot(udptable, sock_net(sk), 1394719f8358SEric Dumazet udp_sk(sk)->udp_port_hash); 1395719f8358SEric Dumazet /* we must lock primary chain too */ 1396719f8358SEric Dumazet spin_lock_bh(&hslot->lock); 1397719f8358SEric Dumazet 1398719f8358SEric Dumazet spin_lock(&hslot2->lock); 1399719f8358SEric Dumazet hlist_nulls_del_init_rcu(&udp_sk(sk)->udp_portaddr_node); 1400719f8358SEric Dumazet hslot2->count--; 1401719f8358SEric Dumazet spin_unlock(&hslot2->lock); 1402719f8358SEric Dumazet 1403719f8358SEric Dumazet spin_lock(&nhslot2->lock); 1404719f8358SEric Dumazet hlist_nulls_add_head_rcu(&udp_sk(sk)->udp_portaddr_node, 1405719f8358SEric Dumazet &nhslot2->head); 1406719f8358SEric Dumazet nhslot2->count++; 1407719f8358SEric Dumazet spin_unlock(&nhslot2->lock); 1408719f8358SEric Dumazet 1409719f8358SEric Dumazet spin_unlock_bh(&hslot->lock); 1410719f8358SEric Dumazet } 1411719f8358SEric Dumazet } 1412719f8358SEric Dumazet } 1413719f8358SEric Dumazet EXPORT_SYMBOL(udp_lib_rehash); 1414719f8358SEric Dumazet 1415719f8358SEric Dumazet static void udp_v4_rehash(struct sock *sk) 1416719f8358SEric Dumazet { 1417719f8358SEric Dumazet u16 new_hash = udp4_portaddr_hash(sock_net(sk), 1418719f8358SEric Dumazet inet_sk(sk)->inet_rcv_saddr, 1419719f8358SEric Dumazet inet_sk(sk)->inet_num); 1420719f8358SEric Dumazet udp_lib_rehash(sk, new_hash); 1421719f8358SEric Dumazet } 1422719f8358SEric Dumazet 142393821778SHerbert Xu static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) 142493821778SHerbert Xu { 1425fec5e652STom Herbert int rc; 142693821778SHerbert Xu 1427005ec974SShawn Bohrer if (inet_sk(sk)->inet_daddr) { 1428bdeab991STom Herbert sock_rps_save_rxhash(sk, skb); 1429005ec974SShawn Bohrer sk_mark_napi_id(sk, skb); 1430005ec974SShawn Bohrer } 1431fec5e652STom Herbert 1432d826eb14SEric Dumazet rc = sock_queue_rcv_skb(sk, skb); 1433766e9037SEric Dumazet if (rc < 0) { 1434766e9037SEric Dumazet int is_udplite = IS_UDPLITE(sk); 1435766e9037SEric Dumazet 143693821778SHerbert Xu /* Note that an ENOMEM error is charged twice */ 1437766e9037SEric Dumazet if (rc == -ENOMEM) 143893821778SHerbert Xu UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_RCVBUFERRORS, 143993821778SHerbert Xu is_udplite); 1440766e9037SEric Dumazet UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite); 1441766e9037SEric Dumazet kfree_skb(skb); 1442296f7ea7SSatoru Moriya trace_udp_fail_queue_rcv_skb(rc, sk); 1443766e9037SEric Dumazet return -1; 144493821778SHerbert Xu } 144593821778SHerbert Xu 144693821778SHerbert Xu return 0; 144793821778SHerbert Xu 144893821778SHerbert Xu } 144993821778SHerbert Xu 1450447167bfSEric Dumazet static struct static_key udp_encap_needed __read_mostly; 1451447167bfSEric Dumazet void udp_encap_enable(void) 1452447167bfSEric Dumazet { 1453447167bfSEric Dumazet if (!static_key_enabled(&udp_encap_needed)) 1454447167bfSEric Dumazet static_key_slow_inc(&udp_encap_needed); 1455447167bfSEric Dumazet } 1456447167bfSEric Dumazet EXPORT_SYMBOL(udp_encap_enable); 1457447167bfSEric Dumazet 1458db8dac20SDavid S. Miller /* returns: 1459db8dac20SDavid S. Miller * -1: error 1460db8dac20SDavid S. Miller * 0: success 1461db8dac20SDavid S. Miller * >0: "udp encap" protocol resubmission 1462db8dac20SDavid S. Miller * 1463db8dac20SDavid S. Miller * Note that in the success and error cases, the skb is assumed to 1464db8dac20SDavid S. Miller * have either been requeued or freed. 1465db8dac20SDavid S. Miller */ 1466db8dac20SDavid S. Miller int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) 1467db8dac20SDavid S. Miller { 1468db8dac20SDavid S. Miller struct udp_sock *up = udp_sk(sk); 1469db8dac20SDavid S. Miller int rc; 1470db8dac20SDavid S. Miller int is_udplite = IS_UDPLITE(sk); 1471db8dac20SDavid S. Miller 1472db8dac20SDavid S. Miller /* 1473db8dac20SDavid S. Miller * Charge it to the socket, dropping if the queue is full. 1474db8dac20SDavid S. Miller */ 1475db8dac20SDavid S. Miller if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) 1476db8dac20SDavid S. Miller goto drop; 1477db8dac20SDavid S. Miller nf_reset(skb); 1478db8dac20SDavid S. Miller 1479447167bfSEric Dumazet if (static_key_false(&udp_encap_needed) && up->encap_type) { 14800ad92ad0SEric Dumazet int (*encap_rcv)(struct sock *sk, struct sk_buff *skb); 14810ad92ad0SEric Dumazet 1482db8dac20SDavid S. Miller /* 1483db8dac20SDavid S. Miller * This is an encapsulation socket so pass the skb to 1484db8dac20SDavid S. Miller * the socket's udp_encap_rcv() hook. Otherwise, just 1485db8dac20SDavid S. Miller * fall through and pass this up the UDP socket. 1486db8dac20SDavid S. Miller * up->encap_rcv() returns the following value: 1487db8dac20SDavid S. Miller * =0 if skb was successfully passed to the encap 1488db8dac20SDavid S. Miller * handler or was discarded by it. 1489db8dac20SDavid S. Miller * >0 if skb should be passed on to UDP. 1490db8dac20SDavid S. Miller * <0 if skb should be resubmitted as proto -N 1491db8dac20SDavid S. Miller */ 1492db8dac20SDavid S. Miller 1493db8dac20SDavid S. Miller /* if we're overly short, let UDP handle it */ 14940ad92ad0SEric Dumazet encap_rcv = ACCESS_ONCE(up->encap_rcv); 14950ad92ad0SEric Dumazet if (skb->len > sizeof(struct udphdr) && encap_rcv != NULL) { 1496db8dac20SDavid S. Miller int ret; 1497db8dac20SDavid S. Miller 14980a80966bSTom Herbert /* Verify checksum before giving to encap */ 14990a80966bSTom Herbert if (udp_lib_checksum_complete(skb)) 15000a80966bSTom Herbert goto csum_error; 15010a80966bSTom Herbert 15020ad92ad0SEric Dumazet ret = encap_rcv(sk, skb); 1503db8dac20SDavid S. Miller if (ret <= 0) { 15040283328eSPavel Emelyanov UDP_INC_STATS_BH(sock_net(sk), 15050283328eSPavel Emelyanov UDP_MIB_INDATAGRAMS, 1506db8dac20SDavid S. Miller is_udplite); 1507db8dac20SDavid S. Miller return -ret; 1508db8dac20SDavid S. Miller } 1509db8dac20SDavid S. Miller } 1510db8dac20SDavid S. Miller 1511db8dac20SDavid S. Miller /* FALLTHROUGH -- it's a UDP Packet */ 1512db8dac20SDavid S. Miller } 1513db8dac20SDavid S. Miller 1514db8dac20SDavid S. Miller /* 1515db8dac20SDavid S. Miller * UDP-Lite specific tests, ignored on UDP sockets 1516db8dac20SDavid S. Miller */ 1517db8dac20SDavid S. Miller if ((is_udplite & UDPLITE_RECV_CC) && UDP_SKB_CB(skb)->partial_cov) { 1518db8dac20SDavid S. Miller 1519db8dac20SDavid S. Miller /* 1520db8dac20SDavid S. Miller * MIB statistics other than incrementing the error count are 1521db8dac20SDavid S. Miller * disabled for the following two types of errors: these depend 1522db8dac20SDavid S. Miller * on the application settings, not on the functioning of the 1523db8dac20SDavid S. Miller * protocol stack as such. 1524db8dac20SDavid S. Miller * 1525db8dac20SDavid S. Miller * RFC 3828 here recommends (sec 3.3): "There should also be a 1526db8dac20SDavid S. Miller * way ... to ... at least let the receiving application block 1527db8dac20SDavid S. Miller * delivery of packets with coverage values less than a value 1528db8dac20SDavid S. Miller * provided by the application." 1529db8dac20SDavid S. Miller */ 1530db8dac20SDavid S. Miller if (up->pcrlen == 0) { /* full coverage was set */ 1531afd46503SJoe Perches LIMIT_NETDEBUG(KERN_WARNING "UDPLite: partial coverage %d while full coverage %d requested\n", 1532db8dac20SDavid S. Miller UDP_SKB_CB(skb)->cscov, skb->len); 1533db8dac20SDavid S. Miller goto drop; 1534db8dac20SDavid S. Miller } 1535db8dac20SDavid S. Miller /* The next case involves violating the min. coverage requested 1536db8dac20SDavid S. Miller * by the receiver. This is subtle: if receiver wants x and x is 1537db8dac20SDavid S. Miller * greater than the buffersize/MTU then receiver will complain 1538db8dac20SDavid S. Miller * that it wants x while sender emits packets of smaller size y. 1539db8dac20SDavid S. Miller * Therefore the above ...()->partial_cov statement is essential. 1540db8dac20SDavid S. Miller */ 1541db8dac20SDavid S. Miller if (UDP_SKB_CB(skb)->cscov < up->pcrlen) { 1542afd46503SJoe Perches LIMIT_NETDEBUG(KERN_WARNING "UDPLite: coverage %d too small, need min %d\n", 1543db8dac20SDavid S. Miller UDP_SKB_CB(skb)->cscov, up->pcrlen); 1544db8dac20SDavid S. Miller goto drop; 1545db8dac20SDavid S. Miller } 1546db8dac20SDavid S. Miller } 1547db8dac20SDavid S. Miller 154833d480ceSEric Dumazet if (rcu_access_pointer(sk->sk_filter) && 154933d480ceSEric Dumazet udp_lib_checksum_complete(skb)) 15506a5dc9e5SEric Dumazet goto csum_error; 1551db8dac20SDavid S. Miller 1552c377411fSEric Dumazet 1553f545a38fSEric Dumazet if (sk_rcvqueues_full(sk, skb, sk->sk_rcvbuf)) 1554c377411fSEric Dumazet goto drop; 1555c377411fSEric Dumazet 155693821778SHerbert Xu rc = 0; 1557db8dac20SDavid S. Miller 1558fbf8866dSShawn Bohrer ipv4_pktinfo_prepare(sk, skb); 155993821778SHerbert Xu bh_lock_sock(sk); 156093821778SHerbert Xu if (!sock_owned_by_user(sk)) 156193821778SHerbert Xu rc = __udp_queue_rcv_skb(sk, skb); 1562f545a38fSEric Dumazet else if (sk_add_backlog(sk, skb, sk->sk_rcvbuf)) { 156355349790SZhu Yi bh_unlock_sock(sk); 156455349790SZhu Yi goto drop; 156555349790SZhu Yi } 156693821778SHerbert Xu bh_unlock_sock(sk); 156793821778SHerbert Xu 156893821778SHerbert Xu return rc; 1569db8dac20SDavid S. Miller 15706a5dc9e5SEric Dumazet csum_error: 15716a5dc9e5SEric Dumazet UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite); 1572db8dac20SDavid S. Miller drop: 15730283328eSPavel Emelyanov UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite); 15748edf19c2SEric Dumazet atomic_inc(&sk->sk_drops); 1575db8dac20SDavid S. Miller kfree_skb(skb); 1576db8dac20SDavid S. Miller return -1; 1577db8dac20SDavid S. Miller } 1578db8dac20SDavid S. Miller 15791240d137SEric Dumazet 15801240d137SEric Dumazet static void flush_stack(struct sock **stack, unsigned int count, 15811240d137SEric Dumazet struct sk_buff *skb, unsigned int final) 15821240d137SEric Dumazet { 15831240d137SEric Dumazet unsigned int i; 15841240d137SEric Dumazet struct sk_buff *skb1 = NULL; 1585f6b8f32cSEric Dumazet struct sock *sk; 15861240d137SEric Dumazet 15871240d137SEric Dumazet for (i = 0; i < count; i++) { 1588f6b8f32cSEric Dumazet sk = stack[i]; 15891240d137SEric Dumazet if (likely(skb1 == NULL)) 15901240d137SEric Dumazet skb1 = (i == final) ? skb : skb_clone(skb, GFP_ATOMIC); 15911240d137SEric Dumazet 1592f6b8f32cSEric Dumazet if (!skb1) { 1593f6b8f32cSEric Dumazet atomic_inc(&sk->sk_drops); 1594f6b8f32cSEric Dumazet UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_RCVBUFERRORS, 1595f6b8f32cSEric Dumazet IS_UDPLITE(sk)); 1596f6b8f32cSEric Dumazet UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, 1597f6b8f32cSEric Dumazet IS_UDPLITE(sk)); 1598f6b8f32cSEric Dumazet } 1599f6b8f32cSEric Dumazet 1600f6b8f32cSEric Dumazet if (skb1 && udp_queue_rcv_skb(sk, skb1) <= 0) 16011240d137SEric Dumazet skb1 = NULL; 16021240d137SEric Dumazet } 16031240d137SEric Dumazet if (unlikely(skb1)) 16041240d137SEric Dumazet kfree_skb(skb1); 16051240d137SEric Dumazet } 16061240d137SEric Dumazet 160797502231SEric Dumazet /* For TCP sockets, sk_rx_dst is protected by socket lock 1608e47eb5dfSEric Dumazet * For UDP, we use xchg() to guard against concurrent changes. 160997502231SEric Dumazet */ 161097502231SEric Dumazet static void udp_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst) 1611421b3885SShawn Bohrer { 161297502231SEric Dumazet struct dst_entry *old; 1613421b3885SShawn Bohrer 1614421b3885SShawn Bohrer dst_hold(dst); 1615e47eb5dfSEric Dumazet old = xchg(&sk->sk_rx_dst, dst); 161697502231SEric Dumazet dst_release(old); 161797502231SEric Dumazet } 1618421b3885SShawn Bohrer 1619db8dac20SDavid S. Miller /* 1620db8dac20SDavid S. Miller * Multicasts and broadcasts go to each listener. 1621db8dac20SDavid S. Miller * 16221240d137SEric Dumazet * Note: called only from the BH handler context. 1623db8dac20SDavid S. Miller */ 1624e3163493SPavel Emelyanov static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb, 1625db8dac20SDavid S. Miller struct udphdr *uh, 1626db8dac20SDavid S. Miller __be32 saddr, __be32 daddr, 1627645ca708SEric Dumazet struct udp_table *udptable) 1628db8dac20SDavid S. Miller { 16291240d137SEric Dumazet struct sock *sk, *stack[256 / sizeof(struct sock *)]; 1630f86dcc5aSEric Dumazet struct udp_hslot *hslot = udp_hashslot(udptable, net, ntohs(uh->dest)); 1631db8dac20SDavid S. Miller int dif; 16321240d137SEric Dumazet unsigned int i, count = 0; 1633db8dac20SDavid S. Miller 1634645ca708SEric Dumazet spin_lock(&hslot->lock); 163588ab1932SEric Dumazet sk = sk_nulls_head(&hslot->head); 1636db8dac20SDavid S. Miller dif = skb->dev->ifindex; 1637920a4611SEric Dumazet sk = udp_v4_mcast_next(net, sk, uh->dest, daddr, uh->source, saddr, dif); 16381240d137SEric Dumazet while (sk) { 16391240d137SEric Dumazet stack[count++] = sk; 16401240d137SEric Dumazet sk = udp_v4_mcast_next(net, sk_nulls_next(sk), uh->dest, 16411240d137SEric Dumazet daddr, uh->source, saddr, dif); 16421240d137SEric Dumazet if (unlikely(count == ARRAY_SIZE(stack))) { 16431240d137SEric Dumazet if (!sk) 16441240d137SEric Dumazet break; 16451240d137SEric Dumazet flush_stack(stack, count, skb, ~0); 16461240d137SEric Dumazet count = 0; 1647db8dac20SDavid S. Miller } 16481240d137SEric Dumazet } 16491240d137SEric Dumazet /* 16501240d137SEric Dumazet * before releasing chain lock, we must take a reference on sockets 16511240d137SEric Dumazet */ 16521240d137SEric Dumazet for (i = 0; i < count; i++) 16531240d137SEric Dumazet sock_hold(stack[i]); 16541240d137SEric Dumazet 1655645ca708SEric Dumazet spin_unlock(&hslot->lock); 16561240d137SEric Dumazet 16571240d137SEric Dumazet /* 16581240d137SEric Dumazet * do the slow work with no lock held 16591240d137SEric Dumazet */ 16601240d137SEric Dumazet if (count) { 16611240d137SEric Dumazet flush_stack(stack, count, skb, count - 1); 16621240d137SEric Dumazet 16631240d137SEric Dumazet for (i = 0; i < count; i++) 16641240d137SEric Dumazet sock_put(stack[i]); 16651240d137SEric Dumazet } else { 16661240d137SEric Dumazet kfree_skb(skb); 16671240d137SEric Dumazet } 1668db8dac20SDavid S. Miller return 0; 1669db8dac20SDavid S. Miller } 1670db8dac20SDavid S. Miller 1671db8dac20SDavid S. Miller /* Initialize UDP checksum. If exited with zero value (success), 1672db8dac20SDavid S. Miller * CHECKSUM_UNNECESSARY means, that no more checks are required. 1673db8dac20SDavid S. Miller * Otherwise, csum completion requires chacksumming packet body, 1674db8dac20SDavid S. Miller * including udp header and folding it to skb->csum. 1675db8dac20SDavid S. Miller */ 1676db8dac20SDavid S. Miller static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh, 1677db8dac20SDavid S. Miller int proto) 1678db8dac20SDavid S. Miller { 1679db8dac20SDavid S. Miller int err; 1680db8dac20SDavid S. Miller 1681db8dac20SDavid S. Miller UDP_SKB_CB(skb)->partial_cov = 0; 1682db8dac20SDavid S. Miller UDP_SKB_CB(skb)->cscov = skb->len; 1683db8dac20SDavid S. Miller 1684db8dac20SDavid S. Miller if (proto == IPPROTO_UDPLITE) { 1685db8dac20SDavid S. Miller err = udplite_checksum_init(skb, uh); 1686db8dac20SDavid S. Miller if (err) 1687db8dac20SDavid S. Miller return err; 1688db8dac20SDavid S. Miller } 1689db8dac20SDavid S. Miller 1690ed70fcfcSTom Herbert return skb_checksum_init_zero_check(skb, proto, uh->check, 1691ed70fcfcSTom Herbert inet_compute_pseudo); 1692db8dac20SDavid S. Miller } 1693db8dac20SDavid S. Miller 1694db8dac20SDavid S. Miller /* 1695db8dac20SDavid S. Miller * All we need to do is get the socket, and then do a checksum. 1696db8dac20SDavid S. Miller */ 1697db8dac20SDavid S. Miller 1698645ca708SEric Dumazet int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, 1699db8dac20SDavid S. Miller int proto) 1700db8dac20SDavid S. Miller { 1701db8dac20SDavid S. Miller struct sock *sk; 17027b5e56f9SJesper Dangaard Brouer struct udphdr *uh; 1703db8dac20SDavid S. Miller unsigned short ulen; 1704adf30907SEric Dumazet struct rtable *rt = skb_rtable(skb); 17052783ef23SJesper Dangaard Brouer __be32 saddr, daddr; 17060283328eSPavel Emelyanov struct net *net = dev_net(skb->dev); 1707db8dac20SDavid S. Miller 1708db8dac20SDavid S. Miller /* 1709db8dac20SDavid S. Miller * Validate the packet. 1710db8dac20SDavid S. Miller */ 1711db8dac20SDavid S. Miller if (!pskb_may_pull(skb, sizeof(struct udphdr))) 1712db8dac20SDavid S. Miller goto drop; /* No space for header. */ 1713db8dac20SDavid S. Miller 17147b5e56f9SJesper Dangaard Brouer uh = udp_hdr(skb); 1715db8dac20SDavid S. Miller ulen = ntohs(uh->len); 1716ccc2d97cSBjørn Mork saddr = ip_hdr(skb)->saddr; 1717ccc2d97cSBjørn Mork daddr = ip_hdr(skb)->daddr; 1718ccc2d97cSBjørn Mork 1719db8dac20SDavid S. Miller if (ulen > skb->len) 1720db8dac20SDavid S. Miller goto short_packet; 1721db8dac20SDavid S. Miller 1722db8dac20SDavid S. Miller if (proto == IPPROTO_UDP) { 1723db8dac20SDavid S. Miller /* UDP validates ulen. */ 1724db8dac20SDavid S. Miller if (ulen < sizeof(*uh) || pskb_trim_rcsum(skb, ulen)) 1725db8dac20SDavid S. Miller goto short_packet; 1726db8dac20SDavid S. Miller uh = udp_hdr(skb); 1727db8dac20SDavid S. Miller } 1728db8dac20SDavid S. Miller 1729db8dac20SDavid S. Miller if (udp4_csum_init(skb, uh, proto)) 1730db8dac20SDavid S. Miller goto csum_error; 1731db8dac20SDavid S. Miller 17328afdd99aSEric Dumazet sk = skb_steal_sock(skb); 17338afdd99aSEric Dumazet if (sk) { 173497502231SEric Dumazet struct dst_entry *dst = skb_dst(skb); 1735421b3885SShawn Bohrer int ret; 1736421b3885SShawn Bohrer 173797502231SEric Dumazet if (unlikely(sk->sk_rx_dst != dst)) 173897502231SEric Dumazet udp_sk_rx_dst_set(sk, dst); 1739421b3885SShawn Bohrer 1740421b3885SShawn Bohrer ret = udp_queue_rcv_skb(sk, skb); 17418afdd99aSEric Dumazet sock_put(sk); 1742421b3885SShawn Bohrer /* a return value > 0 means to resubmit the input, but 1743421b3885SShawn Bohrer * it wants the return to be -protocol, or 0 1744421b3885SShawn Bohrer */ 1745421b3885SShawn Bohrer if (ret > 0) 1746421b3885SShawn Bohrer return -ret; 1747421b3885SShawn Bohrer return 0; 1748421b3885SShawn Bohrer } else { 1749db8dac20SDavid S. Miller if (rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST)) 1750e3163493SPavel Emelyanov return __udp4_lib_mcast_deliver(net, skb, uh, 1751e3163493SPavel Emelyanov saddr, daddr, udptable); 1752db8dac20SDavid S. Miller 1753607c4aafSKOVACS Krisztian sk = __udp4_lib_lookup_skb(skb, uh->source, uh->dest, udptable); 1754421b3885SShawn Bohrer } 1755db8dac20SDavid S. Miller 1756db8dac20SDavid S. Miller if (sk != NULL) { 1757a5b50476SEliezer Tamir int ret; 1758a5b50476SEliezer Tamir 1759a5b50476SEliezer Tamir ret = udp_queue_rcv_skb(sk, skb); 1760db8dac20SDavid S. Miller sock_put(sk); 1761db8dac20SDavid S. Miller 1762db8dac20SDavid S. Miller /* a return value > 0 means to resubmit the input, but 1763db8dac20SDavid S. Miller * it wants the return to be -protocol, or 0 1764db8dac20SDavid S. Miller */ 1765db8dac20SDavid S. Miller if (ret > 0) 1766db8dac20SDavid S. Miller return -ret; 1767db8dac20SDavid S. Miller return 0; 1768db8dac20SDavid S. Miller } 1769db8dac20SDavid S. Miller 1770db8dac20SDavid S. Miller if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) 1771db8dac20SDavid S. Miller goto drop; 1772db8dac20SDavid S. Miller nf_reset(skb); 1773db8dac20SDavid S. Miller 1774db8dac20SDavid S. Miller /* No socket. Drop packet silently, if checksum is wrong */ 1775db8dac20SDavid S. Miller if (udp_lib_checksum_complete(skb)) 1776db8dac20SDavid S. Miller goto csum_error; 1777db8dac20SDavid S. Miller 17780283328eSPavel Emelyanov UDP_INC_STATS_BH(net, UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE); 1779db8dac20SDavid S. Miller icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); 1780db8dac20SDavid S. Miller 1781db8dac20SDavid S. Miller /* 1782db8dac20SDavid S. Miller * Hmm. We got an UDP packet to a port to which we 1783db8dac20SDavid S. Miller * don't wanna listen. Ignore it. 1784db8dac20SDavid S. Miller */ 1785db8dac20SDavid S. Miller kfree_skb(skb); 1786db8dac20SDavid S. Miller return 0; 1787db8dac20SDavid S. Miller 1788db8dac20SDavid S. Miller short_packet: 1789673d57e7SHarvey Harrison LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: short packet: From %pI4:%u %d/%d to %pI4:%u\n", 1790afd46503SJoe Perches proto == IPPROTO_UDPLITE ? "Lite" : "", 1791afd46503SJoe Perches &saddr, ntohs(uh->source), 1792afd46503SJoe Perches ulen, skb->len, 1793afd46503SJoe Perches &daddr, ntohs(uh->dest)); 1794db8dac20SDavid S. Miller goto drop; 1795db8dac20SDavid S. Miller 1796db8dac20SDavid S. Miller csum_error: 1797db8dac20SDavid S. Miller /* 1798db8dac20SDavid S. Miller * RFC1122: OK. Discards the bad packet silently (as far as 1799db8dac20SDavid S. Miller * the network is concerned, anyway) as per 4.1.3.4 (MUST). 1800db8dac20SDavid S. Miller */ 1801673d57e7SHarvey Harrison LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: bad checksum. From %pI4:%u to %pI4:%u ulen %d\n", 1802afd46503SJoe Perches proto == IPPROTO_UDPLITE ? "Lite" : "", 1803afd46503SJoe Perches &saddr, ntohs(uh->source), &daddr, ntohs(uh->dest), 1804db8dac20SDavid S. Miller ulen); 18056a5dc9e5SEric Dumazet UDP_INC_STATS_BH(net, UDP_MIB_CSUMERRORS, proto == IPPROTO_UDPLITE); 1806db8dac20SDavid S. Miller drop: 18070283328eSPavel Emelyanov UDP_INC_STATS_BH(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE); 1808db8dac20SDavid S. Miller kfree_skb(skb); 1809db8dac20SDavid S. Miller return 0; 1810db8dac20SDavid S. Miller } 1811db8dac20SDavid S. Miller 1812421b3885SShawn Bohrer /* We can only early demux multicast if there is a single matching socket. 1813421b3885SShawn Bohrer * If more than one socket found returns NULL 1814421b3885SShawn Bohrer */ 1815421b3885SShawn Bohrer static struct sock *__udp4_lib_mcast_demux_lookup(struct net *net, 1816421b3885SShawn Bohrer __be16 loc_port, __be32 loc_addr, 1817421b3885SShawn Bohrer __be16 rmt_port, __be32 rmt_addr, 1818421b3885SShawn Bohrer int dif) 1819421b3885SShawn Bohrer { 1820421b3885SShawn Bohrer struct sock *sk, *result; 1821421b3885SShawn Bohrer struct hlist_nulls_node *node; 1822421b3885SShawn Bohrer unsigned short hnum = ntohs(loc_port); 1823421b3885SShawn Bohrer unsigned int count, slot = udp_hashfn(net, hnum, udp_table.mask); 1824421b3885SShawn Bohrer struct udp_hslot *hslot = &udp_table.hash[slot]; 1825421b3885SShawn Bohrer 1826421b3885SShawn Bohrer rcu_read_lock(); 1827421b3885SShawn Bohrer begin: 1828421b3885SShawn Bohrer count = 0; 1829421b3885SShawn Bohrer result = NULL; 1830421b3885SShawn Bohrer sk_nulls_for_each_rcu(sk, node, &hslot->head) { 1831421b3885SShawn Bohrer if (__udp_is_mcast_sock(net, sk, 1832421b3885SShawn Bohrer loc_port, loc_addr, 1833421b3885SShawn Bohrer rmt_port, rmt_addr, 1834421b3885SShawn Bohrer dif, hnum)) { 1835421b3885SShawn Bohrer result = sk; 1836421b3885SShawn Bohrer ++count; 1837421b3885SShawn Bohrer } 1838421b3885SShawn Bohrer } 1839421b3885SShawn Bohrer /* 1840421b3885SShawn Bohrer * if the nulls value we got at the end of this lookup is 1841421b3885SShawn Bohrer * not the expected one, we must restart lookup. 1842421b3885SShawn Bohrer * We probably met an item that was moved to another chain. 1843421b3885SShawn Bohrer */ 1844421b3885SShawn Bohrer if (get_nulls_value(node) != slot) 1845421b3885SShawn Bohrer goto begin; 1846421b3885SShawn Bohrer 1847421b3885SShawn Bohrer if (result) { 1848421b3885SShawn Bohrer if (count != 1 || 1849421b3885SShawn Bohrer unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2))) 1850421b3885SShawn Bohrer result = NULL; 1851f69b923aSEric Dumazet else if (unlikely(!__udp_is_mcast_sock(net, result, 1852421b3885SShawn Bohrer loc_port, loc_addr, 1853421b3885SShawn Bohrer rmt_port, rmt_addr, 1854421b3885SShawn Bohrer dif, hnum))) { 1855421b3885SShawn Bohrer sock_put(result); 1856421b3885SShawn Bohrer result = NULL; 1857421b3885SShawn Bohrer } 1858421b3885SShawn Bohrer } 1859421b3885SShawn Bohrer rcu_read_unlock(); 1860421b3885SShawn Bohrer return result; 1861421b3885SShawn Bohrer } 1862421b3885SShawn Bohrer 1863421b3885SShawn Bohrer /* For unicast we should only early demux connected sockets or we can 1864421b3885SShawn Bohrer * break forwarding setups. The chains here can be long so only check 1865421b3885SShawn Bohrer * if the first socket is an exact match and if not move on. 1866421b3885SShawn Bohrer */ 1867421b3885SShawn Bohrer static struct sock *__udp4_lib_demux_lookup(struct net *net, 1868421b3885SShawn Bohrer __be16 loc_port, __be32 loc_addr, 1869421b3885SShawn Bohrer __be16 rmt_port, __be32 rmt_addr, 1870421b3885SShawn Bohrer int dif) 1871421b3885SShawn Bohrer { 1872421b3885SShawn Bohrer struct sock *sk, *result; 1873421b3885SShawn Bohrer struct hlist_nulls_node *node; 1874421b3885SShawn Bohrer unsigned short hnum = ntohs(loc_port); 1875421b3885SShawn Bohrer unsigned int hash2 = udp4_portaddr_hash(net, loc_addr, hnum); 1876421b3885SShawn Bohrer unsigned int slot2 = hash2 & udp_table.mask; 1877421b3885SShawn Bohrer struct udp_hslot *hslot2 = &udp_table.hash2[slot2]; 1878*c7228317SJoe Perches INET_ADDR_COOKIE(acookie, rmt_addr, loc_addr); 1879421b3885SShawn Bohrer const __portpair ports = INET_COMBINED_PORTS(rmt_port, hnum); 1880421b3885SShawn Bohrer 1881421b3885SShawn Bohrer rcu_read_lock(); 1882421b3885SShawn Bohrer result = NULL; 1883421b3885SShawn Bohrer udp_portaddr_for_each_entry_rcu(sk, node, &hslot2->head) { 1884421b3885SShawn Bohrer if (INET_MATCH(sk, net, acookie, 1885421b3885SShawn Bohrer rmt_addr, loc_addr, ports, dif)) 1886421b3885SShawn Bohrer result = sk; 1887421b3885SShawn Bohrer /* Only check first socket in chain */ 1888421b3885SShawn Bohrer break; 1889421b3885SShawn Bohrer } 1890421b3885SShawn Bohrer 1891421b3885SShawn Bohrer if (result) { 1892421b3885SShawn Bohrer if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2))) 1893421b3885SShawn Bohrer result = NULL; 1894421b3885SShawn Bohrer else if (unlikely(!INET_MATCH(sk, net, acookie, 1895421b3885SShawn Bohrer rmt_addr, loc_addr, 1896421b3885SShawn Bohrer ports, dif))) { 1897421b3885SShawn Bohrer sock_put(result); 1898421b3885SShawn Bohrer result = NULL; 1899421b3885SShawn Bohrer } 1900421b3885SShawn Bohrer } 1901421b3885SShawn Bohrer rcu_read_unlock(); 1902421b3885SShawn Bohrer return result; 1903421b3885SShawn Bohrer } 1904421b3885SShawn Bohrer 1905421b3885SShawn Bohrer void udp_v4_early_demux(struct sk_buff *skb) 1906421b3885SShawn Bohrer { 1907610438b7SEric Dumazet struct net *net = dev_net(skb->dev); 1908610438b7SEric Dumazet const struct iphdr *iph; 1909610438b7SEric Dumazet const struct udphdr *uh; 1910421b3885SShawn Bohrer struct sock *sk; 1911421b3885SShawn Bohrer struct dst_entry *dst; 1912421b3885SShawn Bohrer int dif = skb->dev->ifindex; 1913421b3885SShawn Bohrer 1914421b3885SShawn Bohrer /* validate the packet */ 1915421b3885SShawn Bohrer if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct udphdr))) 1916421b3885SShawn Bohrer return; 1917421b3885SShawn Bohrer 1918610438b7SEric Dumazet iph = ip_hdr(skb); 1919610438b7SEric Dumazet uh = udp_hdr(skb); 1920610438b7SEric Dumazet 1921421b3885SShawn Bohrer if (skb->pkt_type == PACKET_BROADCAST || 1922421b3885SShawn Bohrer skb->pkt_type == PACKET_MULTICAST) 1923421b3885SShawn Bohrer sk = __udp4_lib_mcast_demux_lookup(net, uh->dest, iph->daddr, 1924421b3885SShawn Bohrer uh->source, iph->saddr, dif); 1925421b3885SShawn Bohrer else if (skb->pkt_type == PACKET_HOST) 1926421b3885SShawn Bohrer sk = __udp4_lib_demux_lookup(net, uh->dest, iph->daddr, 1927421b3885SShawn Bohrer uh->source, iph->saddr, dif); 1928421b3885SShawn Bohrer else 1929421b3885SShawn Bohrer return; 1930421b3885SShawn Bohrer 1931421b3885SShawn Bohrer if (!sk) 1932421b3885SShawn Bohrer return; 1933421b3885SShawn Bohrer 1934421b3885SShawn Bohrer skb->sk = sk; 1935421b3885SShawn Bohrer skb->destructor = sock_edemux; 1936421b3885SShawn Bohrer dst = sk->sk_rx_dst; 1937421b3885SShawn Bohrer 1938421b3885SShawn Bohrer if (dst) 1939421b3885SShawn Bohrer dst = dst_check(dst, 0); 1940421b3885SShawn Bohrer if (dst) 1941421b3885SShawn Bohrer skb_dst_set_noref(skb, dst); 1942421b3885SShawn Bohrer } 1943421b3885SShawn Bohrer 1944db8dac20SDavid S. Miller int udp_rcv(struct sk_buff *skb) 1945db8dac20SDavid S. Miller { 1946645ca708SEric Dumazet return __udp4_lib_rcv(skb, &udp_table, IPPROTO_UDP); 1947db8dac20SDavid S. Miller } 1948db8dac20SDavid S. Miller 19497d06b2e0SBrian Haley void udp_destroy_sock(struct sock *sk) 1950db8dac20SDavid S. Miller { 195144046a59STom Parkin struct udp_sock *up = udp_sk(sk); 19528a74ad60SEric Dumazet bool slow = lock_sock_fast(sk); 1953db8dac20SDavid S. Miller udp_flush_pending_frames(sk); 19548a74ad60SEric Dumazet unlock_sock_fast(sk, slow); 195544046a59STom Parkin if (static_key_false(&udp_encap_needed) && up->encap_type) { 195644046a59STom Parkin void (*encap_destroy)(struct sock *sk); 195744046a59STom Parkin encap_destroy = ACCESS_ONCE(up->encap_destroy); 195844046a59STom Parkin if (encap_destroy) 195944046a59STom Parkin encap_destroy(sk); 196044046a59STom Parkin } 1961db8dac20SDavid S. Miller } 1962db8dac20SDavid S. Miller 19631da177e4SLinus Torvalds /* 19641da177e4SLinus Torvalds * Socket option code for UDP 19651da177e4SLinus Torvalds */ 19664c0a6cb0SGerrit Renker int udp_lib_setsockopt(struct sock *sk, int level, int optname, 1967b7058842SDavid S. Miller char __user *optval, unsigned int optlen, 19684c0a6cb0SGerrit Renker int (*push_pending_frames)(struct sock *)) 19691da177e4SLinus Torvalds { 19701da177e4SLinus Torvalds struct udp_sock *up = udp_sk(sk); 19711da177e4SLinus Torvalds int val; 19721da177e4SLinus Torvalds int err = 0; 1973b2bf1e26SWang Chen int is_udplite = IS_UDPLITE(sk); 19741da177e4SLinus Torvalds 19751da177e4SLinus Torvalds if (optlen < sizeof(int)) 19761da177e4SLinus Torvalds return -EINVAL; 19771da177e4SLinus Torvalds 19781da177e4SLinus Torvalds if (get_user(val, (int __user *)optval)) 19791da177e4SLinus Torvalds return -EFAULT; 19801da177e4SLinus Torvalds 19811da177e4SLinus Torvalds switch (optname) { 19821da177e4SLinus Torvalds case UDP_CORK: 19831da177e4SLinus Torvalds if (val != 0) { 19841da177e4SLinus Torvalds up->corkflag = 1; 19851da177e4SLinus Torvalds } else { 19861da177e4SLinus Torvalds up->corkflag = 0; 19871da177e4SLinus Torvalds lock_sock(sk); 19884c0a6cb0SGerrit Renker (*push_pending_frames)(sk); 19891da177e4SLinus Torvalds release_sock(sk); 19901da177e4SLinus Torvalds } 19911da177e4SLinus Torvalds break; 19921da177e4SLinus Torvalds 19931da177e4SLinus Torvalds case UDP_ENCAP: 19941da177e4SLinus Torvalds switch (val) { 19951da177e4SLinus Torvalds case 0: 19961da177e4SLinus Torvalds case UDP_ENCAP_ESPINUDP: 19971da177e4SLinus Torvalds case UDP_ENCAP_ESPINUDP_NON_IKE: 1998067b207bSJames Chapman up->encap_rcv = xfrm4_udp_encap_rcv; 1999067b207bSJames Chapman /* FALLTHROUGH */ 2000342f0234SJames Chapman case UDP_ENCAP_L2TPINUDP: 20011da177e4SLinus Torvalds up->encap_type = val; 2002447167bfSEric Dumazet udp_encap_enable(); 20031da177e4SLinus Torvalds break; 20041da177e4SLinus Torvalds default: 20051da177e4SLinus Torvalds err = -ENOPROTOOPT; 20061da177e4SLinus Torvalds break; 20071da177e4SLinus Torvalds } 20081da177e4SLinus Torvalds break; 20091da177e4SLinus Torvalds 2010ba4e58ecSGerrit Renker /* 2011ba4e58ecSGerrit Renker * UDP-Lite's partial checksum coverage (RFC 3828). 2012ba4e58ecSGerrit Renker */ 2013ba4e58ecSGerrit Renker /* The sender sets actual checksum coverage length via this option. 2014ba4e58ecSGerrit Renker * The case coverage > packet length is handled by send module. */ 2015ba4e58ecSGerrit Renker case UDPLITE_SEND_CSCOV: 2016b2bf1e26SWang Chen if (!is_udplite) /* Disable the option on UDP sockets */ 2017ba4e58ecSGerrit Renker return -ENOPROTOOPT; 2018ba4e58ecSGerrit Renker if (val != 0 && val < 8) /* Illegal coverage: use default (8) */ 2019ba4e58ecSGerrit Renker val = 8; 20204be929beSAlexey Dobriyan else if (val > USHRT_MAX) 20214be929beSAlexey Dobriyan val = USHRT_MAX; 2022ba4e58ecSGerrit Renker up->pcslen = val; 2023ba4e58ecSGerrit Renker up->pcflag |= UDPLITE_SEND_CC; 2024ba4e58ecSGerrit Renker break; 2025ba4e58ecSGerrit Renker 2026ba4e58ecSGerrit Renker /* The receiver specifies a minimum checksum coverage value. To make 2027ba4e58ecSGerrit Renker * sense, this should be set to at least 8 (as done below). If zero is 2028ba4e58ecSGerrit Renker * used, this again means full checksum coverage. */ 2029ba4e58ecSGerrit Renker case UDPLITE_RECV_CSCOV: 2030b2bf1e26SWang Chen if (!is_udplite) /* Disable the option on UDP sockets */ 2031ba4e58ecSGerrit Renker return -ENOPROTOOPT; 2032ba4e58ecSGerrit Renker if (val != 0 && val < 8) /* Avoid silly minimal values. */ 2033ba4e58ecSGerrit Renker val = 8; 20344be929beSAlexey Dobriyan else if (val > USHRT_MAX) 20354be929beSAlexey Dobriyan val = USHRT_MAX; 2036ba4e58ecSGerrit Renker up->pcrlen = val; 2037ba4e58ecSGerrit Renker up->pcflag |= UDPLITE_RECV_CC; 2038ba4e58ecSGerrit Renker break; 2039ba4e58ecSGerrit Renker 20401da177e4SLinus Torvalds default: 20411da177e4SLinus Torvalds err = -ENOPROTOOPT; 20421da177e4SLinus Torvalds break; 20436516c655SStephen Hemminger } 20441da177e4SLinus Torvalds 20451da177e4SLinus Torvalds return err; 20461da177e4SLinus Torvalds } 2047c482c568SEric Dumazet EXPORT_SYMBOL(udp_lib_setsockopt); 20481da177e4SLinus Torvalds 2049db8dac20SDavid S. Miller int udp_setsockopt(struct sock *sk, int level, int optname, 2050b7058842SDavid S. Miller char __user *optval, unsigned int optlen) 2051db8dac20SDavid S. Miller { 2052db8dac20SDavid S. Miller if (level == SOL_UDP || level == SOL_UDPLITE) 2053db8dac20SDavid S. Miller return udp_lib_setsockopt(sk, level, optname, optval, optlen, 2054db8dac20SDavid S. Miller udp_push_pending_frames); 2055db8dac20SDavid S. Miller return ip_setsockopt(sk, level, optname, optval, optlen); 2056db8dac20SDavid S. Miller } 2057db8dac20SDavid S. Miller 2058db8dac20SDavid S. Miller #ifdef CONFIG_COMPAT 2059db8dac20SDavid S. Miller int compat_udp_setsockopt(struct sock *sk, int level, int optname, 2060b7058842SDavid S. Miller char __user *optval, unsigned int optlen) 2061db8dac20SDavid S. Miller { 2062db8dac20SDavid S. Miller if (level == SOL_UDP || level == SOL_UDPLITE) 2063db8dac20SDavid S. Miller return udp_lib_setsockopt(sk, level, optname, optval, optlen, 2064db8dac20SDavid S. Miller udp_push_pending_frames); 2065db8dac20SDavid S. Miller return compat_ip_setsockopt(sk, level, optname, optval, optlen); 2066db8dac20SDavid S. Miller } 2067db8dac20SDavid S. Miller #endif 2068db8dac20SDavid S. Miller 20694c0a6cb0SGerrit Renker int udp_lib_getsockopt(struct sock *sk, int level, int optname, 20701da177e4SLinus Torvalds char __user *optval, int __user *optlen) 20711da177e4SLinus Torvalds { 20721da177e4SLinus Torvalds struct udp_sock *up = udp_sk(sk); 20731da177e4SLinus Torvalds int val, len; 20741da177e4SLinus Torvalds 20751da177e4SLinus Torvalds if (get_user(len, optlen)) 20761da177e4SLinus Torvalds return -EFAULT; 20771da177e4SLinus Torvalds 20781da177e4SLinus Torvalds len = min_t(unsigned int, len, sizeof(int)); 20791da177e4SLinus Torvalds 20801da177e4SLinus Torvalds if (len < 0) 20811da177e4SLinus Torvalds return -EINVAL; 20821da177e4SLinus Torvalds 20831da177e4SLinus Torvalds switch (optname) { 20841da177e4SLinus Torvalds case UDP_CORK: 20851da177e4SLinus Torvalds val = up->corkflag; 20861da177e4SLinus Torvalds break; 20871da177e4SLinus Torvalds 20881da177e4SLinus Torvalds case UDP_ENCAP: 20891da177e4SLinus Torvalds val = up->encap_type; 20901da177e4SLinus Torvalds break; 20911da177e4SLinus Torvalds 2092ba4e58ecSGerrit Renker /* The following two cannot be changed on UDP sockets, the return is 2093ba4e58ecSGerrit Renker * always 0 (which corresponds to the full checksum coverage of UDP). */ 2094ba4e58ecSGerrit Renker case UDPLITE_SEND_CSCOV: 2095ba4e58ecSGerrit Renker val = up->pcslen; 2096ba4e58ecSGerrit Renker break; 2097ba4e58ecSGerrit Renker 2098ba4e58ecSGerrit Renker case UDPLITE_RECV_CSCOV: 2099ba4e58ecSGerrit Renker val = up->pcrlen; 2100ba4e58ecSGerrit Renker break; 2101ba4e58ecSGerrit Renker 21021da177e4SLinus Torvalds default: 21031da177e4SLinus Torvalds return -ENOPROTOOPT; 21046516c655SStephen Hemminger } 21051da177e4SLinus Torvalds 21061da177e4SLinus Torvalds if (put_user(len, optlen)) 21071da177e4SLinus Torvalds return -EFAULT; 21081da177e4SLinus Torvalds if (copy_to_user(optval, &val, len)) 21091da177e4SLinus Torvalds return -EFAULT; 21101da177e4SLinus Torvalds return 0; 21111da177e4SLinus Torvalds } 2112c482c568SEric Dumazet EXPORT_SYMBOL(udp_lib_getsockopt); 21131da177e4SLinus Torvalds 2114db8dac20SDavid S. Miller int udp_getsockopt(struct sock *sk, int level, int optname, 2115db8dac20SDavid S. Miller char __user *optval, int __user *optlen) 2116db8dac20SDavid S. Miller { 2117db8dac20SDavid S. Miller if (level == SOL_UDP || level == SOL_UDPLITE) 2118db8dac20SDavid S. Miller return udp_lib_getsockopt(sk, level, optname, optval, optlen); 2119db8dac20SDavid S. Miller return ip_getsockopt(sk, level, optname, optval, optlen); 2120db8dac20SDavid S. Miller } 2121db8dac20SDavid S. Miller 2122db8dac20SDavid S. Miller #ifdef CONFIG_COMPAT 2123db8dac20SDavid S. Miller int compat_udp_getsockopt(struct sock *sk, int level, int optname, 2124db8dac20SDavid S. Miller char __user *optval, int __user *optlen) 2125db8dac20SDavid S. Miller { 2126db8dac20SDavid S. Miller if (level == SOL_UDP || level == SOL_UDPLITE) 2127db8dac20SDavid S. Miller return udp_lib_getsockopt(sk, level, optname, optval, optlen); 2128db8dac20SDavid S. Miller return compat_ip_getsockopt(sk, level, optname, optval, optlen); 2129db8dac20SDavid S. Miller } 2130db8dac20SDavid S. Miller #endif 21311da177e4SLinus Torvalds /** 21321da177e4SLinus Torvalds * udp_poll - wait for a UDP event. 21331da177e4SLinus Torvalds * @file - file struct 21341da177e4SLinus Torvalds * @sock - socket 21351da177e4SLinus Torvalds * @wait - poll table 21361da177e4SLinus Torvalds * 21371da177e4SLinus Torvalds * This is same as datagram poll, except for the special case of 21381da177e4SLinus Torvalds * blocking sockets. If application is using a blocking fd 21391da177e4SLinus Torvalds * and a packet with checksum error is in the queue; 21401da177e4SLinus Torvalds * then it could get return from select indicating data available 21411da177e4SLinus Torvalds * but then block when reading it. Add special case code 21421da177e4SLinus Torvalds * to work around these arguably broken applications. 21431da177e4SLinus Torvalds */ 21441da177e4SLinus Torvalds unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait) 21451da177e4SLinus Torvalds { 21461da177e4SLinus Torvalds unsigned int mask = datagram_poll(file, sock, wait); 21471da177e4SLinus Torvalds struct sock *sk = sock->sk; 21481da177e4SLinus Torvalds 2149c3f1dbafSDavid Majnemer sock_rps_record_flow(sk); 2150c3f1dbafSDavid Majnemer 21511da177e4SLinus Torvalds /* Check for false positives due to checksum errors */ 215285584672SEric Dumazet if ((mask & POLLRDNORM) && !(file->f_flags & O_NONBLOCK) && 215385584672SEric Dumazet !(sk->sk_shutdown & RCV_SHUTDOWN) && !first_packet_length(sk)) 21541da177e4SLinus Torvalds mask &= ~(POLLIN | POLLRDNORM); 21551da177e4SLinus Torvalds 21561da177e4SLinus Torvalds return mask; 21571da177e4SLinus Torvalds 21581da177e4SLinus Torvalds } 2159c482c568SEric Dumazet EXPORT_SYMBOL(udp_poll); 21601da177e4SLinus Torvalds 2161db8dac20SDavid S. Miller struct proto udp_prot = { 2162db8dac20SDavid S. Miller .name = "UDP", 2163db8dac20SDavid S. Miller .owner = THIS_MODULE, 2164db8dac20SDavid S. Miller .close = udp_lib_close, 2165db8dac20SDavid S. Miller .connect = ip4_datagram_connect, 2166db8dac20SDavid S. Miller .disconnect = udp_disconnect, 2167db8dac20SDavid S. Miller .ioctl = udp_ioctl, 2168db8dac20SDavid S. Miller .destroy = udp_destroy_sock, 2169db8dac20SDavid S. Miller .setsockopt = udp_setsockopt, 2170db8dac20SDavid S. Miller .getsockopt = udp_getsockopt, 2171db8dac20SDavid S. Miller .sendmsg = udp_sendmsg, 2172db8dac20SDavid S. Miller .recvmsg = udp_recvmsg, 2173db8dac20SDavid S. Miller .sendpage = udp_sendpage, 217493821778SHerbert Xu .backlog_rcv = __udp_queue_rcv_skb, 21758141ed9fSSteffen Klassert .release_cb = ip4_datagram_release_cb, 2176db8dac20SDavid S. Miller .hash = udp_lib_hash, 2177db8dac20SDavid S. Miller .unhash = udp_lib_unhash, 2178719f8358SEric Dumazet .rehash = udp_v4_rehash, 2179db8dac20SDavid S. Miller .get_port = udp_v4_get_port, 2180db8dac20SDavid S. Miller .memory_allocated = &udp_memory_allocated, 2181db8dac20SDavid S. Miller .sysctl_mem = sysctl_udp_mem, 2182db8dac20SDavid S. Miller .sysctl_wmem = &sysctl_udp_wmem_min, 2183db8dac20SDavid S. Miller .sysctl_rmem = &sysctl_udp_rmem_min, 2184db8dac20SDavid S. Miller .obj_size = sizeof(struct udp_sock), 2185271b72c7SEric Dumazet .slab_flags = SLAB_DESTROY_BY_RCU, 2186645ca708SEric Dumazet .h.udp_table = &udp_table, 2187db8dac20SDavid S. Miller #ifdef CONFIG_COMPAT 2188db8dac20SDavid S. Miller .compat_setsockopt = compat_udp_setsockopt, 2189db8dac20SDavid S. Miller .compat_getsockopt = compat_udp_getsockopt, 2190db8dac20SDavid S. Miller #endif 2191fcbdf09dSOctavian Purdila .clear_sk = sk_prot_clear_portaddr_nulls, 2192db8dac20SDavid S. Miller }; 2193c482c568SEric Dumazet EXPORT_SYMBOL(udp_prot); 21941da177e4SLinus Torvalds 21951da177e4SLinus Torvalds /* ------------------------------------------------------------------------ */ 21961da177e4SLinus Torvalds #ifdef CONFIG_PROC_FS 21971da177e4SLinus Torvalds 2198645ca708SEric Dumazet static struct sock *udp_get_first(struct seq_file *seq, int start) 21991da177e4SLinus Torvalds { 22001da177e4SLinus Torvalds struct sock *sk; 22011da177e4SLinus Torvalds struct udp_iter_state *state = seq->private; 22026f191efeSDenis V. Lunev struct net *net = seq_file_net(seq); 22031da177e4SLinus Torvalds 2204f86dcc5aSEric Dumazet for (state->bucket = start; state->bucket <= state->udp_table->mask; 2205f86dcc5aSEric Dumazet ++state->bucket) { 220688ab1932SEric Dumazet struct hlist_nulls_node *node; 2207645ca708SEric Dumazet struct udp_hslot *hslot = &state->udp_table->hash[state->bucket]; 2208f86dcc5aSEric Dumazet 2209f86dcc5aSEric Dumazet if (hlist_nulls_empty(&hslot->head)) 2210f86dcc5aSEric Dumazet continue; 2211f86dcc5aSEric Dumazet 2212645ca708SEric Dumazet spin_lock_bh(&hslot->lock); 221388ab1932SEric Dumazet sk_nulls_for_each(sk, node, &hslot->head) { 2214878628fbSYOSHIFUJI Hideaki if (!net_eq(sock_net(sk), net)) 2215a91275efSDaniel Lezcano continue; 22161da177e4SLinus Torvalds if (sk->sk_family == state->family) 22171da177e4SLinus Torvalds goto found; 22181da177e4SLinus Torvalds } 2219645ca708SEric Dumazet spin_unlock_bh(&hslot->lock); 22201da177e4SLinus Torvalds } 22211da177e4SLinus Torvalds sk = NULL; 22221da177e4SLinus Torvalds found: 22231da177e4SLinus Torvalds return sk; 22241da177e4SLinus Torvalds } 22251da177e4SLinus Torvalds 22261da177e4SLinus Torvalds static struct sock *udp_get_next(struct seq_file *seq, struct sock *sk) 22271da177e4SLinus Torvalds { 22281da177e4SLinus Torvalds struct udp_iter_state *state = seq->private; 22296f191efeSDenis V. Lunev struct net *net = seq_file_net(seq); 22301da177e4SLinus Torvalds 22311da177e4SLinus Torvalds do { 223288ab1932SEric Dumazet sk = sk_nulls_next(sk); 2233878628fbSYOSHIFUJI Hideaki } while (sk && (!net_eq(sock_net(sk), net) || sk->sk_family != state->family)); 22341da177e4SLinus Torvalds 2235645ca708SEric Dumazet if (!sk) { 2236f86dcc5aSEric Dumazet if (state->bucket <= state->udp_table->mask) 2237f52b5054SEric Dumazet spin_unlock_bh(&state->udp_table->hash[state->bucket].lock); 2238645ca708SEric Dumazet return udp_get_first(seq, state->bucket + 1); 22391da177e4SLinus Torvalds } 22401da177e4SLinus Torvalds return sk; 22411da177e4SLinus Torvalds } 22421da177e4SLinus Torvalds 22431da177e4SLinus Torvalds static struct sock *udp_get_idx(struct seq_file *seq, loff_t pos) 22441da177e4SLinus Torvalds { 2245645ca708SEric Dumazet struct sock *sk = udp_get_first(seq, 0); 22461da177e4SLinus Torvalds 22471da177e4SLinus Torvalds if (sk) 22481da177e4SLinus Torvalds while (pos && (sk = udp_get_next(seq, sk)) != NULL) 22491da177e4SLinus Torvalds --pos; 22501da177e4SLinus Torvalds return pos ? NULL : sk; 22511da177e4SLinus Torvalds } 22521da177e4SLinus Torvalds 22531da177e4SLinus Torvalds static void *udp_seq_start(struct seq_file *seq, loff_t *pos) 22541da177e4SLinus Torvalds { 225530842f29SVitaly Mayatskikh struct udp_iter_state *state = seq->private; 2256f86dcc5aSEric Dumazet state->bucket = MAX_UDP_PORTS; 225730842f29SVitaly Mayatskikh 2258b50660f1SYOSHIFUJI Hideaki return *pos ? udp_get_idx(seq, *pos-1) : SEQ_START_TOKEN; 22591da177e4SLinus Torvalds } 22601da177e4SLinus Torvalds 22611da177e4SLinus Torvalds static void *udp_seq_next(struct seq_file *seq, void *v, loff_t *pos) 22621da177e4SLinus Torvalds { 22631da177e4SLinus Torvalds struct sock *sk; 22641da177e4SLinus Torvalds 2265b50660f1SYOSHIFUJI Hideaki if (v == SEQ_START_TOKEN) 22661da177e4SLinus Torvalds sk = udp_get_idx(seq, 0); 22671da177e4SLinus Torvalds else 22681da177e4SLinus Torvalds sk = udp_get_next(seq, v); 22691da177e4SLinus Torvalds 22701da177e4SLinus Torvalds ++*pos; 22711da177e4SLinus Torvalds return sk; 22721da177e4SLinus Torvalds } 22731da177e4SLinus Torvalds 22741da177e4SLinus Torvalds static void udp_seq_stop(struct seq_file *seq, void *v) 22751da177e4SLinus Torvalds { 2276645ca708SEric Dumazet struct udp_iter_state *state = seq->private; 2277645ca708SEric Dumazet 2278f86dcc5aSEric Dumazet if (state->bucket <= state->udp_table->mask) 2279645ca708SEric Dumazet spin_unlock_bh(&state->udp_table->hash[state->bucket].lock); 22801da177e4SLinus Torvalds } 22811da177e4SLinus Torvalds 228273cb88ecSArjan van de Ven int udp_seq_open(struct inode *inode, struct file *file) 22831da177e4SLinus Torvalds { 2284d9dda78bSAl Viro struct udp_seq_afinfo *afinfo = PDE_DATA(inode); 2285a2be75c1SDenis V. Lunev struct udp_iter_state *s; 2286a2be75c1SDenis V. Lunev int err; 22871da177e4SLinus Torvalds 2288a2be75c1SDenis V. Lunev err = seq_open_net(inode, file, &afinfo->seq_ops, 2289a2be75c1SDenis V. Lunev sizeof(struct udp_iter_state)); 2290a2be75c1SDenis V. Lunev if (err < 0) 2291a2be75c1SDenis V. Lunev return err; 2292a91275efSDaniel Lezcano 2293a2be75c1SDenis V. Lunev s = ((struct seq_file *)file->private_data)->private; 22941da177e4SLinus Torvalds s->family = afinfo->family; 2295645ca708SEric Dumazet s->udp_table = afinfo->udp_table; 2296a2be75c1SDenis V. Lunev return err; 2297a91275efSDaniel Lezcano } 229873cb88ecSArjan van de Ven EXPORT_SYMBOL(udp_seq_open); 2299a91275efSDaniel Lezcano 23001da177e4SLinus Torvalds /* ------------------------------------------------------------------------ */ 23010c96d8c5SDaniel Lezcano int udp_proc_register(struct net *net, struct udp_seq_afinfo *afinfo) 23021da177e4SLinus Torvalds { 23031da177e4SLinus Torvalds struct proc_dir_entry *p; 23041da177e4SLinus Torvalds int rc = 0; 23051da177e4SLinus Torvalds 2306dda61925SDenis V. Lunev afinfo->seq_ops.start = udp_seq_start; 2307dda61925SDenis V. Lunev afinfo->seq_ops.next = udp_seq_next; 2308dda61925SDenis V. Lunev afinfo->seq_ops.stop = udp_seq_stop; 2309dda61925SDenis V. Lunev 231084841c3cSDenis V. Lunev p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net, 231173cb88ecSArjan van de Ven afinfo->seq_fops, afinfo); 231284841c3cSDenis V. Lunev if (!p) 23131da177e4SLinus Torvalds rc = -ENOMEM; 23141da177e4SLinus Torvalds return rc; 23151da177e4SLinus Torvalds } 2316c482c568SEric Dumazet EXPORT_SYMBOL(udp_proc_register); 23171da177e4SLinus Torvalds 23180c96d8c5SDaniel Lezcano void udp_proc_unregister(struct net *net, struct udp_seq_afinfo *afinfo) 23191da177e4SLinus Torvalds { 2320ece31ffdSGao feng remove_proc_entry(afinfo->name, net->proc_net); 23211da177e4SLinus Torvalds } 2322c482c568SEric Dumazet EXPORT_SYMBOL(udp_proc_unregister); 2323db8dac20SDavid S. Miller 2324db8dac20SDavid S. Miller /* ------------------------------------------------------------------------ */ 23255e659e4cSPavel Emelyanov static void udp4_format_sock(struct sock *sp, struct seq_file *f, 2326652586dfSTetsuo Handa int bucket) 2327db8dac20SDavid S. Miller { 2328db8dac20SDavid S. Miller struct inet_sock *inet = inet_sk(sp); 2329c720c7e8SEric Dumazet __be32 dest = inet->inet_daddr; 2330c720c7e8SEric Dumazet __be32 src = inet->inet_rcv_saddr; 2331c720c7e8SEric Dumazet __u16 destp = ntohs(inet->inet_dport); 2332c720c7e8SEric Dumazet __u16 srcp = ntohs(inet->inet_sport); 2333db8dac20SDavid S. Miller 2334f86dcc5aSEric Dumazet seq_printf(f, "%5d: %08X:%04X %08X:%04X" 2335652586dfSTetsuo Handa " %02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %d", 2336db8dac20SDavid S. Miller bucket, src, srcp, dest, destp, sp->sk_state, 233731e6d363SEric Dumazet sk_wmem_alloc_get(sp), 233831e6d363SEric Dumazet sk_rmem_alloc_get(sp), 2339a7cb5a49SEric W. Biederman 0, 0L, 0, 2340a7cb5a49SEric W. Biederman from_kuid_munged(seq_user_ns(f), sock_i_uid(sp)), 2341a7cb5a49SEric W. Biederman 0, sock_i_ino(sp), 2342cb61cb9bSEric Dumazet atomic_read(&sp->sk_refcnt), sp, 2343652586dfSTetsuo Handa atomic_read(&sp->sk_drops)); 2344db8dac20SDavid S. Miller } 2345db8dac20SDavid S. Miller 2346db8dac20SDavid S. Miller int udp4_seq_show(struct seq_file *seq, void *v) 2347db8dac20SDavid S. Miller { 2348652586dfSTetsuo Handa seq_setwidth(seq, 127); 2349db8dac20SDavid S. Miller if (v == SEQ_START_TOKEN) 2350652586dfSTetsuo Handa seq_puts(seq, " sl local_address rem_address st tx_queue " 2351db8dac20SDavid S. Miller "rx_queue tr tm->when retrnsmt uid timeout " 2352cb61cb9bSEric Dumazet "inode ref pointer drops"); 2353db8dac20SDavid S. Miller else { 2354db8dac20SDavid S. Miller struct udp_iter_state *state = seq->private; 2355db8dac20SDavid S. Miller 2356652586dfSTetsuo Handa udp4_format_sock(v, seq, state->bucket); 2357db8dac20SDavid S. Miller } 2358652586dfSTetsuo Handa seq_pad(seq, '\n'); 2359db8dac20SDavid S. Miller return 0; 2360db8dac20SDavid S. Miller } 2361db8dac20SDavid S. Miller 236273cb88ecSArjan van de Ven static const struct file_operations udp_afinfo_seq_fops = { 236373cb88ecSArjan van de Ven .owner = THIS_MODULE, 236473cb88ecSArjan van de Ven .open = udp_seq_open, 236573cb88ecSArjan van de Ven .read = seq_read, 236673cb88ecSArjan van de Ven .llseek = seq_lseek, 236773cb88ecSArjan van de Ven .release = seq_release_net 236873cb88ecSArjan van de Ven }; 236973cb88ecSArjan van de Ven 2370db8dac20SDavid S. Miller /* ------------------------------------------------------------------------ */ 2371db8dac20SDavid S. Miller static struct udp_seq_afinfo udp4_seq_afinfo = { 2372db8dac20SDavid S. Miller .name = "udp", 2373db8dac20SDavid S. Miller .family = AF_INET, 2374645ca708SEric Dumazet .udp_table = &udp_table, 237573cb88ecSArjan van de Ven .seq_fops = &udp_afinfo_seq_fops, 2376dda61925SDenis V. Lunev .seq_ops = { 2377dda61925SDenis V. Lunev .show = udp4_seq_show, 2378dda61925SDenis V. Lunev }, 2379db8dac20SDavid S. Miller }; 2380db8dac20SDavid S. Miller 23812c8c1e72SAlexey Dobriyan static int __net_init udp4_proc_init_net(struct net *net) 238215439febSPavel Emelyanov { 238315439febSPavel Emelyanov return udp_proc_register(net, &udp4_seq_afinfo); 238415439febSPavel Emelyanov } 238515439febSPavel Emelyanov 23862c8c1e72SAlexey Dobriyan static void __net_exit udp4_proc_exit_net(struct net *net) 238715439febSPavel Emelyanov { 238815439febSPavel Emelyanov udp_proc_unregister(net, &udp4_seq_afinfo); 238915439febSPavel Emelyanov } 239015439febSPavel Emelyanov 239115439febSPavel Emelyanov static struct pernet_operations udp4_net_ops = { 239215439febSPavel Emelyanov .init = udp4_proc_init_net, 239315439febSPavel Emelyanov .exit = udp4_proc_exit_net, 239415439febSPavel Emelyanov }; 239515439febSPavel Emelyanov 2396db8dac20SDavid S. Miller int __init udp4_proc_init(void) 2397db8dac20SDavid S. Miller { 239815439febSPavel Emelyanov return register_pernet_subsys(&udp4_net_ops); 2399db8dac20SDavid S. Miller } 2400db8dac20SDavid S. Miller 2401db8dac20SDavid S. Miller void udp4_proc_exit(void) 2402db8dac20SDavid S. Miller { 240315439febSPavel Emelyanov unregister_pernet_subsys(&udp4_net_ops); 2404db8dac20SDavid S. Miller } 24051da177e4SLinus Torvalds #endif /* CONFIG_PROC_FS */ 24061da177e4SLinus Torvalds 2407f86dcc5aSEric Dumazet static __initdata unsigned long uhash_entries; 2408f86dcc5aSEric Dumazet static int __init set_uhash_entries(char *str) 2409645ca708SEric Dumazet { 2410413c27d8SEldad Zack ssize_t ret; 2411413c27d8SEldad Zack 2412f86dcc5aSEric Dumazet if (!str) 2413f86dcc5aSEric Dumazet return 0; 2414413c27d8SEldad Zack 2415413c27d8SEldad Zack ret = kstrtoul(str, 0, &uhash_entries); 2416413c27d8SEldad Zack if (ret) 2417413c27d8SEldad Zack return 0; 2418413c27d8SEldad Zack 2419f86dcc5aSEric Dumazet if (uhash_entries && uhash_entries < UDP_HTABLE_SIZE_MIN) 2420f86dcc5aSEric Dumazet uhash_entries = UDP_HTABLE_SIZE_MIN; 2421f86dcc5aSEric Dumazet return 1; 2422f86dcc5aSEric Dumazet } 2423f86dcc5aSEric Dumazet __setup("uhash_entries=", set_uhash_entries); 2424645ca708SEric Dumazet 2425f86dcc5aSEric Dumazet void __init udp_table_init(struct udp_table *table, const char *name) 2426f86dcc5aSEric Dumazet { 2427f86dcc5aSEric Dumazet unsigned int i; 2428f86dcc5aSEric Dumazet 2429f86dcc5aSEric Dumazet table->hash = alloc_large_system_hash(name, 2430512615b6SEric Dumazet 2 * sizeof(struct udp_hslot), 2431f86dcc5aSEric Dumazet uhash_entries, 2432f86dcc5aSEric Dumazet 21, /* one slot per 2 MB */ 2433f86dcc5aSEric Dumazet 0, 2434f86dcc5aSEric Dumazet &table->log, 2435f86dcc5aSEric Dumazet &table->mask, 243631fe62b9STim Bird UDP_HTABLE_SIZE_MIN, 2437f86dcc5aSEric Dumazet 64 * 1024); 243831fe62b9STim Bird 2439512615b6SEric Dumazet table->hash2 = table->hash + (table->mask + 1); 2440f86dcc5aSEric Dumazet for (i = 0; i <= table->mask; i++) { 244188ab1932SEric Dumazet INIT_HLIST_NULLS_HEAD(&table->hash[i].head, i); 2442fdcc8aa9SEric Dumazet table->hash[i].count = 0; 2443645ca708SEric Dumazet spin_lock_init(&table->hash[i].lock); 2444645ca708SEric Dumazet } 2445512615b6SEric Dumazet for (i = 0; i <= table->mask; i++) { 2446512615b6SEric Dumazet INIT_HLIST_NULLS_HEAD(&table->hash2[i].head, i); 2447512615b6SEric Dumazet table->hash2[i].count = 0; 2448512615b6SEric Dumazet spin_lock_init(&table->hash2[i].lock); 2449512615b6SEric Dumazet } 2450645ca708SEric Dumazet } 2451645ca708SEric Dumazet 245295766fffSHideo Aoki void __init udp_init(void) 245395766fffSHideo Aoki { 2454f03d78dbSEric Dumazet unsigned long limit; 245595766fffSHideo Aoki 2456f86dcc5aSEric Dumazet udp_table_init(&udp_table, "UDP"); 2457f03d78dbSEric Dumazet limit = nr_free_buffer_pages() / 8; 245895766fffSHideo Aoki limit = max(limit, 128UL); 245995766fffSHideo Aoki sysctl_udp_mem[0] = limit / 4 * 3; 246095766fffSHideo Aoki sysctl_udp_mem[1] = limit; 246195766fffSHideo Aoki sysctl_udp_mem[2] = sysctl_udp_mem[0] * 2; 246295766fffSHideo Aoki 246395766fffSHideo Aoki sysctl_udp_rmem_min = SK_MEM_QUANTUM; 246495766fffSHideo Aoki sysctl_udp_wmem_min = SK_MEM_QUANTUM; 246595766fffSHideo Aoki } 246695766fffSHideo Aoki 2467da5bab07SDaniel Borkmann struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb, 246873136267SPravin B Shelar netdev_features_t features) 246973136267SPravin B Shelar { 247073136267SPravin B Shelar struct sk_buff *segs = ERR_PTR(-EINVAL); 24717a7ffbabSWei-Chun Chao u16 mac_offset = skb->mac_header; 247273136267SPravin B Shelar int mac_len = skb->mac_len; 247373136267SPravin B Shelar int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb); 24740d05535dSPravin B Shelar __be16 protocol = skb->protocol; 247573136267SPravin B Shelar netdev_features_t enc_features; 24760d05535dSPravin B Shelar int outer_hlen; 247773136267SPravin B Shelar 247873136267SPravin B Shelar if (unlikely(!pskb_may_pull(skb, tnl_hlen))) 247973136267SPravin B Shelar goto out; 248073136267SPravin B Shelar 248173136267SPravin B Shelar skb->encapsulation = 0; 248273136267SPravin B Shelar __skb_pull(skb, tnl_hlen); 248373136267SPravin B Shelar skb_reset_mac_header(skb); 248473136267SPravin B Shelar skb_set_network_header(skb, skb_inner_network_offset(skb)); 248573136267SPravin B Shelar skb->mac_len = skb_inner_network_offset(skb); 248619acc327SPravin B Shelar skb->protocol = htons(ETH_P_TEB); 248773136267SPravin B Shelar 248873136267SPravin B Shelar /* segment inner packet. */ 248973136267SPravin B Shelar enc_features = skb->dev->hw_enc_features & netif_skb_features(skb); 249073136267SPravin B Shelar segs = skb_mac_gso_segment(skb, enc_features); 24917a7ffbabSWei-Chun Chao if (!segs || IS_ERR(segs)) { 24927a7ffbabSWei-Chun Chao skb_gso_error_unwind(skb, protocol, tnl_hlen, mac_offset, 24937a7ffbabSWei-Chun Chao mac_len); 249473136267SPravin B Shelar goto out; 24957a7ffbabSWei-Chun Chao } 249673136267SPravin B Shelar 249773136267SPravin B Shelar outer_hlen = skb_tnl_header_len(skb); 249873136267SPravin B Shelar skb = segs; 249973136267SPravin B Shelar do { 250073136267SPravin B Shelar struct udphdr *uh; 250173136267SPravin B Shelar int udp_offset = outer_hlen - tnl_hlen; 250273136267SPravin B Shelar 2503cdbaa0bbSAlexander Duyck skb_reset_inner_headers(skb); 2504cdbaa0bbSAlexander Duyck skb->encapsulation = 1; 2505cdbaa0bbSAlexander Duyck 250673136267SPravin B Shelar skb->mac_len = mac_len; 250773136267SPravin B Shelar 250873136267SPravin B Shelar skb_push(skb, outer_hlen); 250973136267SPravin B Shelar skb_reset_mac_header(skb); 251073136267SPravin B Shelar skb_set_network_header(skb, mac_len); 251173136267SPravin B Shelar skb_set_transport_header(skb, udp_offset); 251273136267SPravin B Shelar uh = udp_hdr(skb); 251373136267SPravin B Shelar uh->len = htons(skb->len - udp_offset); 251473136267SPravin B Shelar 251573136267SPravin B Shelar /* csum segment if tunnel sets skb with csum. */ 2516eb3c0d83SCong Wang if (protocol == htons(ETH_P_IP) && unlikely(uh->check)) { 251773136267SPravin B Shelar struct iphdr *iph = ip_hdr(skb); 251873136267SPravin B Shelar 251973136267SPravin B Shelar uh->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, 252073136267SPravin B Shelar skb->len - udp_offset, 252173136267SPravin B Shelar IPPROTO_UDP, 0); 252273136267SPravin B Shelar uh->check = csum_fold(skb_checksum(skb, udp_offset, 252373136267SPravin B Shelar skb->len - udp_offset, 0)); 252473136267SPravin B Shelar if (uh->check == 0) 252573136267SPravin B Shelar uh->check = CSUM_MANGLED_0; 252673136267SPravin B Shelar 2527eb3c0d83SCong Wang } else if (protocol == htons(ETH_P_IPV6)) { 2528eb3c0d83SCong Wang struct ipv6hdr *ipv6h = ipv6_hdr(skb); 2529eb3c0d83SCong Wang u32 len = skb->len - udp_offset; 2530eb3c0d83SCong Wang 2531eb3c0d83SCong Wang uh->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, 2532eb3c0d83SCong Wang len, IPPROTO_UDP, 0); 2533eb3c0d83SCong Wang uh->check = csum_fold(skb_checksum(skb, udp_offset, len, 0)); 2534eb3c0d83SCong Wang if (uh->check == 0) 2535eb3c0d83SCong Wang uh->check = CSUM_MANGLED_0; 2536eb3c0d83SCong Wang skb->ip_summed = CHECKSUM_NONE; 253773136267SPravin B Shelar } 2538eb3c0d83SCong Wang 25390d05535dSPravin B Shelar skb->protocol = protocol; 254073136267SPravin B Shelar } while ((skb = skb->next)); 254173136267SPravin B Shelar out: 254273136267SPravin B Shelar return segs; 254373136267SPravin B Shelar } 2544