11da177e4SLinus Torvalds /* 21da177e4SLinus Torvalds * INET An implementation of the TCP/IP protocol suite for the LINUX 31da177e4SLinus Torvalds * operating system. INET is implemented using the BSD Socket 41da177e4SLinus Torvalds * interface as the means of communication with the user level. 51da177e4SLinus Torvalds * 61da177e4SLinus Torvalds * The User Datagram Protocol (UDP). 71da177e4SLinus Torvalds * 802c30a84SJesper Juhl * Authors: Ross Biro 91da177e4SLinus Torvalds * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> 101da177e4SLinus Torvalds * Arnt Gulbrandsen, <agulbra@nvg.unit.no> 11113aa838SAlan Cox * Alan Cox, <alan@lxorguk.ukuu.org.uk> 121da177e4SLinus Torvalds * Hirokazu Takahashi, <taka@valinux.co.jp> 131da177e4SLinus Torvalds * 141da177e4SLinus Torvalds * Fixes: 151da177e4SLinus Torvalds * Alan Cox : verify_area() calls 161da177e4SLinus Torvalds * Alan Cox : stopped close while in use off icmp 171da177e4SLinus Torvalds * messages. Not a fix but a botch that 181da177e4SLinus Torvalds * for udp at least is 'valid'. 191da177e4SLinus Torvalds * Alan Cox : Fixed icmp handling properly 201da177e4SLinus Torvalds * Alan Cox : Correct error for oversized datagrams 211da177e4SLinus Torvalds * Alan Cox : Tidied select() semantics. 221da177e4SLinus Torvalds * Alan Cox : udp_err() fixed properly, also now 231da177e4SLinus Torvalds * select and read wake correctly on errors 241da177e4SLinus Torvalds * Alan Cox : udp_send verify_area moved to avoid mem leak 251da177e4SLinus Torvalds * Alan Cox : UDP can count its memory 261da177e4SLinus Torvalds * Alan Cox : send to an unknown connection causes 271da177e4SLinus Torvalds * an ECONNREFUSED off the icmp, but 281da177e4SLinus Torvalds * does NOT close. 291da177e4SLinus Torvalds * Alan Cox : Switched to new sk_buff handlers. No more backlog! 301da177e4SLinus Torvalds * Alan Cox : Using generic datagram code. Even smaller and the PEEK 311da177e4SLinus Torvalds * bug no longer crashes it. 321da177e4SLinus Torvalds * Fred Van Kempen : Net2e support for sk->broadcast. 331da177e4SLinus Torvalds * Alan Cox : Uses skb_free_datagram 341da177e4SLinus Torvalds * Alan Cox : Added get/set sockopt support. 351da177e4SLinus Torvalds * Alan Cox : Broadcasting without option set returns EACCES. 361da177e4SLinus Torvalds * Alan Cox : No wakeup calls. Instead we now use the callbacks. 371da177e4SLinus Torvalds * Alan Cox : Use ip_tos and ip_ttl 381da177e4SLinus Torvalds * Alan Cox : SNMP Mibs 391da177e4SLinus Torvalds * Alan Cox : MSG_DONTROUTE, and 0.0.0.0 support. 401da177e4SLinus Torvalds * Matt Dillon : UDP length checks. 411da177e4SLinus Torvalds * Alan Cox : Smarter af_inet used properly. 421da177e4SLinus Torvalds * Alan Cox : Use new kernel side addressing. 431da177e4SLinus Torvalds * Alan Cox : Incorrect return on truncated datagram receive. 441da177e4SLinus Torvalds * Arnt Gulbrandsen : New udp_send and stuff 451da177e4SLinus Torvalds * Alan Cox : Cache last socket 461da177e4SLinus Torvalds * Alan Cox : Route cache 471da177e4SLinus Torvalds * Jon Peatfield : Minor efficiency fix to sendto(). 481da177e4SLinus Torvalds * Mike Shaver : RFC1122 checks. 491da177e4SLinus Torvalds * Alan Cox : Nonblocking error fix. 501da177e4SLinus Torvalds * Willy Konynenberg : Transparent proxying support. 511da177e4SLinus Torvalds * Mike McLagan : Routing by source 521da177e4SLinus Torvalds * David S. Miller : New socket lookup architecture. 531da177e4SLinus Torvalds * Last socket cache retained as it 541da177e4SLinus Torvalds * does have a high hit rate. 551da177e4SLinus Torvalds * Olaf Kirch : Don't linearise iovec on sendmsg. 561da177e4SLinus Torvalds * Andi Kleen : Some cleanups, cache destination entry 571da177e4SLinus Torvalds * for connect. 581da177e4SLinus Torvalds * Vitaly E. Lavrov : Transparent proxy revived after year coma. 591da177e4SLinus Torvalds * Melvin Smith : Check msg_name not msg_namelen in sendto(), 601da177e4SLinus Torvalds * return ENOTCONN for unconnected sockets (POSIX) 611da177e4SLinus Torvalds * Janos Farkas : don't deliver multi/broadcasts to a different 621da177e4SLinus Torvalds * bound-to-device socket 631da177e4SLinus Torvalds * Hirokazu Takahashi : HW checksumming for outgoing UDP 641da177e4SLinus Torvalds * datagrams. 651da177e4SLinus Torvalds * Hirokazu Takahashi : sendfile() on UDP works now. 661da177e4SLinus Torvalds * Arnaldo C. Melo : convert /proc/net/udp to seq_file 671da177e4SLinus Torvalds * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which 681da177e4SLinus Torvalds * Alexey Kuznetsov: allow both IPv4 and IPv6 sockets to bind 691da177e4SLinus Torvalds * a single port at the same time. 701da177e4SLinus Torvalds * Derek Atkins <derek@ihtfp.com>: Add Encapulation Support 71342f0234SJames Chapman * James Chapman : Add L2TP encapsulation type. 721da177e4SLinus Torvalds * 731da177e4SLinus Torvalds * 741da177e4SLinus Torvalds * This program is free software; you can redistribute it and/or 751da177e4SLinus Torvalds * modify it under the terms of the GNU General Public License 761da177e4SLinus Torvalds * as published by the Free Software Foundation; either version 771da177e4SLinus Torvalds * 2 of the License, or (at your option) any later version. 781da177e4SLinus Torvalds */ 791da177e4SLinus Torvalds 80afd46503SJoe Perches #define pr_fmt(fmt) "UDP: " fmt 81afd46503SJoe Perches 821da177e4SLinus Torvalds #include <asm/uaccess.h> 831da177e4SLinus Torvalds #include <asm/ioctls.h> 8495766fffSHideo Aoki #include <linux/bootmem.h> 858203efb3SEric Dumazet #include <linux/highmem.h> 868203efb3SEric Dumazet #include <linux/swap.h> 871da177e4SLinus Torvalds #include <linux/types.h> 881da177e4SLinus Torvalds #include <linux/fcntl.h> 891da177e4SLinus Torvalds #include <linux/module.h> 901da177e4SLinus Torvalds #include <linux/socket.h> 911da177e4SLinus Torvalds #include <linux/sockios.h> 9214c85021SArnaldo Carvalho de Melo #include <linux/igmp.h> 931da177e4SLinus Torvalds #include <linux/in.h> 941da177e4SLinus Torvalds #include <linux/errno.h> 951da177e4SLinus Torvalds #include <linux/timer.h> 961da177e4SLinus Torvalds #include <linux/mm.h> 971da177e4SLinus Torvalds #include <linux/inet.h> 981da177e4SLinus Torvalds #include <linux/netdevice.h> 995a0e3ad6STejun Heo #include <linux/slab.h> 100c752f073SArnaldo Carvalho de Melo #include <net/tcp_states.h> 1011da177e4SLinus Torvalds #include <linux/skbuff.h> 1021da177e4SLinus Torvalds #include <linux/proc_fs.h> 1031da177e4SLinus Torvalds #include <linux/seq_file.h> 104457c4cbcSEric W. Biederman #include <net/net_namespace.h> 1051da177e4SLinus Torvalds #include <net/icmp.h> 106421b3885SShawn Bohrer #include <net/inet_hashtables.h> 1071da177e4SLinus Torvalds #include <net/route.h> 1081da177e4SLinus Torvalds #include <net/checksum.h> 1091da177e4SLinus Torvalds #include <net/xfrm.h> 110296f7ea7SSatoru Moriya #include <trace/events/udp.h> 111447167bfSEric Dumazet #include <linux/static_key.h> 11222911fc5SEric Dumazet #include <trace/events/skb.h> 113076bb0c8SEliezer Tamir #include <net/busy_poll.h> 114ba4e58ecSGerrit Renker #include "udp_impl.h" 1151da177e4SLinus Torvalds 116f86dcc5aSEric Dumazet struct udp_table udp_table __read_mostly; 117645ca708SEric Dumazet EXPORT_SYMBOL(udp_table); 1181da177e4SLinus Torvalds 1198d987e5cSEric Dumazet long sysctl_udp_mem[3] __read_mostly; 12095766fffSHideo Aoki EXPORT_SYMBOL(sysctl_udp_mem); 121c482c568SEric Dumazet 122c482c568SEric Dumazet int sysctl_udp_rmem_min __read_mostly; 12395766fffSHideo Aoki EXPORT_SYMBOL(sysctl_udp_rmem_min); 124c482c568SEric Dumazet 125c482c568SEric Dumazet int sysctl_udp_wmem_min __read_mostly; 12695766fffSHideo Aoki EXPORT_SYMBOL(sysctl_udp_wmem_min); 12795766fffSHideo Aoki 1288d987e5cSEric Dumazet atomic_long_t udp_memory_allocated; 12995766fffSHideo Aoki EXPORT_SYMBOL(udp_memory_allocated); 13095766fffSHideo Aoki 131f86dcc5aSEric Dumazet #define MAX_UDP_PORTS 65536 132f86dcc5aSEric Dumazet #define PORTS_PER_CHAIN (MAX_UDP_PORTS / UDP_HTABLE_SIZE_MIN) 13398322f22SEric Dumazet 134f24d43c0SEric Dumazet static int udp_lib_lport_inuse(struct net *net, __u16 num, 135645ca708SEric Dumazet const struct udp_hslot *hslot, 13698322f22SEric Dumazet unsigned long *bitmap, 137f24d43c0SEric Dumazet struct sock *sk, 138f24d43c0SEric Dumazet int (*saddr_comp)(const struct sock *sk1, 139f86dcc5aSEric Dumazet const struct sock *sk2), 140f86dcc5aSEric Dumazet unsigned int log) 14125030a7fSGerrit Renker { 142f24d43c0SEric Dumazet struct sock *sk2; 14388ab1932SEric Dumazet struct hlist_nulls_node *node; 144ba418fa3STom Herbert kuid_t uid = sock_i_uid(sk); 14525030a7fSGerrit Renker 14688ab1932SEric Dumazet sk_nulls_for_each(sk2, node, &hslot->head) 147f24d43c0SEric Dumazet if (net_eq(sock_net(sk2), net) && 148f24d43c0SEric Dumazet sk2 != sk && 149d4cada4aSEric Dumazet (bitmap || udp_sk(sk2)->udp_port_hash == num) && 150f24d43c0SEric Dumazet (!sk2->sk_reuse || !sk->sk_reuse) && 1519d4fb27dSJoe Perches (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if || 1529d4fb27dSJoe Perches sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && 153ba418fa3STom Herbert (!sk2->sk_reuseport || !sk->sk_reuseport || 154ba418fa3STom Herbert !uid_eq(uid, sock_i_uid(sk2))) && 15598322f22SEric Dumazet (*saddr_comp)(sk, sk2)) { 15698322f22SEric Dumazet if (bitmap) 157d4cada4aSEric Dumazet __set_bit(udp_sk(sk2)->udp_port_hash >> log, 158d4cada4aSEric Dumazet bitmap); 15998322f22SEric Dumazet else 160fc038410SDavid S. Miller return 1; 16198322f22SEric Dumazet } 16225030a7fSGerrit Renker return 0; 16325030a7fSGerrit Renker } 16425030a7fSGerrit Renker 16530fff923SEric Dumazet /* 16630fff923SEric Dumazet * Note: we still hold spinlock of primary hash chain, so no other writer 16730fff923SEric Dumazet * can insert/delete a socket with local_port == num 16830fff923SEric Dumazet */ 16930fff923SEric Dumazet static int udp_lib_lport_inuse2(struct net *net, __u16 num, 17030fff923SEric Dumazet struct udp_hslot *hslot2, 17130fff923SEric Dumazet struct sock *sk, 17230fff923SEric Dumazet int (*saddr_comp)(const struct sock *sk1, 17330fff923SEric Dumazet const struct sock *sk2)) 17430fff923SEric Dumazet { 17530fff923SEric Dumazet struct sock *sk2; 17630fff923SEric Dumazet struct hlist_nulls_node *node; 177ba418fa3STom Herbert kuid_t uid = sock_i_uid(sk); 17830fff923SEric Dumazet int res = 0; 17930fff923SEric Dumazet 18030fff923SEric Dumazet spin_lock(&hslot2->lock); 18130fff923SEric Dumazet udp_portaddr_for_each_entry(sk2, node, &hslot2->head) 18230fff923SEric Dumazet if (net_eq(sock_net(sk2), net) && 18330fff923SEric Dumazet sk2 != sk && 18430fff923SEric Dumazet (udp_sk(sk2)->udp_port_hash == num) && 18530fff923SEric Dumazet (!sk2->sk_reuse || !sk->sk_reuse) && 1869d4fb27dSJoe Perches (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if || 1879d4fb27dSJoe Perches sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && 188ba418fa3STom Herbert (!sk2->sk_reuseport || !sk->sk_reuseport || 189ba418fa3STom Herbert !uid_eq(uid, sock_i_uid(sk2))) && 19030fff923SEric Dumazet (*saddr_comp)(sk, sk2)) { 19130fff923SEric Dumazet res = 1; 19230fff923SEric Dumazet break; 19330fff923SEric Dumazet } 19430fff923SEric Dumazet spin_unlock(&hslot2->lock); 19530fff923SEric Dumazet return res; 19630fff923SEric Dumazet } 19730fff923SEric Dumazet 19825030a7fSGerrit Renker /** 1996ba5a3c5SPavel Emelyanov * udp_lib_get_port - UDP/-Lite port lookup for IPv4 and IPv6 20025030a7fSGerrit Renker * 20125030a7fSGerrit Renker * @sk: socket struct in question 20225030a7fSGerrit Renker * @snum: port number to look up 203df2bc459SDavid S. Miller * @saddr_comp: AF-dependent comparison of bound local IP addresses 20425985edcSLucas De Marchi * @hash2_nulladdr: AF-dependent hash value in secondary hash chains, 20530fff923SEric Dumazet * with NULL address 20625030a7fSGerrit Renker */ 2076ba5a3c5SPavel Emelyanov int udp_lib_get_port(struct sock *sk, unsigned short snum, 208df2bc459SDavid S. Miller int (*saddr_comp)(const struct sock *sk1, 20930fff923SEric Dumazet const struct sock *sk2), 21030fff923SEric Dumazet unsigned int hash2_nulladdr) 2111da177e4SLinus Torvalds { 212512615b6SEric Dumazet struct udp_hslot *hslot, *hslot2; 213645ca708SEric Dumazet struct udp_table *udptable = sk->sk_prot->h.udp_table; 21425030a7fSGerrit Renker int error = 1; 2153b1e0a65SYOSHIFUJI Hideaki struct net *net = sock_net(sk); 2161da177e4SLinus Torvalds 21732c1da70SStephen Hemminger if (!snum) { 2189088c560SEric Dumazet int low, high, remaining; 21995c96174SEric Dumazet unsigned int rand; 22098322f22SEric Dumazet unsigned short first, last; 22198322f22SEric Dumazet DECLARE_BITMAP(bitmap, PORTS_PER_CHAIN); 2221da177e4SLinus Torvalds 2230bbf87d8SEric W. Biederman inet_get_local_port_range(net, &low, &high); 224a25de534SAnton Arapov remaining = (high - low) + 1; 225227b60f5SStephen Hemminger 2269088c560SEric Dumazet rand = net_random(); 22798322f22SEric Dumazet first = (((u64)rand * remaining) >> 32) + low; 22898322f22SEric Dumazet /* 22998322f22SEric Dumazet * force rand to be an odd multiple of UDP_HTABLE_SIZE 23098322f22SEric Dumazet */ 231f86dcc5aSEric Dumazet rand = (rand | 1) * (udptable->mask + 1); 2325781b235SEric Dumazet last = first + udptable->mask + 1; 2335781b235SEric Dumazet do { 234f86dcc5aSEric Dumazet hslot = udp_hashslot(udptable, net, first); 23598322f22SEric Dumazet bitmap_zero(bitmap, PORTS_PER_CHAIN); 236645ca708SEric Dumazet spin_lock_bh(&hslot->lock); 23798322f22SEric Dumazet udp_lib_lport_inuse(net, snum, hslot, bitmap, sk, 238f86dcc5aSEric Dumazet saddr_comp, udptable->log); 23998322f22SEric Dumazet 24098322f22SEric Dumazet snum = first; 24198322f22SEric Dumazet /* 24298322f22SEric Dumazet * Iterate on all possible values of snum for this hash. 24398322f22SEric Dumazet * Using steps of an odd multiple of UDP_HTABLE_SIZE 24498322f22SEric Dumazet * give us randomization and full range coverage. 24598322f22SEric Dumazet */ 2469088c560SEric Dumazet do { 24798322f22SEric Dumazet if (low <= snum && snum <= high && 248e3826f1eSAmerigo Wang !test_bit(snum >> udptable->log, bitmap) && 249e3826f1eSAmerigo Wang !inet_is_reserved_local_port(snum)) 25098322f22SEric Dumazet goto found; 25198322f22SEric Dumazet snum += rand; 25298322f22SEric Dumazet } while (snum != first); 25398322f22SEric Dumazet spin_unlock_bh(&hslot->lock); 2545781b235SEric Dumazet } while (++first != last); 25598322f22SEric Dumazet goto fail; 256645ca708SEric Dumazet } else { 257f86dcc5aSEric Dumazet hslot = udp_hashslot(udptable, net, snum); 258645ca708SEric Dumazet spin_lock_bh(&hslot->lock); 25930fff923SEric Dumazet if (hslot->count > 10) { 26030fff923SEric Dumazet int exist; 26130fff923SEric Dumazet unsigned int slot2 = udp_sk(sk)->udp_portaddr_hash ^ snum; 26230fff923SEric Dumazet 26330fff923SEric Dumazet slot2 &= udptable->mask; 26430fff923SEric Dumazet hash2_nulladdr &= udptable->mask; 26530fff923SEric Dumazet 26630fff923SEric Dumazet hslot2 = udp_hashslot2(udptable, slot2); 26730fff923SEric Dumazet if (hslot->count < hslot2->count) 26830fff923SEric Dumazet goto scan_primary_hash; 26930fff923SEric Dumazet 27030fff923SEric Dumazet exist = udp_lib_lport_inuse2(net, snum, hslot2, 27130fff923SEric Dumazet sk, saddr_comp); 27230fff923SEric Dumazet if (!exist && (hash2_nulladdr != slot2)) { 27330fff923SEric Dumazet hslot2 = udp_hashslot2(udptable, hash2_nulladdr); 27430fff923SEric Dumazet exist = udp_lib_lport_inuse2(net, snum, hslot2, 27530fff923SEric Dumazet sk, saddr_comp); 27630fff923SEric Dumazet } 27730fff923SEric Dumazet if (exist) 27830fff923SEric Dumazet goto fail_unlock; 27930fff923SEric Dumazet else 28030fff923SEric Dumazet goto found; 28130fff923SEric Dumazet } 28230fff923SEric Dumazet scan_primary_hash: 283f86dcc5aSEric Dumazet if (udp_lib_lport_inuse(net, snum, hslot, NULL, sk, 284f86dcc5aSEric Dumazet saddr_comp, 0)) 285645ca708SEric Dumazet goto fail_unlock; 286645ca708SEric Dumazet } 28798322f22SEric Dumazet found: 288c720c7e8SEric Dumazet inet_sk(sk)->inet_num = snum; 289d4cada4aSEric Dumazet udp_sk(sk)->udp_port_hash = snum; 290d4cada4aSEric Dumazet udp_sk(sk)->udp_portaddr_hash ^= snum; 2911da177e4SLinus Torvalds if (sk_unhashed(sk)) { 29288ab1932SEric Dumazet sk_nulls_add_node_rcu(sk, &hslot->head); 293fdcc8aa9SEric Dumazet hslot->count++; 294c29a0bc4SPavel Emelyanov sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); 295512615b6SEric Dumazet 296512615b6SEric Dumazet hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash); 297512615b6SEric Dumazet spin_lock(&hslot2->lock); 298512615b6SEric Dumazet hlist_nulls_add_head_rcu(&udp_sk(sk)->udp_portaddr_node, 299512615b6SEric Dumazet &hslot2->head); 300512615b6SEric Dumazet hslot2->count++; 301512615b6SEric Dumazet spin_unlock(&hslot2->lock); 3021da177e4SLinus Torvalds } 30325030a7fSGerrit Renker error = 0; 304645ca708SEric Dumazet fail_unlock: 305645ca708SEric Dumazet spin_unlock_bh(&hslot->lock); 3061da177e4SLinus Torvalds fail: 30725030a7fSGerrit Renker return error; 3081da177e4SLinus Torvalds } 309c482c568SEric Dumazet EXPORT_SYMBOL(udp_lib_get_port); 3101da177e4SLinus Torvalds 311499923c7SVlad Yasevich static int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2) 312db8dac20SDavid S. Miller { 313db8dac20SDavid S. Miller struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2); 314db8dac20SDavid S. Miller 315db8dac20SDavid S. Miller return (!ipv6_only_sock(sk2) && 316c720c7e8SEric Dumazet (!inet1->inet_rcv_saddr || !inet2->inet_rcv_saddr || 317c720c7e8SEric Dumazet inet1->inet_rcv_saddr == inet2->inet_rcv_saddr)); 318db8dac20SDavid S. Miller } 319db8dac20SDavid S. Miller 320d4cada4aSEric Dumazet static unsigned int udp4_portaddr_hash(struct net *net, __be32 saddr, 321d4cada4aSEric Dumazet unsigned int port) 322d4cada4aSEric Dumazet { 3230eae88f3SEric Dumazet return jhash_1word((__force u32)saddr, net_hash_mix(net)) ^ port; 324d4cada4aSEric Dumazet } 325d4cada4aSEric Dumazet 3266ba5a3c5SPavel Emelyanov int udp_v4_get_port(struct sock *sk, unsigned short snum) 327db8dac20SDavid S. Miller { 32830fff923SEric Dumazet unsigned int hash2_nulladdr = 3290eae88f3SEric Dumazet udp4_portaddr_hash(sock_net(sk), htonl(INADDR_ANY), snum); 33030fff923SEric Dumazet unsigned int hash2_partial = 33130fff923SEric Dumazet udp4_portaddr_hash(sock_net(sk), inet_sk(sk)->inet_rcv_saddr, 0); 33230fff923SEric Dumazet 333d4cada4aSEric Dumazet /* precompute partial secondary hash */ 33430fff923SEric Dumazet udp_sk(sk)->udp_portaddr_hash = hash2_partial; 33530fff923SEric Dumazet return udp_lib_get_port(sk, snum, ipv4_rcv_saddr_equal, hash2_nulladdr); 336db8dac20SDavid S. Miller } 337db8dac20SDavid S. Miller 338645ca708SEric Dumazet static inline int compute_score(struct sock *sk, struct net *net, __be32 saddr, 339645ca708SEric Dumazet unsigned short hnum, 340645ca708SEric Dumazet __be16 sport, __be32 daddr, __be16 dport, int dif) 341645ca708SEric Dumazet { 342645ca708SEric Dumazet int score = -1; 343645ca708SEric Dumazet 344d4cada4aSEric Dumazet if (net_eq(sock_net(sk), net) && udp_sk(sk)->udp_port_hash == hnum && 345645ca708SEric Dumazet !ipv6_only_sock(sk)) { 346645ca708SEric Dumazet struct inet_sock *inet = inet_sk(sk); 347645ca708SEric Dumazet 348ba418fa3STom Herbert score = (sk->sk_family == PF_INET ? 2 : 1); 349c720c7e8SEric Dumazet if (inet->inet_rcv_saddr) { 350c720c7e8SEric Dumazet if (inet->inet_rcv_saddr != daddr) 351645ca708SEric Dumazet return -1; 352ba418fa3STom Herbert score += 4; 353645ca708SEric Dumazet } 354c720c7e8SEric Dumazet if (inet->inet_daddr) { 355c720c7e8SEric Dumazet if (inet->inet_daddr != saddr) 356645ca708SEric Dumazet return -1; 357ba418fa3STom Herbert score += 4; 358645ca708SEric Dumazet } 359c720c7e8SEric Dumazet if (inet->inet_dport) { 360c720c7e8SEric Dumazet if (inet->inet_dport != sport) 361645ca708SEric Dumazet return -1; 362ba418fa3STom Herbert score += 4; 363645ca708SEric Dumazet } 364645ca708SEric Dumazet if (sk->sk_bound_dev_if) { 365645ca708SEric Dumazet if (sk->sk_bound_dev_if != dif) 366645ca708SEric Dumazet return -1; 367ba418fa3STom Herbert score += 4; 368645ca708SEric Dumazet } 369645ca708SEric Dumazet } 370645ca708SEric Dumazet return score; 371645ca708SEric Dumazet } 372645ca708SEric Dumazet 3735051ebd2SEric Dumazet /* 3745051ebd2SEric Dumazet * In this second variant, we check (daddr, dport) matches (inet_rcv_sadd, inet_num) 3755051ebd2SEric Dumazet */ 3765051ebd2SEric Dumazet static inline int compute_score2(struct sock *sk, struct net *net, 3775051ebd2SEric Dumazet __be32 saddr, __be16 sport, 3785051ebd2SEric Dumazet __be32 daddr, unsigned int hnum, int dif) 3795051ebd2SEric Dumazet { 3805051ebd2SEric Dumazet int score = -1; 3815051ebd2SEric Dumazet 3825051ebd2SEric Dumazet if (net_eq(sock_net(sk), net) && !ipv6_only_sock(sk)) { 3835051ebd2SEric Dumazet struct inet_sock *inet = inet_sk(sk); 3845051ebd2SEric Dumazet 3855051ebd2SEric Dumazet if (inet->inet_rcv_saddr != daddr) 3865051ebd2SEric Dumazet return -1; 3875051ebd2SEric Dumazet if (inet->inet_num != hnum) 3885051ebd2SEric Dumazet return -1; 3895051ebd2SEric Dumazet 390ba418fa3STom Herbert score = (sk->sk_family == PF_INET ? 2 : 1); 3915051ebd2SEric Dumazet if (inet->inet_daddr) { 3925051ebd2SEric Dumazet if (inet->inet_daddr != saddr) 3935051ebd2SEric Dumazet return -1; 394ba418fa3STom Herbert score += 4; 3955051ebd2SEric Dumazet } 3965051ebd2SEric Dumazet if (inet->inet_dport) { 3975051ebd2SEric Dumazet if (inet->inet_dport != sport) 3985051ebd2SEric Dumazet return -1; 399ba418fa3STom Herbert score += 4; 4005051ebd2SEric Dumazet } 4015051ebd2SEric Dumazet if (sk->sk_bound_dev_if) { 4025051ebd2SEric Dumazet if (sk->sk_bound_dev_if != dif) 4035051ebd2SEric Dumazet return -1; 404ba418fa3STom Herbert score += 4; 4055051ebd2SEric Dumazet } 4065051ebd2SEric Dumazet } 4075051ebd2SEric Dumazet return score; 4085051ebd2SEric Dumazet } 4095051ebd2SEric Dumazet 41065cd8033SHannes Frederic Sowa static unsigned int udp_ehashfn(struct net *net, const __be32 laddr, 41165cd8033SHannes Frederic Sowa const __u16 lport, const __be32 faddr, 41265cd8033SHannes Frederic Sowa const __be16 fport) 41365cd8033SHannes Frederic Sowa { 4141bbdceefSHannes Frederic Sowa static u32 udp_ehash_secret __read_mostly; 4151bbdceefSHannes Frederic Sowa 4161bbdceefSHannes Frederic Sowa net_get_random_once(&udp_ehash_secret, sizeof(udp_ehash_secret)); 4171bbdceefSHannes Frederic Sowa 41865cd8033SHannes Frederic Sowa return __inet_ehashfn(laddr, lport, faddr, fport, 4191bbdceefSHannes Frederic Sowa udp_ehash_secret + net_hash_mix(net)); 42065cd8033SHannes Frederic Sowa } 42165cd8033SHannes Frederic Sowa 4225051ebd2SEric Dumazet 4235051ebd2SEric Dumazet /* called with read_rcu_lock() */ 4245051ebd2SEric Dumazet static struct sock *udp4_lib_lookup2(struct net *net, 4255051ebd2SEric Dumazet __be32 saddr, __be16 sport, 4265051ebd2SEric Dumazet __be32 daddr, unsigned int hnum, int dif, 4275051ebd2SEric Dumazet struct udp_hslot *hslot2, unsigned int slot2) 4285051ebd2SEric Dumazet { 4295051ebd2SEric Dumazet struct sock *sk, *result; 4305051ebd2SEric Dumazet struct hlist_nulls_node *node; 431ba418fa3STom Herbert int score, badness, matches = 0, reuseport = 0; 432ba418fa3STom Herbert u32 hash = 0; 4335051ebd2SEric Dumazet 4345051ebd2SEric Dumazet begin: 4355051ebd2SEric Dumazet result = NULL; 436ba418fa3STom Herbert badness = 0; 4375051ebd2SEric Dumazet udp_portaddr_for_each_entry_rcu(sk, node, &hslot2->head) { 4385051ebd2SEric Dumazet score = compute_score2(sk, net, saddr, sport, 4395051ebd2SEric Dumazet daddr, hnum, dif); 4405051ebd2SEric Dumazet if (score > badness) { 4415051ebd2SEric Dumazet result = sk; 4425051ebd2SEric Dumazet badness = score; 443ba418fa3STom Herbert reuseport = sk->sk_reuseport; 444ba418fa3STom Herbert if (reuseport) { 44565cd8033SHannes Frederic Sowa hash = udp_ehashfn(net, daddr, hnum, 4467c0cadc6SEric Dumazet saddr, sport); 447ba418fa3STom Herbert matches = 1; 448ba418fa3STom Herbert } 449ba418fa3STom Herbert } else if (score == badness && reuseport) { 450ba418fa3STom Herbert matches++; 451ba418fa3STom Herbert if (((u64)hash * matches) >> 32 == 0) 452ba418fa3STom Herbert result = sk; 453ba418fa3STom Herbert hash = next_pseudo_random32(hash); 4545051ebd2SEric Dumazet } 4555051ebd2SEric Dumazet } 4565051ebd2SEric Dumazet /* 4575051ebd2SEric Dumazet * if the nulls value we got at the end of this lookup is 4585051ebd2SEric Dumazet * not the expected one, we must restart lookup. 4595051ebd2SEric Dumazet * We probably met an item that was moved to another chain. 4605051ebd2SEric Dumazet */ 4615051ebd2SEric Dumazet if (get_nulls_value(node) != slot2) 4625051ebd2SEric Dumazet goto begin; 4635051ebd2SEric Dumazet if (result) { 464c31504dcSEric Dumazet if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2))) 4655051ebd2SEric Dumazet result = NULL; 4665051ebd2SEric Dumazet else if (unlikely(compute_score2(result, net, saddr, sport, 4675051ebd2SEric Dumazet daddr, hnum, dif) < badness)) { 4685051ebd2SEric Dumazet sock_put(result); 4695051ebd2SEric Dumazet goto begin; 4705051ebd2SEric Dumazet } 4715051ebd2SEric Dumazet } 4725051ebd2SEric Dumazet return result; 4735051ebd2SEric Dumazet } 4745051ebd2SEric Dumazet 475db8dac20SDavid S. Miller /* UDP is nearly always wildcards out the wazoo, it makes no sense to try 476db8dac20SDavid S. Miller * harder than this. -DaveM 477db8dac20SDavid S. Miller */ 478fce82338SPavel Emelyanov struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, 479db8dac20SDavid S. Miller __be16 sport, __be32 daddr, __be16 dport, 480645ca708SEric Dumazet int dif, struct udp_table *udptable) 481db8dac20SDavid S. Miller { 482271b72c7SEric Dumazet struct sock *sk, *result; 48388ab1932SEric Dumazet struct hlist_nulls_node *node; 484db8dac20SDavid S. Miller unsigned short hnum = ntohs(dport); 4855051ebd2SEric Dumazet unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask); 4865051ebd2SEric Dumazet struct udp_hslot *hslot2, *hslot = &udptable->hash[slot]; 487ba418fa3STom Herbert int score, badness, matches = 0, reuseport = 0; 488ba418fa3STom Herbert u32 hash = 0; 489db8dac20SDavid S. Miller 490271b72c7SEric Dumazet rcu_read_lock(); 4915051ebd2SEric Dumazet if (hslot->count > 10) { 4925051ebd2SEric Dumazet hash2 = udp4_portaddr_hash(net, daddr, hnum); 4935051ebd2SEric Dumazet slot2 = hash2 & udptable->mask; 4945051ebd2SEric Dumazet hslot2 = &udptable->hash2[slot2]; 4955051ebd2SEric Dumazet if (hslot->count < hslot2->count) 4965051ebd2SEric Dumazet goto begin; 4975051ebd2SEric Dumazet 4985051ebd2SEric Dumazet result = udp4_lib_lookup2(net, saddr, sport, 4995051ebd2SEric Dumazet daddr, hnum, dif, 5005051ebd2SEric Dumazet hslot2, slot2); 5015051ebd2SEric Dumazet if (!result) { 5020eae88f3SEric Dumazet hash2 = udp4_portaddr_hash(net, htonl(INADDR_ANY), hnum); 5035051ebd2SEric Dumazet slot2 = hash2 & udptable->mask; 5045051ebd2SEric Dumazet hslot2 = &udptable->hash2[slot2]; 5055051ebd2SEric Dumazet if (hslot->count < hslot2->count) 5065051ebd2SEric Dumazet goto begin; 5075051ebd2SEric Dumazet 5081223c67cSJorge Boncompte [DTI2] result = udp4_lib_lookup2(net, saddr, sport, 5090eae88f3SEric Dumazet htonl(INADDR_ANY), hnum, dif, 5105051ebd2SEric Dumazet hslot2, slot2); 5115051ebd2SEric Dumazet } 5125051ebd2SEric Dumazet rcu_read_unlock(); 5135051ebd2SEric Dumazet return result; 5145051ebd2SEric Dumazet } 515271b72c7SEric Dumazet begin: 516271b72c7SEric Dumazet result = NULL; 517ba418fa3STom Herbert badness = 0; 51888ab1932SEric Dumazet sk_nulls_for_each_rcu(sk, node, &hslot->head) { 519645ca708SEric Dumazet score = compute_score(sk, net, saddr, hnum, sport, 520645ca708SEric Dumazet daddr, dport, dif); 521645ca708SEric Dumazet if (score > badness) { 522db8dac20SDavid S. Miller result = sk; 523db8dac20SDavid S. Miller badness = score; 524ba418fa3STom Herbert reuseport = sk->sk_reuseport; 525ba418fa3STom Herbert if (reuseport) { 52665cd8033SHannes Frederic Sowa hash = udp_ehashfn(net, daddr, hnum, 5277c0cadc6SEric Dumazet saddr, sport); 528ba418fa3STom Herbert matches = 1; 529ba418fa3STom Herbert } 530ba418fa3STom Herbert } else if (score == badness && reuseport) { 531ba418fa3STom Herbert matches++; 532ba418fa3STom Herbert if (((u64)hash * matches) >> 32 == 0) 533ba418fa3STom Herbert result = sk; 534ba418fa3STom Herbert hash = next_pseudo_random32(hash); 535db8dac20SDavid S. Miller } 536db8dac20SDavid S. Miller } 53788ab1932SEric Dumazet /* 53888ab1932SEric Dumazet * if the nulls value we got at the end of this lookup is 53988ab1932SEric Dumazet * not the expected one, we must restart lookup. 54088ab1932SEric Dumazet * We probably met an item that was moved to another chain. 54188ab1932SEric Dumazet */ 5425051ebd2SEric Dumazet if (get_nulls_value(node) != slot) 54388ab1932SEric Dumazet goto begin; 54488ab1932SEric Dumazet 545271b72c7SEric Dumazet if (result) { 546c31504dcSEric Dumazet if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2))) 547271b72c7SEric Dumazet result = NULL; 548271b72c7SEric Dumazet else if (unlikely(compute_score(result, net, saddr, hnum, sport, 549271b72c7SEric Dumazet daddr, dport, dif) < badness)) { 550271b72c7SEric Dumazet sock_put(result); 551271b72c7SEric Dumazet goto begin; 552271b72c7SEric Dumazet } 553271b72c7SEric Dumazet } 554271b72c7SEric Dumazet rcu_read_unlock(); 555db8dac20SDavid S. Miller return result; 556db8dac20SDavid S. Miller } 557fce82338SPavel Emelyanov EXPORT_SYMBOL_GPL(__udp4_lib_lookup); 558db8dac20SDavid S. Miller 559607c4aafSKOVACS Krisztian static inline struct sock *__udp4_lib_lookup_skb(struct sk_buff *skb, 560607c4aafSKOVACS Krisztian __be16 sport, __be16 dport, 561645ca708SEric Dumazet struct udp_table *udptable) 562607c4aafSKOVACS Krisztian { 56323542618SKOVACS Krisztian struct sock *sk; 564607c4aafSKOVACS Krisztian const struct iphdr *iph = ip_hdr(skb); 565607c4aafSKOVACS Krisztian 56623542618SKOVACS Krisztian if (unlikely(sk = skb_steal_sock(skb))) 56723542618SKOVACS Krisztian return sk; 56823542618SKOVACS Krisztian else 569adf30907SEric Dumazet return __udp4_lib_lookup(dev_net(skb_dst(skb)->dev), iph->saddr, sport, 570607c4aafSKOVACS Krisztian iph->daddr, dport, inet_iif(skb), 571607c4aafSKOVACS Krisztian udptable); 572607c4aafSKOVACS Krisztian } 573607c4aafSKOVACS Krisztian 574bcd41303SKOVACS Krisztian struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport, 575bcd41303SKOVACS Krisztian __be32 daddr, __be16 dport, int dif) 576bcd41303SKOVACS Krisztian { 577645ca708SEric Dumazet return __udp4_lib_lookup(net, saddr, sport, daddr, dport, dif, &udp_table); 578bcd41303SKOVACS Krisztian } 579bcd41303SKOVACS Krisztian EXPORT_SYMBOL_GPL(udp4_lib_lookup); 580bcd41303SKOVACS Krisztian 581421b3885SShawn Bohrer static inline bool __udp_is_mcast_sock(struct net *net, struct sock *sk, 582421b3885SShawn Bohrer __be16 loc_port, __be32 loc_addr, 583421b3885SShawn Bohrer __be16 rmt_port, __be32 rmt_addr, 584421b3885SShawn Bohrer int dif, unsigned short hnum) 585421b3885SShawn Bohrer { 586421b3885SShawn Bohrer struct inet_sock *inet = inet_sk(sk); 587421b3885SShawn Bohrer 588421b3885SShawn Bohrer if (!net_eq(sock_net(sk), net) || 589421b3885SShawn Bohrer udp_sk(sk)->udp_port_hash != hnum || 590421b3885SShawn Bohrer (inet->inet_daddr && inet->inet_daddr != rmt_addr) || 591421b3885SShawn Bohrer (inet->inet_dport != rmt_port && inet->inet_dport) || 592421b3885SShawn Bohrer (inet->inet_rcv_saddr && inet->inet_rcv_saddr != loc_addr) || 593421b3885SShawn Bohrer ipv6_only_sock(sk) || 594421b3885SShawn Bohrer (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif)) 595421b3885SShawn Bohrer return false; 596421b3885SShawn Bohrer if (!ip_mc_sf_allow(sk, loc_addr, rmt_addr, dif)) 597421b3885SShawn Bohrer return false; 598421b3885SShawn Bohrer return true; 599421b3885SShawn Bohrer } 600421b3885SShawn Bohrer 601920a4611SEric Dumazet static inline struct sock *udp_v4_mcast_next(struct net *net, struct sock *sk, 602db8dac20SDavid S. Miller __be16 loc_port, __be32 loc_addr, 603db8dac20SDavid S. Miller __be16 rmt_port, __be32 rmt_addr, 604db8dac20SDavid S. Miller int dif) 605db8dac20SDavid S. Miller { 60688ab1932SEric Dumazet struct hlist_nulls_node *node; 607db8dac20SDavid S. Miller struct sock *s = sk; 608db8dac20SDavid S. Miller unsigned short hnum = ntohs(loc_port); 609db8dac20SDavid S. Miller 61088ab1932SEric Dumazet sk_nulls_for_each_from(s, node) { 611421b3885SShawn Bohrer if (__udp_is_mcast_sock(net, s, 612421b3885SShawn Bohrer loc_port, loc_addr, 613421b3885SShawn Bohrer rmt_port, rmt_addr, 614421b3885SShawn Bohrer dif, hnum)) 615db8dac20SDavid S. Miller goto found; 616db8dac20SDavid S. Miller } 617db8dac20SDavid S. Miller s = NULL; 618db8dac20SDavid S. Miller found: 619db8dac20SDavid S. Miller return s; 620db8dac20SDavid S. Miller } 621db8dac20SDavid S. Miller 622db8dac20SDavid S. Miller /* 623db8dac20SDavid S. Miller * This routine is called by the ICMP module when it gets some 624db8dac20SDavid S. Miller * sort of error condition. If err < 0 then the socket should 625db8dac20SDavid S. Miller * be closed and the error returned to the user. If err > 0 626db8dac20SDavid S. Miller * it's just the icmp type << 8 | icmp code. 627db8dac20SDavid S. Miller * Header points to the ip header of the error packet. We move 628db8dac20SDavid S. Miller * on past this. Then (as it used to claim before adjustment) 629db8dac20SDavid S. Miller * header points to the first 8 bytes of the udp header. We need 630db8dac20SDavid S. Miller * to find the appropriate port. 631db8dac20SDavid S. Miller */ 632db8dac20SDavid S. Miller 633645ca708SEric Dumazet void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable) 634db8dac20SDavid S. Miller { 635db8dac20SDavid S. Miller struct inet_sock *inet; 636b71d1d42SEric Dumazet const struct iphdr *iph = (const struct iphdr *)skb->data; 637db8dac20SDavid S. Miller struct udphdr *uh = (struct udphdr *)(skb->data+(iph->ihl<<2)); 638db8dac20SDavid S. Miller const int type = icmp_hdr(skb)->type; 639db8dac20SDavid S. Miller const int code = icmp_hdr(skb)->code; 640db8dac20SDavid S. Miller struct sock *sk; 641db8dac20SDavid S. Miller int harderr; 642db8dac20SDavid S. Miller int err; 643fd54d716SPavel Emelyanov struct net *net = dev_net(skb->dev); 644db8dac20SDavid S. Miller 645fd54d716SPavel Emelyanov sk = __udp4_lib_lookup(net, iph->daddr, uh->dest, 646db8dac20SDavid S. Miller iph->saddr, uh->source, skb->dev->ifindex, udptable); 647db8dac20SDavid S. Miller if (sk == NULL) { 648dcfc23caSPavel Emelyanov ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); 649db8dac20SDavid S. Miller return; /* No socket for error */ 650db8dac20SDavid S. Miller } 651db8dac20SDavid S. Miller 652db8dac20SDavid S. Miller err = 0; 653db8dac20SDavid S. Miller harderr = 0; 654db8dac20SDavid S. Miller inet = inet_sk(sk); 655db8dac20SDavid S. Miller 656db8dac20SDavid S. Miller switch (type) { 657db8dac20SDavid S. Miller default: 658db8dac20SDavid S. Miller case ICMP_TIME_EXCEEDED: 659db8dac20SDavid S. Miller err = EHOSTUNREACH; 660db8dac20SDavid S. Miller break; 661db8dac20SDavid S. Miller case ICMP_SOURCE_QUENCH: 662db8dac20SDavid S. Miller goto out; 663db8dac20SDavid S. Miller case ICMP_PARAMETERPROB: 664db8dac20SDavid S. Miller err = EPROTO; 665db8dac20SDavid S. Miller harderr = 1; 666db8dac20SDavid S. Miller break; 667db8dac20SDavid S. Miller case ICMP_DEST_UNREACH: 668db8dac20SDavid S. Miller if (code == ICMP_FRAG_NEEDED) { /* Path MTU discovery */ 66936393395SDavid S. Miller ipv4_sk_update_pmtu(skb, sk, info); 670db8dac20SDavid S. Miller if (inet->pmtudisc != IP_PMTUDISC_DONT) { 671db8dac20SDavid S. Miller err = EMSGSIZE; 672db8dac20SDavid S. Miller harderr = 1; 673db8dac20SDavid S. Miller break; 674db8dac20SDavid S. Miller } 675db8dac20SDavid S. Miller goto out; 676db8dac20SDavid S. Miller } 677db8dac20SDavid S. Miller err = EHOSTUNREACH; 678db8dac20SDavid S. Miller if (code <= NR_ICMP_UNREACH) { 679db8dac20SDavid S. Miller harderr = icmp_err_convert[code].fatal; 680db8dac20SDavid S. Miller err = icmp_err_convert[code].errno; 681db8dac20SDavid S. Miller } 682db8dac20SDavid S. Miller break; 68355be7a9cSDavid S. Miller case ICMP_REDIRECT: 68455be7a9cSDavid S. Miller ipv4_sk_redirect(skb, sk); 6851a462d18SDuan Jiong goto out; 686db8dac20SDavid S. Miller } 687db8dac20SDavid S. Miller 688db8dac20SDavid S. Miller /* 689db8dac20SDavid S. Miller * RFC1122: OK. Passes ICMP errors back to application, as per 690db8dac20SDavid S. Miller * 4.1.3.3. 691db8dac20SDavid S. Miller */ 692db8dac20SDavid S. Miller if (!inet->recverr) { 693db8dac20SDavid S. Miller if (!harderr || sk->sk_state != TCP_ESTABLISHED) 694db8dac20SDavid S. Miller goto out; 695b1faf566SEric Dumazet } else 696db8dac20SDavid S. Miller ip_icmp_error(sk, skb, err, uh->dest, info, (u8 *)(uh+1)); 697b1faf566SEric Dumazet 698db8dac20SDavid S. Miller sk->sk_err = err; 699db8dac20SDavid S. Miller sk->sk_error_report(sk); 700db8dac20SDavid S. Miller out: 701db8dac20SDavid S. Miller sock_put(sk); 702db8dac20SDavid S. Miller } 703db8dac20SDavid S. Miller 704db8dac20SDavid S. Miller void udp_err(struct sk_buff *skb, u32 info) 705db8dac20SDavid S. Miller { 706645ca708SEric Dumazet __udp4_lib_err(skb, info, &udp_table); 707db8dac20SDavid S. Miller } 708db8dac20SDavid S. Miller 709db8dac20SDavid S. Miller /* 710db8dac20SDavid S. Miller * Throw away all pending data and cancel the corking. Socket is locked. 711db8dac20SDavid S. Miller */ 71236d926b9SDenis V. Lunev void udp_flush_pending_frames(struct sock *sk) 713db8dac20SDavid S. Miller { 714db8dac20SDavid S. Miller struct udp_sock *up = udp_sk(sk); 715db8dac20SDavid S. Miller 716db8dac20SDavid S. Miller if (up->pending) { 717db8dac20SDavid S. Miller up->len = 0; 718db8dac20SDavid S. Miller up->pending = 0; 719db8dac20SDavid S. Miller ip_flush_pending_frames(sk); 720db8dac20SDavid S. Miller } 721db8dac20SDavid S. Miller } 72236d926b9SDenis V. Lunev EXPORT_SYMBOL(udp_flush_pending_frames); 723db8dac20SDavid S. Miller 724db8dac20SDavid S. Miller /** 725f6b9664fSHerbert Xu * udp4_hwcsum - handle outgoing HW checksumming 726db8dac20SDavid S. Miller * @skb: sk_buff containing the filled-in UDP header 727db8dac20SDavid S. Miller * (checksum field must be zeroed out) 728f6b9664fSHerbert Xu * @src: source IP address 729f6b9664fSHerbert Xu * @dst: destination IP address 730db8dac20SDavid S. Miller */ 731c26bf4a5SThomas Graf void udp4_hwcsum(struct sk_buff *skb, __be32 src, __be32 dst) 732db8dac20SDavid S. Miller { 733db8dac20SDavid S. Miller struct udphdr *uh = udp_hdr(skb); 734f6b9664fSHerbert Xu struct sk_buff *frags = skb_shinfo(skb)->frag_list; 735f6b9664fSHerbert Xu int offset = skb_transport_offset(skb); 736f6b9664fSHerbert Xu int len = skb->len - offset; 737f6b9664fSHerbert Xu int hlen = len; 738db8dac20SDavid S. Miller __wsum csum = 0; 739db8dac20SDavid S. Miller 740f6b9664fSHerbert Xu if (!frags) { 741db8dac20SDavid S. Miller /* 742db8dac20SDavid S. Miller * Only one fragment on the socket. 743db8dac20SDavid S. Miller */ 744db8dac20SDavid S. Miller skb->csum_start = skb_transport_header(skb) - skb->head; 745db8dac20SDavid S. Miller skb->csum_offset = offsetof(struct udphdr, check); 746f6b9664fSHerbert Xu uh->check = ~csum_tcpudp_magic(src, dst, len, 747f6b9664fSHerbert Xu IPPROTO_UDP, 0); 748db8dac20SDavid S. Miller } else { 749db8dac20SDavid S. Miller /* 750db8dac20SDavid S. Miller * HW-checksum won't work as there are two or more 751db8dac20SDavid S. Miller * fragments on the socket so that all csums of sk_buffs 752db8dac20SDavid S. Miller * should be together 753db8dac20SDavid S. Miller */ 754f6b9664fSHerbert Xu do { 755f6b9664fSHerbert Xu csum = csum_add(csum, frags->csum); 756f6b9664fSHerbert Xu hlen -= frags->len; 757f6b9664fSHerbert Xu } while ((frags = frags->next)); 758db8dac20SDavid S. Miller 759f6b9664fSHerbert Xu csum = skb_checksum(skb, offset, hlen, csum); 760db8dac20SDavid S. Miller skb->ip_summed = CHECKSUM_NONE; 761db8dac20SDavid S. Miller 762db8dac20SDavid S. Miller uh->check = csum_tcpudp_magic(src, dst, len, IPPROTO_UDP, csum); 763db8dac20SDavid S. Miller if (uh->check == 0) 764db8dac20SDavid S. Miller uh->check = CSUM_MANGLED_0; 765db8dac20SDavid S. Miller } 766db8dac20SDavid S. Miller } 767c26bf4a5SThomas Graf EXPORT_SYMBOL_GPL(udp4_hwcsum); 768db8dac20SDavid S. Miller 76979ab0531SDavid S. Miller static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4) 770f6b9664fSHerbert Xu { 771f6b9664fSHerbert Xu struct sock *sk = skb->sk; 772f6b9664fSHerbert Xu struct inet_sock *inet = inet_sk(sk); 773f6b9664fSHerbert Xu struct udphdr *uh; 774f6b9664fSHerbert Xu int err = 0; 775f6b9664fSHerbert Xu int is_udplite = IS_UDPLITE(sk); 776f6b9664fSHerbert Xu int offset = skb_transport_offset(skb); 777f6b9664fSHerbert Xu int len = skb->len - offset; 778f6b9664fSHerbert Xu __wsum csum = 0; 779f6b9664fSHerbert Xu 780f6b9664fSHerbert Xu /* 781f6b9664fSHerbert Xu * Create a UDP header 782f6b9664fSHerbert Xu */ 783f6b9664fSHerbert Xu uh = udp_hdr(skb); 784f6b9664fSHerbert Xu uh->source = inet->inet_sport; 78579ab0531SDavid S. Miller uh->dest = fl4->fl4_dport; 786f6b9664fSHerbert Xu uh->len = htons(len); 787f6b9664fSHerbert Xu uh->check = 0; 788f6b9664fSHerbert Xu 789f6b9664fSHerbert Xu if (is_udplite) /* UDP-Lite */ 790f6b9664fSHerbert Xu csum = udplite_csum(skb); 791f6b9664fSHerbert Xu 792f6b9664fSHerbert Xu else if (sk->sk_no_check == UDP_CSUM_NOXMIT) { /* UDP csum disabled */ 793f6b9664fSHerbert Xu 794f6b9664fSHerbert Xu skb->ip_summed = CHECKSUM_NONE; 795f6b9664fSHerbert Xu goto send; 796f6b9664fSHerbert Xu 797f6b9664fSHerbert Xu } else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */ 798f6b9664fSHerbert Xu 79979ab0531SDavid S. Miller udp4_hwcsum(skb, fl4->saddr, fl4->daddr); 800f6b9664fSHerbert Xu goto send; 801f6b9664fSHerbert Xu 802f6b9664fSHerbert Xu } else 803f6b9664fSHerbert Xu csum = udp_csum(skb); 804f6b9664fSHerbert Xu 805f6b9664fSHerbert Xu /* add protocol-dependent pseudo-header */ 80679ab0531SDavid S. Miller uh->check = csum_tcpudp_magic(fl4->saddr, fl4->daddr, len, 807f6b9664fSHerbert Xu sk->sk_protocol, csum); 808f6b9664fSHerbert Xu if (uh->check == 0) 809f6b9664fSHerbert Xu uh->check = CSUM_MANGLED_0; 810f6b9664fSHerbert Xu 811f6b9664fSHerbert Xu send: 812b5ec8eeaSEric Dumazet err = ip_send_skb(sock_net(sk), skb); 813f6b9664fSHerbert Xu if (err) { 814f6b9664fSHerbert Xu if (err == -ENOBUFS && !inet->recverr) { 815f6b9664fSHerbert Xu UDP_INC_STATS_USER(sock_net(sk), 816f6b9664fSHerbert Xu UDP_MIB_SNDBUFERRORS, is_udplite); 817f6b9664fSHerbert Xu err = 0; 818f6b9664fSHerbert Xu } 819f6b9664fSHerbert Xu } else 820f6b9664fSHerbert Xu UDP_INC_STATS_USER(sock_net(sk), 821f6b9664fSHerbert Xu UDP_MIB_OUTDATAGRAMS, is_udplite); 822f6b9664fSHerbert Xu return err; 823f6b9664fSHerbert Xu } 824f6b9664fSHerbert Xu 825db8dac20SDavid S. Miller /* 826db8dac20SDavid S. Miller * Push out all pending data as one UDP datagram. Socket is locked. 827db8dac20SDavid S. Miller */ 8288822b64aSHannes Frederic Sowa int udp_push_pending_frames(struct sock *sk) 829db8dac20SDavid S. Miller { 830db8dac20SDavid S. Miller struct udp_sock *up = udp_sk(sk); 831db8dac20SDavid S. Miller struct inet_sock *inet = inet_sk(sk); 832b6f21b26SDavid S. Miller struct flowi4 *fl4 = &inet->cork.fl.u.ip4; 833db8dac20SDavid S. Miller struct sk_buff *skb; 834db8dac20SDavid S. Miller int err = 0; 835db8dac20SDavid S. Miller 83677968b78SDavid S. Miller skb = ip_finish_skb(sk, fl4); 837f6b9664fSHerbert Xu if (!skb) 838db8dac20SDavid S. Miller goto out; 839db8dac20SDavid S. Miller 84079ab0531SDavid S. Miller err = udp_send_skb(skb, fl4); 841db8dac20SDavid S. Miller 842db8dac20SDavid S. Miller out: 843db8dac20SDavid S. Miller up->len = 0; 844db8dac20SDavid S. Miller up->pending = 0; 845db8dac20SDavid S. Miller return err; 846db8dac20SDavid S. Miller } 8478822b64aSHannes Frederic Sowa EXPORT_SYMBOL(udp_push_pending_frames); 848db8dac20SDavid S. Miller 849db8dac20SDavid S. Miller int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, 850db8dac20SDavid S. Miller size_t len) 851db8dac20SDavid S. Miller { 852db8dac20SDavid S. Miller struct inet_sock *inet = inet_sk(sk); 853db8dac20SDavid S. Miller struct udp_sock *up = udp_sk(sk); 854e474995fSDavid S. Miller struct flowi4 fl4_stack; 855b6f21b26SDavid S. Miller struct flowi4 *fl4; 856db8dac20SDavid S. Miller int ulen = len; 857db8dac20SDavid S. Miller struct ipcm_cookie ipc; 858db8dac20SDavid S. Miller struct rtable *rt = NULL; 859db8dac20SDavid S. Miller int free = 0; 860db8dac20SDavid S. Miller int connected = 0; 861db8dac20SDavid S. Miller __be32 daddr, faddr, saddr; 862db8dac20SDavid S. Miller __be16 dport; 863db8dac20SDavid S. Miller u8 tos; 864db8dac20SDavid S. Miller int err, is_udplite = IS_UDPLITE(sk); 865db8dac20SDavid S. Miller int corkreq = up->corkflag || msg->msg_flags&MSG_MORE; 866db8dac20SDavid S. Miller int (*getfrag)(void *, char *, int, int, int, struct sk_buff *); 867903ab86dSHerbert Xu struct sk_buff *skb; 868f6d8bd05SEric Dumazet struct ip_options_data opt_copy; 869db8dac20SDavid S. Miller 870db8dac20SDavid S. Miller if (len > 0xFFFF) 871db8dac20SDavid S. Miller return -EMSGSIZE; 872db8dac20SDavid S. Miller 873db8dac20SDavid S. Miller /* 874db8dac20SDavid S. Miller * Check the flags. 875db8dac20SDavid S. Miller */ 876db8dac20SDavid S. Miller 877db8dac20SDavid S. Miller if (msg->msg_flags & MSG_OOB) /* Mirror BSD error message compatibility */ 878db8dac20SDavid S. Miller return -EOPNOTSUPP; 879db8dac20SDavid S. Miller 880db8dac20SDavid S. Miller ipc.opt = NULL; 8812244d07bSOliver Hartkopp ipc.tx_flags = 0; 882aa661581SFrancesco Fusco ipc.ttl = 0; 883aa661581SFrancesco Fusco ipc.tos = -1; 884db8dac20SDavid S. Miller 885903ab86dSHerbert Xu getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag; 886903ab86dSHerbert Xu 887f5fca608SDavid S. Miller fl4 = &inet->cork.fl.u.ip4; 888db8dac20SDavid S. Miller if (up->pending) { 889db8dac20SDavid S. Miller /* 890db8dac20SDavid S. Miller * There are pending frames. 891db8dac20SDavid S. Miller * The socket lock must be held while it's corked. 892db8dac20SDavid S. Miller */ 893db8dac20SDavid S. Miller lock_sock(sk); 894db8dac20SDavid S. Miller if (likely(up->pending)) { 895db8dac20SDavid S. Miller if (unlikely(up->pending != AF_INET)) { 896db8dac20SDavid S. Miller release_sock(sk); 897db8dac20SDavid S. Miller return -EINVAL; 898db8dac20SDavid S. Miller } 899db8dac20SDavid S. Miller goto do_append_data; 900db8dac20SDavid S. Miller } 901db8dac20SDavid S. Miller release_sock(sk); 902db8dac20SDavid S. Miller } 903db8dac20SDavid S. Miller ulen += sizeof(struct udphdr); 904db8dac20SDavid S. Miller 905db8dac20SDavid S. Miller /* 906db8dac20SDavid S. Miller * Get and verify the address. 907db8dac20SDavid S. Miller */ 908db8dac20SDavid S. Miller if (msg->msg_name) { 909db8dac20SDavid S. Miller struct sockaddr_in *usin = (struct sockaddr_in *)msg->msg_name; 910db8dac20SDavid S. Miller if (msg->msg_namelen < sizeof(*usin)) 911db8dac20SDavid S. Miller return -EINVAL; 912db8dac20SDavid S. Miller if (usin->sin_family != AF_INET) { 913db8dac20SDavid S. Miller if (usin->sin_family != AF_UNSPEC) 914db8dac20SDavid S. Miller return -EAFNOSUPPORT; 915db8dac20SDavid S. Miller } 916db8dac20SDavid S. Miller 917db8dac20SDavid S. Miller daddr = usin->sin_addr.s_addr; 918db8dac20SDavid S. Miller dport = usin->sin_port; 919db8dac20SDavid S. Miller if (dport == 0) 920db8dac20SDavid S. Miller return -EINVAL; 921db8dac20SDavid S. Miller } else { 922db8dac20SDavid S. Miller if (sk->sk_state != TCP_ESTABLISHED) 923db8dac20SDavid S. Miller return -EDESTADDRREQ; 924c720c7e8SEric Dumazet daddr = inet->inet_daddr; 925c720c7e8SEric Dumazet dport = inet->inet_dport; 926db8dac20SDavid S. Miller /* Open fast path for connected socket. 927db8dac20SDavid S. Miller Route will not be used, if at least one option is set. 928db8dac20SDavid S. Miller */ 929db8dac20SDavid S. Miller connected = 1; 930db8dac20SDavid S. Miller } 931c720c7e8SEric Dumazet ipc.addr = inet->inet_saddr; 932db8dac20SDavid S. Miller 933db8dac20SDavid S. Miller ipc.oif = sk->sk_bound_dev_if; 934bf84a010SDaniel Borkmann 935bf84a010SDaniel Borkmann sock_tx_timestamp(sk, &ipc.tx_flags); 936bf84a010SDaniel Borkmann 937db8dac20SDavid S. Miller if (msg->msg_controllen) { 9383b1e0a65SYOSHIFUJI Hideaki err = ip_cmsg_send(sock_net(sk), msg, &ipc); 939db8dac20SDavid S. Miller if (err) 940db8dac20SDavid S. Miller return err; 941db8dac20SDavid S. Miller if (ipc.opt) 942db8dac20SDavid S. Miller free = 1; 943db8dac20SDavid S. Miller connected = 0; 944db8dac20SDavid S. Miller } 945f6d8bd05SEric Dumazet if (!ipc.opt) { 946f6d8bd05SEric Dumazet struct ip_options_rcu *inet_opt; 947f6d8bd05SEric Dumazet 948f6d8bd05SEric Dumazet rcu_read_lock(); 949f6d8bd05SEric Dumazet inet_opt = rcu_dereference(inet->inet_opt); 950f6d8bd05SEric Dumazet if (inet_opt) { 951f6d8bd05SEric Dumazet memcpy(&opt_copy, inet_opt, 952f6d8bd05SEric Dumazet sizeof(*inet_opt) + inet_opt->opt.optlen); 953f6d8bd05SEric Dumazet ipc.opt = &opt_copy.opt; 954f6d8bd05SEric Dumazet } 955f6d8bd05SEric Dumazet rcu_read_unlock(); 956f6d8bd05SEric Dumazet } 957db8dac20SDavid S. Miller 958db8dac20SDavid S. Miller saddr = ipc.addr; 959db8dac20SDavid S. Miller ipc.addr = faddr = daddr; 960db8dac20SDavid S. Miller 961f6d8bd05SEric Dumazet if (ipc.opt && ipc.opt->opt.srr) { 962db8dac20SDavid S. Miller if (!daddr) 963db8dac20SDavid S. Miller return -EINVAL; 964f6d8bd05SEric Dumazet faddr = ipc.opt->opt.faddr; 965db8dac20SDavid S. Miller connected = 0; 966db8dac20SDavid S. Miller } 967aa661581SFrancesco Fusco tos = get_rttos(&ipc, inet); 968db8dac20SDavid S. Miller if (sock_flag(sk, SOCK_LOCALROUTE) || 969db8dac20SDavid S. Miller (msg->msg_flags & MSG_DONTROUTE) || 970f6d8bd05SEric Dumazet (ipc.opt && ipc.opt->opt.is_strictroute)) { 971db8dac20SDavid S. Miller tos |= RTO_ONLINK; 972db8dac20SDavid S. Miller connected = 0; 973db8dac20SDavid S. Miller } 974db8dac20SDavid S. Miller 975db8dac20SDavid S. Miller if (ipv4_is_multicast(daddr)) { 976db8dac20SDavid S. Miller if (!ipc.oif) 977db8dac20SDavid S. Miller ipc.oif = inet->mc_index; 978db8dac20SDavid S. Miller if (!saddr) 979db8dac20SDavid S. Miller saddr = inet->mc_addr; 980db8dac20SDavid S. Miller connected = 0; 98176e21053SErich E. Hoover } else if (!ipc.oif) 98276e21053SErich E. Hoover ipc.oif = inet->uc_index; 983db8dac20SDavid S. Miller 984db8dac20SDavid S. Miller if (connected) 985db8dac20SDavid S. Miller rt = (struct rtable *)sk_dst_check(sk, 0); 986db8dac20SDavid S. Miller 987db8dac20SDavid S. Miller if (rt == NULL) { 98884a3aa00SPavel Emelyanov struct net *net = sock_net(sk); 98984a3aa00SPavel Emelyanov 990e474995fSDavid S. Miller fl4 = &fl4_stack; 991e474995fSDavid S. Miller flowi4_init_output(fl4, ipc.oif, sk->sk_mark, tos, 992c0951cbcSDavid S. Miller RT_SCOPE_UNIVERSE, sk->sk_protocol, 993c0951cbcSDavid S. Miller inet_sk_flowi_flags(sk)|FLOWI_FLAG_CAN_SLEEP, 994c0951cbcSDavid S. Miller faddr, saddr, dport, inet->inet_sport); 995c0951cbcSDavid S. Miller 996e474995fSDavid S. Miller security_sk_classify_flow(sk, flowi4_to_flowi(fl4)); 997e474995fSDavid S. Miller rt = ip_route_output_flow(net, fl4, sk); 998b23dd4feSDavid S. Miller if (IS_ERR(rt)) { 999b23dd4feSDavid S. Miller err = PTR_ERR(rt); 100006dc94b1SDavid S. Miller rt = NULL; 1001db8dac20SDavid S. Miller if (err == -ENETUNREACH) 10027c73a6faSPavel Emelyanov IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES); 1003db8dac20SDavid S. Miller goto out; 1004db8dac20SDavid S. Miller } 1005db8dac20SDavid S. Miller 1006db8dac20SDavid S. Miller err = -EACCES; 1007db8dac20SDavid S. Miller if ((rt->rt_flags & RTCF_BROADCAST) && 1008db8dac20SDavid S. Miller !sock_flag(sk, SOCK_BROADCAST)) 1009db8dac20SDavid S. Miller goto out; 1010db8dac20SDavid S. Miller if (connected) 1011d8d1f30bSChangli Gao sk_dst_set(sk, dst_clone(&rt->dst)); 1012db8dac20SDavid S. Miller } 1013db8dac20SDavid S. Miller 1014db8dac20SDavid S. Miller if (msg->msg_flags&MSG_CONFIRM) 1015db8dac20SDavid S. Miller goto do_confirm; 1016db8dac20SDavid S. Miller back_from_confirm: 1017db8dac20SDavid S. Miller 1018e474995fSDavid S. Miller saddr = fl4->saddr; 1019db8dac20SDavid S. Miller if (!ipc.addr) 1020e474995fSDavid S. Miller daddr = ipc.addr = fl4->daddr; 1021db8dac20SDavid S. Miller 1022903ab86dSHerbert Xu /* Lockless fast path for the non-corking case. */ 1023903ab86dSHerbert Xu if (!corkreq) { 102477968b78SDavid S. Miller skb = ip_make_skb(sk, fl4, getfrag, msg->msg_iov, ulen, 1025903ab86dSHerbert Xu sizeof(struct udphdr), &ipc, &rt, 1026903ab86dSHerbert Xu msg->msg_flags); 1027903ab86dSHerbert Xu err = PTR_ERR(skb); 102850c3a487SYOSHIFUJI Hideaki / 吉藤英明 if (!IS_ERR_OR_NULL(skb)) 102979ab0531SDavid S. Miller err = udp_send_skb(skb, fl4); 1030903ab86dSHerbert Xu goto out; 1031903ab86dSHerbert Xu } 1032903ab86dSHerbert Xu 1033db8dac20SDavid S. Miller lock_sock(sk); 1034db8dac20SDavid S. Miller if (unlikely(up->pending)) { 1035db8dac20SDavid S. Miller /* The socket is already corked while preparing it. */ 1036db8dac20SDavid S. Miller /* ... which is an evident application bug. --ANK */ 1037db8dac20SDavid S. Miller release_sock(sk); 1038db8dac20SDavid S. Miller 1039afd46503SJoe Perches LIMIT_NETDEBUG(KERN_DEBUG pr_fmt("cork app bug 2\n")); 1040db8dac20SDavid S. Miller err = -EINVAL; 1041db8dac20SDavid S. Miller goto out; 1042db8dac20SDavid S. Miller } 1043db8dac20SDavid S. Miller /* 1044db8dac20SDavid S. Miller * Now cork the socket to pend data. 1045db8dac20SDavid S. Miller */ 1046b6f21b26SDavid S. Miller fl4 = &inet->cork.fl.u.ip4; 1047b6f21b26SDavid S. Miller fl4->daddr = daddr; 1048b6f21b26SDavid S. Miller fl4->saddr = saddr; 10499cce96dfSDavid S. Miller fl4->fl4_dport = dport; 10509cce96dfSDavid S. Miller fl4->fl4_sport = inet->inet_sport; 1051db8dac20SDavid S. Miller up->pending = AF_INET; 1052db8dac20SDavid S. Miller 1053db8dac20SDavid S. Miller do_append_data: 1054db8dac20SDavid S. Miller up->len += ulen; 1055f5fca608SDavid S. Miller err = ip_append_data(sk, fl4, getfrag, msg->msg_iov, ulen, 10562e77d89bSEric Dumazet sizeof(struct udphdr), &ipc, &rt, 1057db8dac20SDavid S. Miller corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags); 1058db8dac20SDavid S. Miller if (err) 1059db8dac20SDavid S. Miller udp_flush_pending_frames(sk); 1060db8dac20SDavid S. Miller else if (!corkreq) 1061db8dac20SDavid S. Miller err = udp_push_pending_frames(sk); 1062db8dac20SDavid S. Miller else if (unlikely(skb_queue_empty(&sk->sk_write_queue))) 1063db8dac20SDavid S. Miller up->pending = 0; 1064db8dac20SDavid S. Miller release_sock(sk); 1065db8dac20SDavid S. Miller 1066db8dac20SDavid S. Miller out: 1067db8dac20SDavid S. Miller ip_rt_put(rt); 1068db8dac20SDavid S. Miller if (free) 1069db8dac20SDavid S. Miller kfree(ipc.opt); 1070db8dac20SDavid S. Miller if (!err) 1071db8dac20SDavid S. Miller return len; 1072db8dac20SDavid S. Miller /* 1073db8dac20SDavid S. Miller * ENOBUFS = no kernel mem, SOCK_NOSPACE = no sndbuf space. Reporting 1074db8dac20SDavid S. Miller * ENOBUFS might not be good (it's not tunable per se), but otherwise 1075db8dac20SDavid S. Miller * we don't have a good statistic (IpOutDiscards but it can be too many 1076db8dac20SDavid S. Miller * things). We could add another new stat but at least for now that 1077db8dac20SDavid S. Miller * seems like overkill. 1078db8dac20SDavid S. Miller */ 1079db8dac20SDavid S. Miller if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) { 1080629ca23cSPavel Emelyanov UDP_INC_STATS_USER(sock_net(sk), 1081629ca23cSPavel Emelyanov UDP_MIB_SNDBUFERRORS, is_udplite); 1082db8dac20SDavid S. Miller } 1083db8dac20SDavid S. Miller return err; 1084db8dac20SDavid S. Miller 1085db8dac20SDavid S. Miller do_confirm: 1086d8d1f30bSChangli Gao dst_confirm(&rt->dst); 1087db8dac20SDavid S. Miller if (!(msg->msg_flags&MSG_PROBE) || len) 1088db8dac20SDavid S. Miller goto back_from_confirm; 1089db8dac20SDavid S. Miller err = 0; 1090db8dac20SDavid S. Miller goto out; 1091db8dac20SDavid S. Miller } 1092c482c568SEric Dumazet EXPORT_SYMBOL(udp_sendmsg); 1093db8dac20SDavid S. Miller 1094db8dac20SDavid S. Miller int udp_sendpage(struct sock *sk, struct page *page, int offset, 1095db8dac20SDavid S. Miller size_t size, int flags) 1096db8dac20SDavid S. Miller { 1097f5fca608SDavid S. Miller struct inet_sock *inet = inet_sk(sk); 1098db8dac20SDavid S. Miller struct udp_sock *up = udp_sk(sk); 1099db8dac20SDavid S. Miller int ret; 1100db8dac20SDavid S. Miller 1101db8dac20SDavid S. Miller if (!up->pending) { 1102db8dac20SDavid S. Miller struct msghdr msg = { .msg_flags = flags|MSG_MORE }; 1103db8dac20SDavid S. Miller 1104db8dac20SDavid S. Miller /* Call udp_sendmsg to specify destination address which 1105db8dac20SDavid S. Miller * sendpage interface can't pass. 1106db8dac20SDavid S. Miller * This will succeed only when the socket is connected. 1107db8dac20SDavid S. Miller */ 1108db8dac20SDavid S. Miller ret = udp_sendmsg(NULL, sk, &msg, 0); 1109db8dac20SDavid S. Miller if (ret < 0) 1110db8dac20SDavid S. Miller return ret; 1111db8dac20SDavid S. Miller } 1112db8dac20SDavid S. Miller 1113db8dac20SDavid S. Miller lock_sock(sk); 1114db8dac20SDavid S. Miller 1115db8dac20SDavid S. Miller if (unlikely(!up->pending)) { 1116db8dac20SDavid S. Miller release_sock(sk); 1117db8dac20SDavid S. Miller 1118afd46503SJoe Perches LIMIT_NETDEBUG(KERN_DEBUG pr_fmt("udp cork app bug 3\n")); 1119db8dac20SDavid S. Miller return -EINVAL; 1120db8dac20SDavid S. Miller } 1121db8dac20SDavid S. Miller 1122f5fca608SDavid S. Miller ret = ip_append_page(sk, &inet->cork.fl.u.ip4, 1123f5fca608SDavid S. Miller page, offset, size, flags); 1124db8dac20SDavid S. Miller if (ret == -EOPNOTSUPP) { 1125db8dac20SDavid S. Miller release_sock(sk); 1126db8dac20SDavid S. Miller return sock_no_sendpage(sk->sk_socket, page, offset, 1127db8dac20SDavid S. Miller size, flags); 1128db8dac20SDavid S. Miller } 1129db8dac20SDavid S. Miller if (ret < 0) { 1130db8dac20SDavid S. Miller udp_flush_pending_frames(sk); 1131db8dac20SDavid S. Miller goto out; 1132db8dac20SDavid S. Miller } 1133db8dac20SDavid S. Miller 1134db8dac20SDavid S. Miller up->len += size; 1135db8dac20SDavid S. Miller if (!(up->corkflag || (flags&MSG_MORE))) 1136db8dac20SDavid S. Miller ret = udp_push_pending_frames(sk); 1137db8dac20SDavid S. Miller if (!ret) 1138db8dac20SDavid S. Miller ret = size; 1139db8dac20SDavid S. Miller out: 1140db8dac20SDavid S. Miller release_sock(sk); 1141db8dac20SDavid S. Miller return ret; 1142db8dac20SDavid S. Miller } 1143db8dac20SDavid S. Miller 114485584672SEric Dumazet 114585584672SEric Dumazet /** 114685584672SEric Dumazet * first_packet_length - return length of first packet in receive queue 114785584672SEric Dumazet * @sk: socket 114885584672SEric Dumazet * 114985584672SEric Dumazet * Drops all bad checksum frames, until a valid one is found. 115085584672SEric Dumazet * Returns the length of found skb, or 0 if none is found. 115185584672SEric Dumazet */ 115285584672SEric Dumazet static unsigned int first_packet_length(struct sock *sk) 115385584672SEric Dumazet { 115485584672SEric Dumazet struct sk_buff_head list_kill, *rcvq = &sk->sk_receive_queue; 115585584672SEric Dumazet struct sk_buff *skb; 115685584672SEric Dumazet unsigned int res; 115785584672SEric Dumazet 115885584672SEric Dumazet __skb_queue_head_init(&list_kill); 115985584672SEric Dumazet 116085584672SEric Dumazet spin_lock_bh(&rcvq->lock); 116185584672SEric Dumazet while ((skb = skb_peek(rcvq)) != NULL && 116285584672SEric Dumazet udp_lib_checksum_complete(skb)) { 11636a5dc9e5SEric Dumazet UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_CSUMERRORS, 11646a5dc9e5SEric Dumazet IS_UDPLITE(sk)); 116585584672SEric Dumazet UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, 116685584672SEric Dumazet IS_UDPLITE(sk)); 11678edf19c2SEric Dumazet atomic_inc(&sk->sk_drops); 116885584672SEric Dumazet __skb_unlink(skb, rcvq); 116985584672SEric Dumazet __skb_queue_tail(&list_kill, skb); 117085584672SEric Dumazet } 117185584672SEric Dumazet res = skb ? skb->len : 0; 117285584672SEric Dumazet spin_unlock_bh(&rcvq->lock); 117385584672SEric Dumazet 117485584672SEric Dumazet if (!skb_queue_empty(&list_kill)) { 11758a74ad60SEric Dumazet bool slow = lock_sock_fast(sk); 11768a74ad60SEric Dumazet 117785584672SEric Dumazet __skb_queue_purge(&list_kill); 117885584672SEric Dumazet sk_mem_reclaim_partial(sk); 11798a74ad60SEric Dumazet unlock_sock_fast(sk, slow); 118085584672SEric Dumazet } 118185584672SEric Dumazet return res; 118285584672SEric Dumazet } 118385584672SEric Dumazet 11841da177e4SLinus Torvalds /* 11851da177e4SLinus Torvalds * IOCTL requests applicable to the UDP protocol 11861da177e4SLinus Torvalds */ 11871da177e4SLinus Torvalds 11881da177e4SLinus Torvalds int udp_ioctl(struct sock *sk, int cmd, unsigned long arg) 11891da177e4SLinus Torvalds { 11906516c655SStephen Hemminger switch (cmd) { 11911da177e4SLinus Torvalds case SIOCOUTQ: 11921da177e4SLinus Torvalds { 119331e6d363SEric Dumazet int amount = sk_wmem_alloc_get(sk); 119431e6d363SEric Dumazet 11951da177e4SLinus Torvalds return put_user(amount, (int __user *)arg); 11961da177e4SLinus Torvalds } 11971da177e4SLinus Torvalds 11981da177e4SLinus Torvalds case SIOCINQ: 11991da177e4SLinus Torvalds { 120085584672SEric Dumazet unsigned int amount = first_packet_length(sk); 12011da177e4SLinus Torvalds 120285584672SEric Dumazet if (amount) 12031da177e4SLinus Torvalds /* 12041da177e4SLinus Torvalds * We will only return the amount 12051da177e4SLinus Torvalds * of this packet since that is all 12061da177e4SLinus Torvalds * that will be read. 12071da177e4SLinus Torvalds */ 120885584672SEric Dumazet amount -= sizeof(struct udphdr); 120985584672SEric Dumazet 12101da177e4SLinus Torvalds return put_user(amount, (int __user *)arg); 12111da177e4SLinus Torvalds } 12121da177e4SLinus Torvalds 12131da177e4SLinus Torvalds default: 12141da177e4SLinus Torvalds return -ENOIOCTLCMD; 12151da177e4SLinus Torvalds } 12166516c655SStephen Hemminger 12176516c655SStephen Hemminger return 0; 12181da177e4SLinus Torvalds } 1219c482c568SEric Dumazet EXPORT_SYMBOL(udp_ioctl); 12201da177e4SLinus Torvalds 1221db8dac20SDavid S. Miller /* 1222db8dac20SDavid S. Miller * This should be easy, if there is something there we 1223db8dac20SDavid S. Miller * return it, otherwise we block. 1224db8dac20SDavid S. Miller */ 1225db8dac20SDavid S. Miller 1226db8dac20SDavid S. Miller int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, 1227db8dac20SDavid S. Miller size_t len, int noblock, int flags, int *addr_len) 1228db8dac20SDavid S. Miller { 1229db8dac20SDavid S. Miller struct inet_sock *inet = inet_sk(sk); 1230db8dac20SDavid S. Miller struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name; 1231db8dac20SDavid S. Miller struct sk_buff *skb; 123259c2cdaeSDavid S. Miller unsigned int ulen, copied; 12333f518bf7SPavel Emelyanov int peeked, off = 0; 1234db8dac20SDavid S. Miller int err; 1235db8dac20SDavid S. Miller int is_udplite = IS_UDPLITE(sk); 12368a74ad60SEric Dumazet bool slow; 1237db8dac20SDavid S. Miller 1238db8dac20SDavid S. Miller if (flags & MSG_ERRQUEUE) 1239*85fbaa75SHannes Frederic Sowa return ip_recv_error(sk, msg, len, addr_len); 1240db8dac20SDavid S. Miller 1241db8dac20SDavid S. Miller try_again: 1242db8dac20SDavid S. Miller skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0), 12433f518bf7SPavel Emelyanov &peeked, &off, &err); 1244db8dac20SDavid S. Miller if (!skb) 1245db8dac20SDavid S. Miller goto out; 1246db8dac20SDavid S. Miller 1247db8dac20SDavid S. Miller ulen = skb->len - sizeof(struct udphdr); 124859c2cdaeSDavid S. Miller copied = len; 124959c2cdaeSDavid S. Miller if (copied > ulen) 125059c2cdaeSDavid S. Miller copied = ulen; 125159c2cdaeSDavid S. Miller else if (copied < ulen) 1252db8dac20SDavid S. Miller msg->msg_flags |= MSG_TRUNC; 1253db8dac20SDavid S. Miller 1254db8dac20SDavid S. Miller /* 1255db8dac20SDavid S. Miller * If checksum is needed at all, try to do it while copying the 1256db8dac20SDavid S. Miller * data. If the data is truncated, or if we only want a partial 1257db8dac20SDavid S. Miller * coverage checksum (UDP-Lite), do it before the copy. 1258db8dac20SDavid S. Miller */ 1259db8dac20SDavid S. Miller 126059c2cdaeSDavid S. Miller if (copied < ulen || UDP_SKB_CB(skb)->partial_cov) { 1261db8dac20SDavid S. Miller if (udp_lib_checksum_complete(skb)) 1262db8dac20SDavid S. Miller goto csum_copy_err; 1263db8dac20SDavid S. Miller } 1264db8dac20SDavid S. Miller 1265db8dac20SDavid S. Miller if (skb_csum_unnecessary(skb)) 1266db8dac20SDavid S. Miller err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), 126759c2cdaeSDavid S. Miller msg->msg_iov, copied); 1268db8dac20SDavid S. Miller else { 1269c482c568SEric Dumazet err = skb_copy_and_csum_datagram_iovec(skb, 1270c482c568SEric Dumazet sizeof(struct udphdr), 1271c482c568SEric Dumazet msg->msg_iov); 1272db8dac20SDavid S. Miller 1273db8dac20SDavid S. Miller if (err == -EINVAL) 1274db8dac20SDavid S. Miller goto csum_copy_err; 1275db8dac20SDavid S. Miller } 1276db8dac20SDavid S. Miller 127722911fc5SEric Dumazet if (unlikely(err)) { 127822911fc5SEric Dumazet trace_kfree_skb(skb, udp_recvmsg); 1279979402b1SEric Dumazet if (!peeked) { 1280979402b1SEric Dumazet atomic_inc(&sk->sk_drops); 1281979402b1SEric Dumazet UDP_INC_STATS_USER(sock_net(sk), 1282979402b1SEric Dumazet UDP_MIB_INERRORS, is_udplite); 1283979402b1SEric Dumazet } 1284db8dac20SDavid S. Miller goto out_free; 128522911fc5SEric Dumazet } 1286db8dac20SDavid S. Miller 1287db8dac20SDavid S. Miller if (!peeked) 1288629ca23cSPavel Emelyanov UDP_INC_STATS_USER(sock_net(sk), 1289629ca23cSPavel Emelyanov UDP_MIB_INDATAGRAMS, is_udplite); 1290db8dac20SDavid S. Miller 12913b885787SNeil Horman sock_recv_ts_and_drops(msg, sk, skb); 1292db8dac20SDavid S. Miller 1293db8dac20SDavid S. Miller /* Copy the address. */ 1294c482c568SEric Dumazet if (sin) { 1295db8dac20SDavid S. Miller sin->sin_family = AF_INET; 1296db8dac20SDavid S. Miller sin->sin_port = udp_hdr(skb)->source; 1297db8dac20SDavid S. Miller sin->sin_addr.s_addr = ip_hdr(skb)->saddr; 1298db8dac20SDavid S. Miller memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); 1299bceaa902SHannes Frederic Sowa *addr_len = sizeof(*sin); 1300db8dac20SDavid S. Miller } 1301db8dac20SDavid S. Miller if (inet->cmsg_flags) 1302db8dac20SDavid S. Miller ip_cmsg_recv(msg, skb); 1303db8dac20SDavid S. Miller 130459c2cdaeSDavid S. Miller err = copied; 1305db8dac20SDavid S. Miller if (flags & MSG_TRUNC) 1306db8dac20SDavid S. Miller err = ulen; 1307db8dac20SDavid S. Miller 1308db8dac20SDavid S. Miller out_free: 13099d410c79SEric Dumazet skb_free_datagram_locked(sk, skb); 1310db8dac20SDavid S. Miller out: 1311db8dac20SDavid S. Miller return err; 1312db8dac20SDavid S. Miller 1313db8dac20SDavid S. Miller csum_copy_err: 13148a74ad60SEric Dumazet slow = lock_sock_fast(sk); 13156a5dc9e5SEric Dumazet if (!skb_kill_datagram(sk, skb, flags)) { 13166a5dc9e5SEric Dumazet UDP_INC_STATS_USER(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite); 1317629ca23cSPavel Emelyanov UDP_INC_STATS_USER(sock_net(sk), UDP_MIB_INERRORS, is_udplite); 13186a5dc9e5SEric Dumazet } 13198a74ad60SEric Dumazet unlock_sock_fast(sk, slow); 1320db8dac20SDavid S. Miller 1321db8dac20SDavid S. Miller if (noblock) 1322db8dac20SDavid S. Miller return -EAGAIN; 13239cfaa8deSXufeng Zhang 13249cfaa8deSXufeng Zhang /* starting over for a new packet */ 13259cfaa8deSXufeng Zhang msg->msg_flags &= ~MSG_TRUNC; 1326db8dac20SDavid S. Miller goto try_again; 1327db8dac20SDavid S. Miller } 1328db8dac20SDavid S. Miller 1329db8dac20SDavid S. Miller 13301da177e4SLinus Torvalds int udp_disconnect(struct sock *sk, int flags) 13311da177e4SLinus Torvalds { 13321da177e4SLinus Torvalds struct inet_sock *inet = inet_sk(sk); 13331da177e4SLinus Torvalds /* 13341da177e4SLinus Torvalds * 1003.1g - break association. 13351da177e4SLinus Torvalds */ 13361da177e4SLinus Torvalds 13371da177e4SLinus Torvalds sk->sk_state = TCP_CLOSE; 1338c720c7e8SEric Dumazet inet->inet_daddr = 0; 1339c720c7e8SEric Dumazet inet->inet_dport = 0; 1340bdeab991STom Herbert sock_rps_reset_rxhash(sk); 13411da177e4SLinus Torvalds sk->sk_bound_dev_if = 0; 13421da177e4SLinus Torvalds if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) 13431da177e4SLinus Torvalds inet_reset_saddr(sk); 13441da177e4SLinus Torvalds 13451da177e4SLinus Torvalds if (!(sk->sk_userlocks & SOCK_BINDPORT_LOCK)) { 13461da177e4SLinus Torvalds sk->sk_prot->unhash(sk); 1347c720c7e8SEric Dumazet inet->inet_sport = 0; 13481da177e4SLinus Torvalds } 13491da177e4SLinus Torvalds sk_dst_reset(sk); 13501da177e4SLinus Torvalds return 0; 13511da177e4SLinus Torvalds } 1352c482c568SEric Dumazet EXPORT_SYMBOL(udp_disconnect); 13531da177e4SLinus Torvalds 1354645ca708SEric Dumazet void udp_lib_unhash(struct sock *sk) 1355645ca708SEric Dumazet { 1356723b4610SEric Dumazet if (sk_hashed(sk)) { 1357645ca708SEric Dumazet struct udp_table *udptable = sk->sk_prot->h.udp_table; 1358512615b6SEric Dumazet struct udp_hslot *hslot, *hslot2; 1359512615b6SEric Dumazet 1360512615b6SEric Dumazet hslot = udp_hashslot(udptable, sock_net(sk), 1361d4cada4aSEric Dumazet udp_sk(sk)->udp_port_hash); 1362512615b6SEric Dumazet hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash); 1363645ca708SEric Dumazet 1364c8db3fecSEric Dumazet spin_lock_bh(&hslot->lock); 136588ab1932SEric Dumazet if (sk_nulls_del_node_init_rcu(sk)) { 1366fdcc8aa9SEric Dumazet hslot->count--; 1367c720c7e8SEric Dumazet inet_sk(sk)->inet_num = 0; 1368645ca708SEric Dumazet sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); 1369512615b6SEric Dumazet 1370512615b6SEric Dumazet spin_lock(&hslot2->lock); 1371512615b6SEric Dumazet hlist_nulls_del_init_rcu(&udp_sk(sk)->udp_portaddr_node); 1372512615b6SEric Dumazet hslot2->count--; 1373512615b6SEric Dumazet spin_unlock(&hslot2->lock); 1374645ca708SEric Dumazet } 1375c8db3fecSEric Dumazet spin_unlock_bh(&hslot->lock); 1376645ca708SEric Dumazet } 1377723b4610SEric Dumazet } 1378645ca708SEric Dumazet EXPORT_SYMBOL(udp_lib_unhash); 1379645ca708SEric Dumazet 1380719f8358SEric Dumazet /* 1381719f8358SEric Dumazet * inet_rcv_saddr was changed, we must rehash secondary hash 1382719f8358SEric Dumazet */ 1383719f8358SEric Dumazet void udp_lib_rehash(struct sock *sk, u16 newhash) 1384719f8358SEric Dumazet { 1385719f8358SEric Dumazet if (sk_hashed(sk)) { 1386719f8358SEric Dumazet struct udp_table *udptable = sk->sk_prot->h.udp_table; 1387719f8358SEric Dumazet struct udp_hslot *hslot, *hslot2, *nhslot2; 1388719f8358SEric Dumazet 1389719f8358SEric Dumazet hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash); 1390719f8358SEric Dumazet nhslot2 = udp_hashslot2(udptable, newhash); 1391719f8358SEric Dumazet udp_sk(sk)->udp_portaddr_hash = newhash; 1392719f8358SEric Dumazet if (hslot2 != nhslot2) { 1393719f8358SEric Dumazet hslot = udp_hashslot(udptable, sock_net(sk), 1394719f8358SEric Dumazet udp_sk(sk)->udp_port_hash); 1395719f8358SEric Dumazet /* we must lock primary chain too */ 1396719f8358SEric Dumazet spin_lock_bh(&hslot->lock); 1397719f8358SEric Dumazet 1398719f8358SEric Dumazet spin_lock(&hslot2->lock); 1399719f8358SEric Dumazet hlist_nulls_del_init_rcu(&udp_sk(sk)->udp_portaddr_node); 1400719f8358SEric Dumazet hslot2->count--; 1401719f8358SEric Dumazet spin_unlock(&hslot2->lock); 1402719f8358SEric Dumazet 1403719f8358SEric Dumazet spin_lock(&nhslot2->lock); 1404719f8358SEric Dumazet hlist_nulls_add_head_rcu(&udp_sk(sk)->udp_portaddr_node, 1405719f8358SEric Dumazet &nhslot2->head); 1406719f8358SEric Dumazet nhslot2->count++; 1407719f8358SEric Dumazet spin_unlock(&nhslot2->lock); 1408719f8358SEric Dumazet 1409719f8358SEric Dumazet spin_unlock_bh(&hslot->lock); 1410719f8358SEric Dumazet } 1411719f8358SEric Dumazet } 1412719f8358SEric Dumazet } 1413719f8358SEric Dumazet EXPORT_SYMBOL(udp_lib_rehash); 1414719f8358SEric Dumazet 1415719f8358SEric Dumazet static void udp_v4_rehash(struct sock *sk) 1416719f8358SEric Dumazet { 1417719f8358SEric Dumazet u16 new_hash = udp4_portaddr_hash(sock_net(sk), 1418719f8358SEric Dumazet inet_sk(sk)->inet_rcv_saddr, 1419719f8358SEric Dumazet inet_sk(sk)->inet_num); 1420719f8358SEric Dumazet udp_lib_rehash(sk, new_hash); 1421719f8358SEric Dumazet } 1422719f8358SEric Dumazet 142393821778SHerbert Xu static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) 142493821778SHerbert Xu { 1425fec5e652STom Herbert int rc; 142693821778SHerbert Xu 1427005ec974SShawn Bohrer if (inet_sk(sk)->inet_daddr) { 1428bdeab991STom Herbert sock_rps_save_rxhash(sk, skb); 1429005ec974SShawn Bohrer sk_mark_napi_id(sk, skb); 1430005ec974SShawn Bohrer } 1431fec5e652STom Herbert 1432d826eb14SEric Dumazet rc = sock_queue_rcv_skb(sk, skb); 1433766e9037SEric Dumazet if (rc < 0) { 1434766e9037SEric Dumazet int is_udplite = IS_UDPLITE(sk); 1435766e9037SEric Dumazet 143693821778SHerbert Xu /* Note that an ENOMEM error is charged twice */ 1437766e9037SEric Dumazet if (rc == -ENOMEM) 143893821778SHerbert Xu UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_RCVBUFERRORS, 143993821778SHerbert Xu is_udplite); 1440766e9037SEric Dumazet UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite); 1441766e9037SEric Dumazet kfree_skb(skb); 1442296f7ea7SSatoru Moriya trace_udp_fail_queue_rcv_skb(rc, sk); 1443766e9037SEric Dumazet return -1; 144493821778SHerbert Xu } 144593821778SHerbert Xu 144693821778SHerbert Xu return 0; 144793821778SHerbert Xu 144893821778SHerbert Xu } 144993821778SHerbert Xu 1450447167bfSEric Dumazet static struct static_key udp_encap_needed __read_mostly; 1451447167bfSEric Dumazet void udp_encap_enable(void) 1452447167bfSEric Dumazet { 1453447167bfSEric Dumazet if (!static_key_enabled(&udp_encap_needed)) 1454447167bfSEric Dumazet static_key_slow_inc(&udp_encap_needed); 1455447167bfSEric Dumazet } 1456447167bfSEric Dumazet EXPORT_SYMBOL(udp_encap_enable); 1457447167bfSEric Dumazet 1458db8dac20SDavid S. Miller /* returns: 1459db8dac20SDavid S. Miller * -1: error 1460db8dac20SDavid S. Miller * 0: success 1461db8dac20SDavid S. Miller * >0: "udp encap" protocol resubmission 1462db8dac20SDavid S. Miller * 1463db8dac20SDavid S. Miller * Note that in the success and error cases, the skb is assumed to 1464db8dac20SDavid S. Miller * have either been requeued or freed. 1465db8dac20SDavid S. Miller */ 1466db8dac20SDavid S. Miller int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) 1467db8dac20SDavid S. Miller { 1468db8dac20SDavid S. Miller struct udp_sock *up = udp_sk(sk); 1469db8dac20SDavid S. Miller int rc; 1470db8dac20SDavid S. Miller int is_udplite = IS_UDPLITE(sk); 1471db8dac20SDavid S. Miller 1472db8dac20SDavid S. Miller /* 1473db8dac20SDavid S. Miller * Charge it to the socket, dropping if the queue is full. 1474db8dac20SDavid S. Miller */ 1475db8dac20SDavid S. Miller if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) 1476db8dac20SDavid S. Miller goto drop; 1477db8dac20SDavid S. Miller nf_reset(skb); 1478db8dac20SDavid S. Miller 1479447167bfSEric Dumazet if (static_key_false(&udp_encap_needed) && up->encap_type) { 14800ad92ad0SEric Dumazet int (*encap_rcv)(struct sock *sk, struct sk_buff *skb); 14810ad92ad0SEric Dumazet 1482db8dac20SDavid S. Miller /* 1483db8dac20SDavid S. Miller * This is an encapsulation socket so pass the skb to 1484db8dac20SDavid S. Miller * the socket's udp_encap_rcv() hook. Otherwise, just 1485db8dac20SDavid S. Miller * fall through and pass this up the UDP socket. 1486db8dac20SDavid S. Miller * up->encap_rcv() returns the following value: 1487db8dac20SDavid S. Miller * =0 if skb was successfully passed to the encap 1488db8dac20SDavid S. Miller * handler or was discarded by it. 1489db8dac20SDavid S. Miller * >0 if skb should be passed on to UDP. 1490db8dac20SDavid S. Miller * <0 if skb should be resubmitted as proto -N 1491db8dac20SDavid S. Miller */ 1492db8dac20SDavid S. Miller 1493db8dac20SDavid S. Miller /* if we're overly short, let UDP handle it */ 14940ad92ad0SEric Dumazet encap_rcv = ACCESS_ONCE(up->encap_rcv); 14950ad92ad0SEric Dumazet if (skb->len > sizeof(struct udphdr) && encap_rcv != NULL) { 1496db8dac20SDavid S. Miller int ret; 1497db8dac20SDavid S. Miller 14980ad92ad0SEric Dumazet ret = encap_rcv(sk, skb); 1499db8dac20SDavid S. Miller if (ret <= 0) { 15000283328eSPavel Emelyanov UDP_INC_STATS_BH(sock_net(sk), 15010283328eSPavel Emelyanov UDP_MIB_INDATAGRAMS, 1502db8dac20SDavid S. Miller is_udplite); 1503db8dac20SDavid S. Miller return -ret; 1504db8dac20SDavid S. Miller } 1505db8dac20SDavid S. Miller } 1506db8dac20SDavid S. Miller 1507db8dac20SDavid S. Miller /* FALLTHROUGH -- it's a UDP Packet */ 1508db8dac20SDavid S. Miller } 1509db8dac20SDavid S. Miller 1510db8dac20SDavid S. Miller /* 1511db8dac20SDavid S. Miller * UDP-Lite specific tests, ignored on UDP sockets 1512db8dac20SDavid S. Miller */ 1513db8dac20SDavid S. Miller if ((is_udplite & UDPLITE_RECV_CC) && UDP_SKB_CB(skb)->partial_cov) { 1514db8dac20SDavid S. Miller 1515db8dac20SDavid S. Miller /* 1516db8dac20SDavid S. Miller * MIB statistics other than incrementing the error count are 1517db8dac20SDavid S. Miller * disabled for the following two types of errors: these depend 1518db8dac20SDavid S. Miller * on the application settings, not on the functioning of the 1519db8dac20SDavid S. Miller * protocol stack as such. 1520db8dac20SDavid S. Miller * 1521db8dac20SDavid S. Miller * RFC 3828 here recommends (sec 3.3): "There should also be a 1522db8dac20SDavid S. Miller * way ... to ... at least let the receiving application block 1523db8dac20SDavid S. Miller * delivery of packets with coverage values less than a value 1524db8dac20SDavid S. Miller * provided by the application." 1525db8dac20SDavid S. Miller */ 1526db8dac20SDavid S. Miller if (up->pcrlen == 0) { /* full coverage was set */ 1527afd46503SJoe Perches LIMIT_NETDEBUG(KERN_WARNING "UDPLite: partial coverage %d while full coverage %d requested\n", 1528db8dac20SDavid S. Miller UDP_SKB_CB(skb)->cscov, skb->len); 1529db8dac20SDavid S. Miller goto drop; 1530db8dac20SDavid S. Miller } 1531db8dac20SDavid S. Miller /* The next case involves violating the min. coverage requested 1532db8dac20SDavid S. Miller * by the receiver. This is subtle: if receiver wants x and x is 1533db8dac20SDavid S. Miller * greater than the buffersize/MTU then receiver will complain 1534db8dac20SDavid S. Miller * that it wants x while sender emits packets of smaller size y. 1535db8dac20SDavid S. Miller * Therefore the above ...()->partial_cov statement is essential. 1536db8dac20SDavid S. Miller */ 1537db8dac20SDavid S. Miller if (UDP_SKB_CB(skb)->cscov < up->pcrlen) { 1538afd46503SJoe Perches LIMIT_NETDEBUG(KERN_WARNING "UDPLite: coverage %d too small, need min %d\n", 1539db8dac20SDavid S. Miller UDP_SKB_CB(skb)->cscov, up->pcrlen); 1540db8dac20SDavid S. Miller goto drop; 1541db8dac20SDavid S. Miller } 1542db8dac20SDavid S. Miller } 1543db8dac20SDavid S. Miller 154433d480ceSEric Dumazet if (rcu_access_pointer(sk->sk_filter) && 154533d480ceSEric Dumazet udp_lib_checksum_complete(skb)) 15466a5dc9e5SEric Dumazet goto csum_error; 1547db8dac20SDavid S. Miller 1548c377411fSEric Dumazet 1549f545a38fSEric Dumazet if (sk_rcvqueues_full(sk, skb, sk->sk_rcvbuf)) 1550c377411fSEric Dumazet goto drop; 1551c377411fSEric Dumazet 155293821778SHerbert Xu rc = 0; 1553db8dac20SDavid S. Miller 1554fbf8866dSShawn Bohrer ipv4_pktinfo_prepare(sk, skb); 155593821778SHerbert Xu bh_lock_sock(sk); 155693821778SHerbert Xu if (!sock_owned_by_user(sk)) 155793821778SHerbert Xu rc = __udp_queue_rcv_skb(sk, skb); 1558f545a38fSEric Dumazet else if (sk_add_backlog(sk, skb, sk->sk_rcvbuf)) { 155955349790SZhu Yi bh_unlock_sock(sk); 156055349790SZhu Yi goto drop; 156155349790SZhu Yi } 156293821778SHerbert Xu bh_unlock_sock(sk); 156393821778SHerbert Xu 156493821778SHerbert Xu return rc; 1565db8dac20SDavid S. Miller 15666a5dc9e5SEric Dumazet csum_error: 15676a5dc9e5SEric Dumazet UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite); 1568db8dac20SDavid S. Miller drop: 15690283328eSPavel Emelyanov UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite); 15708edf19c2SEric Dumazet atomic_inc(&sk->sk_drops); 1571db8dac20SDavid S. Miller kfree_skb(skb); 1572db8dac20SDavid S. Miller return -1; 1573db8dac20SDavid S. Miller } 1574db8dac20SDavid S. Miller 15751240d137SEric Dumazet 15761240d137SEric Dumazet static void flush_stack(struct sock **stack, unsigned int count, 15771240d137SEric Dumazet struct sk_buff *skb, unsigned int final) 15781240d137SEric Dumazet { 15791240d137SEric Dumazet unsigned int i; 15801240d137SEric Dumazet struct sk_buff *skb1 = NULL; 1581f6b8f32cSEric Dumazet struct sock *sk; 15821240d137SEric Dumazet 15831240d137SEric Dumazet for (i = 0; i < count; i++) { 1584f6b8f32cSEric Dumazet sk = stack[i]; 15851240d137SEric Dumazet if (likely(skb1 == NULL)) 15861240d137SEric Dumazet skb1 = (i == final) ? skb : skb_clone(skb, GFP_ATOMIC); 15871240d137SEric Dumazet 1588f6b8f32cSEric Dumazet if (!skb1) { 1589f6b8f32cSEric Dumazet atomic_inc(&sk->sk_drops); 1590f6b8f32cSEric Dumazet UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_RCVBUFERRORS, 1591f6b8f32cSEric Dumazet IS_UDPLITE(sk)); 1592f6b8f32cSEric Dumazet UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, 1593f6b8f32cSEric Dumazet IS_UDPLITE(sk)); 1594f6b8f32cSEric Dumazet } 1595f6b8f32cSEric Dumazet 1596f6b8f32cSEric Dumazet if (skb1 && udp_queue_rcv_skb(sk, skb1) <= 0) 15971240d137SEric Dumazet skb1 = NULL; 15981240d137SEric Dumazet } 15991240d137SEric Dumazet if (unlikely(skb1)) 16001240d137SEric Dumazet kfree_skb(skb1); 16011240d137SEric Dumazet } 16021240d137SEric Dumazet 1603421b3885SShawn Bohrer static void udp_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) 1604421b3885SShawn Bohrer { 1605421b3885SShawn Bohrer struct dst_entry *dst = skb_dst(skb); 1606421b3885SShawn Bohrer 1607421b3885SShawn Bohrer dst_hold(dst); 1608421b3885SShawn Bohrer sk->sk_rx_dst = dst; 1609421b3885SShawn Bohrer } 1610421b3885SShawn Bohrer 1611db8dac20SDavid S. Miller /* 1612db8dac20SDavid S. Miller * Multicasts and broadcasts go to each listener. 1613db8dac20SDavid S. Miller * 16141240d137SEric Dumazet * Note: called only from the BH handler context. 1615db8dac20SDavid S. Miller */ 1616e3163493SPavel Emelyanov static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb, 1617db8dac20SDavid S. Miller struct udphdr *uh, 1618db8dac20SDavid S. Miller __be32 saddr, __be32 daddr, 1619645ca708SEric Dumazet struct udp_table *udptable) 1620db8dac20SDavid S. Miller { 16211240d137SEric Dumazet struct sock *sk, *stack[256 / sizeof(struct sock *)]; 1622f86dcc5aSEric Dumazet struct udp_hslot *hslot = udp_hashslot(udptable, net, ntohs(uh->dest)); 1623db8dac20SDavid S. Miller int dif; 16241240d137SEric Dumazet unsigned int i, count = 0; 1625db8dac20SDavid S. Miller 1626645ca708SEric Dumazet spin_lock(&hslot->lock); 162788ab1932SEric Dumazet sk = sk_nulls_head(&hslot->head); 1628db8dac20SDavid S. Miller dif = skb->dev->ifindex; 1629920a4611SEric Dumazet sk = udp_v4_mcast_next(net, sk, uh->dest, daddr, uh->source, saddr, dif); 16301240d137SEric Dumazet while (sk) { 16311240d137SEric Dumazet stack[count++] = sk; 16321240d137SEric Dumazet sk = udp_v4_mcast_next(net, sk_nulls_next(sk), uh->dest, 16331240d137SEric Dumazet daddr, uh->source, saddr, dif); 16341240d137SEric Dumazet if (unlikely(count == ARRAY_SIZE(stack))) { 16351240d137SEric Dumazet if (!sk) 16361240d137SEric Dumazet break; 16371240d137SEric Dumazet flush_stack(stack, count, skb, ~0); 16381240d137SEric Dumazet count = 0; 1639db8dac20SDavid S. Miller } 16401240d137SEric Dumazet } 16411240d137SEric Dumazet /* 16421240d137SEric Dumazet * before releasing chain lock, we must take a reference on sockets 16431240d137SEric Dumazet */ 16441240d137SEric Dumazet for (i = 0; i < count; i++) 16451240d137SEric Dumazet sock_hold(stack[i]); 16461240d137SEric Dumazet 1647645ca708SEric Dumazet spin_unlock(&hslot->lock); 16481240d137SEric Dumazet 16491240d137SEric Dumazet /* 16501240d137SEric Dumazet * do the slow work with no lock held 16511240d137SEric Dumazet */ 16521240d137SEric Dumazet if (count) { 16531240d137SEric Dumazet flush_stack(stack, count, skb, count - 1); 16541240d137SEric Dumazet 16551240d137SEric Dumazet for (i = 0; i < count; i++) 16561240d137SEric Dumazet sock_put(stack[i]); 16571240d137SEric Dumazet } else { 16581240d137SEric Dumazet kfree_skb(skb); 16591240d137SEric Dumazet } 1660db8dac20SDavid S. Miller return 0; 1661db8dac20SDavid S. Miller } 1662db8dac20SDavid S. Miller 1663db8dac20SDavid S. Miller /* Initialize UDP checksum. If exited with zero value (success), 1664db8dac20SDavid S. Miller * CHECKSUM_UNNECESSARY means, that no more checks are required. 1665db8dac20SDavid S. Miller * Otherwise, csum completion requires chacksumming packet body, 1666db8dac20SDavid S. Miller * including udp header and folding it to skb->csum. 1667db8dac20SDavid S. Miller */ 1668db8dac20SDavid S. Miller static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh, 1669db8dac20SDavid S. Miller int proto) 1670db8dac20SDavid S. Miller { 1671db8dac20SDavid S. Miller const struct iphdr *iph; 1672db8dac20SDavid S. Miller int err; 1673db8dac20SDavid S. Miller 1674db8dac20SDavid S. Miller UDP_SKB_CB(skb)->partial_cov = 0; 1675db8dac20SDavid S. Miller UDP_SKB_CB(skb)->cscov = skb->len; 1676db8dac20SDavid S. Miller 1677db8dac20SDavid S. Miller if (proto == IPPROTO_UDPLITE) { 1678db8dac20SDavid S. Miller err = udplite_checksum_init(skb, uh); 1679db8dac20SDavid S. Miller if (err) 1680db8dac20SDavid S. Miller return err; 1681db8dac20SDavid S. Miller } 1682db8dac20SDavid S. Miller 1683db8dac20SDavid S. Miller iph = ip_hdr(skb); 1684db8dac20SDavid S. Miller if (uh->check == 0) { 1685db8dac20SDavid S. Miller skb->ip_summed = CHECKSUM_UNNECESSARY; 1686db8dac20SDavid S. Miller } else if (skb->ip_summed == CHECKSUM_COMPLETE) { 1687db8dac20SDavid S. Miller if (!csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len, 1688db8dac20SDavid S. Miller proto, skb->csum)) 1689db8dac20SDavid S. Miller skb->ip_summed = CHECKSUM_UNNECESSARY; 1690db8dac20SDavid S. Miller } 1691db8dac20SDavid S. Miller if (!skb_csum_unnecessary(skb)) 1692db8dac20SDavid S. Miller skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr, 1693db8dac20SDavid S. Miller skb->len, proto, 0); 1694db8dac20SDavid S. Miller /* Probably, we should checksum udp header (it should be in cache 1695db8dac20SDavid S. Miller * in any case) and data in tiny packets (< rx copybreak). 1696db8dac20SDavid S. Miller */ 1697db8dac20SDavid S. Miller 1698db8dac20SDavid S. Miller return 0; 1699db8dac20SDavid S. Miller } 1700db8dac20SDavid S. Miller 1701db8dac20SDavid S. Miller /* 1702db8dac20SDavid S. Miller * All we need to do is get the socket, and then do a checksum. 1703db8dac20SDavid S. Miller */ 1704db8dac20SDavid S. Miller 1705645ca708SEric Dumazet int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, 1706db8dac20SDavid S. Miller int proto) 1707db8dac20SDavid S. Miller { 1708db8dac20SDavid S. Miller struct sock *sk; 17097b5e56f9SJesper Dangaard Brouer struct udphdr *uh; 1710db8dac20SDavid S. Miller unsigned short ulen; 1711adf30907SEric Dumazet struct rtable *rt = skb_rtable(skb); 17122783ef23SJesper Dangaard Brouer __be32 saddr, daddr; 17130283328eSPavel Emelyanov struct net *net = dev_net(skb->dev); 1714db8dac20SDavid S. Miller 1715db8dac20SDavid S. Miller /* 1716db8dac20SDavid S. Miller * Validate the packet. 1717db8dac20SDavid S. Miller */ 1718db8dac20SDavid S. Miller if (!pskb_may_pull(skb, sizeof(struct udphdr))) 1719db8dac20SDavid S. Miller goto drop; /* No space for header. */ 1720db8dac20SDavid S. Miller 17217b5e56f9SJesper Dangaard Brouer uh = udp_hdr(skb); 1722db8dac20SDavid S. Miller ulen = ntohs(uh->len); 1723ccc2d97cSBjørn Mork saddr = ip_hdr(skb)->saddr; 1724ccc2d97cSBjørn Mork daddr = ip_hdr(skb)->daddr; 1725ccc2d97cSBjørn Mork 1726db8dac20SDavid S. Miller if (ulen > skb->len) 1727db8dac20SDavid S. Miller goto short_packet; 1728db8dac20SDavid S. Miller 1729db8dac20SDavid S. Miller if (proto == IPPROTO_UDP) { 1730db8dac20SDavid S. Miller /* UDP validates ulen. */ 1731db8dac20SDavid S. Miller if (ulen < sizeof(*uh) || pskb_trim_rcsum(skb, ulen)) 1732db8dac20SDavid S. Miller goto short_packet; 1733db8dac20SDavid S. Miller uh = udp_hdr(skb); 1734db8dac20SDavid S. Miller } 1735db8dac20SDavid S. Miller 1736db8dac20SDavid S. Miller if (udp4_csum_init(skb, uh, proto)) 1737db8dac20SDavid S. Miller goto csum_error; 1738db8dac20SDavid S. Miller 1739421b3885SShawn Bohrer if (skb->sk) { 1740421b3885SShawn Bohrer int ret; 1741421b3885SShawn Bohrer sk = skb->sk; 1742421b3885SShawn Bohrer 1743421b3885SShawn Bohrer if (unlikely(sk->sk_rx_dst == NULL)) 1744421b3885SShawn Bohrer udp_sk_rx_dst_set(sk, skb); 1745421b3885SShawn Bohrer 1746421b3885SShawn Bohrer ret = udp_queue_rcv_skb(sk, skb); 1747421b3885SShawn Bohrer 1748421b3885SShawn Bohrer /* a return value > 0 means to resubmit the input, but 1749421b3885SShawn Bohrer * it wants the return to be -protocol, or 0 1750421b3885SShawn Bohrer */ 1751421b3885SShawn Bohrer if (ret > 0) 1752421b3885SShawn Bohrer return -ret; 1753421b3885SShawn Bohrer return 0; 1754421b3885SShawn Bohrer } else { 1755db8dac20SDavid S. Miller if (rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST)) 1756e3163493SPavel Emelyanov return __udp4_lib_mcast_deliver(net, skb, uh, 1757e3163493SPavel Emelyanov saddr, daddr, udptable); 1758db8dac20SDavid S. Miller 1759607c4aafSKOVACS Krisztian sk = __udp4_lib_lookup_skb(skb, uh->source, uh->dest, udptable); 1760421b3885SShawn Bohrer } 1761db8dac20SDavid S. Miller 1762db8dac20SDavid S. Miller if (sk != NULL) { 1763a5b50476SEliezer Tamir int ret; 1764a5b50476SEliezer Tamir 1765a5b50476SEliezer Tamir ret = udp_queue_rcv_skb(sk, skb); 1766db8dac20SDavid S. Miller sock_put(sk); 1767db8dac20SDavid S. Miller 1768db8dac20SDavid S. Miller /* a return value > 0 means to resubmit the input, but 1769db8dac20SDavid S. Miller * it wants the return to be -protocol, or 0 1770db8dac20SDavid S. Miller */ 1771db8dac20SDavid S. Miller if (ret > 0) 1772db8dac20SDavid S. Miller return -ret; 1773db8dac20SDavid S. Miller return 0; 1774db8dac20SDavid S. Miller } 1775db8dac20SDavid S. Miller 1776db8dac20SDavid S. Miller if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) 1777db8dac20SDavid S. Miller goto drop; 1778db8dac20SDavid S. Miller nf_reset(skb); 1779db8dac20SDavid S. Miller 1780db8dac20SDavid S. Miller /* No socket. Drop packet silently, if checksum is wrong */ 1781db8dac20SDavid S. Miller if (udp_lib_checksum_complete(skb)) 1782db8dac20SDavid S. Miller goto csum_error; 1783db8dac20SDavid S. Miller 17840283328eSPavel Emelyanov UDP_INC_STATS_BH(net, UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE); 1785db8dac20SDavid S. Miller icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); 1786db8dac20SDavid S. Miller 1787db8dac20SDavid S. Miller /* 1788db8dac20SDavid S. Miller * Hmm. We got an UDP packet to a port to which we 1789db8dac20SDavid S. Miller * don't wanna listen. Ignore it. 1790db8dac20SDavid S. Miller */ 1791db8dac20SDavid S. Miller kfree_skb(skb); 1792db8dac20SDavid S. Miller return 0; 1793db8dac20SDavid S. Miller 1794db8dac20SDavid S. Miller short_packet: 1795673d57e7SHarvey Harrison LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: short packet: From %pI4:%u %d/%d to %pI4:%u\n", 1796afd46503SJoe Perches proto == IPPROTO_UDPLITE ? "Lite" : "", 1797afd46503SJoe Perches &saddr, ntohs(uh->source), 1798afd46503SJoe Perches ulen, skb->len, 1799afd46503SJoe Perches &daddr, ntohs(uh->dest)); 1800db8dac20SDavid S. Miller goto drop; 1801db8dac20SDavid S. Miller 1802db8dac20SDavid S. Miller csum_error: 1803db8dac20SDavid S. Miller /* 1804db8dac20SDavid S. Miller * RFC1122: OK. Discards the bad packet silently (as far as 1805db8dac20SDavid S. Miller * the network is concerned, anyway) as per 4.1.3.4 (MUST). 1806db8dac20SDavid S. Miller */ 1807673d57e7SHarvey Harrison LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: bad checksum. From %pI4:%u to %pI4:%u ulen %d\n", 1808afd46503SJoe Perches proto == IPPROTO_UDPLITE ? "Lite" : "", 1809afd46503SJoe Perches &saddr, ntohs(uh->source), &daddr, ntohs(uh->dest), 1810db8dac20SDavid S. Miller ulen); 18116a5dc9e5SEric Dumazet UDP_INC_STATS_BH(net, UDP_MIB_CSUMERRORS, proto == IPPROTO_UDPLITE); 1812db8dac20SDavid S. Miller drop: 18130283328eSPavel Emelyanov UDP_INC_STATS_BH(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE); 1814db8dac20SDavid S. Miller kfree_skb(skb); 1815db8dac20SDavid S. Miller return 0; 1816db8dac20SDavid S. Miller } 1817db8dac20SDavid S. Miller 1818421b3885SShawn Bohrer /* We can only early demux multicast if there is a single matching socket. 1819421b3885SShawn Bohrer * If more than one socket found returns NULL 1820421b3885SShawn Bohrer */ 1821421b3885SShawn Bohrer static struct sock *__udp4_lib_mcast_demux_lookup(struct net *net, 1822421b3885SShawn Bohrer __be16 loc_port, __be32 loc_addr, 1823421b3885SShawn Bohrer __be16 rmt_port, __be32 rmt_addr, 1824421b3885SShawn Bohrer int dif) 1825421b3885SShawn Bohrer { 1826421b3885SShawn Bohrer struct sock *sk, *result; 1827421b3885SShawn Bohrer struct hlist_nulls_node *node; 1828421b3885SShawn Bohrer unsigned short hnum = ntohs(loc_port); 1829421b3885SShawn Bohrer unsigned int count, slot = udp_hashfn(net, hnum, udp_table.mask); 1830421b3885SShawn Bohrer struct udp_hslot *hslot = &udp_table.hash[slot]; 1831421b3885SShawn Bohrer 1832421b3885SShawn Bohrer rcu_read_lock(); 1833421b3885SShawn Bohrer begin: 1834421b3885SShawn Bohrer count = 0; 1835421b3885SShawn Bohrer result = NULL; 1836421b3885SShawn Bohrer sk_nulls_for_each_rcu(sk, node, &hslot->head) { 1837421b3885SShawn Bohrer if (__udp_is_mcast_sock(net, sk, 1838421b3885SShawn Bohrer loc_port, loc_addr, 1839421b3885SShawn Bohrer rmt_port, rmt_addr, 1840421b3885SShawn Bohrer dif, hnum)) { 1841421b3885SShawn Bohrer result = sk; 1842421b3885SShawn Bohrer ++count; 1843421b3885SShawn Bohrer } 1844421b3885SShawn Bohrer } 1845421b3885SShawn Bohrer /* 1846421b3885SShawn Bohrer * if the nulls value we got at the end of this lookup is 1847421b3885SShawn Bohrer * not the expected one, we must restart lookup. 1848421b3885SShawn Bohrer * We probably met an item that was moved to another chain. 1849421b3885SShawn Bohrer */ 1850421b3885SShawn Bohrer if (get_nulls_value(node) != slot) 1851421b3885SShawn Bohrer goto begin; 1852421b3885SShawn Bohrer 1853421b3885SShawn Bohrer if (result) { 1854421b3885SShawn Bohrer if (count != 1 || 1855421b3885SShawn Bohrer unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2))) 1856421b3885SShawn Bohrer result = NULL; 1857f69b923aSEric Dumazet else if (unlikely(!__udp_is_mcast_sock(net, result, 1858421b3885SShawn Bohrer loc_port, loc_addr, 1859421b3885SShawn Bohrer rmt_port, rmt_addr, 1860421b3885SShawn Bohrer dif, hnum))) { 1861421b3885SShawn Bohrer sock_put(result); 1862421b3885SShawn Bohrer result = NULL; 1863421b3885SShawn Bohrer } 1864421b3885SShawn Bohrer } 1865421b3885SShawn Bohrer rcu_read_unlock(); 1866421b3885SShawn Bohrer return result; 1867421b3885SShawn Bohrer } 1868421b3885SShawn Bohrer 1869421b3885SShawn Bohrer /* For unicast we should only early demux connected sockets or we can 1870421b3885SShawn Bohrer * break forwarding setups. The chains here can be long so only check 1871421b3885SShawn Bohrer * if the first socket is an exact match and if not move on. 1872421b3885SShawn Bohrer */ 1873421b3885SShawn Bohrer static struct sock *__udp4_lib_demux_lookup(struct net *net, 1874421b3885SShawn Bohrer __be16 loc_port, __be32 loc_addr, 1875421b3885SShawn Bohrer __be16 rmt_port, __be32 rmt_addr, 1876421b3885SShawn Bohrer int dif) 1877421b3885SShawn Bohrer { 1878421b3885SShawn Bohrer struct sock *sk, *result; 1879421b3885SShawn Bohrer struct hlist_nulls_node *node; 1880421b3885SShawn Bohrer unsigned short hnum = ntohs(loc_port); 1881421b3885SShawn Bohrer unsigned int hash2 = udp4_portaddr_hash(net, loc_addr, hnum); 1882421b3885SShawn Bohrer unsigned int slot2 = hash2 & udp_table.mask; 1883421b3885SShawn Bohrer struct udp_hslot *hslot2 = &udp_table.hash2[slot2]; 1884421b3885SShawn Bohrer INET_ADDR_COOKIE(acookie, rmt_addr, loc_addr) 1885421b3885SShawn Bohrer const __portpair ports = INET_COMBINED_PORTS(rmt_port, hnum); 1886421b3885SShawn Bohrer 1887421b3885SShawn Bohrer rcu_read_lock(); 1888421b3885SShawn Bohrer result = NULL; 1889421b3885SShawn Bohrer udp_portaddr_for_each_entry_rcu(sk, node, &hslot2->head) { 1890421b3885SShawn Bohrer if (INET_MATCH(sk, net, acookie, 1891421b3885SShawn Bohrer rmt_addr, loc_addr, ports, dif)) 1892421b3885SShawn Bohrer result = sk; 1893421b3885SShawn Bohrer /* Only check first socket in chain */ 1894421b3885SShawn Bohrer break; 1895421b3885SShawn Bohrer } 1896421b3885SShawn Bohrer 1897421b3885SShawn Bohrer if (result) { 1898421b3885SShawn Bohrer if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2))) 1899421b3885SShawn Bohrer result = NULL; 1900421b3885SShawn Bohrer else if (unlikely(!INET_MATCH(sk, net, acookie, 1901421b3885SShawn Bohrer rmt_addr, loc_addr, 1902421b3885SShawn Bohrer ports, dif))) { 1903421b3885SShawn Bohrer sock_put(result); 1904421b3885SShawn Bohrer result = NULL; 1905421b3885SShawn Bohrer } 1906421b3885SShawn Bohrer } 1907421b3885SShawn Bohrer rcu_read_unlock(); 1908421b3885SShawn Bohrer return result; 1909421b3885SShawn Bohrer } 1910421b3885SShawn Bohrer 1911421b3885SShawn Bohrer void udp_v4_early_demux(struct sk_buff *skb) 1912421b3885SShawn Bohrer { 1913421b3885SShawn Bohrer const struct iphdr *iph = ip_hdr(skb); 1914421b3885SShawn Bohrer const struct udphdr *uh = udp_hdr(skb); 1915421b3885SShawn Bohrer struct sock *sk; 1916421b3885SShawn Bohrer struct dst_entry *dst; 1917421b3885SShawn Bohrer struct net *net = dev_net(skb->dev); 1918421b3885SShawn Bohrer int dif = skb->dev->ifindex; 1919421b3885SShawn Bohrer 1920421b3885SShawn Bohrer /* validate the packet */ 1921421b3885SShawn Bohrer if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct udphdr))) 1922421b3885SShawn Bohrer return; 1923421b3885SShawn Bohrer 1924421b3885SShawn Bohrer if (skb->pkt_type == PACKET_BROADCAST || 1925421b3885SShawn Bohrer skb->pkt_type == PACKET_MULTICAST) 1926421b3885SShawn Bohrer sk = __udp4_lib_mcast_demux_lookup(net, uh->dest, iph->daddr, 1927421b3885SShawn Bohrer uh->source, iph->saddr, dif); 1928421b3885SShawn Bohrer else if (skb->pkt_type == PACKET_HOST) 1929421b3885SShawn Bohrer sk = __udp4_lib_demux_lookup(net, uh->dest, iph->daddr, 1930421b3885SShawn Bohrer uh->source, iph->saddr, dif); 1931421b3885SShawn Bohrer else 1932421b3885SShawn Bohrer return; 1933421b3885SShawn Bohrer 1934421b3885SShawn Bohrer if (!sk) 1935421b3885SShawn Bohrer return; 1936421b3885SShawn Bohrer 1937421b3885SShawn Bohrer skb->sk = sk; 1938421b3885SShawn Bohrer skb->destructor = sock_edemux; 1939421b3885SShawn Bohrer dst = sk->sk_rx_dst; 1940421b3885SShawn Bohrer 1941421b3885SShawn Bohrer if (dst) 1942421b3885SShawn Bohrer dst = dst_check(dst, 0); 1943421b3885SShawn Bohrer if (dst) 1944421b3885SShawn Bohrer skb_dst_set_noref(skb, dst); 1945421b3885SShawn Bohrer } 1946421b3885SShawn Bohrer 1947db8dac20SDavid S. Miller int udp_rcv(struct sk_buff *skb) 1948db8dac20SDavid S. Miller { 1949645ca708SEric Dumazet return __udp4_lib_rcv(skb, &udp_table, IPPROTO_UDP); 1950db8dac20SDavid S. Miller } 1951db8dac20SDavid S. Miller 19527d06b2e0SBrian Haley void udp_destroy_sock(struct sock *sk) 1953db8dac20SDavid S. Miller { 195444046a59STom Parkin struct udp_sock *up = udp_sk(sk); 19558a74ad60SEric Dumazet bool slow = lock_sock_fast(sk); 1956db8dac20SDavid S. Miller udp_flush_pending_frames(sk); 19578a74ad60SEric Dumazet unlock_sock_fast(sk, slow); 195844046a59STom Parkin if (static_key_false(&udp_encap_needed) && up->encap_type) { 195944046a59STom Parkin void (*encap_destroy)(struct sock *sk); 196044046a59STom Parkin encap_destroy = ACCESS_ONCE(up->encap_destroy); 196144046a59STom Parkin if (encap_destroy) 196244046a59STom Parkin encap_destroy(sk); 196344046a59STom Parkin } 1964db8dac20SDavid S. Miller } 1965db8dac20SDavid S. Miller 19661da177e4SLinus Torvalds /* 19671da177e4SLinus Torvalds * Socket option code for UDP 19681da177e4SLinus Torvalds */ 19694c0a6cb0SGerrit Renker int udp_lib_setsockopt(struct sock *sk, int level, int optname, 1970b7058842SDavid S. Miller char __user *optval, unsigned int optlen, 19714c0a6cb0SGerrit Renker int (*push_pending_frames)(struct sock *)) 19721da177e4SLinus Torvalds { 19731da177e4SLinus Torvalds struct udp_sock *up = udp_sk(sk); 19741da177e4SLinus Torvalds int val; 19751da177e4SLinus Torvalds int err = 0; 1976b2bf1e26SWang Chen int is_udplite = IS_UDPLITE(sk); 19771da177e4SLinus Torvalds 19781da177e4SLinus Torvalds if (optlen < sizeof(int)) 19791da177e4SLinus Torvalds return -EINVAL; 19801da177e4SLinus Torvalds 19811da177e4SLinus Torvalds if (get_user(val, (int __user *)optval)) 19821da177e4SLinus Torvalds return -EFAULT; 19831da177e4SLinus Torvalds 19841da177e4SLinus Torvalds switch (optname) { 19851da177e4SLinus Torvalds case UDP_CORK: 19861da177e4SLinus Torvalds if (val != 0) { 19871da177e4SLinus Torvalds up->corkflag = 1; 19881da177e4SLinus Torvalds } else { 19891da177e4SLinus Torvalds up->corkflag = 0; 19901da177e4SLinus Torvalds lock_sock(sk); 19914c0a6cb0SGerrit Renker (*push_pending_frames)(sk); 19921da177e4SLinus Torvalds release_sock(sk); 19931da177e4SLinus Torvalds } 19941da177e4SLinus Torvalds break; 19951da177e4SLinus Torvalds 19961da177e4SLinus Torvalds case UDP_ENCAP: 19971da177e4SLinus Torvalds switch (val) { 19981da177e4SLinus Torvalds case 0: 19991da177e4SLinus Torvalds case UDP_ENCAP_ESPINUDP: 20001da177e4SLinus Torvalds case UDP_ENCAP_ESPINUDP_NON_IKE: 2001067b207bSJames Chapman up->encap_rcv = xfrm4_udp_encap_rcv; 2002067b207bSJames Chapman /* FALLTHROUGH */ 2003342f0234SJames Chapman case UDP_ENCAP_L2TPINUDP: 20041da177e4SLinus Torvalds up->encap_type = val; 2005447167bfSEric Dumazet udp_encap_enable(); 20061da177e4SLinus Torvalds break; 20071da177e4SLinus Torvalds default: 20081da177e4SLinus Torvalds err = -ENOPROTOOPT; 20091da177e4SLinus Torvalds break; 20101da177e4SLinus Torvalds } 20111da177e4SLinus Torvalds break; 20121da177e4SLinus Torvalds 2013ba4e58ecSGerrit Renker /* 2014ba4e58ecSGerrit Renker * UDP-Lite's partial checksum coverage (RFC 3828). 2015ba4e58ecSGerrit Renker */ 2016ba4e58ecSGerrit Renker /* The sender sets actual checksum coverage length via this option. 2017ba4e58ecSGerrit Renker * The case coverage > packet length is handled by send module. */ 2018ba4e58ecSGerrit Renker case UDPLITE_SEND_CSCOV: 2019b2bf1e26SWang Chen if (!is_udplite) /* Disable the option on UDP sockets */ 2020ba4e58ecSGerrit Renker return -ENOPROTOOPT; 2021ba4e58ecSGerrit Renker if (val != 0 && val < 8) /* Illegal coverage: use default (8) */ 2022ba4e58ecSGerrit Renker val = 8; 20234be929beSAlexey Dobriyan else if (val > USHRT_MAX) 20244be929beSAlexey Dobriyan val = USHRT_MAX; 2025ba4e58ecSGerrit Renker up->pcslen = val; 2026ba4e58ecSGerrit Renker up->pcflag |= UDPLITE_SEND_CC; 2027ba4e58ecSGerrit Renker break; 2028ba4e58ecSGerrit Renker 2029ba4e58ecSGerrit Renker /* The receiver specifies a minimum checksum coverage value. To make 2030ba4e58ecSGerrit Renker * sense, this should be set to at least 8 (as done below). If zero is 2031ba4e58ecSGerrit Renker * used, this again means full checksum coverage. */ 2032ba4e58ecSGerrit Renker case UDPLITE_RECV_CSCOV: 2033b2bf1e26SWang Chen if (!is_udplite) /* Disable the option on UDP sockets */ 2034ba4e58ecSGerrit Renker return -ENOPROTOOPT; 2035ba4e58ecSGerrit Renker if (val != 0 && val < 8) /* Avoid silly minimal values. */ 2036ba4e58ecSGerrit Renker val = 8; 20374be929beSAlexey Dobriyan else if (val > USHRT_MAX) 20384be929beSAlexey Dobriyan val = USHRT_MAX; 2039ba4e58ecSGerrit Renker up->pcrlen = val; 2040ba4e58ecSGerrit Renker up->pcflag |= UDPLITE_RECV_CC; 2041ba4e58ecSGerrit Renker break; 2042ba4e58ecSGerrit Renker 20431da177e4SLinus Torvalds default: 20441da177e4SLinus Torvalds err = -ENOPROTOOPT; 20451da177e4SLinus Torvalds break; 20466516c655SStephen Hemminger } 20471da177e4SLinus Torvalds 20481da177e4SLinus Torvalds return err; 20491da177e4SLinus Torvalds } 2050c482c568SEric Dumazet EXPORT_SYMBOL(udp_lib_setsockopt); 20511da177e4SLinus Torvalds 2052db8dac20SDavid S. Miller int udp_setsockopt(struct sock *sk, int level, int optname, 2053b7058842SDavid S. Miller char __user *optval, unsigned int optlen) 2054db8dac20SDavid S. Miller { 2055db8dac20SDavid S. Miller if (level == SOL_UDP || level == SOL_UDPLITE) 2056db8dac20SDavid S. Miller return udp_lib_setsockopt(sk, level, optname, optval, optlen, 2057db8dac20SDavid S. Miller udp_push_pending_frames); 2058db8dac20SDavid S. Miller return ip_setsockopt(sk, level, optname, optval, optlen); 2059db8dac20SDavid S. Miller } 2060db8dac20SDavid S. Miller 2061db8dac20SDavid S. Miller #ifdef CONFIG_COMPAT 2062db8dac20SDavid S. Miller int compat_udp_setsockopt(struct sock *sk, int level, int optname, 2063b7058842SDavid S. Miller char __user *optval, unsigned int optlen) 2064db8dac20SDavid S. Miller { 2065db8dac20SDavid S. Miller if (level == SOL_UDP || level == SOL_UDPLITE) 2066db8dac20SDavid S. Miller return udp_lib_setsockopt(sk, level, optname, optval, optlen, 2067db8dac20SDavid S. Miller udp_push_pending_frames); 2068db8dac20SDavid S. Miller return compat_ip_setsockopt(sk, level, optname, optval, optlen); 2069db8dac20SDavid S. Miller } 2070db8dac20SDavid S. Miller #endif 2071db8dac20SDavid S. Miller 20724c0a6cb0SGerrit Renker int udp_lib_getsockopt(struct sock *sk, int level, int optname, 20731da177e4SLinus Torvalds char __user *optval, int __user *optlen) 20741da177e4SLinus Torvalds { 20751da177e4SLinus Torvalds struct udp_sock *up = udp_sk(sk); 20761da177e4SLinus Torvalds int val, len; 20771da177e4SLinus Torvalds 20781da177e4SLinus Torvalds if (get_user(len, optlen)) 20791da177e4SLinus Torvalds return -EFAULT; 20801da177e4SLinus Torvalds 20811da177e4SLinus Torvalds len = min_t(unsigned int, len, sizeof(int)); 20821da177e4SLinus Torvalds 20831da177e4SLinus Torvalds if (len < 0) 20841da177e4SLinus Torvalds return -EINVAL; 20851da177e4SLinus Torvalds 20861da177e4SLinus Torvalds switch (optname) { 20871da177e4SLinus Torvalds case UDP_CORK: 20881da177e4SLinus Torvalds val = up->corkflag; 20891da177e4SLinus Torvalds break; 20901da177e4SLinus Torvalds 20911da177e4SLinus Torvalds case UDP_ENCAP: 20921da177e4SLinus Torvalds val = up->encap_type; 20931da177e4SLinus Torvalds break; 20941da177e4SLinus Torvalds 2095ba4e58ecSGerrit Renker /* The following two cannot be changed on UDP sockets, the return is 2096ba4e58ecSGerrit Renker * always 0 (which corresponds to the full checksum coverage of UDP). */ 2097ba4e58ecSGerrit Renker case UDPLITE_SEND_CSCOV: 2098ba4e58ecSGerrit Renker val = up->pcslen; 2099ba4e58ecSGerrit Renker break; 2100ba4e58ecSGerrit Renker 2101ba4e58ecSGerrit Renker case UDPLITE_RECV_CSCOV: 2102ba4e58ecSGerrit Renker val = up->pcrlen; 2103ba4e58ecSGerrit Renker break; 2104ba4e58ecSGerrit Renker 21051da177e4SLinus Torvalds default: 21061da177e4SLinus Torvalds return -ENOPROTOOPT; 21076516c655SStephen Hemminger } 21081da177e4SLinus Torvalds 21091da177e4SLinus Torvalds if (put_user(len, optlen)) 21101da177e4SLinus Torvalds return -EFAULT; 21111da177e4SLinus Torvalds if (copy_to_user(optval, &val, len)) 21121da177e4SLinus Torvalds return -EFAULT; 21131da177e4SLinus Torvalds return 0; 21141da177e4SLinus Torvalds } 2115c482c568SEric Dumazet EXPORT_SYMBOL(udp_lib_getsockopt); 21161da177e4SLinus Torvalds 2117db8dac20SDavid S. Miller int udp_getsockopt(struct sock *sk, int level, int optname, 2118db8dac20SDavid S. Miller char __user *optval, int __user *optlen) 2119db8dac20SDavid S. Miller { 2120db8dac20SDavid S. Miller if (level == SOL_UDP || level == SOL_UDPLITE) 2121db8dac20SDavid S. Miller return udp_lib_getsockopt(sk, level, optname, optval, optlen); 2122db8dac20SDavid S. Miller return ip_getsockopt(sk, level, optname, optval, optlen); 2123db8dac20SDavid S. Miller } 2124db8dac20SDavid S. Miller 2125db8dac20SDavid S. Miller #ifdef CONFIG_COMPAT 2126db8dac20SDavid S. Miller int compat_udp_getsockopt(struct sock *sk, int level, int optname, 2127db8dac20SDavid S. Miller char __user *optval, int __user *optlen) 2128db8dac20SDavid S. Miller { 2129db8dac20SDavid S. Miller if (level == SOL_UDP || level == SOL_UDPLITE) 2130db8dac20SDavid S. Miller return udp_lib_getsockopt(sk, level, optname, optval, optlen); 2131db8dac20SDavid S. Miller return compat_ip_getsockopt(sk, level, optname, optval, optlen); 2132db8dac20SDavid S. Miller } 2133db8dac20SDavid S. Miller #endif 21341da177e4SLinus Torvalds /** 21351da177e4SLinus Torvalds * udp_poll - wait for a UDP event. 21361da177e4SLinus Torvalds * @file - file struct 21371da177e4SLinus Torvalds * @sock - socket 21381da177e4SLinus Torvalds * @wait - poll table 21391da177e4SLinus Torvalds * 21401da177e4SLinus Torvalds * This is same as datagram poll, except for the special case of 21411da177e4SLinus Torvalds * blocking sockets. If application is using a blocking fd 21421da177e4SLinus Torvalds * and a packet with checksum error is in the queue; 21431da177e4SLinus Torvalds * then it could get return from select indicating data available 21441da177e4SLinus Torvalds * but then block when reading it. Add special case code 21451da177e4SLinus Torvalds * to work around these arguably broken applications. 21461da177e4SLinus Torvalds */ 21471da177e4SLinus Torvalds unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait) 21481da177e4SLinus Torvalds { 21491da177e4SLinus Torvalds unsigned int mask = datagram_poll(file, sock, wait); 21501da177e4SLinus Torvalds struct sock *sk = sock->sk; 21511da177e4SLinus Torvalds 2152c3f1dbafSDavid Majnemer sock_rps_record_flow(sk); 2153c3f1dbafSDavid Majnemer 21541da177e4SLinus Torvalds /* Check for false positives due to checksum errors */ 215585584672SEric Dumazet if ((mask & POLLRDNORM) && !(file->f_flags & O_NONBLOCK) && 215685584672SEric Dumazet !(sk->sk_shutdown & RCV_SHUTDOWN) && !first_packet_length(sk)) 21571da177e4SLinus Torvalds mask &= ~(POLLIN | POLLRDNORM); 21581da177e4SLinus Torvalds 21591da177e4SLinus Torvalds return mask; 21601da177e4SLinus Torvalds 21611da177e4SLinus Torvalds } 2162c482c568SEric Dumazet EXPORT_SYMBOL(udp_poll); 21631da177e4SLinus Torvalds 2164db8dac20SDavid S. Miller struct proto udp_prot = { 2165db8dac20SDavid S. Miller .name = "UDP", 2166db8dac20SDavid S. Miller .owner = THIS_MODULE, 2167db8dac20SDavid S. Miller .close = udp_lib_close, 2168db8dac20SDavid S. Miller .connect = ip4_datagram_connect, 2169db8dac20SDavid S. Miller .disconnect = udp_disconnect, 2170db8dac20SDavid S. Miller .ioctl = udp_ioctl, 2171db8dac20SDavid S. Miller .destroy = udp_destroy_sock, 2172db8dac20SDavid S. Miller .setsockopt = udp_setsockopt, 2173db8dac20SDavid S. Miller .getsockopt = udp_getsockopt, 2174db8dac20SDavid S. Miller .sendmsg = udp_sendmsg, 2175db8dac20SDavid S. Miller .recvmsg = udp_recvmsg, 2176db8dac20SDavid S. Miller .sendpage = udp_sendpage, 217793821778SHerbert Xu .backlog_rcv = __udp_queue_rcv_skb, 21788141ed9fSSteffen Klassert .release_cb = ip4_datagram_release_cb, 2179db8dac20SDavid S. Miller .hash = udp_lib_hash, 2180db8dac20SDavid S. Miller .unhash = udp_lib_unhash, 2181719f8358SEric Dumazet .rehash = udp_v4_rehash, 2182db8dac20SDavid S. Miller .get_port = udp_v4_get_port, 2183db8dac20SDavid S. Miller .memory_allocated = &udp_memory_allocated, 2184db8dac20SDavid S. Miller .sysctl_mem = sysctl_udp_mem, 2185db8dac20SDavid S. Miller .sysctl_wmem = &sysctl_udp_wmem_min, 2186db8dac20SDavid S. Miller .sysctl_rmem = &sysctl_udp_rmem_min, 2187db8dac20SDavid S. Miller .obj_size = sizeof(struct udp_sock), 2188271b72c7SEric Dumazet .slab_flags = SLAB_DESTROY_BY_RCU, 2189645ca708SEric Dumazet .h.udp_table = &udp_table, 2190db8dac20SDavid S. Miller #ifdef CONFIG_COMPAT 2191db8dac20SDavid S. Miller .compat_setsockopt = compat_udp_setsockopt, 2192db8dac20SDavid S. Miller .compat_getsockopt = compat_udp_getsockopt, 2193db8dac20SDavid S. Miller #endif 2194fcbdf09dSOctavian Purdila .clear_sk = sk_prot_clear_portaddr_nulls, 2195db8dac20SDavid S. Miller }; 2196c482c568SEric Dumazet EXPORT_SYMBOL(udp_prot); 21971da177e4SLinus Torvalds 21981da177e4SLinus Torvalds /* ------------------------------------------------------------------------ */ 21991da177e4SLinus Torvalds #ifdef CONFIG_PROC_FS 22001da177e4SLinus Torvalds 2201645ca708SEric Dumazet static struct sock *udp_get_first(struct seq_file *seq, int start) 22021da177e4SLinus Torvalds { 22031da177e4SLinus Torvalds struct sock *sk; 22041da177e4SLinus Torvalds struct udp_iter_state *state = seq->private; 22056f191efeSDenis V. Lunev struct net *net = seq_file_net(seq); 22061da177e4SLinus Torvalds 2207f86dcc5aSEric Dumazet for (state->bucket = start; state->bucket <= state->udp_table->mask; 2208f86dcc5aSEric Dumazet ++state->bucket) { 220988ab1932SEric Dumazet struct hlist_nulls_node *node; 2210645ca708SEric Dumazet struct udp_hslot *hslot = &state->udp_table->hash[state->bucket]; 2211f86dcc5aSEric Dumazet 2212f86dcc5aSEric Dumazet if (hlist_nulls_empty(&hslot->head)) 2213f86dcc5aSEric Dumazet continue; 2214f86dcc5aSEric Dumazet 2215645ca708SEric Dumazet spin_lock_bh(&hslot->lock); 221688ab1932SEric Dumazet sk_nulls_for_each(sk, node, &hslot->head) { 2217878628fbSYOSHIFUJI Hideaki if (!net_eq(sock_net(sk), net)) 2218a91275efSDaniel Lezcano continue; 22191da177e4SLinus Torvalds if (sk->sk_family == state->family) 22201da177e4SLinus Torvalds goto found; 22211da177e4SLinus Torvalds } 2222645ca708SEric Dumazet spin_unlock_bh(&hslot->lock); 22231da177e4SLinus Torvalds } 22241da177e4SLinus Torvalds sk = NULL; 22251da177e4SLinus Torvalds found: 22261da177e4SLinus Torvalds return sk; 22271da177e4SLinus Torvalds } 22281da177e4SLinus Torvalds 22291da177e4SLinus Torvalds static struct sock *udp_get_next(struct seq_file *seq, struct sock *sk) 22301da177e4SLinus Torvalds { 22311da177e4SLinus Torvalds struct udp_iter_state *state = seq->private; 22326f191efeSDenis V. Lunev struct net *net = seq_file_net(seq); 22331da177e4SLinus Torvalds 22341da177e4SLinus Torvalds do { 223588ab1932SEric Dumazet sk = sk_nulls_next(sk); 2236878628fbSYOSHIFUJI Hideaki } while (sk && (!net_eq(sock_net(sk), net) || sk->sk_family != state->family)); 22371da177e4SLinus Torvalds 2238645ca708SEric Dumazet if (!sk) { 2239f86dcc5aSEric Dumazet if (state->bucket <= state->udp_table->mask) 2240f52b5054SEric Dumazet spin_unlock_bh(&state->udp_table->hash[state->bucket].lock); 2241645ca708SEric Dumazet return udp_get_first(seq, state->bucket + 1); 22421da177e4SLinus Torvalds } 22431da177e4SLinus Torvalds return sk; 22441da177e4SLinus Torvalds } 22451da177e4SLinus Torvalds 22461da177e4SLinus Torvalds static struct sock *udp_get_idx(struct seq_file *seq, loff_t pos) 22471da177e4SLinus Torvalds { 2248645ca708SEric Dumazet struct sock *sk = udp_get_first(seq, 0); 22491da177e4SLinus Torvalds 22501da177e4SLinus Torvalds if (sk) 22511da177e4SLinus Torvalds while (pos && (sk = udp_get_next(seq, sk)) != NULL) 22521da177e4SLinus Torvalds --pos; 22531da177e4SLinus Torvalds return pos ? NULL : sk; 22541da177e4SLinus Torvalds } 22551da177e4SLinus Torvalds 22561da177e4SLinus Torvalds static void *udp_seq_start(struct seq_file *seq, loff_t *pos) 22571da177e4SLinus Torvalds { 225830842f29SVitaly Mayatskikh struct udp_iter_state *state = seq->private; 2259f86dcc5aSEric Dumazet state->bucket = MAX_UDP_PORTS; 226030842f29SVitaly Mayatskikh 2261b50660f1SYOSHIFUJI Hideaki return *pos ? udp_get_idx(seq, *pos-1) : SEQ_START_TOKEN; 22621da177e4SLinus Torvalds } 22631da177e4SLinus Torvalds 22641da177e4SLinus Torvalds static void *udp_seq_next(struct seq_file *seq, void *v, loff_t *pos) 22651da177e4SLinus Torvalds { 22661da177e4SLinus Torvalds struct sock *sk; 22671da177e4SLinus Torvalds 2268b50660f1SYOSHIFUJI Hideaki if (v == SEQ_START_TOKEN) 22691da177e4SLinus Torvalds sk = udp_get_idx(seq, 0); 22701da177e4SLinus Torvalds else 22711da177e4SLinus Torvalds sk = udp_get_next(seq, v); 22721da177e4SLinus Torvalds 22731da177e4SLinus Torvalds ++*pos; 22741da177e4SLinus Torvalds return sk; 22751da177e4SLinus Torvalds } 22761da177e4SLinus Torvalds 22771da177e4SLinus Torvalds static void udp_seq_stop(struct seq_file *seq, void *v) 22781da177e4SLinus Torvalds { 2279645ca708SEric Dumazet struct udp_iter_state *state = seq->private; 2280645ca708SEric Dumazet 2281f86dcc5aSEric Dumazet if (state->bucket <= state->udp_table->mask) 2282645ca708SEric Dumazet spin_unlock_bh(&state->udp_table->hash[state->bucket].lock); 22831da177e4SLinus Torvalds } 22841da177e4SLinus Torvalds 228573cb88ecSArjan van de Ven int udp_seq_open(struct inode *inode, struct file *file) 22861da177e4SLinus Torvalds { 2287d9dda78bSAl Viro struct udp_seq_afinfo *afinfo = PDE_DATA(inode); 2288a2be75c1SDenis V. Lunev struct udp_iter_state *s; 2289a2be75c1SDenis V. Lunev int err; 22901da177e4SLinus Torvalds 2291a2be75c1SDenis V. Lunev err = seq_open_net(inode, file, &afinfo->seq_ops, 2292a2be75c1SDenis V. Lunev sizeof(struct udp_iter_state)); 2293a2be75c1SDenis V. Lunev if (err < 0) 2294a2be75c1SDenis V. Lunev return err; 2295a91275efSDaniel Lezcano 2296a2be75c1SDenis V. Lunev s = ((struct seq_file *)file->private_data)->private; 22971da177e4SLinus Torvalds s->family = afinfo->family; 2298645ca708SEric Dumazet s->udp_table = afinfo->udp_table; 2299a2be75c1SDenis V. Lunev return err; 2300a91275efSDaniel Lezcano } 230173cb88ecSArjan van de Ven EXPORT_SYMBOL(udp_seq_open); 2302a91275efSDaniel Lezcano 23031da177e4SLinus Torvalds /* ------------------------------------------------------------------------ */ 23040c96d8c5SDaniel Lezcano int udp_proc_register(struct net *net, struct udp_seq_afinfo *afinfo) 23051da177e4SLinus Torvalds { 23061da177e4SLinus Torvalds struct proc_dir_entry *p; 23071da177e4SLinus Torvalds int rc = 0; 23081da177e4SLinus Torvalds 2309dda61925SDenis V. Lunev afinfo->seq_ops.start = udp_seq_start; 2310dda61925SDenis V. Lunev afinfo->seq_ops.next = udp_seq_next; 2311dda61925SDenis V. Lunev afinfo->seq_ops.stop = udp_seq_stop; 2312dda61925SDenis V. Lunev 231384841c3cSDenis V. Lunev p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net, 231473cb88ecSArjan van de Ven afinfo->seq_fops, afinfo); 231584841c3cSDenis V. Lunev if (!p) 23161da177e4SLinus Torvalds rc = -ENOMEM; 23171da177e4SLinus Torvalds return rc; 23181da177e4SLinus Torvalds } 2319c482c568SEric Dumazet EXPORT_SYMBOL(udp_proc_register); 23201da177e4SLinus Torvalds 23210c96d8c5SDaniel Lezcano void udp_proc_unregister(struct net *net, struct udp_seq_afinfo *afinfo) 23221da177e4SLinus Torvalds { 2323ece31ffdSGao feng remove_proc_entry(afinfo->name, net->proc_net); 23241da177e4SLinus Torvalds } 2325c482c568SEric Dumazet EXPORT_SYMBOL(udp_proc_unregister); 2326db8dac20SDavid S. Miller 2327db8dac20SDavid S. Miller /* ------------------------------------------------------------------------ */ 23285e659e4cSPavel Emelyanov static void udp4_format_sock(struct sock *sp, struct seq_file *f, 2329652586dfSTetsuo Handa int bucket) 2330db8dac20SDavid S. Miller { 2331db8dac20SDavid S. Miller struct inet_sock *inet = inet_sk(sp); 2332c720c7e8SEric Dumazet __be32 dest = inet->inet_daddr; 2333c720c7e8SEric Dumazet __be32 src = inet->inet_rcv_saddr; 2334c720c7e8SEric Dumazet __u16 destp = ntohs(inet->inet_dport); 2335c720c7e8SEric Dumazet __u16 srcp = ntohs(inet->inet_sport); 2336db8dac20SDavid S. Miller 2337f86dcc5aSEric Dumazet seq_printf(f, "%5d: %08X:%04X %08X:%04X" 2338652586dfSTetsuo Handa " %02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %d", 2339db8dac20SDavid S. Miller bucket, src, srcp, dest, destp, sp->sk_state, 234031e6d363SEric Dumazet sk_wmem_alloc_get(sp), 234131e6d363SEric Dumazet sk_rmem_alloc_get(sp), 2342a7cb5a49SEric W. Biederman 0, 0L, 0, 2343a7cb5a49SEric W. Biederman from_kuid_munged(seq_user_ns(f), sock_i_uid(sp)), 2344a7cb5a49SEric W. Biederman 0, sock_i_ino(sp), 2345cb61cb9bSEric Dumazet atomic_read(&sp->sk_refcnt), sp, 2346652586dfSTetsuo Handa atomic_read(&sp->sk_drops)); 2347db8dac20SDavid S. Miller } 2348db8dac20SDavid S. Miller 2349db8dac20SDavid S. Miller int udp4_seq_show(struct seq_file *seq, void *v) 2350db8dac20SDavid S. Miller { 2351652586dfSTetsuo Handa seq_setwidth(seq, 127); 2352db8dac20SDavid S. Miller if (v == SEQ_START_TOKEN) 2353652586dfSTetsuo Handa seq_puts(seq, " sl local_address rem_address st tx_queue " 2354db8dac20SDavid S. Miller "rx_queue tr tm->when retrnsmt uid timeout " 2355cb61cb9bSEric Dumazet "inode ref pointer drops"); 2356db8dac20SDavid S. Miller else { 2357db8dac20SDavid S. Miller struct udp_iter_state *state = seq->private; 2358db8dac20SDavid S. Miller 2359652586dfSTetsuo Handa udp4_format_sock(v, seq, state->bucket); 2360db8dac20SDavid S. Miller } 2361652586dfSTetsuo Handa seq_pad(seq, '\n'); 2362db8dac20SDavid S. Miller return 0; 2363db8dac20SDavid S. Miller } 2364db8dac20SDavid S. Miller 236573cb88ecSArjan van de Ven static const struct file_operations udp_afinfo_seq_fops = { 236673cb88ecSArjan van de Ven .owner = THIS_MODULE, 236773cb88ecSArjan van de Ven .open = udp_seq_open, 236873cb88ecSArjan van de Ven .read = seq_read, 236973cb88ecSArjan van de Ven .llseek = seq_lseek, 237073cb88ecSArjan van de Ven .release = seq_release_net 237173cb88ecSArjan van de Ven }; 237273cb88ecSArjan van de Ven 2373db8dac20SDavid S. Miller /* ------------------------------------------------------------------------ */ 2374db8dac20SDavid S. Miller static struct udp_seq_afinfo udp4_seq_afinfo = { 2375db8dac20SDavid S. Miller .name = "udp", 2376db8dac20SDavid S. Miller .family = AF_INET, 2377645ca708SEric Dumazet .udp_table = &udp_table, 237873cb88ecSArjan van de Ven .seq_fops = &udp_afinfo_seq_fops, 2379dda61925SDenis V. Lunev .seq_ops = { 2380dda61925SDenis V. Lunev .show = udp4_seq_show, 2381dda61925SDenis V. Lunev }, 2382db8dac20SDavid S. Miller }; 2383db8dac20SDavid S. Miller 23842c8c1e72SAlexey Dobriyan static int __net_init udp4_proc_init_net(struct net *net) 238515439febSPavel Emelyanov { 238615439febSPavel Emelyanov return udp_proc_register(net, &udp4_seq_afinfo); 238715439febSPavel Emelyanov } 238815439febSPavel Emelyanov 23892c8c1e72SAlexey Dobriyan static void __net_exit udp4_proc_exit_net(struct net *net) 239015439febSPavel Emelyanov { 239115439febSPavel Emelyanov udp_proc_unregister(net, &udp4_seq_afinfo); 239215439febSPavel Emelyanov } 239315439febSPavel Emelyanov 239415439febSPavel Emelyanov static struct pernet_operations udp4_net_ops = { 239515439febSPavel Emelyanov .init = udp4_proc_init_net, 239615439febSPavel Emelyanov .exit = udp4_proc_exit_net, 239715439febSPavel Emelyanov }; 239815439febSPavel Emelyanov 2399db8dac20SDavid S. Miller int __init udp4_proc_init(void) 2400db8dac20SDavid S. Miller { 240115439febSPavel Emelyanov return register_pernet_subsys(&udp4_net_ops); 2402db8dac20SDavid S. Miller } 2403db8dac20SDavid S. Miller 2404db8dac20SDavid S. Miller void udp4_proc_exit(void) 2405db8dac20SDavid S. Miller { 240615439febSPavel Emelyanov unregister_pernet_subsys(&udp4_net_ops); 2407db8dac20SDavid S. Miller } 24081da177e4SLinus Torvalds #endif /* CONFIG_PROC_FS */ 24091da177e4SLinus Torvalds 2410f86dcc5aSEric Dumazet static __initdata unsigned long uhash_entries; 2411f86dcc5aSEric Dumazet static int __init set_uhash_entries(char *str) 2412645ca708SEric Dumazet { 2413413c27d8SEldad Zack ssize_t ret; 2414413c27d8SEldad Zack 2415f86dcc5aSEric Dumazet if (!str) 2416f86dcc5aSEric Dumazet return 0; 2417413c27d8SEldad Zack 2418413c27d8SEldad Zack ret = kstrtoul(str, 0, &uhash_entries); 2419413c27d8SEldad Zack if (ret) 2420413c27d8SEldad Zack return 0; 2421413c27d8SEldad Zack 2422f86dcc5aSEric Dumazet if (uhash_entries && uhash_entries < UDP_HTABLE_SIZE_MIN) 2423f86dcc5aSEric Dumazet uhash_entries = UDP_HTABLE_SIZE_MIN; 2424f86dcc5aSEric Dumazet return 1; 2425f86dcc5aSEric Dumazet } 2426f86dcc5aSEric Dumazet __setup("uhash_entries=", set_uhash_entries); 2427645ca708SEric Dumazet 2428f86dcc5aSEric Dumazet void __init udp_table_init(struct udp_table *table, const char *name) 2429f86dcc5aSEric Dumazet { 2430f86dcc5aSEric Dumazet unsigned int i; 2431f86dcc5aSEric Dumazet 2432f86dcc5aSEric Dumazet table->hash = alloc_large_system_hash(name, 2433512615b6SEric Dumazet 2 * sizeof(struct udp_hslot), 2434f86dcc5aSEric Dumazet uhash_entries, 2435f86dcc5aSEric Dumazet 21, /* one slot per 2 MB */ 2436f86dcc5aSEric Dumazet 0, 2437f86dcc5aSEric Dumazet &table->log, 2438f86dcc5aSEric Dumazet &table->mask, 243931fe62b9STim Bird UDP_HTABLE_SIZE_MIN, 2440f86dcc5aSEric Dumazet 64 * 1024); 244131fe62b9STim Bird 2442512615b6SEric Dumazet table->hash2 = table->hash + (table->mask + 1); 2443f86dcc5aSEric Dumazet for (i = 0; i <= table->mask; i++) { 244488ab1932SEric Dumazet INIT_HLIST_NULLS_HEAD(&table->hash[i].head, i); 2445fdcc8aa9SEric Dumazet table->hash[i].count = 0; 2446645ca708SEric Dumazet spin_lock_init(&table->hash[i].lock); 2447645ca708SEric Dumazet } 2448512615b6SEric Dumazet for (i = 0; i <= table->mask; i++) { 2449512615b6SEric Dumazet INIT_HLIST_NULLS_HEAD(&table->hash2[i].head, i); 2450512615b6SEric Dumazet table->hash2[i].count = 0; 2451512615b6SEric Dumazet spin_lock_init(&table->hash2[i].lock); 2452512615b6SEric Dumazet } 2453645ca708SEric Dumazet } 2454645ca708SEric Dumazet 245595766fffSHideo Aoki void __init udp_init(void) 245695766fffSHideo Aoki { 2457f03d78dbSEric Dumazet unsigned long limit; 245895766fffSHideo Aoki 2459f86dcc5aSEric Dumazet udp_table_init(&udp_table, "UDP"); 2460f03d78dbSEric Dumazet limit = nr_free_buffer_pages() / 8; 246195766fffSHideo Aoki limit = max(limit, 128UL); 246295766fffSHideo Aoki sysctl_udp_mem[0] = limit / 4 * 3; 246395766fffSHideo Aoki sysctl_udp_mem[1] = limit; 246495766fffSHideo Aoki sysctl_udp_mem[2] = sysctl_udp_mem[0] * 2; 246595766fffSHideo Aoki 246695766fffSHideo Aoki sysctl_udp_rmem_min = SK_MEM_QUANTUM; 246795766fffSHideo Aoki sysctl_udp_wmem_min = SK_MEM_QUANTUM; 246895766fffSHideo Aoki } 246995766fffSHideo Aoki 2470da5bab07SDaniel Borkmann struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb, 247173136267SPravin B Shelar netdev_features_t features) 247273136267SPravin B Shelar { 247373136267SPravin B Shelar struct sk_buff *segs = ERR_PTR(-EINVAL); 247473136267SPravin B Shelar int mac_len = skb->mac_len; 247573136267SPravin B Shelar int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb); 24760d05535dSPravin B Shelar __be16 protocol = skb->protocol; 247773136267SPravin B Shelar netdev_features_t enc_features; 24780d05535dSPravin B Shelar int outer_hlen; 247973136267SPravin B Shelar 248073136267SPravin B Shelar if (unlikely(!pskb_may_pull(skb, tnl_hlen))) 248173136267SPravin B Shelar goto out; 248273136267SPravin B Shelar 248373136267SPravin B Shelar skb->encapsulation = 0; 248473136267SPravin B Shelar __skb_pull(skb, tnl_hlen); 248573136267SPravin B Shelar skb_reset_mac_header(skb); 248673136267SPravin B Shelar skb_set_network_header(skb, skb_inner_network_offset(skb)); 248773136267SPravin B Shelar skb->mac_len = skb_inner_network_offset(skb); 248819acc327SPravin B Shelar skb->protocol = htons(ETH_P_TEB); 248973136267SPravin B Shelar 249073136267SPravin B Shelar /* segment inner packet. */ 249173136267SPravin B Shelar enc_features = skb->dev->hw_enc_features & netif_skb_features(skb); 249273136267SPravin B Shelar segs = skb_mac_gso_segment(skb, enc_features); 249373136267SPravin B Shelar if (!segs || IS_ERR(segs)) 249473136267SPravin B Shelar goto out; 249573136267SPravin B Shelar 249673136267SPravin B Shelar outer_hlen = skb_tnl_header_len(skb); 249773136267SPravin B Shelar skb = segs; 249873136267SPravin B Shelar do { 249973136267SPravin B Shelar struct udphdr *uh; 250073136267SPravin B Shelar int udp_offset = outer_hlen - tnl_hlen; 250173136267SPravin B Shelar 2502cdbaa0bbSAlexander Duyck skb_reset_inner_headers(skb); 2503cdbaa0bbSAlexander Duyck skb->encapsulation = 1; 2504cdbaa0bbSAlexander Duyck 250573136267SPravin B Shelar skb->mac_len = mac_len; 250673136267SPravin B Shelar 250773136267SPravin B Shelar skb_push(skb, outer_hlen); 250873136267SPravin B Shelar skb_reset_mac_header(skb); 250973136267SPravin B Shelar skb_set_network_header(skb, mac_len); 251073136267SPravin B Shelar skb_set_transport_header(skb, udp_offset); 251173136267SPravin B Shelar uh = udp_hdr(skb); 251273136267SPravin B Shelar uh->len = htons(skb->len - udp_offset); 251373136267SPravin B Shelar 251473136267SPravin B Shelar /* csum segment if tunnel sets skb with csum. */ 2515eb3c0d83SCong Wang if (protocol == htons(ETH_P_IP) && unlikely(uh->check)) { 251673136267SPravin B Shelar struct iphdr *iph = ip_hdr(skb); 251773136267SPravin B Shelar 251873136267SPravin B Shelar uh->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, 251973136267SPravin B Shelar skb->len - udp_offset, 252073136267SPravin B Shelar IPPROTO_UDP, 0); 252173136267SPravin B Shelar uh->check = csum_fold(skb_checksum(skb, udp_offset, 252273136267SPravin B Shelar skb->len - udp_offset, 0)); 252373136267SPravin B Shelar if (uh->check == 0) 252473136267SPravin B Shelar uh->check = CSUM_MANGLED_0; 252573136267SPravin B Shelar 2526eb3c0d83SCong Wang } else if (protocol == htons(ETH_P_IPV6)) { 2527eb3c0d83SCong Wang struct ipv6hdr *ipv6h = ipv6_hdr(skb); 2528eb3c0d83SCong Wang u32 len = skb->len - udp_offset; 2529eb3c0d83SCong Wang 2530eb3c0d83SCong Wang uh->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, 2531eb3c0d83SCong Wang len, IPPROTO_UDP, 0); 2532eb3c0d83SCong Wang uh->check = csum_fold(skb_checksum(skb, udp_offset, len, 0)); 2533eb3c0d83SCong Wang if (uh->check == 0) 2534eb3c0d83SCong Wang uh->check = CSUM_MANGLED_0; 2535eb3c0d83SCong Wang skb->ip_summed = CHECKSUM_NONE; 253673136267SPravin B Shelar } 2537eb3c0d83SCong Wang 25380d05535dSPravin B Shelar skb->protocol = protocol; 253973136267SPravin B Shelar } while ((skb = skb->next)); 254073136267SPravin B Shelar out: 254173136267SPravin B Shelar return segs; 254273136267SPravin B Shelar } 2543