80 #define pr_fmt(fmt) "UDP: " fmt
82 #include <asm/uaccess.h>
83 #include <asm/ioctls.h>
87 #include <linux/types.h>
88 #include <linux/fcntl.h>
89 #include <linux/module.h>
90 #include <linux/socket.h>
92 #include <linux/igmp.h>
94 #include <linux/errno.h>
98 #include <linux/netdevice.h>
99 #include <linux/slab.h>
129 #define MAX_UDP_PORTS 65536
130 #define PORTS_PER_CHAIN (MAX_UDP_PORTS / UDP_HTABLE_SIZE_MIN)
132 static int udp_lib_lport_inuse(
struct net *
net,
__u16 num,
136 int (*saddr_comp)(
const struct sock *sk1,
137 const struct sock *sk2),
144 if (net_eq(sock_net(sk2), net) &&
147 (!sk2->sk_reuse || !sk->sk_reuse) &&
148 (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if ||
149 sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
150 (*saddr_comp)(sk, sk2)) {
164 static int udp_lib_lport_inuse2(
struct net *net,
__u16 num,
167 int (*saddr_comp)(
const struct sock *sk1,
168 const struct sock *sk2))
174 spin_lock(&hslot2->
lock);
176 if (net_eq(sock_net(sk2), net) &&
182 (*saddr_comp)(sk, sk2)) {
186 spin_unlock(&hslot2->
lock);
200 int (*saddr_comp)(
const struct sock *sk1,
201 const struct sock *sk2),
202 unsigned int hash2_nulladdr)
205 struct udp_table *udptable = sk->sk_prot->h.udp_table;
207 struct net *net = sock_net(sk);
216 remaining = (high -
low) + 1;
219 first = (((
u64)rand * remaining) >> 32) + low;
223 rand = (rand | 1) * (udptable->
mask + 1);
224 last = first + udptable->
mask + 1;
226 hslot = udp_hashslot(udptable, net, first);
228 spin_lock_bh(&hslot->
lock);
229 udp_lib_lport_inuse(net, snum, hslot, bitmap, sk,
230 saddr_comp, udptable->
log);
239 if (low <= snum && snum <= high &&
241 !inet_is_reserved_local_port(snum))
244 }
while (snum != first);
245 spin_unlock_bh(&hslot->
lock);
246 }
while (++first != last);
249 hslot = udp_hashslot(udptable, net, snum);
250 spin_lock_bh(&hslot->
lock);
251 if (hslot->
count > 10) {
253 unsigned int slot2 = udp_sk(sk)->udp_portaddr_hash ^ snum;
255 slot2 &= udptable->
mask;
256 hash2_nulladdr &= udptable->
mask;
258 hslot2 = udp_hashslot2(udptable, slot2);
260 goto scan_primary_hash;
262 exist = udp_lib_lport_inuse2(net, snum, hslot2,
264 if (!exist && (hash2_nulladdr != slot2)) {
265 hslot2 = udp_hashslot2(udptable, hash2_nulladdr);
266 exist = udp_lib_lport_inuse2(net, snum, hslot2,
275 if (udp_lib_lport_inuse(net, snum, hslot,
NULL, sk,
280 inet_sk(sk)->inet_num = snum;
281 udp_sk(sk)->udp_port_hash = snum;
282 udp_sk(sk)->udp_portaddr_hash ^= snum;
283 if (sk_unhashed(sk)) {
284 sk_nulls_add_node_rcu(sk, &hslot->
head);
286 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
289 spin_lock(&hslot2->
lock);
293 spin_unlock(&hslot2->
lock);
297 spin_unlock_bh(&hslot->
lock);
303 static int ipv4_rcv_saddr_equal(
const struct sock *sk1,
const struct sock *sk2)
305 struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2);
308 (!inet1->inet_rcv_saddr || !inet2->inet_rcv_saddr ||
309 inet1->inet_rcv_saddr == inet2->inet_rcv_saddr));
312 static unsigned int udp4_portaddr_hash(
struct net *net,
__be32 saddr,
320 unsigned int hash2_nulladdr =
322 unsigned int hash2_partial =
326 udp_sk(sk)->udp_portaddr_hash = hash2_partial;
330 static inline int compute_score(
struct sock *sk,
struct net *net,
__be32 saddr,
336 if (net_eq(sock_net(sk), net) && udp_sk(sk)->
udp_port_hash == hnum &&
340 score = (sk->sk_family ==
PF_INET ? 1 : 0);
341 if (inet->inet_rcv_saddr) {
342 if (inet->inet_rcv_saddr != daddr)
346 if (inet->inet_daddr) {
347 if (inet->inet_daddr != saddr)
356 if (sk->sk_bound_dev_if) {
357 if (sk->sk_bound_dev_if != dif)
368 #define SCORE2_MAX (1 + 2 + 2 + 2)
369 static inline int compute_score2(
struct sock *sk,
struct net *net,
371 __be32 daddr,
unsigned int hnum,
int dif)
378 if (inet->inet_rcv_saddr != daddr)
383 score = (sk->sk_family ==
PF_INET ? 1 : 0);
384 if (inet->inet_daddr) {
385 if (inet->inet_daddr != saddr)
394 if (sk->sk_bound_dev_if) {
395 if (sk->sk_bound_dev_if != dif)
405 static struct sock *udp4_lib_lookup2(
struct net *net,
407 __be32 daddr,
unsigned int hnum,
int dif,
418 score = compute_score2(sk, net, saddr, sport,
420 if (score > badness) {
432 if (get_nulls_value(node) != slot2)
437 if (
unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2)))
439 else if (
unlikely(compute_score2(result, net, saddr, sport,
440 daddr, hnum, dif) < badness)) {
457 unsigned short hnum =
ntohs(dport);
458 unsigned int hash2,
slot2,
slot = udp_hashfn(net, hnum, udptable->
mask);
463 if (hslot->
count > 10) {
464 hash2 = udp4_portaddr_hash(net, daddr, hnum);
465 slot2 = hash2 & udptable->
mask;
470 result = udp4_lib_lookup2(net, saddr, sport,
475 slot2 = hash2 & udptable->
mask;
480 result = udp4_lib_lookup2(net, saddr, sport,
491 score = compute_score(sk, net, saddr, hnum, sport,
493 if (score > badness) {
503 if (get_nulls_value(node) != slot)
507 if (
unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2)))
509 else if (
unlikely(compute_score(result, net, saddr, hnum, sport,
510 daddr, dport, dif) < badness)) {
520 static inline struct sock *__udp4_lib_lookup_skb(
struct sk_buff *
skb,
525 const struct iphdr *iph = ip_hdr(skb);
527 if (
unlikely(sk = skb_steal_sock(skb)))
531 iph->
daddr, dport, inet_iif(skb),
542 static inline struct sock *udp_v4_mcast_next(
struct net *net,
struct sock *sk,
549 unsigned short hnum =
ntohs(loc_port);
554 if (!net_eq(sock_net(s), net) ||
556 (inet->inet_daddr && inet->inet_daddr != rmt_addr) ||
558 (inet->inet_rcv_saddr &&
559 inet->inet_rcv_saddr != loc_addr) ||
561 (s->sk_bound_dev_if && s->sk_bound_dev_if != dif))
588 const int type = icmp_hdr(skb)->type;
589 const int code = icmp_hdr(skb)->code;
593 struct net *net = dev_net(skb->
dev);
596 iph->
saddr, uh->source, skb->
dev->ifindex, udptable);
683 struct udphdr *uh = udp_hdr(skb);
684 struct sk_buff *frags = skb_shinfo(skb)->frag_list;
685 int offset = skb_transport_offset(skb);
705 csum = csum_add(csum, frags->
csum);
707 }
while ((frags = frags->
next));
718 static int udp_send_skb(
struct sk_buff *skb,
struct flowi4 *fl4)
720 struct sock *sk = skb->
sk;
725 int offset = skb_transport_offset(skb);
734 uh->
dest = fl4->fl4_dport;
739 csum = udplite_csum(skb);
752 csum = udp_csum(skb);
777 static int udp_push_pending_frames(
struct sock *sk)
785 skb = ip_finish_skb(sk, fl4);
789 err = udp_send_skb(skb, fl4);
833 fl4 = &inet->
cork.fl.u.ip4;
849 ulen +=
sizeof(
struct udphdr);
870 daddr = inet->inet_daddr;
879 ipc.
oif = sk->sk_bound_dev_if;
897 memcpy(&opt_copy, inet_opt,
898 sizeof(*inet_opt) + inet_opt->
opt.optlen);
907 if (ipc.
opt && ipc.
opt->opt.srr) {
910 faddr = ipc.
opt->opt.faddr;
916 (ipc.
opt && ipc.
opt->opt.is_strictroute)) {
921 if (ipv4_is_multicast(daddr)) {
934 struct net *net = sock_net(sk);
937 flowi4_init_output(fl4, ipc.
oif, sk->
sk_mark, tos,
942 security_sk_classify_flow(sk, flowi4_to_flowi(fl4));
957 sk_dst_set(sk, dst_clone(&rt->
dst));
971 sizeof(
struct udphdr), &ipc, &rt,
974 if (skb && !IS_ERR(skb))
975 err = udp_send_skb(skb, fl4);
992 fl4 = &inet->
cork.fl.u.ip4;
995 fl4->fl4_dport = dport;
1002 sizeof(
struct udphdr), &ipc, &rt,
1007 err = udp_push_pending_frames(sk);
1032 dst_confirm(&rt->
dst);
1034 goto back_from_confirm;
1069 page, offset, size, flags);
1082 ret = udp_push_pending_frames(sk);
1098 static unsigned int first_packet_length(
struct sock *sk)
1104 __skb_queue_head_init(&list_kill);
1106 spin_lock_bh(&rcvq->
lock);
1107 while ((skb = skb_peek(rcvq)) !=
NULL &&
1108 udp_lib_checksum_complete(skb)) {
1112 __skb_unlink(skb, rcvq);
1113 __skb_queue_tail(&list_kill, skb);
1115 res = skb ? skb->
len : 0;
1116 spin_unlock_bh(&rcvq->
lock);
1118 if (!skb_queue_empty(&list_kill)) {
1121 __skb_queue_purge(&list_kill);
1122 sk_mem_reclaim_partial(sk);
1123 unlock_sock_fast(sk, slow);
1137 int amount = sk_wmem_alloc_get(sk);
1144 unsigned int amount = first_packet_length(sk);
1152 amount -=
sizeof(
struct udphdr);
1171 size_t len,
int noblock,
int flags,
int *addr_len)
1176 unsigned int ulen, copied;
1177 int peeked, off = 0;
1186 *addr_len =
sizeof(*sin);
1193 &peeked, &off, &err);
1197 ulen = skb->
len -
sizeof(
struct udphdr);
1201 else if (copied < ulen)
1210 if (copied < ulen ||
UDP_SKB_CB(skb)->partial_cov) {
1211 if (udp_lib_checksum_complete(skb))
1215 if (skb_csum_unnecessary(skb))
1241 sock_recv_ts_and_drops(msg, sk, skb);
1246 sin->
sin_port = udp_hdr(skb)->source;
1247 sin->
sin_addr.s_addr = ip_hdr(skb)->saddr;
1248 memset(sin->sin_zero, 0,
sizeof(sin->sin_zero));
1266 unlock_sock_fast(sk, slow);
1285 inet->inet_daddr = 0;
1287 sock_rps_reset_rxhash(sk);
1288 sk->sk_bound_dev_if = 0;
1290 inet_reset_saddr(sk);
1293 sk->sk_prot->unhash(sk);
1303 if (sk_hashed(sk)) {
1304 struct udp_table *udptable = sk->sk_prot->h.udp_table;
1307 hslot = udp_hashslot(udptable, sock_net(sk),
1311 spin_lock_bh(&hslot->
lock);
1312 if (sk_nulls_del_node_init_rcu(sk)) {
1314 inet_sk(sk)->inet_num = 0;
1315 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
1317 spin_lock(&hslot2->
lock);
1320 spin_unlock(&hslot2->
lock);
1322 spin_unlock_bh(&hslot->
lock);
1332 if (sk_hashed(sk)) {
1333 struct udp_table *udptable = sk->sk_prot->h.udp_table;
1334 struct udp_hslot *hslot, *hslot2, *nhslot2;
1337 nhslot2 = udp_hashslot2(udptable, newhash);
1338 udp_sk(sk)->udp_portaddr_hash = newhash;
1339 if (hslot2 != nhslot2) {
1340 hslot = udp_hashslot(udptable, sock_net(sk),
1343 spin_lock_bh(&hslot->
lock);
1345 spin_lock(&hslot2->
lock);
1348 spin_unlock(&hslot2->
lock);
1350 spin_lock(&nhslot2->
lock);
1354 spin_unlock(&nhslot2->
lock);
1356 spin_unlock_bh(&hslot->
lock);
1362 static void udp_v4_rehash(
struct sock *sk)
1364 u16 new_hash = udp4_portaddr_hash(sock_net(sk),
1366 inet_sk(sk)->inet_num);
1370 static int __udp_queue_rcv_skb(
struct sock *sk,
struct sk_buff *skb)
1375 sock_rps_save_rxhash(sk, skb);
1387 trace_udp_fail_queue_rcv_skb(rc, sk);
1398 if (!static_key_enabled(&udp_encap_needed))
1399 static_key_slow_inc(&udp_encap_needed);
1424 if (static_key_false(&udp_encap_needed) && up->
encap_type) {
1440 if (skb->
len >
sizeof(
struct udphdr) && encap_rcv !=
NULL) {
1443 ret = encap_rcv(sk, skb);
1490 udp_lib_checksum_complete(skb))
1494 if (sk_rcvqueues_full(sk, skb, sk->
sk_rcvbuf))
1502 rc = __udp_queue_rcv_skb(sk, skb);
1503 else if (sk_add_backlog(sk, skb, sk->
sk_rcvbuf)) {
1520 struct sk_buff *skb,
unsigned int final)
1526 for (i = 0; i <
count; i++) {
1551 static int __udp4_lib_mcast_deliver(
struct net *net,
struct sk_buff *skb,
1556 struct sock *
sk, *stack[256 /
sizeof(
struct sock *)];
1559 unsigned int i, count = 0;
1561 spin_lock(&hslot->
lock);
1562 sk = sk_nulls_head(&hslot->
head);
1563 dif = skb->
dev->ifindex;
1564 sk = udp_v4_mcast_next(net, sk, uh->
dest, daddr, uh->
source, saddr, dif);
1566 stack[count++] =
sk;
1567 sk = udp_v4_mcast_next(net, sk_nulls_next(sk), uh->
dest,
1568 daddr, uh->
source, saddr, dif);
1572 flush_stack(stack, count, skb, ~0);
1579 for (i = 0; i <
count; i++)
1580 sock_hold(stack[i]);
1582 spin_unlock(&hslot->
lock);
1588 flush_stack(stack, count, skb, count - 1);
1590 for (i = 0; i <
count; i++)
1603 static inline int udp4_csum_init(
struct sk_buff *skb,
struct udphdr *uh,
1606 const struct iphdr *iph;
1613 err = udplite_checksum_init(skb, uh);
1619 if (uh->
check == 0) {
1626 if (!skb_csum_unnecessary(skb))
1628 skb->
len, proto, 0);
1645 unsigned short ulen;
1646 struct rtable *rt = skb_rtable(skb);
1648 struct net *net = dev_net(skb->
dev);
1653 if (!pskb_may_pull(skb,
sizeof(
struct udphdr)))
1658 saddr = ip_hdr(skb)->saddr;
1659 daddr = ip_hdr(skb)->daddr;
1661 if (ulen > skb->
len)
1666 if (ulen <
sizeof(*uh) || pskb_trim_rcsum(skb, ulen))
1671 if (udp4_csum_init(skb, uh, proto))
1675 return __udp4_lib_mcast_deliver(net, skb, uh,
1676 saddr, daddr, udptable);
1678 sk = __udp4_lib_lookup_skb(skb, uh->
source, uh->
dest, udptable);
1697 if (udp_lib_checksum_complete(skb))
1742 unlock_sock_fast(sk, slow);
1749 char __user *optval,
unsigned int optlen,
1750 int (*push_pending_frames)(
struct sock *))
1757 if (optlen <
sizeof(
int))
1770 (*push_pending_frames)(
sk);
1800 if (val != 0 && val < 8)
1814 if (val != 0 && val < 8)
1832 char __user *optval,
unsigned int optlen)
1836 udp_push_pending_frames);
1840 #ifdef CONFIG_COMPAT
1841 int compat_udp_setsockopt(
struct sock *sk,
int level,
int optname,
1842 char __user *optval,
unsigned int optlen)
1846 udp_push_pending_frames);
1852 char __user *optval,
int __user *optlen)
1860 len =
min_t(
unsigned int, len,
sizeof(
int));
1897 char __user *optval,
int __user *optlen)
1904 #ifdef CONFIG_COMPAT
1905 int compat_udp_getsockopt(
struct sock *sk,
int level,
int optname,
1906 char __user *optval,
int __user *optlen)
1929 struct sock *sk = sock->
sk;
1944 .close = udp_lib_close,
1954 .backlog_rcv = __udp_queue_rcv_skb,
1955 .hash = udp_lib_hash,
1957 .rehash = udp_v4_rehash,
1963 .obj_size =
sizeof(
struct udp_sock),
1966 #ifdef CONFIG_COMPAT
1967 .compat_setsockopt = compat_udp_setsockopt,
1968 .compat_getsockopt = compat_udp_getsockopt,
1975 #ifdef CONFIG_PROC_FS
1981 struct net *net = seq_file_net(seq);
1988 if (hlist_nulls_empty(&hslot->
head))
1991 spin_lock_bh(&hslot->
lock);
1993 if (!net_eq(sock_net(sk), net))
1995 if (sk->sk_family == state->
family)
1998 spin_unlock_bh(&hslot->
lock);
2008 struct net *net = seq_file_net(seq);
2011 sk = sk_nulls_next(sk);
2012 }
while (sk && (!net_eq(sock_net(sk), net) || sk->sk_family != state->
family));
2017 return udp_get_first(seq, state->
bucket + 1);
2024 struct sock *sk = udp_get_first(seq, 0);
2027 while (pos && (sk = udp_get_next(seq, sk)) !=
NULL)
2032 static void *udp_seq_start(
struct seq_file *seq, loff_t *pos)
2040 static void *udp_seq_next(
struct seq_file *seq,
void *
v, loff_t *pos)
2045 sk = udp_get_idx(seq, 0);
2047 sk = udp_get_next(seq, v);
2053 static void udp_seq_stop(
struct seq_file *seq,
void *v)
2080 int udp_proc_register(
struct net *net,
struct udp_seq_afinfo *afinfo)
2085 afinfo->
seq_ops.start = udp_seq_start;
2086 afinfo->
seq_ops.next = udp_seq_next;
2087 afinfo->
seq_ops.stop = udp_seq_stop;
2097 void udp_proc_unregister(
struct net *net,
struct udp_seq_afinfo *afinfo)
2105 int bucket,
int *len)
2109 __be32 src = inet->inet_rcv_saddr;
2114 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %pK %d%n",
2115 bucket, src, srcp, dest, destp, sp->sk_state,
2116 sk_wmem_alloc_get(sp),
2117 sk_rmem_alloc_get(sp),
2125 int udp4_seq_show(
struct seq_file *seq,
void *v)
2129 " sl local_address rem_address st tx_queue "
2130 "rx_queue tr tm->when retrnsmt uid timeout "
2131 "inode ref pointer drops");
2136 udp4_format_sock(v, seq, state->
bucket, &len);
2155 .seq_fops = &udp_afinfo_seq_fops,
2157 .show = udp4_seq_show,
2161 static int __net_init udp4_proc_init_net(
struct net *net)
2163 return udp_proc_register(net, &udp4_seq_afinfo);
2166 static void __net_exit udp4_proc_exit_net(
struct net *net)
2168 udp_proc_unregister(net, &udp4_seq_afinfo);
2172 .
init = udp4_proc_init_net,
2173 .exit = udp4_proc_exit_net,
2176 int __init udp4_proc_init(
void)
2181 void udp4_proc_exit(
void)
2187 static __initdata unsigned long uhash_entries;
2188 static int __init set_uhash_entries(
char *
str)
2195 ret = kstrtoul(str, 0, &uhash_entries);
2203 __setup(
"uhash_entries=", set_uhash_entries);
2220 for (i = 0; i <= table->
mask; i++) {
2222 table->
hash[
i].count = 0;
2225 for (i = 0; i <= table->
mask; i++) {
2227 table->
hash2[
i].count = 0;
2234 unsigned long limit;
2238 limit =
max(limit, 128
UL);
2249 const struct iphdr *iph;
2252 if (!pskb_may_pull(skb,
sizeof(*uh)))
2274 mss = skb_shinfo(skb)->gso_size;
2280 int type = skb_shinfo(skb)->gso_type;
2295 offset = skb_checksum_start_offset(skb);
2298 *(
__sum16 *)(skb->
data + offset) = csum_fold(csum);