23 #define pr_fmt(fmt) "IPv4: " fmt
25 #include <linux/compiler.h>
26 #include <linux/module.h>
27 #include <linux/types.h>
31 #include <linux/list.h>
33 #include <linux/icmp.h>
34 #include <linux/netdevice.h>
36 #include <linux/random.h>
37 #include <linux/slab.h>
46 #include <linux/tcp.h>
47 #include <linux/udp.h>
49 #include <linux/netfilter_ipv4.h>
65 #define FRAG_CB(skb) ((struct ipfrag_skb_cb *)((skb)->cb))
86 #define IPFRAG_ECN_NOT_ECT 0x01
87 #define IPFRAG_ECN_ECT_1 0x02
88 #define IPFRAG_ECN_ECT_0 0x04
89 #define IPFRAG_ECN_CE 0x08
91 static inline u8 ip4_frag_ecn(
u8 tos)
100 static const u8 ip4_frag_ecn_table[16] = {
120 return net->
ipv4.frags.nqueues;
138 return jhash_3words((
__force u32)
id << 16 | prot,
157 return qp->
id == arg->
iph->id &&
181 qp->
id = arg->
iph->id;
182 qp->
ecn = ip4_frag_ecn(arg->
iph->tos);
186 qp->
peer = sysctl_ipfrag_max_dist ?
187 inet_getpeer_v4(net->
ipv4.peers, arg->
iph->saddr, 1) :
NULL;
204 inet_frag_put(&ipq->
q, &ip4_frags);
210 static void ipq_kill(
struct ipq *
ipq)
218 static void ip_evictor(
struct net *net)
230 static void ip_expire(
unsigned long arg)
238 spin_lock(&qp->
q.lock);
250 const struct iphdr *iph;
282 spin_unlock(&qp->
q.lock);
289 static inline struct ipq *ip_find(
struct net *net,
struct iphdr *iph,
u32 user)
313 static inline int ip_frag_too_far(
struct ipq *qp)
316 unsigned int max = sysctl_ipfrag_max_dist;
328 rc = qp->
q.fragments && (end -
start) > max;
340 static int ip_frag_reinit(
struct ipq *qp)
344 if (!
mod_timer(&qp->
q.timer, jiffies + qp->
q.net->timeout)) {
349 fp = qp->
q.fragments;
352 frag_kfree_skb(qp->
q.net, fp);
359 qp->
q.fragments =
NULL;
360 qp->
q.fragments_tail =
NULL;
368 static int ip_frag_queue(
struct ipq *qp,
struct sk_buff *
skb)
382 unlikely(err = ip_frag_reinit(qp))) {
387 ecn = ip4_frag_ecn(ip_hdr(skb)->tos);
388 offset =
ntohs(ip_hdr(skb)->frag_off);
392 ihl = ip_hdrlen(skb);
395 end = offset + skb->
len - ihl;
399 if ((flags &
IP_MF) == 0) {
403 if (end < qp->q.
len ||
414 if (end > qp->
q.len) {
425 if (pskb_pull(skb, ihl) ==
NULL)
428 err = pskb_trim_rcsum(skb, end - offset);
436 prev = qp->
q.fragments_tail;
437 if (!prev ||
FRAG_CB(prev)->offset < offset) {
442 for (next = qp->
q.fragments; next !=
NULL; next = next->
next) {
443 if (
FRAG_CB(next)->offset >= offset)
454 int i = (
FRAG_CB(prev)->offset + prev->
len) - offset;
462 if (!pskb_pull(skb, i))
472 int i = end -
FRAG_CB(next)->offset;
478 if (!pskb_pull(next, i))
496 qp->
q.fragments =
next;
498 qp->
q.meat -= free_it->
len;
499 frag_kfree_skb(qp->
q.net, free_it);
508 qp->
q.fragments_tail =
skb;
512 qp->
q.fragments =
skb;
520 qp->
q.meat += skb->
len;
527 skb->
len + ihl > qp->
q.max_size)
528 qp->
q.max_size = skb->
len + ihl;
531 qp->
q.meat == qp->
q.len)
532 return ip_frag_reasm(qp, prev, dev);
535 list_move_tail(&qp->
q.lru_list, &qp->
q.net->lru_list);
547 static int ip_frag_reasm(
struct ipq *qp,
struct sk_buff *prev,
561 ecn = ip4_frag_ecn_table[qp->
ecn];
575 qp->
q.fragments_tail =
fp;
579 head->
next = qp->
q.fragments->next;
582 qp->
q.fragments =
head;
589 ihlen = ip_hdrlen(head);
590 len = ihlen + qp->
q.len;
603 if (skb_has_frag_list(head)) {
611 skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list;
612 skb_frag_list_init(head);
613 for (i = 0; i < skb_shinfo(head)->nr_frags; i++)
614 plen += skb_frag_size(&skb_shinfo(head)->frags[
i]);
626 for (fp = head->
next; fp;) {
640 if (!skb_shinfo(head)->frag_list)
641 skb_shinfo(head)->frag_list =
fp;
653 IPCB(head)->frag_max_size = qp->
q.max_size;
661 qp->
q.fragments =
NULL;
662 qp->
q.fragments_tail =
NULL;
683 net = skb->
dev ? dev_net(skb->
dev) : dev_net(skb_dst(skb)->dev);
690 if ((qp = ip_find(net, ip_hdr(skb), user)) !=
NULL) {
693 spin_lock(&qp->
q.lock);
695 ret = ip_frag_queue(qp, skb);
697 spin_unlock(&qp->
q.lock);
719 if (iph.ihl < 5 || iph.version != 4)
723 if (skb->
len < len || len < (iph.ihl * 4))
726 if (ip_is_fragment(&iph)) {
729 if (!pskb_may_pull(skb, iph.ihl*4))
731 if (pskb_trim_rcsum(skb, len))
746 static struct ctl_table ip4_frags_ns_ctl_table[] = {
749 .data = &
init_net.ipv4.frags.high_thresh,
750 .maxlen =
sizeof(
int),
755 .procname =
"ipfrag_low_thresh",
756 .data = &
init_net.ipv4.frags.low_thresh,
757 .maxlen =
sizeof(
int),
762 .procname =
"ipfrag_time",
763 .data = &
init_net.ipv4.frags.timeout,
764 .maxlen =
sizeof(
int),
771 static struct ctl_table ip4_frags_ctl_table[] = {
773 .
procname =
"ipfrag_secret_interval",
774 .data = &ip4_frags.secret_interval,
775 .maxlen =
sizeof(
int),
780 .procname =
"ipfrag_max_dist",
781 .data = &sysctl_ipfrag_max_dist,
782 .maxlen =
sizeof(
int),
790 static int __net_init ip4_frags_ns_ctl_register(
struct net *net)
795 table = ip4_frags_ns_ctl_table;
801 table[0].
data = &net->
ipv4.frags.high_thresh;
802 table[1].
data = &net->
ipv4.frags.low_thresh;
803 table[2].
data = &net->
ipv4.frags.timeout;
820 static void __net_exit ip4_frags_ns_ctl_unregister(
struct net *net)
824 table = net->
ipv4.frags_hdr->ctl_table_arg;
829 static void ip4_frags_ctl_register(
void)
834 static inline int ip4_frags_ns_ctl_register(
struct net *net)
839 static inline void ip4_frags_ns_ctl_unregister(
struct net *net)
843 static inline void ip4_frags_ctl_register(
void)
848 static int __net_init ipv4_frags_init_net(
struct net *net)
856 net->
ipv4.frags.high_thresh = 256 * 1024;
857 net->
ipv4.frags.low_thresh = 192 * 1024;
867 return ip4_frags_ns_ctl_register(net);
870 static void __net_exit ipv4_frags_exit_net(
struct net *net)
872 ip4_frags_ns_ctl_unregister(net);
877 .init = ipv4_frags_init_net,
878 .exit = ipv4_frags_exit_net,
883 ip4_frags_ctl_register();
885 ip4_frags.hashfn = ip4_hashfn;
886 ip4_frags.constructor = ip4_frag_init;
887 ip4_frags.destructor = ip4_frag_free;
888 ip4_frags.skb_free =
NULL;
889 ip4_frags.qsize =
sizeof(
struct ipq);
890 ip4_frags.match = ip4_frag_match;
891 ip4_frags.frag_expire = ip_expire;
892 ip4_frags.secret_interval = 10 * 60 *
HZ;