14 #include <linux/types.h>
15 #include <linux/netfilter.h>
16 #include <linux/module.h>
17 #include <linux/sched.h>
21 #include <linux/stddef.h>
22 #include <linux/slab.h>
23 #include <linux/random.h>
29 #include <linux/kernel.h>
30 #include <linux/netdevice.h>
31 #include <linux/socket.h>
51 #define NF_CONNTRACK_VERSION "0.5.0"
61 unsigned int protoff);
87 n = (
sizeof(tuple->
src) +
sizeof(tuple->
dst.u3)) /
sizeof(
u32);
88 return jhash2((
u32 *)tuple, n, zone ^ nf_conntrack_hash_rnd ^
90 tuple->
dst.protonum));
95 return ((
u64)hash * size) >> 32;
100 return __hash_bucket(hash, net->ct.htable_size);
104 u16 zone,
unsigned int size)
106 return __hash_bucket(hash_conntrack_raw(tuple, zone), size);
112 return __hash_conntrack(tuple, zone, net->ct.htable_size);
118 unsigned int dataoff,
125 memset(tuple, 0,
sizeof(*tuple));
127 tuple->
src.l3num = l3num;
131 tuple->
dst.protonum = protonum;
143 unsigned int protoff;
149 l3proto = __nf_ct_l3proto_find(l3num);
150 ret = l3proto->
get_l4proto(skb, nhoff, &protoff, &protonum);
172 memset(inverse, 0,
sizeof(*inverse));
174 inverse->
src.l3num = orig->
src.l3num;
178 inverse->
dst.dir = !orig->
dst.dir;
180 inverse->
dst.protonum = orig->
dst.protonum;
186 clean_from_lists(
struct nf_conn *ct)
188 pr_debug(
"clean_from_lists(%p)\n", ct);
197 destroy_conntrack(
struct nf_conntrack *nfct)
200 struct net *
net = nf_ct_net(ct);
203 pr_debug(
"destroy_conntrack(%p)\n", ct);
212 if (l4proto && l4proto->
destroy)
217 spin_lock_bh(&nf_conntrack_lock);
225 if (!nf_ct_is_confirmed(ct)) {
231 spin_unlock_bh(&nf_conntrack_lock);
236 pr_debug(
"destroy_conntrack: returning ct=%p to slab\n", ct);
242 struct net *net = nf_ct_net(ct);
249 clean_from_lists(ct);
254 static void death_by_event(
unsigned long ul_conntrack)
256 struct nf_conn *ct = (
void *)ul_conntrack;
257 struct net *net = nf_ct_net(ct);
265 (
random32() % net->ct.sysctl_events_retry_timeout);
271 spin_lock(&nf_conntrack_lock);
273 spin_unlock(&nf_conntrack_lock);
279 struct net *net = nf_ct_net(ct);
292 (
random32() % net->ct.sysctl_events_retry_timeout);
297 static void death_by_timeout(
unsigned long ul_conntrack)
299 struct nf_conn *ct = (
void *)ul_conntrack;
302 tstamp = nf_conn_tstamp_find(ct);
303 if (tstamp && tstamp->
stop == 0)
326 ____nf_conntrack_find(
struct net *net,
u16 zone,
338 hlist_nulls_for_each_entry_rcu(h, n, &net->ct.hash[bucket], hnnode) {
339 if (nf_ct_tuple_equal(tuple, &h->
tuple) &&
340 nf_ct_zone(nf_ct_tuplehash_to_ctrack(h)) == zone) {
352 if (get_nulls_value(n) != bucket) {
365 return ____nf_conntrack_find(net, zone, tuple,
366 hash_conntrack_raw(tuple, zone));
372 __nf_conntrack_find_get(
struct net *net,
u16 zone,
380 h = ____nf_conntrack_find(net, zone, tuple, hash);
382 ct = nf_ct_tuplehash_to_ctrack(h);
388 nf_ct_zone(ct) != zone)) {
403 return __nf_conntrack_find_get(net, zone, tuple,
404 hash_conntrack_raw(tuple, zone));
408 static void __nf_conntrack_hash_insert(
struct nf_conn *ct,
410 unsigned int repl_hash)
412 struct net *net = nf_ct_net(ct);
415 &net->ct.hash[hash]);
417 &net->ct.hash[repl_hash]);
423 struct net *net = nf_ct_net(ct);
424 unsigned int hash, repl_hash;
429 zone = nf_ct_zone(ct);
430 hash = hash_conntrack(net, zone,
432 repl_hash = hash_conntrack(net, zone,
441 zone == nf_ct_zone(nf_ct_tuplehash_to_ctrack(h)))
446 zone == nf_ct_zone(nf_ct_tuplehash_to_ctrack(h)))
451 __nf_conntrack_hash_insert(ct, hash, repl_hash);
468 unsigned int hash, repl_hash;
478 ct = nf_ct_get(skb, &ctinfo);
488 zone = nf_ct_zone(ct);
492 repl_hash = hash_conntrack(net, zone,
503 pr_debug(
"Confirming conntrack %p\n", ct);
523 zone == nf_ct_zone(nf_ct_tuplehash_to_ctrack(h)))
528 zone == nf_ct_zone(nf_ct_tuplehash_to_ctrack(h)))
543 tstamp = nf_conn_tstamp_find(ct);
546 __net_timestamp(skb);
555 __nf_conntrack_hash_insert(ct, hash, repl_hash);
559 help = nfct_help(ct);
578 const struct nf_conn *ignored_conntrack)
580 struct net *net = nf_ct_net(ignored_conntrack);
584 u16 zone = nf_ct_zone(ignored_conntrack);
585 unsigned int hash = hash_conntrack(net, zone, tuple);
591 hlist_nulls_for_each_entry_rcu(h, n, &net->ct.hash[hash], hnnode) {
592 ct = nf_ct_tuplehash_to_ctrack(h);
593 if (ct != ignored_conntrack &&
594 nf_ct_tuple_equal(tuple, &h->
tuple) &&
595 nf_ct_zone(ct) == zone) {
597 rcu_read_unlock_bh();
602 rcu_read_unlock_bh();
608 #define NF_CT_EVICTION_RANGE 8
612 static noinline int early_drop(
struct net *net,
unsigned int hash)
618 unsigned int i,
cnt = 0;
622 for (i = 0; i < net->ct.htable_size; i++) {
623 hlist_nulls_for_each_entry_rcu(h, n, &net->ct.hash[hash],
625 tmp = nf_ct_tuplehash_to_ctrack(h);
632 if (
likely(!nf_ct_is_dying(ct) &&
642 hash = (hash + 1) % net->ct.htable_size;
650 death_by_timeout((
unsigned long)ct);
678 __nf_conntrack_alloc(
struct net *net,
u16 zone,
685 if (
unlikely(!nf_conntrack_hash_rnd)) {
688 hash = hash_conntrack_raw(orig, zone);
694 if (nf_conntrack_max &&
728 #ifdef CONFIG_NF_CONNTRACK_ZONES
730 struct nf_conntrack_zone *nf_ct_zone;
735 nf_ct_zone->id = zone;
745 #ifdef CONFIG_NF_CONNTRACK_ZONES
758 return __nf_conntrack_alloc(net, zone, orig, repl, gfp, 0);
764 struct net *net = nf_ct_net(ct);
766 nf_ct_ext_destroy(ct);
776 init_conntrack(
struct net *net,
struct nf_conn *tmpl,
781 unsigned int dataoff,
u32 hash)
790 unsigned int *timeouts;
797 ct = __nf_conntrack_alloc(net, zone, tuple, &repl_tuple,
GFP_ATOMIC,
802 timeout_ext = tmpl ? nf_ct_timeout_find(tmpl) :
NULL;
808 if (!l4proto->
new(ct, skb, dataoff, timeouts)) {
810 pr_debug(
"init conntrack: can't track with proto module\n");
820 ecache = tmpl ? nf_ct_ecache_find(tmpl) :
NULL;
821 nf_ct_ecache_ext_add(ct, ecache ? ecache->
ctmask : 0,
825 spin_lock_bh(&nf_conntrack_lock);
828 pr_debug(
"conntrack: expectation arrives ct=%p exp=%p\n",
840 #ifdef CONFIG_NF_CONNTRACK_MARK
841 ct->mark = exp->
master->mark;
843 #ifdef CONFIG_NF_CONNTRACK_SECMARK
844 ct->secmark = exp->
master->secmark;
846 nf_conntrack_get(&ct->
master->ct_general);
855 &net->ct.unconfirmed);
857 spin_unlock_bh(&nf_conntrack_lock);
870 resolve_normal_ct(
struct net *net,
struct nf_conn *tmpl,
872 unsigned int dataoff,
887 dataoff, l3num, protonum, &tuple, l3proto,
889 pr_debug(
"resolve_normal_ct: Can't get tuple\n");
894 hash = hash_conntrack_raw(&tuple, zone);
895 h = __nf_conntrack_find_get(net, zone, &tuple, hash);
897 h = init_conntrack(net, tmpl, &tuple, l3proto, l4proto,
904 ct = nf_ct_tuplehash_to_ctrack(h);
914 pr_debug(
"nf_conntrack_in: normal packet for %p\n", ct);
917 pr_debug(
"nf_conntrack_in: related packet for %p\n",
921 pr_debug(
"nf_conntrack_in: new packet for %p\n", ct);
939 unsigned int *timeouts;
940 unsigned int dataoff;
947 tmpl = (
struct nf_conn *)skb->nfct;
948 if (!nf_ct_is_template(tmpl)) {
956 l3proto = __nf_ct_l3proto_find(pf);
957 ret = l3proto->
get_l4proto(skb, skb_network_offset(skb),
958 &dataoff, &protonum);
960 pr_debug(
"not prepared to track yet or error occurred\n");
973 ret = l4proto->
error(net, tmpl, skb, dataoff, &ctinfo,
986 ct = resolve_normal_ct(net, tmpl, skb, dataoff, pf, protonum,
987 l3proto, l4proto, &set_reply, &ctinfo);
1005 timeouts = nf_ct_timeout_lookup(net, ct, l4proto);
1007 ret = l4proto->
packet(ct, skb, dataoff, ctinfo, pf, hooknum, timeouts);
1011 pr_debug(
"nf_conntrack_in: Can't track with proto module\n");
1012 nf_conntrack_put(skb->nfct);
1029 skb->nfct = (
struct nf_conntrack *)tmpl;
1045 __nf_ct_l3proto_find(orig->
src.l3num),
1047 orig->
dst.protonum));
1063 pr_debug(
"Altering reply tuple of %p to ", ct);
1064 nf_ct_dump_tuple(newreply);
1080 unsigned long extra_jiffies,
1091 if (!nf_ct_is_confirmed(ct)) {
1092 ct->timeout.expires = extra_jiffies;
1094 unsigned long newtime =
jiffies + extra_jiffies;
1099 if (newtime - ct->timeout.expires >=
HZ)
1107 acct = nf_conn_acct_find(ct);
1124 acct = nf_conn_acct_find(ct);
1133 ct->
timeout.function((
unsigned long)ct);
1140 #ifdef CONFIG_NF_CONNTRACK_ZONES
1142 .len =
sizeof(
struct nf_conntrack_zone),
1143 .
align = __alignof__(
struct nf_conntrack_zone),
1144 .id = NF_CT_EXT_ZONE,
1148 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
1150 #include <linux/netfilter/nfnetlink.h>
1162 goto nla_put_failure;
1183 t->
dst.u.tcp.port = nla_get_be16(tb[CTA_PROTO_DST_PORT]);
1197 static void nf_conntrack_attach(
struct sk_buff *nskb,
struct sk_buff *skb)
1203 ct = nf_ct_get(skb, &ctinfo);
1212 nf_conntrack_get(nskb->nfct);
1217 get_next_corpse(
struct net *net,
int (*iter)(
struct nf_conn *
i,
void *
data),
1218 void *data,
unsigned int *bucket)
1224 spin_lock_bh(&nf_conntrack_lock);
1225 for (; *bucket < net->ct.htable_size; (*bucket)++) {
1229 ct = nf_ct_tuplehash_to_ctrack(h);
1235 ct = nf_ct_tuplehash_to_ctrack(h);
1239 spin_unlock_bh(&nf_conntrack_lock);
1243 spin_unlock_bh(&nf_conntrack_lock);
1248 int (*iter)(
struct nf_conn *i,
void *data),
1252 unsigned int bucket = 0;
1254 while ((ct = get_next_corpse(net, iter, data, &bucket)) !=
NULL) {
1257 death_by_timeout((
unsigned long)ct);
1270 static int kill_report(
struct nf_conn *i,
void *data)
1275 tstamp = nf_conn_tstamp_find(i);
1276 if (tstamp && tstamp->
stop == 0)
1289 static int kill_all(
struct nf_conn *i,
void *data)
1296 if (is_vmalloc_addr(hash))
1314 static void nf_ct_release_dying_list(
struct net *net)
1320 spin_lock_bh(&nf_conntrack_lock);
1322 ct = nf_ct_tuplehash_to_ctrack(h);
1326 spin_unlock_bh(&nf_conntrack_lock);
1329 static int untrack_refs(
void)
1341 static void nf_conntrack_cleanup_init_net(
void)
1343 while (untrack_refs() > 0)
1346 #ifdef CONFIG_NF_CONNTRACK_ZONES
1351 static void nf_conntrack_cleanup_net(
struct net *net)
1355 nf_ct_release_dying_list(net);
1358 goto i_see_dead_people;
1369 kfree(net->ct.slabname);
1385 nf_conntrack_cleanup_net(net);
1389 nf_conntrack_cleanup_init_net();
1396 unsigned int nr_slots,
i;
1410 for (i = 0; i < nr_slots; i++)
1446 for (i = 0; i <
init_net.ct.htable_size; i++) {
1447 while (!hlist_nulls_empty(&
init_net.ct.hash[i])) {
1450 ct = nf_ct_tuplehash_to_ctrack(h);
1451 hlist_nulls_del_rcu(&h->
hnnode);
1452 bucket = __hash_conntrack(&h->
tuple, nf_ct_zone(ct),
1454 hlist_nulls_add_head_rcu(&h->
hnnode, &hash[bucket]);
1457 old_size =
init_net.ct.htable_size;
1470 &nf_conntrack_htable_size, 0600);
1477 per_cpu(nf_conntrack_untracked, cpu).status |=
bits;
1481 static int nf_conntrack_init_init_net(
void)
1488 if (!nf_conntrack_htable_size) {
1492 if (totalram_pages > (1024 * 1024 * 1024 /
PAGE_SIZE))
1493 nf_conntrack_htable_size = 16384;
1494 if (nf_conntrack_htable_size < 32)
1508 #ifdef CONFIG_NF_CONNTRACK_ZONES
1523 #ifdef CONFIG_NF_CONNTRACK_ZONES
1532 #define UNCONFIRMED_NULLS_VAL ((1<<30)+0)
1533 #define DYING_NULLS_VAL ((1<<30)+1)
1535 static int nf_conntrack_init_net(
struct net *net)
1543 if (!net->ct.stat) {
1549 if (!net->ct.slabname) {
1557 if (!net->ct.nf_conntrack_cachep) {
1563 net->ct.htable_size = nf_conntrack_htable_size;
1565 if (!net->ct.hash) {
1604 kfree(net->ct.slabname);
1621 ret = nf_conntrack_init_init_net();
1628 ret = nf_conntrack_init_net(net);
1646 nf_conntrack_cleanup_init_net();