Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
nf_conntrack_expect.c
Go to the documentation of this file.
1 /* Expectation handling for nf_conntrack. */
2 
3 /* (C) 1999-2001 Paul `Rusty' Russell
4  * (C) 2002-2006 Netfilter Core Team <[email protected]>
5  * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License version 2 as
9  * published by the Free Software Foundation.
10  */
11 
12 #include <linux/types.h>
13 #include <linux/netfilter.h>
14 #include <linux/skbuff.h>
15 #include <linux/proc_fs.h>
16 #include <linux/seq_file.h>
17 #include <linux/stddef.h>
18 #include <linux/slab.h>
19 #include <linux/err.h>
20 #include <linux/percpu.h>
21 #include <linux/kernel.h>
22 #include <linux/jhash.h>
23 #include <linux/moduleparam.h>
24 #include <linux/export.h>
25 #include <net/net_namespace.h>
26 
33 
35 EXPORT_SYMBOL_GPL(nf_ct_expect_hsize);
36 
37 unsigned int nf_ct_expect_max __read_mostly;
38 
39 static struct kmem_cache *nf_ct_expect_cachep __read_mostly;
40 
41 /* nf_conntrack_expect helper functions */
43  u32 pid, int report)
44 {
45  struct nf_conn_help *master_help = nfct_help(exp->master);
46  struct net *net = nf_ct_exp_net(exp);
47 
48  NF_CT_ASSERT(master_help);
49  NF_CT_ASSERT(!timer_pending(&exp->timeout));
50 
51  hlist_del_rcu(&exp->hnode);
52  net->ct.expect_count--;
53 
54  hlist_del(&exp->lnode);
55  master_help->expecting[exp->class]--;
56 
57  nf_ct_expect_event_report(IPEXP_DESTROY, exp, pid, report);
58  nf_ct_expect_put(exp);
59 
60  NF_CT_STAT_INC(net, expect_delete);
61 }
63 
64 static void nf_ct_expectation_timed_out(unsigned long ul_expect)
65 {
66  struct nf_conntrack_expect *exp = (void *)ul_expect;
67 
68  spin_lock_bh(&nf_conntrack_lock);
69  nf_ct_unlink_expect(exp);
70  spin_unlock_bh(&nf_conntrack_lock);
71  nf_ct_expect_put(exp);
72 }
73 
74 static unsigned int nf_ct_expect_dst_hash(const struct nf_conntrack_tuple *tuple)
75 {
76  unsigned int hash;
77 
78  if (unlikely(!nf_conntrack_hash_rnd)) {
80  }
81 
82  hash = jhash2(tuple->dst.u3.all, ARRAY_SIZE(tuple->dst.u3.all),
83  (((tuple->dst.protonum ^ tuple->src.l3num) << 16) |
84  (__force __u16)tuple->dst.u.all) ^ nf_conntrack_hash_rnd);
85  return ((u64)hash * nf_ct_expect_hsize) >> 32;
86 }
87 
88 struct nf_conntrack_expect *
90  const struct nf_conntrack_tuple *tuple)
91 {
92  struct nf_conntrack_expect *i;
93  struct hlist_node *n;
94  unsigned int h;
95 
96  if (!net->ct.expect_count)
97  return NULL;
98 
99  h = nf_ct_expect_dst_hash(tuple);
100  hlist_for_each_entry_rcu(i, n, &net->ct.expect_hash[h], hnode) {
101  if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask) &&
102  nf_ct_zone(i->master) == zone)
103  return i;
104  }
105  return NULL;
106 }
108 
109 /* Just find a expectation corresponding to a tuple. */
110 struct nf_conntrack_expect *
112  const struct nf_conntrack_tuple *tuple)
113 {
114  struct nf_conntrack_expect *i;
115 
116  rcu_read_lock();
117  i = __nf_ct_expect_find(net, zone, tuple);
118  if (i && !atomic_inc_not_zero(&i->use))
119  i = NULL;
120  rcu_read_unlock();
121 
122  return i;
123 }
125 
126 /* If an expectation for this connection is found, it gets delete from
127  * global list then returned. */
128 struct nf_conntrack_expect *
130  const struct nf_conntrack_tuple *tuple)
131 {
132  struct nf_conntrack_expect *i, *exp = NULL;
133  struct hlist_node *n;
134  unsigned int h;
135 
136  if (!net->ct.expect_count)
137  return NULL;
138 
139  h = nf_ct_expect_dst_hash(tuple);
140  hlist_for_each_entry(i, n, &net->ct.expect_hash[h], hnode) {
141  if (!(i->flags & NF_CT_EXPECT_INACTIVE) &&
142  nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask) &&
143  nf_ct_zone(i->master) == zone) {
144  exp = i;
145  break;
146  }
147  }
148  if (!exp)
149  return NULL;
150 
151  /* If master is not in hash table yet (ie. packet hasn't left
152  this machine yet), how can other end know about expected?
153  Hence these are not the droids you are looking for (if
154  master ct never got confirmed, we'd hold a reference to it
155  and weird things would happen to future packets). */
156  if (!nf_ct_is_confirmed(exp->master))
157  return NULL;
158 
159  if (exp->flags & NF_CT_EXPECT_PERMANENT) {
160  atomic_inc(&exp->use);
161  return exp;
162  } else if (del_timer(&exp->timeout)) {
163  nf_ct_unlink_expect(exp);
164  return exp;
165  }
166 
167  return NULL;
168 }
169 
170 /* delete all expectations for this conntrack */
172 {
173  struct nf_conn_help *help = nfct_help(ct);
174  struct nf_conntrack_expect *exp;
175  struct hlist_node *n, *next;
176 
177  /* Optimization: most connection never expect any others. */
178  if (!help)
179  return;
180 
181  hlist_for_each_entry_safe(exp, n, next, &help->expectations, lnode) {
182  if (del_timer(&exp->timeout)) {
183  nf_ct_unlink_expect(exp);
184  nf_ct_expect_put(exp);
185  }
186  }
187 }
189 
190 /* Would two expected things clash? */
191 static inline int expect_clash(const struct nf_conntrack_expect *a,
192  const struct nf_conntrack_expect *b)
193 {
194  /* Part covered by intersection of masks must be unequal,
195  otherwise they clash */
196  struct nf_conntrack_tuple_mask intersect_mask;
197  int count;
198 
199  intersect_mask.src.u.all = a->mask.src.u.all & b->mask.src.u.all;
200 
201  for (count = 0; count < NF_CT_TUPLE_L3SIZE; count++){
202  intersect_mask.src.u3.all[count] =
203  a->mask.src.u3.all[count] & b->mask.src.u3.all[count];
204  }
205 
206  return nf_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask);
207 }
208 
209 static inline int expect_matches(const struct nf_conntrack_expect *a,
210  const struct nf_conntrack_expect *b)
211 {
212  return a->master == b->master && a->class == b->class &&
213  nf_ct_tuple_equal(&a->tuple, &b->tuple) &&
214  nf_ct_tuple_mask_equal(&a->mask, &b->mask) &&
215  nf_ct_zone(a->master) == nf_ct_zone(b->master);
216 }
217 
218 /* Generally a bad idea to call this: could have matched already. */
220 {
221  spin_lock_bh(&nf_conntrack_lock);
222  if (del_timer(&exp->timeout)) {
223  nf_ct_unlink_expect(exp);
224  nf_ct_expect_put(exp);
225  }
226  spin_unlock_bh(&nf_conntrack_lock);
227 }
229 
230 /* We don't increase the master conntrack refcount for non-fulfilled
231  * conntracks. During the conntrack destruction, the expectations are
232  * always killed before the conntrack itself */
234 {
235  struct nf_conntrack_expect *new;
236 
237  new = kmem_cache_alloc(nf_ct_expect_cachep, GFP_ATOMIC);
238  if (!new)
239  return NULL;
240 
241  new->master = me;
242  atomic_set(&new->use, 1);
243  return new;
244 }
246 
247 void nf_ct_expect_init(struct nf_conntrack_expect *exp, unsigned int class,
249  const union nf_inet_addr *saddr,
250  const union nf_inet_addr *daddr,
251  u_int8_t proto, const __be16 *src, const __be16 *dst)
252 {
253  int len;
254 
255  if (family == AF_INET)
256  len = 4;
257  else
258  len = 16;
259 
260  exp->flags = 0;
261  exp->class = class;
262  exp->expectfn = NULL;
263  exp->helper = NULL;
264  exp->tuple.src.l3num = family;
265  exp->tuple.dst.protonum = proto;
266 
267  if (saddr) {
268  memcpy(&exp->tuple.src.u3, saddr, len);
269  if (sizeof(exp->tuple.src.u3) > len)
270  /* address needs to be cleared for nf_ct_tuple_equal */
271  memset((void *)&exp->tuple.src.u3 + len, 0x00,
272  sizeof(exp->tuple.src.u3) - len);
273  memset(&exp->mask.src.u3, 0xFF, len);
274  if (sizeof(exp->mask.src.u3) > len)
275  memset((void *)&exp->mask.src.u3 + len, 0x00,
276  sizeof(exp->mask.src.u3) - len);
277  } else {
278  memset(&exp->tuple.src.u3, 0x00, sizeof(exp->tuple.src.u3));
279  memset(&exp->mask.src.u3, 0x00, sizeof(exp->mask.src.u3));
280  }
281 
282  if (src) {
283  exp->tuple.src.u.all = *src;
284  exp->mask.src.u.all = htons(0xFFFF);
285  } else {
286  exp->tuple.src.u.all = 0;
287  exp->mask.src.u.all = 0;
288  }
289 
290  memcpy(&exp->tuple.dst.u3, daddr, len);
291  if (sizeof(exp->tuple.dst.u3) > len)
292  /* address needs to be cleared for nf_ct_tuple_equal */
293  memset((void *)&exp->tuple.dst.u3 + len, 0x00,
294  sizeof(exp->tuple.dst.u3) - len);
295 
296  exp->tuple.dst.u.all = *dst;
297 }
299 
300 static void nf_ct_expect_free_rcu(struct rcu_head *head)
301 {
302  struct nf_conntrack_expect *exp;
303 
304  exp = container_of(head, struct nf_conntrack_expect, rcu);
305  kmem_cache_free(nf_ct_expect_cachep, exp);
306 }
307 
309 {
310  if (atomic_dec_and_test(&exp->use))
311  call_rcu(&exp->rcu, nf_ct_expect_free_rcu);
312 }
314 
315 static int nf_ct_expect_insert(struct nf_conntrack_expect *exp)
316 {
317  struct nf_conn_help *master_help = nfct_help(exp->master);
318  struct nf_conntrack_helper *helper;
319  struct net *net = nf_ct_exp_net(exp);
320  unsigned int h = nf_ct_expect_dst_hash(&exp->tuple);
321 
322  /* two references : one for hash insert, one for the timer */
323  atomic_add(2, &exp->use);
324 
325  hlist_add_head(&exp->lnode, &master_help->expectations);
326  master_help->expecting[exp->class]++;
327 
328  hlist_add_head_rcu(&exp->hnode, &net->ct.expect_hash[h]);
329  net->ct.expect_count++;
330 
331  setup_timer(&exp->timeout, nf_ct_expectation_timed_out,
332  (unsigned long)exp);
333  helper = rcu_dereference_protected(master_help->helper,
334  lockdep_is_held(&nf_conntrack_lock));
335  if (helper) {
336  exp->timeout.expires = jiffies +
337  helper->expect_policy[exp->class].timeout * HZ;
338  }
339  add_timer(&exp->timeout);
340 
341  NF_CT_STAT_INC(net, expect_create);
342  return 0;
343 }
344 
345 /* Race with expectations being used means we could have none to find; OK. */
346 static void evict_oldest_expect(struct nf_conn *master,
347  struct nf_conntrack_expect *new)
348 {
349  struct nf_conn_help *master_help = nfct_help(master);
350  struct nf_conntrack_expect *exp, *last = NULL;
351  struct hlist_node *n;
352 
353  hlist_for_each_entry(exp, n, &master_help->expectations, lnode) {
354  if (exp->class == new->class)
355  last = exp;
356  }
357 
358  if (last && del_timer(&last->timeout)) {
359  nf_ct_unlink_expect(last);
360  nf_ct_expect_put(last);
361  }
362 }
363 
364 static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect)
365 {
366  const struct nf_conntrack_expect_policy *p;
367  struct nf_conntrack_expect *i;
368  struct nf_conn *master = expect->master;
369  struct nf_conn_help *master_help = nfct_help(master);
370  struct nf_conntrack_helper *helper;
371  struct net *net = nf_ct_exp_net(expect);
372  struct hlist_node *n, *next;
373  unsigned int h;
374  int ret = 1;
375 
376  if (!master_help) {
377  ret = -ESHUTDOWN;
378  goto out;
379  }
380  h = nf_ct_expect_dst_hash(&expect->tuple);
381  hlist_for_each_entry_safe(i, n, next, &net->ct.expect_hash[h], hnode) {
382  if (expect_matches(i, expect)) {
383  if (del_timer(&i->timeout)) {
384  nf_ct_unlink_expect(i);
385  nf_ct_expect_put(i);
386  break;
387  }
388  } else if (expect_clash(i, expect)) {
389  ret = -EBUSY;
390  goto out;
391  }
392  }
393  /* Will be over limit? */
394  helper = rcu_dereference_protected(master_help->helper,
395  lockdep_is_held(&nf_conntrack_lock));
396  if (helper) {
397  p = &helper->expect_policy[expect->class];
398  if (p->max_expected &&
399  master_help->expecting[expect->class] >= p->max_expected) {
400  evict_oldest_expect(master, expect);
401  if (master_help->expecting[expect->class]
402  >= p->max_expected) {
403  ret = -EMFILE;
404  goto out;
405  }
406  }
407  }
408 
409  if (net->ct.expect_count >= nf_ct_expect_max) {
410  net_warn_ratelimited("nf_conntrack: expectation table full\n");
411  ret = -EMFILE;
412  }
413 out:
414  return ret;
415 }
416 
418  u32 pid, int report)
419 {
420  int ret;
421 
422  spin_lock_bh(&nf_conntrack_lock);
423  ret = __nf_ct_expect_check(expect);
424  if (ret <= 0)
425  goto out;
426 
427  ret = nf_ct_expect_insert(expect);
428  if (ret < 0)
429  goto out;
430  spin_unlock_bh(&nf_conntrack_lock);
431  nf_ct_expect_event_report(IPEXP_NEW, expect, pid, report);
432  return ret;
433 out:
434  spin_unlock_bh(&nf_conntrack_lock);
435  return ret;
436 }
438 
439 #ifdef CONFIG_NF_CONNTRACK_PROCFS
440 struct ct_expect_iter_state {
441  struct seq_net_private p;
442  unsigned int bucket;
443 };
444 
445 static struct hlist_node *ct_expect_get_first(struct seq_file *seq)
446 {
447  struct net *net = seq_file_net(seq);
448  struct ct_expect_iter_state *st = seq->private;
449  struct hlist_node *n;
450 
451  for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) {
452  n = rcu_dereference(hlist_first_rcu(&net->ct.expect_hash[st->bucket]));
453  if (n)
454  return n;
455  }
456  return NULL;
457 }
458 
459 static struct hlist_node *ct_expect_get_next(struct seq_file *seq,
460  struct hlist_node *head)
461 {
462  struct net *net = seq_file_net(seq);
463  struct ct_expect_iter_state *st = seq->private;
464 
465  head = rcu_dereference(hlist_next_rcu(head));
466  while (head == NULL) {
467  if (++st->bucket >= nf_ct_expect_hsize)
468  return NULL;
469  head = rcu_dereference(hlist_first_rcu(&net->ct.expect_hash[st->bucket]));
470  }
471  return head;
472 }
473 
474 static struct hlist_node *ct_expect_get_idx(struct seq_file *seq, loff_t pos)
475 {
476  struct hlist_node *head = ct_expect_get_first(seq);
477 
478  if (head)
479  while (pos && (head = ct_expect_get_next(seq, head)))
480  pos--;
481  return pos ? NULL : head;
482 }
483 
484 static void *exp_seq_start(struct seq_file *seq, loff_t *pos)
485  __acquires(RCU)
486 {
487  rcu_read_lock();
488  return ct_expect_get_idx(seq, *pos);
489 }
490 
491 static void *exp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
492 {
493  (*pos)++;
494  return ct_expect_get_next(seq, v);
495 }
496 
497 static void exp_seq_stop(struct seq_file *seq, void *v)
498  __releases(RCU)
499 {
500  rcu_read_unlock();
501 }
502 
503 static int exp_seq_show(struct seq_file *s, void *v)
504 {
505  struct nf_conntrack_expect *expect;
506  struct nf_conntrack_helper *helper;
507  struct hlist_node *n = v;
508  char *delim = "";
509 
510  expect = hlist_entry(n, struct nf_conntrack_expect, hnode);
511 
512  if (expect->timeout.function)
513  seq_printf(s, "%ld ", timer_pending(&expect->timeout)
514  ? (long)(expect->timeout.expires - jiffies)/HZ : 0);
515  else
516  seq_printf(s, "- ");
517  seq_printf(s, "l3proto = %u proto=%u ",
518  expect->tuple.src.l3num,
519  expect->tuple.dst.protonum);
520  print_tuple(s, &expect->tuple,
521  __nf_ct_l3proto_find(expect->tuple.src.l3num),
522  __nf_ct_l4proto_find(expect->tuple.src.l3num,
523  expect->tuple.dst.protonum));
524 
525  if (expect->flags & NF_CT_EXPECT_PERMANENT) {
526  seq_printf(s, "PERMANENT");
527  delim = ",";
528  }
529  if (expect->flags & NF_CT_EXPECT_INACTIVE) {
530  seq_printf(s, "%sINACTIVE", delim);
531  delim = ",";
532  }
533  if (expect->flags & NF_CT_EXPECT_USERSPACE)
534  seq_printf(s, "%sUSERSPACE", delim);
535 
536  helper = rcu_dereference(nfct_help(expect->master)->helper);
537  if (helper) {
538  seq_printf(s, "%s%s", expect->flags ? " " : "", helper->name);
539  if (helper->expect_policy[expect->class].name)
540  seq_printf(s, "/%s",
541  helper->expect_policy[expect->class].name);
542  }
543 
544  return seq_putc(s, '\n');
545 }
546 
547 static const struct seq_operations exp_seq_ops = {
548  .start = exp_seq_start,
549  .next = exp_seq_next,
550  .stop = exp_seq_stop,
551  .show = exp_seq_show
552 };
553 
554 static int exp_open(struct inode *inode, struct file *file)
555 {
556  return seq_open_net(inode, file, &exp_seq_ops,
557  sizeof(struct ct_expect_iter_state));
558 }
559 
560 static const struct file_operations exp_file_ops = {
561  .owner = THIS_MODULE,
562  .open = exp_open,
563  .read = seq_read,
564  .llseek = seq_lseek,
565  .release = seq_release_net,
566 };
567 #endif /* CONFIG_NF_CONNTRACK_PROCFS */
568 
569 static int exp_proc_init(struct net *net)
570 {
571 #ifdef CONFIG_NF_CONNTRACK_PROCFS
572  struct proc_dir_entry *proc;
573 
574  proc = proc_net_fops_create(net, "nf_conntrack_expect", 0440, &exp_file_ops);
575  if (!proc)
576  return -ENOMEM;
577 #endif /* CONFIG_NF_CONNTRACK_PROCFS */
578  return 0;
579 }
580 
581 static void exp_proc_remove(struct net *net)
582 {
583 #ifdef CONFIG_NF_CONNTRACK_PROCFS
584  proc_net_remove(net, "nf_conntrack_expect");
585 #endif /* CONFIG_NF_CONNTRACK_PROCFS */
586 }
587 
588 module_param_named(expect_hashsize, nf_ct_expect_hsize, uint, 0400);
589 
590 int nf_conntrack_expect_init(struct net *net)
591 {
592  int err = -ENOMEM;
593 
594  if (net_eq(net, &init_net)) {
595  if (!nf_ct_expect_hsize) {
596  nf_ct_expect_hsize = net->ct.htable_size / 256;
597  if (!nf_ct_expect_hsize)
598  nf_ct_expect_hsize = 1;
599  }
601  }
602 
603  net->ct.expect_count = 0;
604  net->ct.expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize, 0);
605  if (net->ct.expect_hash == NULL)
606  goto err1;
607 
608  if (net_eq(net, &init_net)) {
609  nf_ct_expect_cachep = kmem_cache_create("nf_conntrack_expect",
610  sizeof(struct nf_conntrack_expect),
611  0, 0, NULL);
612  if (!nf_ct_expect_cachep)
613  goto err2;
614  }
615 
616  err = exp_proc_init(net);
617  if (err < 0)
618  goto err3;
619 
620  return 0;
621 
622 err3:
623  if (net_eq(net, &init_net))
624  kmem_cache_destroy(nf_ct_expect_cachep);
625 err2:
626  nf_ct_free_hashtable(net->ct.expect_hash, nf_ct_expect_hsize);
627 err1:
628  return err;
629 }
630 
631 void nf_conntrack_expect_fini(struct net *net)
632 {
633  exp_proc_remove(net);
634  if (net_eq(net, &init_net)) {
635  rcu_barrier(); /* Wait for call_rcu() before destroy */
636  kmem_cache_destroy(nf_ct_expect_cachep);
637  }
638  nf_ct_free_hashtable(net->ct.expect_hash, nf_ct_expect_hsize);
639 }