Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
filter.c
Go to the documentation of this file.
1 /*
2  * Linux Socket Filter - Kernel level socket filtering
3  *
4  * Author:
5  * Jay Schulist <[email protected]>
6  *
7  * Based on the design of:
8  * - The Berkeley Packet Filter
9  *
10  * This program is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU General Public License
12  * as published by the Free Software Foundation; either version
13  * 2 of the License, or (at your option) any later version.
14  *
15  * Andi Kleen - Fix a few bad bugs and races.
16  * Kris Katterjohn - Added many additional checks in sk_chk_filter()
17  */
18 
19 #include <linux/module.h>
20 #include <linux/types.h>
21 #include <linux/mm.h>
22 #include <linux/fcntl.h>
23 #include <linux/socket.h>
24 #include <linux/in.h>
25 #include <linux/inet.h>
26 #include <linux/netdevice.h>
27 #include <linux/if_packet.h>
28 #include <linux/gfp.h>
29 #include <net/ip.h>
30 #include <net/protocol.h>
31 #include <net/netlink.h>
32 #include <linux/skbuff.h>
33 #include <net/sock.h>
34 #include <linux/errno.h>
35 #include <linux/timer.h>
36 #include <asm/uaccess.h>
37 #include <asm/unaligned.h>
38 #include <linux/filter.h>
39 #include <linux/reciprocal_div.h>
40 #include <linux/ratelimit.h>
41 #include <linux/seccomp.h>
42 
43 /* No hurry in this branch
44  *
45  * Exported for the bpf jit load helper.
46  */
47 void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb, int k, unsigned int size)
48 {
49  u8 *ptr = NULL;
50 
51  if (k >= SKF_NET_OFF)
52  ptr = skb_network_header(skb) + k - SKF_NET_OFF;
53  else if (k >= SKF_LL_OFF)
54  ptr = skb_mac_header(skb) + k - SKF_LL_OFF;
55 
56  if (ptr >= skb->head && ptr + size <= skb_tail_pointer(skb))
57  return ptr;
58  return NULL;
59 }
60 
61 static inline void *load_pointer(const struct sk_buff *skb, int k,
62  unsigned int size, void *buffer)
63 {
64  if (k >= 0)
65  return skb_header_pointer(skb, k, size, buffer);
66  return bpf_internal_load_pointer_neg_helper(skb, k, size);
67 }
68 
81 int sk_filter(struct sock *sk, struct sk_buff *skb)
82 {
83  int err;
84  struct sk_filter *filter;
85 
86  /*
87  * If the skb was allocated from pfmemalloc reserves, only
88  * allow SOCK_MEMALLOC sockets to use it as this socket is
89  * helping free memory
90  */
91  if (skb_pfmemalloc(skb) && !sock_flag(sk, SOCK_MEMALLOC))
92  return -ENOMEM;
93 
94  err = security_sock_rcv_skb(sk, skb);
95  if (err)
96  return err;
97 
98  rcu_read_lock();
99  filter = rcu_dereference(sk->sk_filter);
100  if (filter) {
101  unsigned int pkt_len = SK_RUN_FILTER(filter, skb);
102 
103  err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM;
104  }
105  rcu_read_unlock();
106 
107  return err;
108 }
110 
123 unsigned int sk_run_filter(const struct sk_buff *skb,
124  const struct sock_filter *fentry)
125 {
126  void *ptr;
127  u32 A = 0; /* Accumulator */
128  u32 X = 0; /* Index Register */
129  u32 mem[BPF_MEMWORDS]; /* Scratch Memory Store */
130  u32 tmp;
131  int k;
132 
133  /*
134  * Process array of filter instructions.
135  */
136  for (;; fentry++) {
137 #if defined(CONFIG_X86_32)
138 #define K (fentry->k)
139 #else
140  const u32 K = fentry->k;
141 #endif
142 
143  switch (fentry->code) {
144  case BPF_S_ALU_ADD_X:
145  A += X;
146  continue;
147  case BPF_S_ALU_ADD_K:
148  A += K;
149  continue;
150  case BPF_S_ALU_SUB_X:
151  A -= X;
152  continue;
153  case BPF_S_ALU_SUB_K:
154  A -= K;
155  continue;
156  case BPF_S_ALU_MUL_X:
157  A *= X;
158  continue;
159  case BPF_S_ALU_MUL_K:
160  A *= K;
161  continue;
162  case BPF_S_ALU_DIV_X:
163  if (X == 0)
164  return 0;
165  A /= X;
166  continue;
167  case BPF_S_ALU_DIV_K:
168  A = reciprocal_divide(A, K);
169  continue;
170  case BPF_S_ALU_MOD_X:
171  if (X == 0)
172  return 0;
173  A %= X;
174  continue;
175  case BPF_S_ALU_MOD_K:
176  A %= K;
177  continue;
178  case BPF_S_ALU_AND_X:
179  A &= X;
180  continue;
181  case BPF_S_ALU_AND_K:
182  A &= K;
183  continue;
184  case BPF_S_ALU_OR_X:
185  A |= X;
186  continue;
187  case BPF_S_ALU_OR_K:
188  A |= K;
189  continue;
190  case BPF_S_ANC_ALU_XOR_X:
191  case BPF_S_ALU_XOR_X:
192  A ^= X;
193  continue;
194  case BPF_S_ALU_XOR_K:
195  A ^= K;
196  continue;
197  case BPF_S_ALU_LSH_X:
198  A <<= X;
199  continue;
200  case BPF_S_ALU_LSH_K:
201  A <<= K;
202  continue;
203  case BPF_S_ALU_RSH_X:
204  A >>= X;
205  continue;
206  case BPF_S_ALU_RSH_K:
207  A >>= K;
208  continue;
209  case BPF_S_ALU_NEG:
210  A = -A;
211  continue;
212  case BPF_S_JMP_JA:
213  fentry += K;
214  continue;
215  case BPF_S_JMP_JGT_K:
216  fentry += (A > K) ? fentry->jt : fentry->jf;
217  continue;
218  case BPF_S_JMP_JGE_K:
219  fentry += (A >= K) ? fentry->jt : fentry->jf;
220  continue;
221  case BPF_S_JMP_JEQ_K:
222  fentry += (A == K) ? fentry->jt : fentry->jf;
223  continue;
224  case BPF_S_JMP_JSET_K:
225  fentry += (A & K) ? fentry->jt : fentry->jf;
226  continue;
227  case BPF_S_JMP_JGT_X:
228  fentry += (A > X) ? fentry->jt : fentry->jf;
229  continue;
230  case BPF_S_JMP_JGE_X:
231  fentry += (A >= X) ? fentry->jt : fentry->jf;
232  continue;
233  case BPF_S_JMP_JEQ_X:
234  fentry += (A == X) ? fentry->jt : fentry->jf;
235  continue;
236  case BPF_S_JMP_JSET_X:
237  fentry += (A & X) ? fentry->jt : fentry->jf;
238  continue;
239  case BPF_S_LD_W_ABS:
240  k = K;
241 load_w:
242  ptr = load_pointer(skb, k, 4, &tmp);
243  if (ptr != NULL) {
244  A = get_unaligned_be32(ptr);
245  continue;
246  }
247  return 0;
248  case BPF_S_LD_H_ABS:
249  k = K;
250 load_h:
251  ptr = load_pointer(skb, k, 2, &tmp);
252  if (ptr != NULL) {
253  A = get_unaligned_be16(ptr);
254  continue;
255  }
256  return 0;
257  case BPF_S_LD_B_ABS:
258  k = K;
259 load_b:
260  ptr = load_pointer(skb, k, 1, &tmp);
261  if (ptr != NULL) {
262  A = *(u8 *)ptr;
263  continue;
264  }
265  return 0;
266  case BPF_S_LD_W_LEN:
267  A = skb->len;
268  continue;
269  case BPF_S_LDX_W_LEN:
270  X = skb->len;
271  continue;
272  case BPF_S_LD_W_IND:
273  k = X + K;
274  goto load_w;
275  case BPF_S_LD_H_IND:
276  k = X + K;
277  goto load_h;
278  case BPF_S_LD_B_IND:
279  k = X + K;
280  goto load_b;
281  case BPF_S_LDX_B_MSH:
282  ptr = load_pointer(skb, K, 1, &tmp);
283  if (ptr != NULL) {
284  X = (*(u8 *)ptr & 0xf) << 2;
285  continue;
286  }
287  return 0;
288  case BPF_S_LD_IMM:
289  A = K;
290  continue;
291  case BPF_S_LDX_IMM:
292  X = K;
293  continue;
294  case BPF_S_LD_MEM:
295  A = mem[K];
296  continue;
297  case BPF_S_LDX_MEM:
298  X = mem[K];
299  continue;
300  case BPF_S_MISC_TAX:
301  X = A;
302  continue;
303  case BPF_S_MISC_TXA:
304  A = X;
305  continue;
306  case BPF_S_RET_K:
307  return K;
308  case BPF_S_RET_A:
309  return A;
310  case BPF_S_ST:
311  mem[K] = A;
312  continue;
313  case BPF_S_STX:
314  mem[K] = X;
315  continue;
316  case BPF_S_ANC_PROTOCOL:
317  A = ntohs(skb->protocol);
318  continue;
319  case BPF_S_ANC_PKTTYPE:
320  A = skb->pkt_type;
321  continue;
322  case BPF_S_ANC_IFINDEX:
323  if (!skb->dev)
324  return 0;
325  A = skb->dev->ifindex;
326  continue;
327  case BPF_S_ANC_MARK:
328  A = skb->mark;
329  continue;
330  case BPF_S_ANC_QUEUE:
331  A = skb->queue_mapping;
332  continue;
333  case BPF_S_ANC_HATYPE:
334  if (!skb->dev)
335  return 0;
336  A = skb->dev->type;
337  continue;
338  case BPF_S_ANC_RXHASH:
339  A = skb->rxhash;
340  continue;
341  case BPF_S_ANC_CPU:
342  A = raw_smp_processor_id();
343  continue;
344  case BPF_S_ANC_NLATTR: {
345  struct nlattr *nla;
346 
347  if (skb_is_nonlinear(skb))
348  return 0;
349  if (A > skb->len - sizeof(struct nlattr))
350  return 0;
351 
352  nla = nla_find((struct nlattr *)&skb->data[A],
353  skb->len - A, X);
354  if (nla)
355  A = (void *)nla - (void *)skb->data;
356  else
357  A = 0;
358  continue;
359  }
360  case BPF_S_ANC_NLATTR_NEST: {
361  struct nlattr *nla;
362 
363  if (skb_is_nonlinear(skb))
364  return 0;
365  if (A > skb->len - sizeof(struct nlattr))
366  return 0;
367 
368  nla = (struct nlattr *)&skb->data[A];
369  if (nla->nla_len > A - skb->len)
370  return 0;
371 
372  nla = nla_find_nested(nla, X);
373  if (nla)
374  A = (void *)nla - (void *)skb->data;
375  else
376  A = 0;
377  continue;
378  }
379 #ifdef CONFIG_SECCOMP_FILTER
381  A = seccomp_bpf_load(fentry->k);
382  continue;
383 #endif
384  default:
385  WARN_RATELIMIT(1, "Unknown code:%u jt:%u tf:%u k:%u\n",
386  fentry->code, fentry->jt,
387  fentry->jf, fentry->k);
388  return 0;
389  }
390  }
391 
392  return 0;
393 }
395 
396 /*
397  * Security :
398  * A BPF program is able to use 16 cells of memory to store intermediate
399  * values (check u32 mem[BPF_MEMWORDS] in sk_run_filter())
400  * As we dont want to clear mem[] array for each packet going through
401  * sk_run_filter(), we check that filter loaded by user never try to read
402  * a cell if not previously written, and we check all branches to be sure
403  * a malicious user doesn't try to abuse us.
404  */
405 static int check_load_and_stores(struct sock_filter *filter, int flen)
406 {
407  u16 *masks, memvalid = 0; /* one bit per cell, 16 cells */
408  int pc, ret = 0;
409 
411  masks = kmalloc(flen * sizeof(*masks), GFP_KERNEL);
412  if (!masks)
413  return -ENOMEM;
414  memset(masks, 0xff, flen * sizeof(*masks));
415 
416  for (pc = 0; pc < flen; pc++) {
417  memvalid &= masks[pc];
418 
419  switch (filter[pc].code) {
420  case BPF_S_ST:
421  case BPF_S_STX:
422  memvalid |= (1 << filter[pc].k);
423  break;
424  case BPF_S_LD_MEM:
425  case BPF_S_LDX_MEM:
426  if (!(memvalid & (1 << filter[pc].k))) {
427  ret = -EINVAL;
428  goto error;
429  }
430  break;
431  case BPF_S_JMP_JA:
432  /* a jump must set masks on target */
433  masks[pc + 1 + filter[pc].k] &= memvalid;
434  memvalid = ~0;
435  break;
436  case BPF_S_JMP_JEQ_K:
437  case BPF_S_JMP_JEQ_X:
438  case BPF_S_JMP_JGE_K:
439  case BPF_S_JMP_JGE_X:
440  case BPF_S_JMP_JGT_K:
441  case BPF_S_JMP_JGT_X:
442  case BPF_S_JMP_JSET_X:
443  case BPF_S_JMP_JSET_K:
444  /* a jump must set masks on targets */
445  masks[pc + 1 + filter[pc].jt] &= memvalid;
446  masks[pc + 1 + filter[pc].jf] &= memvalid;
447  memvalid = ~0;
448  break;
449  }
450  }
451 error:
452  kfree(masks);
453  return ret;
454 }
455 
470 int sk_chk_filter(struct sock_filter *filter, unsigned int flen)
471 {
472  /*
473  * Valid instructions are initialized to non-0.
474  * Invalid instructions are initialized to 0.
475  */
476  static const u8 codes[] = {
515  [BPF_ST] = BPF_S_ST,
516  [BPF_STX] = BPF_S_STX,
526  };
527  int pc;
528 
529  if (flen == 0 || flen > BPF_MAXINSNS)
530  return -EINVAL;
531 
532  /* check the filter code now */
533  for (pc = 0; pc < flen; pc++) {
534  struct sock_filter *ftest = &filter[pc];
535  u16 code = ftest->code;
536 
537  if (code >= ARRAY_SIZE(codes))
538  return -EINVAL;
539  code = codes[code];
540  if (!code)
541  return -EINVAL;
542  /* Some instructions need special checks */
543  switch (code) {
544  case BPF_S_ALU_DIV_K:
545  /* check for division by zero */
546  if (ftest->k == 0)
547  return -EINVAL;
548  ftest->k = reciprocal_value(ftest->k);
549  break;
550  case BPF_S_ALU_MOD_K:
551  /* check for division by zero */
552  if (ftest->k == 0)
553  return -EINVAL;
554  break;
555  case BPF_S_LD_MEM:
556  case BPF_S_LDX_MEM:
557  case BPF_S_ST:
558  case BPF_S_STX:
559  /* check for invalid memory addresses */
560  if (ftest->k >= BPF_MEMWORDS)
561  return -EINVAL;
562  break;
563  case BPF_S_JMP_JA:
564  /*
565  * Note, the large ftest->k might cause loops.
566  * Compare this with conditional jumps below,
567  * where offsets are limited. --ANK (981016)
568  */
569  if (ftest->k >= (unsigned int)(flen-pc-1))
570  return -EINVAL;
571  break;
572  case BPF_S_JMP_JEQ_K:
573  case BPF_S_JMP_JEQ_X:
574  case BPF_S_JMP_JGE_K:
575  case BPF_S_JMP_JGE_X:
576  case BPF_S_JMP_JGT_K:
577  case BPF_S_JMP_JGT_X:
578  case BPF_S_JMP_JSET_X:
579  case BPF_S_JMP_JSET_K:
580  /* for conditionals both must be safe */
581  if (pc + ftest->jt + 1 >= flen ||
582  pc + ftest->jf + 1 >= flen)
583  return -EINVAL;
584  break;
585  case BPF_S_LD_W_ABS:
586  case BPF_S_LD_H_ABS:
587  case BPF_S_LD_B_ABS:
588 #define ANCILLARY(CODE) case SKF_AD_OFF + SKF_AD_##CODE: \
589  code = BPF_S_ANC_##CODE; \
590  break
591  switch (ftest->k) {
593  ANCILLARY(PKTTYPE);
594  ANCILLARY(IFINDEX);
595  ANCILLARY(NLATTR);
596  ANCILLARY(NLATTR_NEST);
597  ANCILLARY(MARK);
598  ANCILLARY(QUEUE);
599  ANCILLARY(HATYPE);
600  ANCILLARY(RXHASH);
601  ANCILLARY(CPU);
602  ANCILLARY(ALU_XOR_X);
603  }
604  }
605  ftest->code = code;
606  }
607 
608  /* last instruction must be a RET code */
609  switch (filter[flen - 1].code) {
610  case BPF_S_RET_K:
611  case BPF_S_RET_A:
612  return check_load_and_stores(filter, flen);
613  }
614  return -EINVAL;
615 }
617 
623 {
624  struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu);
625 
626  bpf_jit_free(fp);
627  kfree(fp);
628 }
630 
631 static int __sk_prepare_filter(struct sk_filter *fp)
632 {
633  int err;
634 
635  fp->bpf_func = sk_run_filter;
636 
637  err = sk_chk_filter(fp->insns, fp->len);
638  if (err)
639  return err;
640 
641  bpf_jit_compile(fp);
642  return 0;
643 }
644 
656  struct sock_fprog *fprog)
657 {
658  struct sk_filter *fp;
659  unsigned int fsize = sizeof(struct sock_filter) * fprog->len;
660  int err;
661 
662  /* Make sure new filter is there and in the right amounts. */
663  if (fprog->filter == NULL)
664  return -EINVAL;
665 
666  fp = kmalloc(fsize + sizeof(*fp), GFP_KERNEL);
667  if (!fp)
668  return -ENOMEM;
669  memcpy(fp->insns, fprog->filter, fsize);
670 
671  atomic_set(&fp->refcnt, 1);
672  fp->len = fprog->len;
673 
674  err = __sk_prepare_filter(fp);
675  if (err)
676  goto free_mem;
677 
678  *pfp = fp;
679  return 0;
680 free_mem:
681  kfree(fp);
682  return err;
683 }
685 
687 {
688  sk_filter_release(fp);
689 }
691 
702 int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
703 {
704  struct sk_filter *fp, *old_fp;
705  unsigned int fsize = sizeof(struct sock_filter) * fprog->len;
706  int err;
707 
708  /* Make sure new filter is there and in the right amounts. */
709  if (fprog->filter == NULL)
710  return -EINVAL;
711 
712  fp = sock_kmalloc(sk, fsize+sizeof(*fp), GFP_KERNEL);
713  if (!fp)
714  return -ENOMEM;
715  if (copy_from_user(fp->insns, fprog->filter, fsize)) {
716  sock_kfree_s(sk, fp, fsize+sizeof(*fp));
717  return -EFAULT;
718  }
719 
720  atomic_set(&fp->refcnt, 1);
721  fp->len = fprog->len;
722 
723  err = __sk_prepare_filter(fp);
724  if (err) {
725  sk_filter_uncharge(sk, fp);
726  return err;
727  }
728 
730  sock_owned_by_user(sk));
731  rcu_assign_pointer(sk->sk_filter, fp);
732 
733  if (old_fp)
734  sk_filter_uncharge(sk, old_fp);
735  return 0;
736 }
738 
740 {
741  int ret = -ENOENT;
742  struct sk_filter *filter;
743 
745  sock_owned_by_user(sk));
746  if (filter) {
748  sk_filter_uncharge(sk, filter);
749  ret = 0;
750  }
751  return ret;
752 }