Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
fault.c
Go to the documentation of this file.
1 /*
2  * linux/arch/arm/mm/fault.c
3  *
4  * Copyright (C) 1995 Linus Torvalds
5  * Modifications for ARM processor (c) 1995-2004 Russell King
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License version 2 as
9  * published by the Free Software Foundation.
10  */
11 #include <linux/module.h>
12 #include <linux/signal.h>
13 #include <linux/mm.h>
14 #include <linux/hardirq.h>
15 #include <linux/init.h>
16 #include <linux/kprobes.h>
17 #include <linux/uaccess.h>
18 #include <linux/page-flags.h>
19 #include <linux/sched.h>
20 #include <linux/highmem.h>
21 #include <linux/perf_event.h>
22 
23 #include <asm/exception.h>
24 #include <asm/pgtable.h>
25 #include <asm/system_misc.h>
26 #include <asm/system_info.h>
27 #include <asm/tlbflush.h>
28 
29 #include "fault.h"
30 
31 #ifdef CONFIG_MMU
32 
33 #ifdef CONFIG_KPROBES
34 static inline int notify_page_fault(struct pt_regs *regs, unsigned int fsr)
35 {
36  int ret = 0;
37 
38  if (!user_mode(regs)) {
39  /* kprobe_running() needs smp_processor_id() */
41  if (kprobe_running() && kprobe_fault_handler(regs, fsr))
42  ret = 1;
44  }
45 
46  return ret;
47 }
48 #else
49 static inline int notify_page_fault(struct pt_regs *regs, unsigned int fsr)
50 {
51  return 0;
52 }
53 #endif
54 
55 /*
56  * This is useful to dump out the page tables associated with
57  * 'addr' in mm 'mm'.
58  */
59 void show_pte(struct mm_struct *mm, unsigned long addr)
60 {
61  pgd_t *pgd;
62 
63  if (!mm)
64  mm = &init_mm;
65 
66  printk(KERN_ALERT "pgd = %p\n", mm->pgd);
67  pgd = pgd_offset(mm, addr);
68  printk(KERN_ALERT "[%08lx] *pgd=%08llx",
69  addr, (long long)pgd_val(*pgd));
70 
71  do {
72  pud_t *pud;
73  pmd_t *pmd;
74  pte_t *pte;
75 
76  if (pgd_none(*pgd))
77  break;
78 
79  if (pgd_bad(*pgd)) {
80  printk("(bad)");
81  break;
82  }
83 
84  pud = pud_offset(pgd, addr);
85  if (PTRS_PER_PUD != 1)
86  printk(", *pud=%08llx", (long long)pud_val(*pud));
87 
88  if (pud_none(*pud))
89  break;
90 
91  if (pud_bad(*pud)) {
92  printk("(bad)");
93  break;
94  }
95 
96  pmd = pmd_offset(pud, addr);
97  if (PTRS_PER_PMD != 1)
98  printk(", *pmd=%08llx", (long long)pmd_val(*pmd));
99 
100  if (pmd_none(*pmd))
101  break;
102 
103  if (pmd_bad(*pmd)) {
104  printk("(bad)");
105  break;
106  }
107 
108  /* We must not map this if we have highmem enabled */
109  if (PageHighMem(pfn_to_page(pmd_val(*pmd) >> PAGE_SHIFT)))
110  break;
111 
112  pte = pte_offset_map(pmd, addr);
113  printk(", *pte=%08llx", (long long)pte_val(*pte));
114 #ifndef CONFIG_ARM_LPAE
115  printk(", *ppte=%08llx",
116  (long long)pte_val(pte[PTE_HWTABLE_PTRS]));
117 #endif
118  pte_unmap(pte);
119  } while(0);
120 
121  printk("\n");
122 }
123 #else /* CONFIG_MMU */
124 void show_pte(struct mm_struct *mm, unsigned long addr)
125 { }
126 #endif /* CONFIG_MMU */
127 
128 /*
129  * Oops. The kernel tried to access some page that wasn't present.
130  */
131 static void
132 __do_kernel_fault(struct mm_struct *mm, unsigned long addr, unsigned int fsr,
133  struct pt_regs *regs)
134 {
135  /*
136  * Are we prepared to handle this kernel fault?
137  */
138  if (fixup_exception(regs))
139  return;
140 
141  /*
142  * No handler, we'll have to terminate things with extreme prejudice.
143  */
144  bust_spinlocks(1);
146  "Unable to handle kernel %s at virtual address %08lx\n",
147  (addr < PAGE_SIZE) ? "NULL pointer dereference" :
148  "paging request", addr);
149 
150  show_pte(mm, addr);
151  die("Oops", regs, fsr);
152  bust_spinlocks(0);
153  do_exit(SIGKILL);
154 }
155 
156 /*
157  * Something tried to access memory that isn't in our memory map..
158  * User mode accesses just cause a SIGSEGV
159  */
160 static void
161 __do_user_fault(struct task_struct *tsk, unsigned long addr,
162  unsigned int fsr, unsigned int sig, int code,
163  struct pt_regs *regs)
164 {
165  struct siginfo si;
166 
167 #ifdef CONFIG_DEBUG_USER
168  if (((user_debug & UDBG_SEGV) && (sig == SIGSEGV)) ||
169  ((user_debug & UDBG_BUS) && (sig == SIGBUS))) {
170  printk(KERN_DEBUG "%s: unhandled page fault (%d) at 0x%08lx, code 0x%03x\n",
171  tsk->comm, sig, addr, fsr);
172  show_pte(tsk->mm, addr);
173  show_regs(regs);
174  }
175 #endif
176 
177  tsk->thread.address = addr;
178  tsk->thread.error_code = fsr;
179  tsk->thread.trap_no = 14;
180  si.si_signo = sig;
181  si.si_errno = 0;
182  si.si_code = code;
183  si.si_addr = (void __user *)addr;
184  force_sig_info(sig, &si, tsk);
185 }
186 
187 void do_bad_area(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
188 {
189  struct task_struct *tsk = current;
190  struct mm_struct *mm = tsk->active_mm;
191 
192  /*
193  * If we are in kernel mode at this point, we
194  * have no context to handle this fault with.
195  */
196  if (user_mode(regs))
197  __do_user_fault(tsk, addr, fsr, SIGSEGV, SEGV_MAPERR, regs);
198  else
199  __do_kernel_fault(mm, addr, fsr, regs);
200 }
201 
202 #ifdef CONFIG_MMU
203 #define VM_FAULT_BADMAP 0x010000
204 #define VM_FAULT_BADACCESS 0x020000
205 
206 /*
207  * Check that the permissions on the VMA allow for the fault which occurred.
208  * If we encountered a write fault, we must have write permission, otherwise
209  * we allow any permission.
210  */
211 static inline bool access_error(unsigned int fsr, struct vm_area_struct *vma)
212 {
213  unsigned int mask = VM_READ | VM_WRITE | VM_EXEC;
214 
215  if (fsr & FSR_WRITE)
216  mask = VM_WRITE;
217  if (fsr & FSR_LNX_PF)
218  mask = VM_EXEC;
219 
220  return vma->vm_flags & mask ? false : true;
221 }
222 
223 static int __kprobes
224 __do_page_fault(struct mm_struct *mm, unsigned long addr, unsigned int fsr,
225  unsigned int flags, struct task_struct *tsk)
226 {
227  struct vm_area_struct *vma;
228  int fault;
229 
230  vma = find_vma(mm, addr);
231  fault = VM_FAULT_BADMAP;
232  if (unlikely(!vma))
233  goto out;
234  if (unlikely(vma->vm_start > addr))
235  goto check_stack;
236 
237  /*
238  * Ok, we have a good vm_area for this
239  * memory access, so we can handle it.
240  */
241 good_area:
242  if (access_error(fsr, vma)) {
243  fault = VM_FAULT_BADACCESS;
244  goto out;
245  }
246 
247  return handle_mm_fault(mm, vma, addr & PAGE_MASK, flags);
248 
249 check_stack:
250  /* Don't allow expansion below FIRST_USER_ADDRESS */
251  if (vma->vm_flags & VM_GROWSDOWN &&
252  addr >= FIRST_USER_ADDRESS && !expand_stack(vma, addr))
253  goto good_area;
254 out:
255  return fault;
256 }
257 
258 static int __kprobes
259 do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
260 {
261  struct task_struct *tsk;
262  struct mm_struct *mm;
263  int fault, sig, code;
264  int write = fsr & FSR_WRITE;
265  unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
266  (write ? FAULT_FLAG_WRITE : 0);
267 
268  if (notify_page_fault(regs, fsr))
269  return 0;
270 
271  tsk = current;
272  mm = tsk->mm;
273 
274  /* Enable interrupts if they were enabled in the parent context. */
275  if (interrupts_enabled(regs))
277 
278  /*
279  * If we're in an interrupt or have no user
280  * context, we must not take the fault..
281  */
282  if (in_atomic() || !mm)
283  goto no_context;
284 
285  /*
286  * As per x86, we may deadlock here. However, since the kernel only
287  * validly references user space from well defined areas of the code,
288  * we can bug out early if this is from code which shouldn't.
289  */
290  if (!down_read_trylock(&mm->mmap_sem)) {
291  if (!user_mode(regs) && !search_exception_tables(regs->ARM_pc))
292  goto no_context;
293 retry:
294  down_read(&mm->mmap_sem);
295  } else {
296  /*
297  * The above down_read_trylock() might have succeeded in
298  * which case, we'll have missed the might_sleep() from
299  * down_read()
300  */
301  might_sleep();
302 #ifdef CONFIG_DEBUG_VM
303  if (!user_mode(regs) &&
304  !search_exception_tables(regs->ARM_pc))
305  goto no_context;
306 #endif
307  }
308 
309  fault = __do_page_fault(mm, addr, fsr, flags, tsk);
310 
311  /* If we need to retry but a fatal signal is pending, handle the
312  * signal first. We do not need to release the mmap_sem because
313  * it would already be released in __lock_page_or_retry in
314  * mm/filemap.c. */
315  if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
316  return 0;
317 
318  /*
319  * Major/minor page fault accounting is only done on the
320  * initial attempt. If we go through a retry, it is extremely
321  * likely that the page will be found in page cache at that point.
322  */
323 
324  perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr);
325  if (!(fault & VM_FAULT_ERROR) && flags & FAULT_FLAG_ALLOW_RETRY) {
326  if (fault & VM_FAULT_MAJOR) {
327  tsk->maj_flt++;
328  perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1,
329  regs, addr);
330  } else {
331  tsk->min_flt++;
332  perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1,
333  regs, addr);
334  }
335  if (fault & VM_FAULT_RETRY) {
336  /* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk
337  * of starvation. */
338  flags &= ~FAULT_FLAG_ALLOW_RETRY;
339  flags |= FAULT_FLAG_TRIED;
340  goto retry;
341  }
342  }
343 
344  up_read(&mm->mmap_sem);
345 
346  /*
347  * Handle the "normal" case first - VM_FAULT_MAJOR / VM_FAULT_MINOR
348  */
349  if (likely(!(fault & (VM_FAULT_ERROR | VM_FAULT_BADMAP | VM_FAULT_BADACCESS))))
350  return 0;
351 
352  if (fault & VM_FAULT_OOM) {
353  /*
354  * We ran out of memory, call the OOM killer, and return to
355  * userspace (which will retry the fault, or kill us if we
356  * got oom-killed)
357  */
359  return 0;
360  }
361 
362  /*
363  * If we are in kernel mode at this point, we
364  * have no context to handle this fault with.
365  */
366  if (!user_mode(regs))
367  goto no_context;
368 
369  if (fault & VM_FAULT_SIGBUS) {
370  /*
371  * We had some memory, but were unable to
372  * successfully fix up this page fault.
373  */
374  sig = SIGBUS;
375  code = BUS_ADRERR;
376  } else {
377  /*
378  * Something tried to access memory that
379  * isn't in our memory map..
380  */
381  sig = SIGSEGV;
382  code = fault == VM_FAULT_BADACCESS ?
384  }
385 
386  __do_user_fault(tsk, addr, fsr, sig, code, regs);
387  return 0;
388 
389 no_context:
390  __do_kernel_fault(mm, addr, fsr, regs);
391  return 0;
392 }
393 #else /* CONFIG_MMU */
394 static int
395 do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
396 {
397  return 0;
398 }
399 #endif /* CONFIG_MMU */
400 
401 /*
402  * First Level Translation Fault Handler
403  *
404  * We enter here because the first level page table doesn't contain
405  * a valid entry for the address.
406  *
407  * If the address is in kernel space (>= TASK_SIZE), then we are
408  * probably faulting in the vmalloc() area.
409  *
410  * If the init_task's first level page tables contains the relevant
411  * entry, we copy the it to this task. If not, we send the process
412  * a signal, fixup the exception, or oops the kernel.
413  *
414  * NOTE! We MUST NOT take any locks for this case. We may be in an
415  * interrupt or a critical region, and should only copy the information
416  * from the master page table, nothing more.
417  */
418 #ifdef CONFIG_MMU
419 static int __kprobes
420 do_translation_fault(unsigned long addr, unsigned int fsr,
421  struct pt_regs *regs)
422 {
423  unsigned int index;
424  pgd_t *pgd, *pgd_k;
425  pud_t *pud, *pud_k;
426  pmd_t *pmd, *pmd_k;
427 
428  if (addr < TASK_SIZE)
429  return do_page_fault(addr, fsr, regs);
430 
431  if (user_mode(regs))
432  goto bad_area;
433 
434  index = pgd_index(addr);
435 
436  pgd = cpu_get_pgd() + index;
437  pgd_k = init_mm.pgd + index;
438 
439  if (pgd_none(*pgd_k))
440  goto bad_area;
441  if (!pgd_present(*pgd))
442  set_pgd(pgd, *pgd_k);
443 
444  pud = pud_offset(pgd, addr);
445  pud_k = pud_offset(pgd_k, addr);
446 
447  if (pud_none(*pud_k))
448  goto bad_area;
449  if (!pud_present(*pud))
450  set_pud(pud, *pud_k);
451 
452  pmd = pmd_offset(pud, addr);
453  pmd_k = pmd_offset(pud_k, addr);
454 
455 #ifdef CONFIG_ARM_LPAE
456  /*
457  * Only one hardware entry per PMD with LPAE.
458  */
459  index = 0;
460 #else
461  /*
462  * On ARM one Linux PGD entry contains two hardware entries (see page
463  * tables layout in pgtable.h). We normally guarantee that we always
464  * fill both L1 entries. But create_mapping() doesn't follow the rule.
465  * It can create inidividual L1 entries, so here we have to call
466  * pmd_none() check for the entry really corresponded to address, not
467  * for the first of pair.
468  */
469  index = (addr >> SECTION_SHIFT) & 1;
470 #endif
471  if (pmd_none(pmd_k[index]))
472  goto bad_area;
473 
474  copy_pmd(pmd, pmd_k);
475  return 0;
476 
477 bad_area:
478  do_bad_area(addr, fsr, regs);
479  return 0;
480 }
481 #else /* CONFIG_MMU */
482 static int
483 do_translation_fault(unsigned long addr, unsigned int fsr,
484  struct pt_regs *regs)
485 {
486  return 0;
487 }
488 #endif /* CONFIG_MMU */
489 
490 /*
491  * Some section permission faults need to be handled gracefully.
492  * They can happen due to a __{get,put}_user during an oops.
493  */
494 static int
495 do_sect_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
496 {
497  do_bad_area(addr, fsr, regs);
498  return 0;
499 }
500 
501 /*
502  * This abort handler always returns "fault".
503  */
504 static int
505 do_bad(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
506 {
507  return 1;
508 }
509 
510 struct fsr_info {
511  int (*fn)(unsigned long addr, unsigned int fsr, struct pt_regs *regs);
512  int sig;
513  int code;
514  const char *name;
515 };
516 
517 /* FSR definition */
518 #ifdef CONFIG_ARM_LPAE
519 #include "fsr-3level.c"
520 #else
521 #include "fsr-2level.c"
522 #endif
523 
524 void __init
525 hook_fault_code(int nr, int (*fn)(unsigned long, unsigned int, struct pt_regs *),
526  int sig, int code, const char *name)
527 {
528  if (nr < 0 || nr >= ARRAY_SIZE(fsr_info))
529  BUG();
530 
531  fsr_info[nr].fn = fn;
532  fsr_info[nr].sig = sig;
533  fsr_info[nr].code = code;
534  fsr_info[nr].name = name;
535 }
536 
537 /*
538  * Dispatch a data abort to the relevant handler.
539  */
541 do_DataAbort(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
542 {
543  const struct fsr_info *inf = fsr_info + fsr_fs(fsr);
544  struct siginfo info;
545 
546  if (!inf->fn(addr, fsr & ~FSR_LNX_PF, regs))
547  return;
548 
549  printk(KERN_ALERT "Unhandled fault: %s (0x%03x) at 0x%08lx\n",
550  inf->name, fsr, addr);
551 
552  info.si_signo = inf->sig;
553  info.si_errno = 0;
554  info.si_code = inf->code;
555  info.si_addr = (void __user *)addr;
556  arm_notify_die("", regs, &info, fsr, 0);
557 }
558 
559 void __init
560 hook_ifault_code(int nr, int (*fn)(unsigned long, unsigned int, struct pt_regs *),
561  int sig, int code, const char *name)
562 {
563  if (nr < 0 || nr >= ARRAY_SIZE(ifsr_info))
564  BUG();
565 
566  ifsr_info[nr].fn = fn;
567  ifsr_info[nr].sig = sig;
568  ifsr_info[nr].code = code;
569  ifsr_info[nr].name = name;
570 }
571 
573 do_PrefetchAbort(unsigned long addr, unsigned int ifsr, struct pt_regs *regs)
574 {
575  const struct fsr_info *inf = ifsr_info + fsr_fs(ifsr);
576  struct siginfo info;
577 
578  if (!inf->fn(addr, ifsr | FSR_LNX_PF, regs))
579  return;
580 
581  printk(KERN_ALERT "Unhandled prefetch abort: %s (0x%03x) at 0x%08lx\n",
582  inf->name, ifsr, addr);
583 
584  info.si_signo = inf->sig;
585  info.si_errno = 0;
586  info.si_code = inf->code;
587  info.si_addr = (void __user *)addr;
588  arm_notify_die("", regs, &info, ifsr, 0);
589 }
590 
591 #ifndef CONFIG_ARM_LPAE
592 static int __init exceptions_init(void)
593 {
594  if (cpu_architecture() >= CPU_ARCH_ARMv6) {
595  hook_fault_code(4, do_translation_fault, SIGSEGV, SEGV_MAPERR,
596  "I-cache maintenance fault");
597  }
598 
599  if (cpu_architecture() >= CPU_ARCH_ARMv7) {
600  /*
601  * TODO: Access flag faults introduced in ARMv6K.
602  * Runtime check for 'K' extension is needed
603  */
604  hook_fault_code(3, do_bad, SIGSEGV, SEGV_MAPERR,
605  "section access flag fault");
606  hook_fault_code(6, do_bad, SIGSEGV, SEGV_MAPERR,
607  "section access flag fault");
608  }
609 
610  return 0;
611 }
612 
613 arch_initcall(exceptions_init);
614 #endif