Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
fault.c
Go to the documentation of this file.
1 /*
2  * Page fault handler for SH with an MMU.
3  *
4  * Copyright (C) 1999 Niibe Yutaka
5  * Copyright (C) 2003 - 2012 Paul Mundt
6  *
7  * Based on linux/arch/i386/mm/fault.c:
8  * Copyright (C) 1995 Linus Torvalds
9  *
10  * This file is subject to the terms and conditions of the GNU General Public
11  * License. See the file "COPYING" in the main directory of this archive
12  * for more details.
13  */
14 #include <linux/kernel.h>
15 #include <linux/mm.h>
16 #include <linux/hardirq.h>
17 #include <linux/kprobes.h>
18 #include <linux/perf_event.h>
19 #include <linux/kdebug.h>
20 #include <asm/io_trapped.h>
21 #include <asm/mmu_context.h>
22 #include <asm/tlbflush.h>
23 #include <asm/traps.h>
24 
25 static inline int notify_page_fault(struct pt_regs *regs, int trap)
26 {
27  int ret = 0;
28 
29  if (kprobes_built_in() && !user_mode(regs)) {
31  if (kprobe_running() && kprobe_fault_handler(regs, trap))
32  ret = 1;
34  }
35 
36  return ret;
37 }
38 
39 static void
40 force_sig_info_fault(int si_signo, int si_code, unsigned long address,
41  struct task_struct *tsk)
42 {
44 
45  info.si_signo = si_signo;
46  info.si_errno = 0;
47  info.si_code = si_code;
48  info.si_addr = (void __user *)address;
49 
50  force_sig_info(si_signo, &info, tsk);
51 }
52 
53 /*
54  * This is useful to dump out the page tables associated with
55  * 'addr' in mm 'mm'.
56  */
57 static void show_pte(struct mm_struct *mm, unsigned long addr)
58 {
59  pgd_t *pgd;
60 
61  if (mm) {
62  pgd = mm->pgd;
63  } else {
64  pgd = get_TTB();
65 
66  if (unlikely(!pgd))
67  pgd = swapper_pg_dir;
68  }
69 
70  printk(KERN_ALERT "pgd = %p\n", pgd);
71  pgd += pgd_index(addr);
72  printk(KERN_ALERT "[%08lx] *pgd=%0*Lx", addr,
73  (u32)(sizeof(*pgd) * 2), (u64)pgd_val(*pgd));
74 
75  do {
76  pud_t *pud;
77  pmd_t *pmd;
78  pte_t *pte;
79 
80  if (pgd_none(*pgd))
81  break;
82 
83  if (pgd_bad(*pgd)) {
84  printk("(bad)");
85  break;
86  }
87 
88  pud = pud_offset(pgd, addr);
89  if (PTRS_PER_PUD != 1)
90  printk(", *pud=%0*Lx", (u32)(sizeof(*pud) * 2),
91  (u64)pud_val(*pud));
92 
93  if (pud_none(*pud))
94  break;
95 
96  if (pud_bad(*pud)) {
97  printk("(bad)");
98  break;
99  }
100 
101  pmd = pmd_offset(pud, addr);
102  if (PTRS_PER_PMD != 1)
103  printk(", *pmd=%0*Lx", (u32)(sizeof(*pmd) * 2),
104  (u64)pmd_val(*pmd));
105 
106  if (pmd_none(*pmd))
107  break;
108 
109  if (pmd_bad(*pmd)) {
110  printk("(bad)");
111  break;
112  }
113 
114  /* We must not map this if we have highmem enabled */
115  if (PageHighMem(pfn_to_page(pmd_val(*pmd) >> PAGE_SHIFT)))
116  break;
117 
118  pte = pte_offset_kernel(pmd, addr);
119  printk(", *pte=%0*Lx", (u32)(sizeof(*pte) * 2),
120  (u64)pte_val(*pte));
121  } while (0);
122 
123  printk("\n");
124 }
125 
126 static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address)
127 {
128  unsigned index = pgd_index(address);
129  pgd_t *pgd_k;
130  pud_t *pud, *pud_k;
131  pmd_t *pmd, *pmd_k;
132 
133  pgd += index;
134  pgd_k = init_mm.pgd + index;
135 
136  if (!pgd_present(*pgd_k))
137  return NULL;
138 
139  pud = pud_offset(pgd, address);
140  pud_k = pud_offset(pgd_k, address);
141  if (!pud_present(*pud_k))
142  return NULL;
143 
144  if (!pud_present(*pud))
145  set_pud(pud, *pud_k);
146 
147  pmd = pmd_offset(pud, address);
148  pmd_k = pmd_offset(pud_k, address);
149  if (!pmd_present(*pmd_k))
150  return NULL;
151 
152  if (!pmd_present(*pmd))
153  set_pmd(pmd, *pmd_k);
154  else {
155  /*
156  * The page tables are fully synchronised so there must
157  * be another reason for the fault. Return NULL here to
158  * signal that we have not taken care of the fault.
159  */
160  BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k));
161  return NULL;
162  }
163 
164  return pmd_k;
165 }
166 
167 #ifdef CONFIG_SH_STORE_QUEUES
168 #define __FAULT_ADDR_LIMIT P3_ADDR_MAX
169 #else
170 #define __FAULT_ADDR_LIMIT VMALLOC_END
171 #endif
172 
173 /*
174  * Handle a fault on the vmalloc or module mapping area
175  */
176 static noinline int vmalloc_fault(unsigned long address)
177 {
178  pgd_t *pgd_k;
179  pmd_t *pmd_k;
180  pte_t *pte_k;
181 
182  /* Make sure we are in vmalloc/module/P3 area: */
183  if (!(address >= VMALLOC_START && address < __FAULT_ADDR_LIMIT))
184  return -1;
185 
186  /*
187  * Synchronize this task's top level page-table
188  * with the 'reference' page table.
189  *
190  * Do _not_ use "current" here. We might be inside
191  * an interrupt in the middle of a task switch..
192  */
193  pgd_k = get_TTB();
194  pmd_k = vmalloc_sync_one(pgd_k, address);
195  if (!pmd_k)
196  return -1;
197 
198  pte_k = pte_offset_kernel(pmd_k, address);
199  if (!pte_present(*pte_k))
200  return -1;
201 
202  return 0;
203 }
204 
205 static void
206 show_fault_oops(struct pt_regs *regs, unsigned long address)
207 {
208  if (!oops_may_print())
209  return;
210 
211  printk(KERN_ALERT "BUG: unable to handle kernel ");
212  if (address < PAGE_SIZE)
213  printk(KERN_CONT "NULL pointer dereference");
214  else
215  printk(KERN_CONT "paging request");
216 
217  printk(KERN_CONT " at %08lx\n", address);
218  printk(KERN_ALERT "PC:");
219  printk_address(regs->pc, 1);
220 
221  show_pte(NULL, address);
222 }
223 
224 static noinline void
225 no_context(struct pt_regs *regs, unsigned long error_code,
226  unsigned long address)
227 {
228  /* Are we prepared to handle this kernel fault? */
229  if (fixup_exception(regs))
230  return;
231 
232  if (handle_trapped_io(regs, address))
233  return;
234 
235  /*
236  * Oops. The kernel tried to access some bad page. We'll have to
237  * terminate things with extreme prejudice.
238  */
239  bust_spinlocks(1);
240 
241  show_fault_oops(regs, address);
242 
243  die("Oops", regs, error_code);
244  bust_spinlocks(0);
245  do_exit(SIGKILL);
246 }
247 
248 static void
249 __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
250  unsigned long address, int si_code)
251 {
252  struct task_struct *tsk = current;
253 
254  /* User mode accesses just cause a SIGSEGV */
255  if (user_mode(regs)) {
256  /*
257  * It's possible to have interrupts off here:
258  */
260 
261  force_sig_info_fault(SIGSEGV, si_code, address, tsk);
262 
263  return;
264  }
265 
266  no_context(regs, error_code, address);
267 }
268 
269 static noinline void
270 bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
271  unsigned long address)
272 {
273  __bad_area_nosemaphore(regs, error_code, address, SEGV_MAPERR);
274 }
275 
276 static void
277 __bad_area(struct pt_regs *regs, unsigned long error_code,
278  unsigned long address, int si_code)
279 {
280  struct mm_struct *mm = current->mm;
281 
282  /*
283  * Something tried to access memory that isn't in our memory map..
284  * Fix it, but check if it's kernel or user first..
285  */
286  up_read(&mm->mmap_sem);
287 
288  __bad_area_nosemaphore(regs, error_code, address, si_code);
289 }
290 
291 static noinline void
292 bad_area(struct pt_regs *regs, unsigned long error_code, unsigned long address)
293 {
294  __bad_area(regs, error_code, address, SEGV_MAPERR);
295 }
296 
297 static noinline void
298 bad_area_access_error(struct pt_regs *regs, unsigned long error_code,
299  unsigned long address)
300 {
301  __bad_area(regs, error_code, address, SEGV_ACCERR);
302 }
303 
304 static void out_of_memory(void)
305 {
306  /*
307  * We ran out of memory, call the OOM killer, and return the userspace
308  * (which will retry the fault, or kill us if we got oom-killed):
309  */
310  up_read(&current->mm->mmap_sem);
311 
313 }
314 
315 static void
316 do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address)
317 {
318  struct task_struct *tsk = current;
319  struct mm_struct *mm = tsk->mm;
320 
321  up_read(&mm->mmap_sem);
322 
323  /* Kernel mode? Handle exceptions or die: */
324  if (!user_mode(regs))
325  no_context(regs, error_code, address);
326 
327  force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk);
328 }
329 
330 static noinline int
331 mm_fault_error(struct pt_regs *regs, unsigned long error_code,
332  unsigned long address, unsigned int fault)
333 {
334  /*
335  * Pagefault was interrupted by SIGKILL. We have no reason to
336  * continue pagefault.
337  */
338  if (fatal_signal_pending(current)) {
339  if (!(fault & VM_FAULT_RETRY))
340  up_read(&current->mm->mmap_sem);
341  if (!user_mode(regs))
342  no_context(regs, error_code, address);
343  return 1;
344  }
345 
346  if (!(fault & VM_FAULT_ERROR))
347  return 0;
348 
349  if (fault & VM_FAULT_OOM) {
350  /* Kernel mode? Handle exceptions or die: */
351  if (!user_mode(regs)) {
352  up_read(&current->mm->mmap_sem);
353  no_context(regs, error_code, address);
354  return 1;
355  }
356 
357  out_of_memory();
358  } else {
359  if (fault & VM_FAULT_SIGBUS)
360  do_sigbus(regs, error_code, address);
361  else
362  BUG();
363  }
364 
365  return 1;
366 }
367 
368 static inline int access_error(int error_code, struct vm_area_struct *vma)
369 {
370  if (error_code & FAULT_CODE_WRITE) {
371  /* write, present and write, not present: */
372  if (unlikely(!(vma->vm_flags & VM_WRITE)))
373  return 1;
374  return 0;
375  }
376 
377  /* ITLB miss on NX page */
378  if (unlikely((error_code & FAULT_CODE_ITLB) &&
379  !(vma->vm_flags & VM_EXEC)))
380  return 1;
381 
382  /* read, not present: */
383  if (unlikely(!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))))
384  return 1;
385 
386  return 0;
387 }
388 
389 static int fault_in_kernel_space(unsigned long address)
390 {
391  return address >= TASK_SIZE;
392 }
393 
394 /*
395  * This routine handles page faults. It determines the address,
396  * and the problem, and then passes it off to one of the appropriate
397  * routines.
398  */
400  unsigned long error_code,
401  unsigned long address)
402 {
403  unsigned long vec;
404  struct task_struct *tsk;
405  struct mm_struct *mm;
406  struct vm_area_struct * vma;
407  int fault;
408  int write = error_code & FAULT_CODE_WRITE;
409  unsigned int flags = (FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
410  (write ? FAULT_FLAG_WRITE : 0));
411 
412  tsk = current;
413  mm = tsk->mm;
414  vec = lookup_exception_vector();
415 
416  /*
417  * We fault-in kernel-space virtual memory on-demand. The
418  * 'reference' page table is init_mm.pgd.
419  *
420  * NOTE! We MUST NOT take any locks for this case. We may
421  * be in an interrupt or a critical region, and should
422  * only copy the information from the master page table,
423  * nothing more.
424  */
425  if (unlikely(fault_in_kernel_space(address))) {
426  if (vmalloc_fault(address) >= 0)
427  return;
428  if (notify_page_fault(regs, vec))
429  return;
430 
431  bad_area_nosemaphore(regs, error_code, address);
432  return;
433  }
434 
435  if (unlikely(notify_page_fault(regs, vec)))
436  return;
437 
438  /* Only enable interrupts if they were on before the fault */
439  if ((regs->sr & SR_IMASK) != SR_IMASK)
441 
442  perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
443 
444  /*
445  * If we're in an interrupt, have no user context or are running
446  * in an atomic region then we must not take the fault:
447  */
448  if (unlikely(in_atomic() || !mm)) {
449  bad_area_nosemaphore(regs, error_code, address);
450  return;
451  }
452 
453 retry:
454  down_read(&mm->mmap_sem);
455 
456  vma = find_vma(mm, address);
457  if (unlikely(!vma)) {
458  bad_area(regs, error_code, address);
459  return;
460  }
461  if (likely(vma->vm_start <= address))
462  goto good_area;
463  if (unlikely(!(vma->vm_flags & VM_GROWSDOWN))) {
464  bad_area(regs, error_code, address);
465  return;
466  }
467  if (unlikely(expand_stack(vma, address))) {
468  bad_area(regs, error_code, address);
469  return;
470  }
471 
472  /*
473  * Ok, we have a good vm_area for this memory access, so
474  * we can handle it..
475  */
476 good_area:
477  if (unlikely(access_error(error_code, vma))) {
478  bad_area_access_error(regs, error_code, address);
479  return;
480  }
481 
482  set_thread_fault_code(error_code);
483 
484  /*
485  * If for any reason at all we couldn't handle the fault,
486  * make sure we exit gracefully rather than endlessly redo
487  * the fault.
488  */
489  fault = handle_mm_fault(mm, vma, address, flags);
490 
491  if (unlikely(fault & (VM_FAULT_RETRY | VM_FAULT_ERROR)))
492  if (mm_fault_error(regs, error_code, address, fault))
493  return;
494 
495  if (flags & FAULT_FLAG_ALLOW_RETRY) {
496  if (fault & VM_FAULT_MAJOR) {
497  tsk->maj_flt++;
498  perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1,
499  regs, address);
500  } else {
501  tsk->min_flt++;
502  perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1,
503  regs, address);
504  }
505  if (fault & VM_FAULT_RETRY) {
506  flags &= ~FAULT_FLAG_ALLOW_RETRY;
507  flags |= FAULT_FLAG_TRIED;
508 
509  /*
510  * No need to up_read(&mm->mmap_sem) as we would
511  * have already released it in __lock_page_or_retry
512  * in mm/filemap.c.
513  */
514  goto retry;
515  }
516  }
517 
518  up_read(&mm->mmap_sem);
519 }