Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
fault.c
Go to the documentation of this file.
1 /*
2  * This file is subject to the terms and conditions of the GNU General Public
3  * License. See the file "COPYING" in the main directory of this archive
4  * for more details.
5  *
6  * Copyright (C) 1995 - 2000 by Ralf Baechle
7  */
8 #include <linux/signal.h>
9 #include <linux/sched.h>
10 #include <linux/interrupt.h>
11 #include <linux/kernel.h>
12 #include <linux/errno.h>
13 #include <linux/string.h>
14 #include <linux/types.h>
15 #include <linux/ptrace.h>
16 #include <linux/mman.h>
17 #include <linux/mm.h>
18 #include <linux/smp.h>
19 #include <linux/module.h>
20 #include <linux/kprobes.h>
21 #include <linux/perf_event.h>
22 
23 #include <asm/branch.h>
24 #include <asm/mmu_context.h>
25 #include <asm/uaccess.h>
26 #include <asm/ptrace.h>
27 #include <asm/highmem.h> /* For VMALLOC_END */
28 #include <linux/kdebug.h>
29 
30 /*
31  * This routine handles page faults. It determines the address,
32  * and the problem, and then passes it off to one of the appropriate
33  * routines.
34  */
35 asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, unsigned long write,
36  unsigned long address)
37 {
38  struct vm_area_struct * vma = NULL;
39  struct task_struct *tsk = current;
40  struct mm_struct *mm = tsk->mm;
41  const int field = sizeof(unsigned long) * 2;
43  int fault;
44  unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
45  (write ? FAULT_FLAG_WRITE : 0);
46 
47 #if 0
48  printk("Cpu%d[%s:%d:%0*lx:%ld:%0*lx]\n", raw_smp_processor_id(),
49  current->comm, current->pid, field, address, write,
50  field, regs->cp0_epc);
51 #endif
52 
53 #ifdef CONFIG_KPROBES
54  /*
55  * This is to notify the fault handler of the kprobes. The
56  * exception code is redundant as it is also carried in REGS,
57  * but we pass it anyhow.
58  */
59  if (notify_die(DIE_PAGE_FAULT, "page fault", regs, -1,
60  (regs->cp0_cause >> 2) & 0x1f, SIGSEGV) == NOTIFY_STOP)
61  return;
62 #endif
63 
64  info.si_code = SEGV_MAPERR;
65 
66  /*
67  * We fault-in kernel-space virtual memory on-demand. The
68  * 'reference' page table is init_mm.pgd.
69  *
70  * NOTE! We MUST NOT take any locks for this case. We may
71  * be in an interrupt or a critical region, and should
72  * only copy the information from the master page table,
73  * nothing more.
74  */
75 #ifdef CONFIG_64BIT
76 # define VMALLOC_FAULT_TARGET no_context
77 #else
78 # define VMALLOC_FAULT_TARGET vmalloc_fault
79 #endif
80 
81  if (unlikely(address >= VMALLOC_START && address <= VMALLOC_END))
83 #ifdef MODULE_START
84  if (unlikely(address >= MODULE_START && address < MODULE_END))
86 #endif
87 
88  /*
89  * If we're in an interrupt or have no user
90  * context, we must not take the fault..
91  */
92  if (in_atomic() || !mm)
93  goto bad_area_nosemaphore;
94 
95 retry:
96  down_read(&mm->mmap_sem);
97  vma = find_vma(mm, address);
98  if (!vma)
99  goto bad_area;
100  if (vma->vm_start <= address)
101  goto good_area;
102  if (!(vma->vm_flags & VM_GROWSDOWN))
103  goto bad_area;
104  if (expand_stack(vma, address))
105  goto bad_area;
106 /*
107  * Ok, we have a good vm_area for this memory access, so
108  * we can handle it..
109  */
110 good_area:
111  info.si_code = SEGV_ACCERR;
112 
113  if (write) {
114  if (!(vma->vm_flags & VM_WRITE))
115  goto bad_area;
116  } else {
117  if (cpu_has_rixi) {
118  if (address == regs->cp0_epc && !(vma->vm_flags & VM_EXEC)) {
119 #if 0
120  pr_notice("Cpu%d[%s:%d:%0*lx:%ld:%0*lx] XI violation\n",
122  current->comm, current->pid,
123  field, address, write,
124  field, regs->cp0_epc);
125 #endif
126  goto bad_area;
127  }
128  if (!(vma->vm_flags & VM_READ)) {
129 #if 0
130  pr_notice("Cpu%d[%s:%d:%0*lx:%ld:%0*lx] RI violation\n",
132  current->comm, current->pid,
133  field, address, write,
134  field, regs->cp0_epc);
135 #endif
136  goto bad_area;
137  }
138  } else {
139  if (!(vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC)))
140  goto bad_area;
141  }
142  }
143 
144  /*
145  * If for any reason at all we couldn't handle the fault,
146  * make sure we exit gracefully rather than endlessly redo
147  * the fault.
148  */
149  fault = handle_mm_fault(mm, vma, address, flags);
150 
151  if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
152  return;
153 
154  perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
155  if (unlikely(fault & VM_FAULT_ERROR)) {
156  if (fault & VM_FAULT_OOM)
157  goto out_of_memory;
158  else if (fault & VM_FAULT_SIGBUS)
159  goto do_sigbus;
160  BUG();
161  }
162  if (flags & FAULT_FLAG_ALLOW_RETRY) {
163  if (fault & VM_FAULT_MAJOR) {
164  perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1,
165  regs, address);
166  tsk->maj_flt++;
167  } else {
168  perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1,
169  regs, address);
170  tsk->min_flt++;
171  }
172  if (fault & VM_FAULT_RETRY) {
173  flags &= ~FAULT_FLAG_ALLOW_RETRY;
174  flags |= FAULT_FLAG_TRIED;
175 
176  /*
177  * No need to up_read(&mm->mmap_sem) as we would
178  * have already released it in __lock_page_or_retry
179  * in mm/filemap.c.
180  */
181 
182  goto retry;
183  }
184  }
185 
186  up_read(&mm->mmap_sem);
187  return;
188 
189 /*
190  * Something tried to access memory that isn't in our memory map..
191  * Fix it, but check if it's kernel or user first..
192  */
193 bad_area:
194  up_read(&mm->mmap_sem);
195 
196 bad_area_nosemaphore:
197  /* User mode accesses just cause a SIGSEGV */
198  if (user_mode(regs)) {
199  tsk->thread.cp0_badvaddr = address;
200  tsk->thread.error_code = write;
201 #if 0
202  printk("do_page_fault() #2: sending SIGSEGV to %s for "
203  "invalid %s\n%0*lx (epc == %0*lx, ra == %0*lx)\n",
204  tsk->comm,
205  write ? "write access to" : "read access from",
206  field, address,
207  field, (unsigned long) regs->cp0_epc,
208  field, (unsigned long) regs->regs[31]);
209 #endif
210  info.si_signo = SIGSEGV;
211  info.si_errno = 0;
212  /* info.si_code has been set above */
213  info.si_addr = (void __user *) address;
214  force_sig_info(SIGSEGV, &info, tsk);
215  return;
216  }
217 
218 no_context:
219  /* Are we prepared to handle this kernel fault? */
220  if (fixup_exception(regs)) {
221  current->thread.cp0_baduaddr = address;
222  return;
223  }
224 
225  /*
226  * Oops. The kernel tried to access some bad page. We'll have to
227  * terminate things with extreme prejudice.
228  */
229  bust_spinlocks(1);
230 
231  printk(KERN_ALERT "CPU %d Unable to handle kernel paging request at "
232  "virtual address %0*lx, epc == %0*lx, ra == %0*lx\n",
233  raw_smp_processor_id(), field, address, field, regs->cp0_epc,
234  field, regs->regs[31]);
235  die("Oops", regs);
236 
238  /*
239  * We ran out of memory, call the OOM killer, and return the userspace
240  * (which will retry the fault, or kill us if we got oom-killed).
241  */
242  up_read(&mm->mmap_sem);
244  return;
245 
246 do_sigbus:
247  up_read(&mm->mmap_sem);
248 
249  /* Kernel mode? Handle exceptions or die */
250  if (!user_mode(regs))
251  goto no_context;
252  else
253  /*
254  * Send a sigbus, regardless of whether we were in kernel
255  * or user mode.
256  */
257 #if 0
258  printk("do_page_fault() #3: sending SIGBUS to %s for "
259  "invalid %s\n%0*lx (epc == %0*lx, ra == %0*lx)\n",
260  tsk->comm,
261  write ? "write access to" : "read access from",
262  field, address,
263  field, (unsigned long) regs->cp0_epc,
264  field, (unsigned long) regs->regs[31]);
265 #endif
266  tsk->thread.cp0_badvaddr = address;
267  info.si_signo = SIGBUS;
268  info.si_errno = 0;
269  info.si_code = BUS_ADRERR;
270  info.si_addr = (void __user *) address;
271  force_sig_info(SIGBUS, &info, tsk);
272 
273  return;
274 #ifndef CONFIG_64BIT
275 vmalloc_fault:
276  {
277  /*
278  * Synchronize this task's top level page-table
279  * with the 'reference' page table.
280  *
281  * Do _not_ use "tsk" here. We might be inside
282  * an interrupt in the middle of a task switch..
283  */
284  int offset = __pgd_offset(address);
285  pgd_t *pgd, *pgd_k;
286  pud_t *pud, *pud_k;
287  pmd_t *pmd, *pmd_k;
288  pte_t *pte_k;
289 
291  pgd_k = init_mm.pgd + offset;
292 
293  if (!pgd_present(*pgd_k))
294  goto no_context;
295  set_pgd(pgd, *pgd_k);
296 
297  pud = pud_offset(pgd, address);
298  pud_k = pud_offset(pgd_k, address);
299  if (!pud_present(*pud_k))
300  goto no_context;
301 
302  pmd = pmd_offset(pud, address);
303  pmd_k = pmd_offset(pud_k, address);
304  if (!pmd_present(*pmd_k))
305  goto no_context;
306  set_pmd(pmd, *pmd_k);
307 
308  pte_k = pte_offset_kernel(pmd_k, address);
309  if (!pte_present(*pte_k))
310  goto no_context;
311  return;
312  }
313 #endif
314 }