Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
kcore.c
Go to the documentation of this file.
1 /*
2  * fs/proc/kcore.c kernel ELF core dumper
3  *
4  * Modelled on fs/exec.c:aout_core_dump()
5  * Jeremy Fitzhardinge <[email protected]>
6  * ELF version written by David Howells <[email protected]>
7  * Modified and incorporated into 2.3.x by Tigran Aivazian <[email protected]>
8  * Support to dump vmalloc'd areas (ELF only), Tigran Aivazian <[email protected]>
9  * Safe accesses to vmalloc/direct-mapped discontiguous areas, Kanoj Sarcar <[email protected]>
10  */
11 
12 #include <linux/mm.h>
13 #include <linux/proc_fs.h>
14 #include <linux/user.h>
15 #include <linux/capability.h>
16 #include <linux/elf.h>
17 #include <linux/elfcore.h>
18 #include <linux/vmalloc.h>
19 #include <linux/highmem.h>
20 #include <linux/bootmem.h>
21 #include <linux/init.h>
22 #include <linux/slab.h>
23 #include <asm/uaccess.h>
24 #include <asm/io.h>
25 #include <linux/list.h>
26 #include <linux/ioport.h>
27 #include <linux/memory.h>
28 #include <asm/sections.h>
29 
30 #define CORE_STR "CORE"
31 
32 #ifndef ELF_CORE_EFLAGS
33 #define ELF_CORE_EFLAGS 0
34 #endif
35 
36 static struct proc_dir_entry *proc_root_kcore;
37 
38 
39 #ifndef kc_vaddr_to_offset
40 #define kc_vaddr_to_offset(v) ((v) - PAGE_OFFSET)
41 #endif
42 #ifndef kc_offset_to_vaddr
43 #define kc_offset_to_vaddr(o) ((o) + PAGE_OFFSET)
44 #endif
45 
46 /* An ELF note in memory */
47 struct memelfnote
48 {
49  const char *name;
50  int type;
51  unsigned int datasz;
52  void *data;
53 };
54 
55 static LIST_HEAD(kclist_head);
56 static DEFINE_RWLOCK(kclist_lock);
57 static int kcore_need_update = 1;
58 
59 void
60 kclist_add(struct kcore_list *new, void *addr, size_t size, int type)
61 {
62  new->addr = (unsigned long)addr;
63  new->size = size;
64  new->type = type;
65 
66  write_lock(&kclist_lock);
67  list_add_tail(&new->list, &kclist_head);
68  write_unlock(&kclist_lock);
69 }
70 
71 static size_t get_kcore_size(int *nphdr, size_t *elf_buflen)
72 {
73  size_t try, size;
74  struct kcore_list *m;
75 
76  *nphdr = 1; /* PT_NOTE */
77  size = 0;
78 
79  list_for_each_entry(m, &kclist_head, list) {
80  try = kc_vaddr_to_offset((size_t)m->addr + m->size);
81  if (try > size)
82  size = try;
83  *nphdr = *nphdr + 1;
84  }
85  *elf_buflen = sizeof(struct elfhdr) +
86  (*nphdr + 2)*sizeof(struct elf_phdr) +
87  3 * ((sizeof(struct elf_note)) +
88  roundup(sizeof(CORE_STR), 4)) +
92  *elf_buflen = PAGE_ALIGN(*elf_buflen);
93  return size + *elf_buflen;
94 }
95 
96 static void free_kclist_ents(struct list_head *head)
97 {
98  struct kcore_list *tmp, *pos;
99 
100  list_for_each_entry_safe(pos, tmp, head, list) {
101  list_del(&pos->list);
102  kfree(pos);
103  }
104 }
105 /*
106  * Replace all KCORE_RAM/KCORE_VMEMMAP information with passed list.
107  */
108 static void __kcore_update_ram(struct list_head *list)
109 {
110  int nphdr;
111  size_t size;
112  struct kcore_list *tmp, *pos;
113  LIST_HEAD(garbage);
114 
115  write_lock(&kclist_lock);
116  if (kcore_need_update) {
117  list_for_each_entry_safe(pos, tmp, &kclist_head, list) {
118  if (pos->type == KCORE_RAM
119  || pos->type == KCORE_VMEMMAP)
120  list_move(&pos->list, &garbage);
121  }
122  list_splice_tail(list, &kclist_head);
123  } else
124  list_splice(list, &garbage);
125  kcore_need_update = 0;
126  proc_root_kcore->size = get_kcore_size(&nphdr, &size);
127  write_unlock(&kclist_lock);
128 
129  free_kclist_ents(&garbage);
130 }
131 
132 
133 #ifdef CONFIG_HIGHMEM
134 /*
135  * If no highmem, we can assume [0...max_low_pfn) continuous range of memory
136  * because memory hole is not as big as !HIGHMEM case.
137  * (HIGHMEM is special because part of memory is _invisible_ from the kernel.)
138  */
139 static int kcore_update_ram(void)
140 {
141  LIST_HEAD(head);
142  struct kcore_list *ent;
143  int ret = 0;
144 
145  ent = kmalloc(sizeof(*ent), GFP_KERNEL);
146  if (!ent)
147  return -ENOMEM;
148  ent->addr = (unsigned long)__va(0);
149  ent->size = max_low_pfn << PAGE_SHIFT;
150  ent->type = KCORE_RAM;
151  list_add(&ent->list, &head);
152  __kcore_update_ram(&head);
153  return ret;
154 }
155 
156 #else /* !CONFIG_HIGHMEM */
157 
158 #ifdef CONFIG_SPARSEMEM_VMEMMAP
159 /* calculate vmemmap's address from given system ram pfn and register it */
160 static int
161 get_sparsemem_vmemmap_info(struct kcore_list *ent, struct list_head *head)
162 {
163  unsigned long pfn = __pa(ent->addr) >> PAGE_SHIFT;
164  unsigned long nr_pages = ent->size >> PAGE_SHIFT;
165  unsigned long start, end;
166  struct kcore_list *vmm, *tmp;
167 
168 
169  start = ((unsigned long)pfn_to_page(pfn)) & PAGE_MASK;
170  end = ((unsigned long)pfn_to_page(pfn + nr_pages)) - 1;
171  end = ALIGN(end, PAGE_SIZE);
172  /* overlap check (because we have to align page */
173  list_for_each_entry(tmp, head, list) {
174  if (tmp->type != KCORE_VMEMMAP)
175  continue;
176  if (start < tmp->addr + tmp->size)
177  if (end > tmp->addr)
178  end = tmp->addr;
179  }
180  if (start < end) {
181  vmm = kmalloc(sizeof(*vmm), GFP_KERNEL);
182  if (!vmm)
183  return 0;
184  vmm->addr = start;
185  vmm->size = end - start;
186  vmm->type = KCORE_VMEMMAP;
187  list_add_tail(&vmm->list, head);
188  }
189  return 1;
190 
191 }
192 #else
193 static int
194 get_sparsemem_vmemmap_info(struct kcore_list *ent, struct list_head *head)
195 {
196  return 1;
197 }
198 
199 #endif
200 
201 static int
202 kclist_add_private(unsigned long pfn, unsigned long nr_pages, void *arg)
203 {
204  struct list_head *head = (struct list_head *)arg;
205  struct kcore_list *ent;
206 
207  ent = kmalloc(sizeof(*ent), GFP_KERNEL);
208  if (!ent)
209  return -ENOMEM;
210  ent->addr = (unsigned long)__va((pfn << PAGE_SHIFT));
211  ent->size = nr_pages << PAGE_SHIFT;
212 
213  /* Sanity check: Can happen in 32bit arch...maybe */
214  if (ent->addr < (unsigned long) __va(0))
215  goto free_out;
216 
217  /* cut not-mapped area. ....from ppc-32 code. */
218  if (ULONG_MAX - ent->addr < ent->size)
219  ent->size = ULONG_MAX - ent->addr;
220 
221  /* cut when vmalloc() area is higher than direct-map area */
222  if (VMALLOC_START > (unsigned long)__va(0)) {
223  if (ent->addr > VMALLOC_START)
224  goto free_out;
225  if (VMALLOC_START - ent->addr < ent->size)
226  ent->size = VMALLOC_START - ent->addr;
227  }
228 
229  ent->type = KCORE_RAM;
230  list_add_tail(&ent->list, head);
231 
232  if (!get_sparsemem_vmemmap_info(ent, head)) {
233  list_del(&ent->list);
234  goto free_out;
235  }
236 
237  return 0;
238 free_out:
239  kfree(ent);
240  return 1;
241 }
242 
243 static int kcore_update_ram(void)
244 {
245  int nid, ret;
246  unsigned long end_pfn;
247  LIST_HEAD(head);
248 
249  /* Not inialized....update now */
250  /* find out "max pfn" */
251  end_pfn = 0;
253  unsigned long node_end;
254  node_end = NODE_DATA(nid)->node_start_pfn +
255  NODE_DATA(nid)->node_spanned_pages;
256  if (end_pfn < node_end)
257  end_pfn = node_end;
258  }
259  /* scan 0 to max_pfn */
260  ret = walk_system_ram_range(0, end_pfn, &head, kclist_add_private);
261  if (ret) {
262  free_kclist_ents(&head);
263  return -ENOMEM;
264  }
265  __kcore_update_ram(&head);
266  return ret;
267 }
268 #endif /* CONFIG_HIGHMEM */
269 
270 /*****************************************************************************/
271 /*
272  * determine size of ELF note
273  */
274 static int notesize(struct memelfnote *en)
275 {
276  int sz;
277 
278  sz = sizeof(struct elf_note);
279  sz += roundup((strlen(en->name) + 1), 4);
280  sz += roundup(en->datasz, 4);
281 
282  return sz;
283 } /* end notesize() */
284 
285 /*****************************************************************************/
286 /*
287  * store a note in the header buffer
288  */
289 static char *storenote(struct memelfnote *men, char *bufp)
290 {
291  struct elf_note en;
292 
293 #define DUMP_WRITE(addr,nr) do { memcpy(bufp,addr,nr); bufp += nr; } while(0)
294 
295  en.n_namesz = strlen(men->name) + 1;
296  en.n_descsz = men->datasz;
297  en.n_type = men->type;
298 
299  DUMP_WRITE(&en, sizeof(en));
300  DUMP_WRITE(men->name, en.n_namesz);
301 
302  /* XXX - cast from long long to long to avoid need for libgcc.a */
303  bufp = (char*) roundup((unsigned long)bufp,4);
304  DUMP_WRITE(men->data, men->datasz);
305  bufp = (char*) roundup((unsigned long)bufp,4);
306 
307 #undef DUMP_WRITE
308 
309  return bufp;
310 } /* end storenote() */
311 
312 /*
313  * store an ELF coredump header in the supplied buffer
314  * nphdr is the number of elf_phdr to insert
315  */
316 static void elf_kcore_store_hdr(char *bufp, int nphdr, int dataoff)
317 {
318  struct elf_prstatus prstatus; /* NT_PRSTATUS */
319  struct elf_prpsinfo prpsinfo; /* NT_PRPSINFO */
320  struct elf_phdr *nhdr, *phdr;
321  struct elfhdr *elf;
322  struct memelfnote notes[3];
323  off_t offset = 0;
324  struct kcore_list *m;
325 
326  /* setup ELF header */
327  elf = (struct elfhdr *) bufp;
328  bufp += sizeof(struct elfhdr);
329  offset += sizeof(struct elfhdr);
330  memcpy(elf->e_ident, ELFMAG, SELFMAG);
331  elf->e_ident[EI_CLASS] = ELF_CLASS;
332  elf->e_ident[EI_DATA] = ELF_DATA;
333  elf->e_ident[EI_VERSION]= EV_CURRENT;
334  elf->e_ident[EI_OSABI] = ELF_OSABI;
335  memset(elf->e_ident+EI_PAD, 0, EI_NIDENT-EI_PAD);
336  elf->e_type = ET_CORE;
337  elf->e_machine = ELF_ARCH;
338  elf->e_version = EV_CURRENT;
339  elf->e_entry = 0;
340  elf->e_phoff = sizeof(struct elfhdr);
341  elf->e_shoff = 0;
342  elf->e_flags = ELF_CORE_EFLAGS;
343  elf->e_ehsize = sizeof(struct elfhdr);
344  elf->e_phentsize= sizeof(struct elf_phdr);
345  elf->e_phnum = nphdr;
346  elf->e_shentsize= 0;
347  elf->e_shnum = 0;
348  elf->e_shstrndx = 0;
349 
350  /* setup ELF PT_NOTE program header */
351  nhdr = (struct elf_phdr *) bufp;
352  bufp += sizeof(struct elf_phdr);
353  offset += sizeof(struct elf_phdr);
354  nhdr->p_type = PT_NOTE;
355  nhdr->p_offset = 0;
356  nhdr->p_vaddr = 0;
357  nhdr->p_paddr = 0;
358  nhdr->p_filesz = 0;
359  nhdr->p_memsz = 0;
360  nhdr->p_flags = 0;
361  nhdr->p_align = 0;
362 
363  /* setup ELF PT_LOAD program header for every area */
364  list_for_each_entry(m, &kclist_head, list) {
365  phdr = (struct elf_phdr *) bufp;
366  bufp += sizeof(struct elf_phdr);
367  offset += sizeof(struct elf_phdr);
368 
369  phdr->p_type = PT_LOAD;
370  phdr->p_flags = PF_R|PF_W|PF_X;
371  phdr->p_offset = kc_vaddr_to_offset(m->addr) + dataoff;
372  phdr->p_vaddr = (size_t)m->addr;
373  phdr->p_paddr = 0;
374  phdr->p_filesz = phdr->p_memsz = m->size;
375  phdr->p_align = PAGE_SIZE;
376  }
377 
378  /*
379  * Set up the notes in similar form to SVR4 core dumps made
380  * with info from their /proc.
381  */
382  nhdr->p_offset = offset;
383 
384  /* set up the process status */
385  notes[0].name = CORE_STR;
386  notes[0].type = NT_PRSTATUS;
387  notes[0].datasz = sizeof(struct elf_prstatus);
388  notes[0].data = &prstatus;
389 
390  memset(&prstatus, 0, sizeof(struct elf_prstatus));
391 
392  nhdr->p_filesz = notesize(&notes[0]);
393  bufp = storenote(&notes[0], bufp);
394 
395  /* set up the process info */
396  notes[1].name = CORE_STR;
397  notes[1].type = NT_PRPSINFO;
398  notes[1].datasz = sizeof(struct elf_prpsinfo);
399  notes[1].data = &prpsinfo;
400 
401  memset(&prpsinfo, 0, sizeof(struct elf_prpsinfo));
402  prpsinfo.pr_state = 0;
403  prpsinfo.pr_sname = 'R';
404  prpsinfo.pr_zomb = 0;
405 
406  strcpy(prpsinfo.pr_fname, "vmlinux");
407  strncpy(prpsinfo.pr_psargs, saved_command_line, ELF_PRARGSZ);
408 
409  nhdr->p_filesz += notesize(&notes[1]);
410  bufp = storenote(&notes[1], bufp);
411 
412  /* set up the task structure */
413  notes[2].name = CORE_STR;
414  notes[2].type = NT_TASKSTRUCT;
415  notes[2].datasz = sizeof(struct task_struct);
416  notes[2].data = current;
417 
418  nhdr->p_filesz += notesize(&notes[2]);
419  bufp = storenote(&notes[2], bufp);
420 
421 } /* end elf_kcore_store_hdr() */
422 
423 /*****************************************************************************/
424 /*
425  * read from the ELF header and then kernel memory
426  */
427 static ssize_t
428 read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
429 {
430  ssize_t acc = 0;
431  size_t size, tsz;
432  size_t elf_buflen;
433  int nphdr;
434  unsigned long start;
435 
436  read_lock(&kclist_lock);
437  size = get_kcore_size(&nphdr, &elf_buflen);
438 
439  if (buflen == 0 || *fpos >= size) {
440  read_unlock(&kclist_lock);
441  return 0;
442  }
443 
444  /* trim buflen to not go beyond EOF */
445  if (buflen > size - *fpos)
446  buflen = size - *fpos;
447 
448  /* construct an ELF core header if we'll need some of it */
449  if (*fpos < elf_buflen) {
450  char * elf_buf;
451 
452  tsz = elf_buflen - *fpos;
453  if (buflen < tsz)
454  tsz = buflen;
455  elf_buf = kzalloc(elf_buflen, GFP_ATOMIC);
456  if (!elf_buf) {
457  read_unlock(&kclist_lock);
458  return -ENOMEM;
459  }
460  elf_kcore_store_hdr(elf_buf, nphdr, elf_buflen);
461  read_unlock(&kclist_lock);
462  if (copy_to_user(buffer, elf_buf + *fpos, tsz)) {
463  kfree(elf_buf);
464  return -EFAULT;
465  }
466  kfree(elf_buf);
467  buflen -= tsz;
468  *fpos += tsz;
469  buffer += tsz;
470  acc += tsz;
471 
472  /* leave now if filled buffer already */
473  if (buflen == 0)
474  return acc;
475  } else
476  read_unlock(&kclist_lock);
477 
478  /*
479  * Check to see if our file offset matches with any of
480  * the addresses in the elf_phdr on our list.
481  */
482  start = kc_offset_to_vaddr(*fpos - elf_buflen);
483  if ((tsz = (PAGE_SIZE - (start & ~PAGE_MASK))) > buflen)
484  tsz = buflen;
485 
486  while (buflen) {
487  struct kcore_list *m;
488 
489  read_lock(&kclist_lock);
490  list_for_each_entry(m, &kclist_head, list) {
491  if (start >= m->addr && start < (m->addr+m->size))
492  break;
493  }
494  read_unlock(&kclist_lock);
495 
496  if (&m->list == &kclist_head) {
497  if (clear_user(buffer, tsz))
498  return -EFAULT;
499  } else if (is_vmalloc_or_module_addr((void *)start)) {
500  char * elf_buf;
501 
502  elf_buf = kzalloc(tsz, GFP_KERNEL);
503  if (!elf_buf)
504  return -ENOMEM;
505  vread(elf_buf, (char *)start, tsz);
506  /* we have to zero-fill user buffer even if no read */
507  if (copy_to_user(buffer, elf_buf, tsz)) {
508  kfree(elf_buf);
509  return -EFAULT;
510  }
511  kfree(elf_buf);
512  } else {
513  if (kern_addr_valid(start)) {
514  unsigned long n;
515 
516  n = copy_to_user(buffer, (char *)start, tsz);
517  /*
518  * We cannot distinguish between fault on source
519  * and fault on destination. When this happens
520  * we clear too and hope it will trigger the
521  * EFAULT again.
522  */
523  if (n) {
524  if (clear_user(buffer + tsz - n,
525  n))
526  return -EFAULT;
527  }
528  } else {
529  if (clear_user(buffer, tsz))
530  return -EFAULT;
531  }
532  }
533  buflen -= tsz;
534  *fpos += tsz;
535  buffer += tsz;
536  acc += tsz;
537  start += tsz;
538  tsz = (buflen > PAGE_SIZE ? PAGE_SIZE : buflen);
539  }
540 
541  return acc;
542 }
543 
544 
545 static int open_kcore(struct inode *inode, struct file *filp)
546 {
547  if (!capable(CAP_SYS_RAWIO))
548  return -EPERM;
549  if (kcore_need_update)
550  kcore_update_ram();
551  if (i_size_read(inode) != proc_root_kcore->size) {
552  mutex_lock(&inode->i_mutex);
553  i_size_write(inode, proc_root_kcore->size);
554  mutex_unlock(&inode->i_mutex);
555  }
556  return 0;
557 }
558 
559 
560 static const struct file_operations proc_kcore_operations = {
561  .read = read_kcore,
562  .open = open_kcore,
563  .llseek = default_llseek,
564 };
565 
566 #ifdef CONFIG_MEMORY_HOTPLUG
567 /* just remember that we have to update kcore */
568 static int __meminit kcore_callback(struct notifier_block *self,
569  unsigned long action, void *arg)
570 {
571  switch (action) {
572  case MEM_ONLINE:
573  case MEM_OFFLINE:
574  write_lock(&kclist_lock);
575  kcore_need_update = 1;
576  write_unlock(&kclist_lock);
577  }
578  return NOTIFY_OK;
579 }
580 #endif
581 
582 
583 static struct kcore_list kcore_vmalloc;
584 
585 #ifdef CONFIG_ARCH_PROC_KCORE_TEXT
586 static struct kcore_list kcore_text;
587 /*
588  * If defined, special segment is used for mapping kernel text instead of
589  * direct-map area. We need to create special TEXT section.
590  */
591 static void __init proc_kcore_text_init(void)
592 {
593  kclist_add(&kcore_text, _text, _end - _text, KCORE_TEXT);
594 }
595 #else
596 static void __init proc_kcore_text_init(void)
597 {
598 }
599 #endif
600 
601 #if defined(CONFIG_MODULES) && defined(MODULES_VADDR)
602 /*
603  * MODULES_VADDR has no intersection with VMALLOC_ADDR.
604  */
605 struct kcore_list kcore_modules;
606 static void __init add_modules_range(void)
607 {
608  kclist_add(&kcore_modules, (void *)MODULES_VADDR,
610 }
611 #else
612 static void __init add_modules_range(void)
613 {
614 }
615 #endif
616 
617 static int __init proc_kcore_init(void)
618 {
619  proc_root_kcore = proc_create("kcore", S_IRUSR, NULL,
620  &proc_kcore_operations);
621  if (!proc_root_kcore) {
622  printk(KERN_ERR "couldn't create /proc/kcore\n");
623  return 0; /* Always returns 0. */
624  }
625  /* Store text area if it's special */
626  proc_kcore_text_init();
627  /* Store vmalloc area */
628  kclist_add(&kcore_vmalloc, (void *)VMALLOC_START,
630  add_modules_range();
631  /* Store direct-map area from physical memory map */
632  kcore_update_ram();
633  hotplug_memory_notifier(kcore_callback, 0);
634 
635  return 0;
636 }
637 module_init(proc_kcore_init);