Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
alternative.c
Go to the documentation of this file.
1 #define pr_fmt(fmt) "SMP alternatives: " fmt
2 
3 #include <linux/module.h>
4 #include <linux/sched.h>
5 #include <linux/mutex.h>
6 #include <linux/list.h>
7 #include <linux/stringify.h>
8 #include <linux/kprobes.h>
9 #include <linux/mm.h>
10 #include <linux/vmalloc.h>
11 #include <linux/memory.h>
12 #include <linux/stop_machine.h>
13 #include <linux/slab.h>
14 #include <asm/alternative.h>
15 #include <asm/sections.h>
16 #include <asm/pgtable.h>
17 #include <asm/mce.h>
18 #include <asm/nmi.h>
19 #include <asm/cacheflush.h>
20 #include <asm/tlbflush.h>
21 #include <asm/io.h>
22 #include <asm/fixmap.h>
23 
24 #define MAX_PATCH_LEN (255-1)
25 
26 static int __initdata_or_module debug_alternative;
27 
28 static int __init debug_alt(char *str)
29 {
30  debug_alternative = 1;
31  return 1;
32 }
33 __setup("debug-alternative", debug_alt);
34 
35 static int noreplace_smp;
36 
37 static int __init setup_noreplace_smp(char *str)
38 {
39  noreplace_smp = 1;
40  return 1;
41 }
42 __setup("noreplace-smp", setup_noreplace_smp);
43 
44 #ifdef CONFIG_PARAVIRT
45 static int __initdata_or_module noreplace_paravirt = 0;
46 
47 static int __init setup_noreplace_paravirt(char *str)
48 {
49  noreplace_paravirt = 1;
50  return 1;
51 }
52 __setup("noreplace-paravirt", setup_noreplace_paravirt);
53 #endif
54 
55 #define DPRINTK(fmt, ...) \
56 do { \
57  if (debug_alternative) \
58  printk(KERN_DEBUG fmt, ##__VA_ARGS__); \
59 } while (0)
60 
61 /*
62  * Each GENERIC_NOPX is of X bytes, and defined as an array of bytes
63  * that correspond to that nop. Getting from one nop to the next, we
64  * add to the array the offset that is equal to the sum of all sizes of
65  * nops preceding the one we are after.
66  *
67  * Note: The GENERIC_NOP5_ATOMIC is at the end, as it breaks the
68  * nice symmetry of sizes of the previous nops.
69  */
70 #if defined(GENERIC_NOP1) && !defined(CONFIG_X86_64)
71 static const unsigned char intelnops[] =
72 {
82 };
83 static const unsigned char * const intel_nops[ASM_NOP_MAX+2] =
84 {
85  NULL,
86  intelnops,
87  intelnops + 1,
88  intelnops + 1 + 2,
89  intelnops + 1 + 2 + 3,
90  intelnops + 1 + 2 + 3 + 4,
91  intelnops + 1 + 2 + 3 + 4 + 5,
92  intelnops + 1 + 2 + 3 + 4 + 5 + 6,
93  intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
94  intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8,
95 };
96 #endif
97 
98 #ifdef K8_NOP1
99 static const unsigned char k8nops[] =
100 {
101  K8_NOP1,
102  K8_NOP2,
103  K8_NOP3,
104  K8_NOP4,
105  K8_NOP5,
106  K8_NOP6,
107  K8_NOP7,
108  K8_NOP8,
110 };
111 static const unsigned char * const k8_nops[ASM_NOP_MAX+2] =
112 {
113  NULL,
114  k8nops,
115  k8nops + 1,
116  k8nops + 1 + 2,
117  k8nops + 1 + 2 + 3,
118  k8nops + 1 + 2 + 3 + 4,
119  k8nops + 1 + 2 + 3 + 4 + 5,
120  k8nops + 1 + 2 + 3 + 4 + 5 + 6,
121  k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
122  k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8,
123 };
124 #endif
125 
126 #if defined(K7_NOP1) && !defined(CONFIG_X86_64)
127 static const unsigned char k7nops[] =
128 {
129  K7_NOP1,
130  K7_NOP2,
131  K7_NOP3,
132  K7_NOP4,
133  K7_NOP5,
134  K7_NOP6,
135  K7_NOP7,
136  K7_NOP8,
138 };
139 static const unsigned char * const k7_nops[ASM_NOP_MAX+2] =
140 {
141  NULL,
142  k7nops,
143  k7nops + 1,
144  k7nops + 1 + 2,
145  k7nops + 1 + 2 + 3,
146  k7nops + 1 + 2 + 3 + 4,
147  k7nops + 1 + 2 + 3 + 4 + 5,
148  k7nops + 1 + 2 + 3 + 4 + 5 + 6,
149  k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
150  k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8,
151 };
152 #endif
153 
154 #ifdef P6_NOP1
155 static const unsigned char p6nops[] =
156 {
157  P6_NOP1,
158  P6_NOP2,
159  P6_NOP3,
160  P6_NOP4,
161  P6_NOP5,
162  P6_NOP6,
163  P6_NOP7,
164  P6_NOP8,
166 };
167 static const unsigned char * const p6_nops[ASM_NOP_MAX+2] =
168 {
169  NULL,
170  p6nops,
171  p6nops + 1,
172  p6nops + 1 + 2,
173  p6nops + 1 + 2 + 3,
174  p6nops + 1 + 2 + 3 + 4,
175  p6nops + 1 + 2 + 3 + 4 + 5,
176  p6nops + 1 + 2 + 3 + 4 + 5 + 6,
177  p6nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
178  p6nops + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8,
179 };
180 #endif
181 
182 /* Initialize these to a safe default */
183 #ifdef CONFIG_X86_64
184 const unsigned char * const *ideal_nops = p6_nops;
185 #else
186 const unsigned char * const *ideal_nops = intel_nops;
187 #endif
188 
190 {
191  switch (boot_cpu_data.x86_vendor) {
192  case X86_VENDOR_INTEL:
193  /*
194  * Due to a decoder implementation quirk, some
195  * specific Intel CPUs actually perform better with
196  * the "k8_nops" than with the SDM-recommended NOPs.
197  */
198  if (boot_cpu_data.x86 == 6 &&
199  boot_cpu_data.x86_model >= 0x0f &&
200  boot_cpu_data.x86_model != 0x1c &&
201  boot_cpu_data.x86_model != 0x26 &&
202  boot_cpu_data.x86_model != 0x27 &&
203  boot_cpu_data.x86_model < 0x30) {
204  ideal_nops = k8_nops;
205  } else if (boot_cpu_has(X86_FEATURE_NOPL)) {
206  ideal_nops = p6_nops;
207  } else {
208 #ifdef CONFIG_X86_64
209  ideal_nops = k8_nops;
210 #else
211  ideal_nops = intel_nops;
212 #endif
213  }
214  break;
215  default:
216 #ifdef CONFIG_X86_64
217  ideal_nops = k8_nops;
218 #else
219  if (boot_cpu_has(X86_FEATURE_K8))
220  ideal_nops = k8_nops;
221  else if (boot_cpu_has(X86_FEATURE_K7))
222  ideal_nops = k7_nops;
223  else
224  ideal_nops = intel_nops;
225 #endif
226  }
227 }
228 
229 /* Use this to add nops to a buffer, then text_poke the whole buffer. */
230 static void __init_or_module add_nops(void *insns, unsigned int len)
231 {
232  while (len > 0) {
233  unsigned int noplen = len;
234  if (noplen > ASM_NOP_MAX)
235  noplen = ASM_NOP_MAX;
236  memcpy(insns, ideal_nops[noplen], noplen);
237  insns += noplen;
238  len -= noplen;
239  }
240 }
241 
243 extern s32 __smp_locks[], __smp_locks_end[];
244 void *text_poke_early(void *addr, const void *opcode, size_t len);
245 
246 /* Replace instructions with better alternatives for this CPU type.
247  This runs before SMP is initialized to avoid SMP problems with
248  self modifying code. This implies that asymmetric systems where
249  APs have less capabilities than the boot processor are not handled.
250  Tough. Make sure you disable such features by hand. */
251 
253  struct alt_instr *end)
254 {
255  struct alt_instr *a;
256  u8 *instr, *replacement;
257  u8 insnbuf[MAX_PATCH_LEN];
258 
259  DPRINTK("%s: alt table %p -> %p\n", __func__, start, end);
260  /*
261  * The scan order should be from start to end. A later scanned
262  * alternative code can overwrite a previous scanned alternative code.
263  * Some kernel functions (e.g. memcpy, memset, etc) use this order to
264  * patch code.
265  *
266  * So be careful if you want to change the scan order to any other
267  * order.
268  */
269  for (a = start; a < end; a++) {
270  instr = (u8 *)&a->instr_offset + a->instr_offset;
271  replacement = (u8 *)&a->repl_offset + a->repl_offset;
272  BUG_ON(a->replacementlen > a->instrlen);
273  BUG_ON(a->instrlen > sizeof(insnbuf));
274  BUG_ON(a->cpuid >= NCAPINTS*32);
275  if (!boot_cpu_has(a->cpuid))
276  continue;
277 
278  memcpy(insnbuf, replacement, a->replacementlen);
279 
280  /* 0xe8 is a relative jump; fix the offset. */
281  if (*insnbuf == 0xe8 && a->replacementlen == 5)
282  *(s32 *)(insnbuf + 1) += replacement - instr;
283 
284  add_nops(insnbuf + a->replacementlen,
285  a->instrlen - a->replacementlen);
286 
287  text_poke_early(instr, insnbuf, a->instrlen);
288  }
289 }
290 
291 #ifdef CONFIG_SMP
292 
293 static void alternatives_smp_lock(const s32 *start, const s32 *end,
294  u8 *text, u8 *text_end)
295 {
296  const s32 *poff;
297 
298  mutex_lock(&text_mutex);
299  for (poff = start; poff < end; poff++) {
300  u8 *ptr = (u8 *)poff + *poff;
301 
302  if (!*poff || ptr < text || ptr >= text_end)
303  continue;
304  /* turn DS segment override prefix into lock prefix */
305  if (*ptr == 0x3e)
306  text_poke(ptr, ((unsigned char []){0xf0}), 1);
307  }
308  mutex_unlock(&text_mutex);
309 }
310 
311 static void alternatives_smp_unlock(const s32 *start, const s32 *end,
312  u8 *text, u8 *text_end)
313 {
314  const s32 *poff;
315 
316  mutex_lock(&text_mutex);
317  for (poff = start; poff < end; poff++) {
318  u8 *ptr = (u8 *)poff + *poff;
319 
320  if (!*poff || ptr < text || ptr >= text_end)
321  continue;
322  /* turn lock prefix into DS segment override prefix */
323  if (*ptr == 0xf0)
324  text_poke(ptr, ((unsigned char []){0x3E}), 1);
325  }
326  mutex_unlock(&text_mutex);
327 }
328 
329 struct smp_alt_module {
330  /* what is this ??? */
331  struct module *mod;
332  char *name;
333 
334  /* ptrs to lock prefixes */
335  const s32 *locks;
336  const s32 *locks_end;
337 
338  /* .text segment, needed to avoid patching init code ;) */
339  u8 *text;
340  u8 *text_end;
341 
342  struct list_head next;
343 };
344 static LIST_HEAD(smp_alt_modules);
345 static DEFINE_MUTEX(smp_alt);
346 static bool uniproc_patched = false; /* protected by smp_alt */
347 
348 void __init_or_module alternatives_smp_module_add(struct module *mod,
349  char *name,
350  void *locks, void *locks_end,
351  void *text, void *text_end)
352 {
353  struct smp_alt_module *smp;
354 
355  mutex_lock(&smp_alt);
356  if (!uniproc_patched)
357  goto unlock;
358 
359  if (num_possible_cpus() == 1)
360  /* Don't bother remembering, we'll never have to undo it. */
361  goto smp_unlock;
362 
363  smp = kzalloc(sizeof(*smp), GFP_KERNEL);
364  if (NULL == smp)
365  /* we'll run the (safe but slow) SMP code then ... */
366  goto unlock;
367 
368  smp->mod = mod;
369  smp->name = name;
370  smp->locks = locks;
371  smp->locks_end = locks_end;
372  smp->text = text;
373  smp->text_end = text_end;
374  DPRINTK("%s: locks %p -> %p, text %p -> %p, name %s\n",
375  __func__, smp->locks, smp->locks_end,
376  smp->text, smp->text_end, smp->name);
377 
378  list_add_tail(&smp->next, &smp_alt_modules);
379 smp_unlock:
380  alternatives_smp_unlock(locks, locks_end, text, text_end);
381 unlock:
382  mutex_unlock(&smp_alt);
383 }
384 
385 void __init_or_module alternatives_smp_module_del(struct module *mod)
386 {
387  struct smp_alt_module *item;
388 
389  mutex_lock(&smp_alt);
390  list_for_each_entry(item, &smp_alt_modules, next) {
391  if (mod != item->mod)
392  continue;
393  list_del(&item->next);
394  kfree(item);
395  break;
396  }
397  mutex_unlock(&smp_alt);
398 }
399 
400 void alternatives_enable_smp(void)
401 {
402  struct smp_alt_module *mod;
403 
404 #ifdef CONFIG_LOCKDEP
405  /*
406  * Older binutils section handling bug prevented
407  * alternatives-replacement from working reliably.
408  *
409  * If this still occurs then you should see a hang
410  * or crash shortly after this line:
411  */
412  pr_info("lockdep: fixing up alternatives\n");
413 #endif
414 
415  /* Why bother if there are no other CPUs? */
416  BUG_ON(num_possible_cpus() == 1);
417 
418  mutex_lock(&smp_alt);
419 
420  if (uniproc_patched) {
421  pr_info("switching to SMP code\n");
422  BUG_ON(num_online_cpus() != 1);
423  clear_cpu_cap(&boot_cpu_data, X86_FEATURE_UP);
424  clear_cpu_cap(&cpu_data(0), X86_FEATURE_UP);
425  list_for_each_entry(mod, &smp_alt_modules, next)
426  alternatives_smp_lock(mod->locks, mod->locks_end,
427  mod->text, mod->text_end);
428  uniproc_patched = false;
429  }
430  mutex_unlock(&smp_alt);
431 }
432 
433 /* Return 1 if the address range is reserved for smp-alternatives */
434 int alternatives_text_reserved(void *start, void *end)
435 {
436  struct smp_alt_module *mod;
437  const s32 *poff;
438  u8 *text_start = start;
439  u8 *text_end = end;
440 
441  list_for_each_entry(mod, &smp_alt_modules, next) {
442  if (mod->text > text_end || mod->text_end < text_start)
443  continue;
444  for (poff = mod->locks; poff < mod->locks_end; poff++) {
445  const u8 *ptr = (const u8 *)poff + *poff;
446 
447  if (text_start <= ptr && text_end > ptr)
448  return 1;
449  }
450  }
451 
452  return 0;
453 }
454 #endif
455 
456 #ifdef CONFIG_PARAVIRT
457 void __init_or_module apply_paravirt(struct paravirt_patch_site *start,
458  struct paravirt_patch_site *end)
459 {
460  struct paravirt_patch_site *p;
461  char insnbuf[MAX_PATCH_LEN];
462 
463  if (noreplace_paravirt)
464  return;
465 
466  for (p = start; p < end; p++) {
467  unsigned int used;
468 
469  BUG_ON(p->len > MAX_PATCH_LEN);
470  /* prep the buffer with the original instructions */
471  memcpy(insnbuf, p->instr, p->len);
472  used = pv_init_ops.patch(p->instrtype, p->clobbers, insnbuf,
473  (unsigned long)p->instr, p->len);
474 
475  BUG_ON(used > p->len);
476 
477  /* Pad the rest with nops */
478  add_nops(insnbuf + used, p->len - used);
479  text_poke_early(p->instr, insnbuf, p->len);
480  }
481 }
482 extern struct paravirt_patch_site __start_parainstructions[],
483  __stop_parainstructions[];
484 #endif /* CONFIG_PARAVIRT */
485 
487 {
488  /* The patching is not fully atomic, so try to avoid local interruptions
489  that might execute the to be patched code.
490  Other CPUs are not running. */
491  stop_nmi();
492 
493  /*
494  * Don't stop machine check exceptions while patching.
495  * MCEs only happen when something got corrupted and in this
496  * case we must do something about the corruption.
497  * Ignoring it is worse than a unlikely patching race.
498  * Also machine checks tend to be broadcast and if one CPU
499  * goes into machine check the others follow quickly, so we don't
500  * expect a machine check to cause undue problems during to code
501  * patching.
502  */
503 
505 
506 #ifdef CONFIG_SMP
507  /* Patch to UP if other cpus not imminent. */
508  if (!noreplace_smp && (num_present_cpus() == 1 || setup_max_cpus <= 1)) {
509  uniproc_patched = true;
510  alternatives_smp_module_add(NULL, "core kernel",
511  __smp_locks, __smp_locks_end,
512  _text, _etext);
513  }
514 
515  if (!uniproc_patched || num_possible_cpus() == 1)
516  free_init_pages("SMP alternatives",
517  (unsigned long)__smp_locks,
518  (unsigned long)__smp_locks_end);
519 #endif
520 
522 
523  restart_nmi();
524 }
525 
538 void *__init_or_module text_poke_early(void *addr, const void *opcode,
539  size_t len)
540 {
541  unsigned long flags;
542  local_irq_save(flags);
543  memcpy(addr, opcode, len);
544  sync_core();
545  local_irq_restore(flags);
546  /* Could also do a CLFLUSH here to speed up CPU recovery; but
547  that causes hangs on some VIA CPUs. */
548  return addr;
549 }
550 
564 void *__kprobes text_poke(void *addr, const void *opcode, size_t len)
565 {
566  unsigned long flags;
567  char *vaddr;
568  struct page *pages[2];
569  int i;
570 
571  if (!core_kernel_text((unsigned long)addr)) {
572  pages[0] = vmalloc_to_page(addr);
573  pages[1] = vmalloc_to_page(addr + PAGE_SIZE);
574  } else {
575  pages[0] = virt_to_page(addr);
576  WARN_ON(!PageReserved(pages[0]));
577  pages[1] = virt_to_page(addr + PAGE_SIZE);
578  }
579  BUG_ON(!pages[0]);
580  local_irq_save(flags);
582  if (pages[1])
584  vaddr = (char *)fix_to_virt(FIX_TEXT_POKE0);
585  memcpy(&vaddr[(unsigned long)addr & ~PAGE_MASK], opcode, len);
587  if (pages[1])
589  local_flush_tlb();
590  sync_core();
591  /* Could also do a CLFLUSH here to speed up CPU recovery; but
592  that causes hangs on some VIA CPUs. */
593  for (i = 0; i < len; i++)
594  BUG_ON(((char *)addr)[i] != ((char *)opcode)[i]);
595  local_irq_restore(flags);
596  return addr;
597 }
598 
599 /*
600  * Cross-modifying kernel text with stop_machine().
601  * This code originally comes from immediate value.
602  */
603 static atomic_t stop_machine_first;
604 static int wrote_text;
605 
608  int nparams;
609 };
610 
611 static int __kprobes stop_machine_text_poke(void *data)
612 {
613  struct text_poke_params *tpp = data;
614  struct text_poke_param *p;
615  int i;
616 
617  if (atomic_xchg(&stop_machine_first, 0)) {
618  for (i = 0; i < tpp->nparams; i++) {
619  p = &tpp->params[i];
620  text_poke(p->addr, p->opcode, p->len);
621  }
622  smp_wmb(); /* Make sure other cpus see that this has run */
623  wrote_text = 1;
624  } else {
625  while (!wrote_text)
626  cpu_relax();
627  smp_mb(); /* Load wrote_text before following execution */
628  }
629 
630  for (i = 0; i < tpp->nparams; i++) {
631  p = &tpp->params[i];
632  flush_icache_range((unsigned long)p->addr,
633  (unsigned long)p->addr + p->len);
634  }
635  /*
636  * Intel Archiecture Software Developer's Manual section 7.1.3 specifies
637  * that a core serializing instruction such as "cpuid" should be
638  * executed on _each_ core before the new instruction is made visible.
639  */
640  sync_core();
641  return 0;
642 }
643 
657 void *__kprobes text_poke_smp(void *addr, const void *opcode, size_t len)
658 {
659  struct text_poke_params tpp;
660  struct text_poke_param p;
661 
662  p.addr = addr;
663  p.opcode = opcode;
664  p.len = len;
665  tpp.params = &p;
666  tpp.nparams = 1;
667  atomic_set(&stop_machine_first, 1);
668  wrote_text = 0;
669  /* Use __stop_machine() because the caller already got online_cpus. */
670  __stop_machine(stop_machine_text_poke, (void *)&tpp, cpu_online_mask);
671  return addr;
672 }
673 
686 {
687  struct text_poke_params tpp = {.params = params, .nparams = n};
688 
689  atomic_set(&stop_machine_first, 1);
690  wrote_text = 0;
691  __stop_machine(stop_machine_text_poke, (void *)&tpp, cpu_online_mask);
692 }