Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
sn2_smp.c
Go to the documentation of this file.
1 /*
2  * SN2 Platform specific SMP Support
3  *
4  * This file is subject to the terms and conditions of the GNU General Public
5  * License. See the file "COPYING" in the main directory of this archive
6  * for more details.
7  *
8  * Copyright (C) 2000-2006 Silicon Graphics, Inc. All rights reserved.
9  */
10 
11 #include <linux/init.h>
12 #include <linux/kernel.h>
13 #include <linux/spinlock.h>
14 #include <linux/threads.h>
15 #include <linux/sched.h>
16 #include <linux/smp.h>
17 #include <linux/interrupt.h>
18 #include <linux/irq.h>
19 #include <linux/mmzone.h>
20 #include <linux/module.h>
21 #include <linux/bitops.h>
22 #include <linux/nodemask.h>
23 #include <linux/proc_fs.h>
24 #include <linux/seq_file.h>
25 
26 #include <asm/processor.h>
27 #include <asm/irq.h>
28 #include <asm/sal.h>
29 #include <asm/delay.h>
30 #include <asm/io.h>
31 #include <asm/smp.h>
32 #include <asm/tlb.h>
33 #include <asm/numa.h>
34 #include <asm/hw_irq.h>
35 #include <asm/current.h>
36 #include <asm/sn/sn_cpuid.h>
37 #include <asm/sn/sn_sal.h>
38 #include <asm/sn/addrs.h>
39 #include <asm/sn/shub_mmr.h>
40 #include <asm/sn/nodepda.h>
41 #include <asm/sn/rw_mmr.h>
42 #include <asm/sn/sn_feature_sets.h>
43 
44 DEFINE_PER_CPU(struct ptc_stats, ptcstats);
45 DECLARE_PER_CPU(struct ptc_stats, ptcstats);
46 
47 static __cacheline_aligned DEFINE_SPINLOCK(sn2_global_ptc_lock);
48 
49 /* 0 = old algorithm (no IPI flushes), 1 = ipi deadlock flush, 2 = ipi instead of SHUB ptc, >2 = always ipi */
50 static int sn2_flush_opt = 0;
51 
52 extern unsigned long
53 sn2_ptc_deadlock_recovery_core(volatile unsigned long *, unsigned long,
54  volatile unsigned long *, unsigned long,
55  volatile unsigned long *, unsigned long);
56 void
57 sn2_ptc_deadlock_recovery(short *, short, short, int,
58  volatile unsigned long *, unsigned long,
59  volatile unsigned long *, unsigned long);
60 
61 /*
62  * Note: some is the following is captured here to make degugging easier
63  * (the macros make more sense if you see the debug patch - not posted)
64  */
65 #define sn2_ptctest 0
66 #define local_node_uses_ptc_ga(sh1) ((sh1) ? 1 : 0)
67 #define max_active_pio(sh1) ((sh1) ? 32 : 7)
68 #define reset_max_active_on_deadlock() 1
69 #define PTC_LOCK(sh1) ((sh1) ? &sn2_global_ptc_lock : &sn_nodepda->ptc_lock)
70 
71 struct ptc_stats {
72  unsigned long ptc_l;
73  unsigned long change_rid;
74  unsigned long shub_ptc_flushes;
75  unsigned long nodes_flushed;
76  unsigned long deadlocks;
77  unsigned long deadlocks2;
78  unsigned long lock_itc_clocks;
79  unsigned long shub_itc_clocks;
80  unsigned long shub_itc_clocks_max;
82  unsigned long shub_ipi_flushes;
84 };
85 
86 #define sn2_ptctest 0
87 
88 static inline unsigned long wait_piowc(void)
89 {
90  volatile unsigned long *piows;
91  unsigned long zeroval, ws;
92 
93  piows = pda->pio_write_status_addr;
94  zeroval = pda->pio_write_status_val;
95  do {
96  cpu_relax();
97  } while (((ws = *piows) & SH_PIO_WRITE_STATUS_PENDING_WRITE_COUNT_MASK) != zeroval);
98  return (ws & SH_PIO_WRITE_STATUS_WRITE_DEADLOCK_MASK) != 0;
99 }
100 
111 {
112  pda_t *last_pda = pdacpu(task_thread_info(task)->last_cpu);
113  volatile unsigned long *adr = last_pda->pio_write_status_addr;
114  unsigned long val = last_pda->pio_write_status_val;
115 
116  /* Drain PIO writes from old CPU's Shub */
118  != val))
119  cpu_relax();
120 }
121 
123 {
124  /* flush_tlb_mm is inefficient if more than 1 users of mm */
125  if (mm == current->mm && mm && atomic_read(&mm->mm_users) == 1)
126  flush_tlb_mm(mm);
127 }
128 
129 static void
130 sn2_ipi_flush_all_tlb(struct mm_struct *mm)
131 {
132  unsigned long itc;
133 
134  itc = ia64_get_itc();
135  smp_flush_tlb_cpumask(*mm_cpumask(mm));
136  itc = ia64_get_itc() - itc;
137  __get_cpu_var(ptcstats).shub_ipi_flushes_itc_clocks += itc;
138  __get_cpu_var(ptcstats).shub_ipi_flushes++;
139 }
140 
163 void
164 sn2_global_tlb_purge(struct mm_struct *mm, unsigned long start,
165  unsigned long end, unsigned long nbits)
166 {
167  int i, ibegin, shub1, cnode, mynasid, cpu, lcpu = 0, nasid;
168  int mymm = (mm == current->active_mm && mm == current->mm);
169  int use_cpu_ptcga;
170  volatile unsigned long *ptc0, *ptc1;
171  unsigned long itc, itc2, flags, data0 = 0, data1 = 0, rr_value, old_rr = 0;
172  short nasids[MAX_NUMNODES], nix;
173  nodemask_t nodes_flushed;
174  int active, max_active, deadlock, flush_opt = sn2_flush_opt;
175 
176  if (flush_opt > 2) {
177  sn2_ipi_flush_all_tlb(mm);
178  return;
179  }
180 
181  nodes_clear(nodes_flushed);
182  i = 0;
183 
184  for_each_cpu(cpu, mm_cpumask(mm)) {
185  cnode = cpu_to_node(cpu);
186  node_set(cnode, nodes_flushed);
187  lcpu = cpu;
188  i++;
189  }
190 
191  if (i == 0)
192  return;
193 
194  preempt_disable();
195 
196  if (likely(i == 1 && lcpu == smp_processor_id() && mymm)) {
197  do {
198  ia64_ptcl(start, nbits << 2);
199  start += (1UL << nbits);
200  } while (start < end);
201  ia64_srlz_i();
202  __get_cpu_var(ptcstats).ptc_l++;
203  preempt_enable();
204  return;
205  }
206 
207  if (atomic_read(&mm->mm_users) == 1 && mymm) {
208  flush_tlb_mm(mm);
209  __get_cpu_var(ptcstats).change_rid++;
210  preempt_enable();
211  return;
212  }
213 
214  if (flush_opt == 2) {
215  sn2_ipi_flush_all_tlb(mm);
216  preempt_enable();
217  return;
218  }
219 
220  itc = ia64_get_itc();
221  nix = 0;
222  for_each_node_mask(cnode, nodes_flushed)
223  nasids[nix++] = cnodeid_to_nasid(cnode);
224 
225  rr_value = (mm->context << 3) | REGION_NUMBER(start);
226 
227  shub1 = is_shub1();
228  if (shub1) {
229  data0 = (1UL << SH1_PTC_0_A_SHFT) |
230  (nbits << SH1_PTC_0_PS_SHFT) |
231  (rr_value << SH1_PTC_0_RID_SHFT) |
232  (1UL << SH1_PTC_0_START_SHFT);
233  ptc0 = (long *)GLOBAL_MMR_PHYS_ADDR(0, SH1_PTC_0);
234  ptc1 = (long *)GLOBAL_MMR_PHYS_ADDR(0, SH1_PTC_1);
235  } else {
236  data0 = (1UL << SH2_PTC_A_SHFT) |
237  (nbits << SH2_PTC_PS_SHFT) |
238  (1UL << SH2_PTC_START_SHFT);
239  ptc0 = (long *)GLOBAL_MMR_PHYS_ADDR(0, SH2_PTC +
240  (rr_value << SH2_PTC_RID_SHFT));
241  ptc1 = NULL;
242  }
243 
244 
245  mynasid = get_nasid();
246  use_cpu_ptcga = local_node_uses_ptc_ga(shub1);
247  max_active = max_active_pio(shub1);
248 
249  itc = ia64_get_itc();
250  spin_lock_irqsave(PTC_LOCK(shub1), flags);
251  itc2 = ia64_get_itc();
252 
253  __get_cpu_var(ptcstats).lock_itc_clocks += itc2 - itc;
254  __get_cpu_var(ptcstats).shub_ptc_flushes++;
255  __get_cpu_var(ptcstats).nodes_flushed += nix;
256  if (!mymm)
257  __get_cpu_var(ptcstats).shub_ptc_flushes_not_my_mm++;
258 
259  if (use_cpu_ptcga && !mymm) {
260  old_rr = ia64_get_rr(start);
261  ia64_set_rr(start, (old_rr & 0xff) | (rr_value << 8));
262  ia64_srlz_d();
263  }
264 
265  wait_piowc();
266  do {
267  if (shub1)
268  data1 = start | (1UL << SH1_PTC_1_START_SHFT);
269  else
270  data0 = (data0 & ~SH2_PTC_ADDR_MASK) | (start & SH2_PTC_ADDR_MASK);
271  deadlock = 0;
272  active = 0;
273  for (ibegin = 0, i = 0; i < nix; i++) {
274  nasid = nasids[i];
275  if (use_cpu_ptcga && unlikely(nasid == mynasid)) {
276  ia64_ptcga(start, nbits << 2);
277  ia64_srlz_i();
278  } else {
279  ptc0 = CHANGE_NASID(nasid, ptc0);
280  if (ptc1)
281  ptc1 = CHANGE_NASID(nasid, ptc1);
282  pio_atomic_phys_write_mmrs(ptc0, data0, ptc1, data1);
283  active++;
284  }
285  if (active >= max_active || i == (nix - 1)) {
286  if ((deadlock = wait_piowc())) {
287  if (flush_opt == 1)
288  goto done;
289  sn2_ptc_deadlock_recovery(nasids, ibegin, i, mynasid, ptc0, data0, ptc1, data1);
291  max_active = 1;
292  }
293  active = 0;
294  ibegin = i + 1;
295  }
296  }
297  start += (1UL << nbits);
298  } while (start < end);
299 
300 done:
301  itc2 = ia64_get_itc() - itc2;
302  __get_cpu_var(ptcstats).shub_itc_clocks += itc2;
303  if (itc2 > __get_cpu_var(ptcstats).shub_itc_clocks_max)
304  __get_cpu_var(ptcstats).shub_itc_clocks_max = itc2;
305 
306  if (old_rr) {
307  ia64_set_rr(start, old_rr);
308  ia64_srlz_d();
309  }
310 
311  spin_unlock_irqrestore(PTC_LOCK(shub1), flags);
312 
313  if (flush_opt == 1 && deadlock) {
314  __get_cpu_var(ptcstats).deadlocks++;
315  sn2_ipi_flush_all_tlb(mm);
316  }
317 
318  preempt_enable();
319 }
320 
321 /*
322  * sn2_ptc_deadlock_recovery
323  *
324  * Recover from PTC deadlocks conditions. Recovery requires stepping thru each
325  * TLB flush transaction. The recovery sequence is somewhat tricky & is
326  * coded in assembly language.
327  */
328 
329 void
330 sn2_ptc_deadlock_recovery(short *nasids, short ib, short ie, int mynasid,
331  volatile unsigned long *ptc0, unsigned long data0,
332  volatile unsigned long *ptc1, unsigned long data1)
333 {
334  short nasid, i;
335  unsigned long *piows, zeroval, n;
336 
337  __get_cpu_var(ptcstats).deadlocks++;
338 
339  piows = (unsigned long *) pda->pio_write_status_addr;
340  zeroval = pda->pio_write_status_val;
341 
342 
343  for (i=ib; i <= ie; i++) {
344  nasid = nasids[i];
345  if (local_node_uses_ptc_ga(is_shub1()) && nasid == mynasid)
346  continue;
347  ptc0 = CHANGE_NASID(nasid, ptc0);
348  if (ptc1)
349  ptc1 = CHANGE_NASID(nasid, ptc1);
350 
351  n = sn2_ptc_deadlock_recovery_core(ptc0, data0, ptc1, data1, piows, zeroval);
352  __get_cpu_var(ptcstats).deadlocks2 += n;
353  }
354 
355 }
356 
374 void sn_send_IPI_phys(int nasid, long physid, int vector, int delivery_mode)
375 {
376  long val;
377  unsigned long flags = 0;
378  volatile long *p;
379 
380  p = (long *)GLOBAL_MMR_PHYS_ADDR(nasid, SH_IPI_INT);
381  val = (1UL << SH_IPI_INT_SEND_SHFT) |
382  (physid << SH_IPI_INT_PID_SHFT) |
383  ((long)delivery_mode << SH_IPI_INT_TYPE_SHFT) |
384  ((long)vector << SH_IPI_INT_IDX_SHFT) |
385  (0x000feeUL << SH_IPI_INT_BASE_SHFT);
386 
387  mb();
388  if (enable_shub_wars_1_1()) {
389  spin_lock_irqsave(&sn2_global_ptc_lock, flags);
390  }
391  pio_phys_write_mmr(p, val);
392  if (enable_shub_wars_1_1()) {
393  wait_piowc();
394  spin_unlock_irqrestore(&sn2_global_ptc_lock, flags);
395  }
396 
397 }
398 
400 
417 void sn2_send_IPI(int cpuid, int vector, int delivery_mode, int redirect)
418 {
419  long physid;
420  int nasid;
421 
422  physid = cpu_physical_id(cpuid);
423  nasid = cpuid_to_nasid(cpuid);
424 
425  /* the following is used only when starting cpus at boot time */
426  if (unlikely(nasid == -1))
427  ia64_sn_get_sapic_info(physid, &nasid, NULL, NULL);
428 
429  sn_send_IPI_phys(nasid, physid, vector, delivery_mode);
430 }
431 
432 #ifdef CONFIG_HOTPLUG_CPU
433 
440 bool sn_cpu_disable_allowed(int cpu)
441 {
443  if (cpu != 0)
444  return true;
445  else
447  "Disabling the boot processor is not allowed.\n");
448 
449  } else
451  "CPU disable is not supported on this system.\n");
452 
453  return false;
454 }
455 #endif /* CONFIG_HOTPLUG_CPU */
456 
457 #ifdef CONFIG_PROC_FS
458 
459 #define PTC_BASENAME "sgi_sn/ptc_statistics"
460 
461 static void *sn2_ptc_seq_start(struct seq_file *file, loff_t * offset)
462 {
463  if (*offset < nr_cpu_ids)
464  return offset;
465  return NULL;
466 }
467 
468 static void *sn2_ptc_seq_next(struct seq_file *file, void *data, loff_t * offset)
469 {
470  (*offset)++;
471  if (*offset < nr_cpu_ids)
472  return offset;
473  return NULL;
474 }
475 
476 static void sn2_ptc_seq_stop(struct seq_file *file, void *data)
477 {
478 }
479 
480 static int sn2_ptc_seq_show(struct seq_file *file, void *data)
481 {
482  struct ptc_stats *stat;
483  int cpu;
484 
485  cpu = *(loff_t *) data;
486 
487  if (!cpu) {
488  seq_printf(file,
489  "# cpu ptc_l newrid ptc_flushes nodes_flushed deadlocks lock_nsec shub_nsec shub_nsec_max not_my_mm deadlock2 ipi_fluches ipi_nsec\n");
490  seq_printf(file, "# ptctest %d, flushopt %d\n", sn2_ptctest, sn2_flush_opt);
491  }
492 
493  if (cpu < nr_cpu_ids && cpu_online(cpu)) {
494  stat = &per_cpu(ptcstats, cpu);
495  seq_printf(file, "cpu %d %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld\n", cpu, stat->ptc_l,
496  stat->change_rid, stat->shub_ptc_flushes, stat->nodes_flushed,
497  stat->deadlocks,
498  1000 * stat->lock_itc_clocks / per_cpu(ia64_cpu_info, cpu).cyc_per_usec,
499  1000 * stat->shub_itc_clocks / per_cpu(ia64_cpu_info, cpu).cyc_per_usec,
500  1000 * stat->shub_itc_clocks_max / per_cpu(ia64_cpu_info, cpu).cyc_per_usec,
502  stat->deadlocks2,
503  stat->shub_ipi_flushes,
504  1000 * stat->shub_ipi_flushes_itc_clocks / per_cpu(ia64_cpu_info, cpu).cyc_per_usec);
505  }
506  return 0;
507 }
508 
509 static ssize_t sn2_ptc_proc_write(struct file *file, const char __user *user, size_t count, loff_t *data)
510 {
511  int cpu;
512  char optstr[64];
513 
514  if (count == 0 || count > sizeof(optstr))
515  return -EINVAL;
516  if (copy_from_user(optstr, user, count))
517  return -EFAULT;
518  optstr[count - 1] = '\0';
519  sn2_flush_opt = simple_strtoul(optstr, NULL, 0);
520 
522  memset(&per_cpu(ptcstats, cpu), 0, sizeof(struct ptc_stats));
523 
524  return count;
525 }
526 
527 static const struct seq_operations sn2_ptc_seq_ops = {
528  .start = sn2_ptc_seq_start,
529  .next = sn2_ptc_seq_next,
530  .stop = sn2_ptc_seq_stop,
531  .show = sn2_ptc_seq_show
532 };
533 
534 static int sn2_ptc_proc_open(struct inode *inode, struct file *file)
535 {
536  return seq_open(file, &sn2_ptc_seq_ops);
537 }
538 
539 static const struct file_operations proc_sn2_ptc_operations = {
540  .open = sn2_ptc_proc_open,
541  .read = seq_read,
542  .write = sn2_ptc_proc_write,
543  .llseek = seq_lseek,
544  .release = seq_release,
545 };
546 
547 static struct proc_dir_entry *proc_sn2_ptc;
548 
549 static int __init sn2_ptc_init(void)
550 {
551  if (!ia64_platform_is("sn2"))
552  return 0;
553 
554  proc_sn2_ptc = proc_create(PTC_BASENAME, 0444,
555  NULL, &proc_sn2_ptc_operations);
556  if (!proc_sn2_ptc) {
557  printk(KERN_ERR "unable to create %s proc entry", PTC_BASENAME);
558  return -EINVAL;
559  }
560  spin_lock_init(&sn2_global_ptc_lock);
561  return 0;
562 }
563 
564 static void __exit sn2_ptc_exit(void)
565 {
566  remove_proc_entry(PTC_BASENAME, NULL);
567 }
568 
569 module_init(sn2_ptc_init);
570 module_exit(sn2_ptc_exit);
571 #endif /* CONFIG_PROC_FS */
572