Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
coupled.c
Go to the documentation of this file.
1 /*
2  * coupled.c - helper functions to enter the same idle state on multiple cpus
3  *
4  * Copyright (c) 2011 Google, Inc.
5  *
6  * Author: Colin Cross <[email protected]>
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License as published by
10  * the Free Software Foundation; either version 2 of the License, or
11  * (at your option) any later version.
12  *
13  * This program is distributed in the hope that it will be useful, but WITHOUT
14  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15  * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
16  * more details.
17  */
18 
19 #include <linux/kernel.h>
20 #include <linux/cpu.h>
21 #include <linux/cpuidle.h>
22 #include <linux/mutex.h>
23 #include <linux/sched.h>
24 #include <linux/slab.h>
25 #include <linux/spinlock.h>
26 
27 #include "cpuidle.h"
28 
110  int refcnt;
111  int prevent;
112 };
113 
114 #define WAITING_BITS 16
115 #define MAX_WAITING_CPUS (1 << WAITING_BITS)
116 #define WAITING_MASK (MAX_WAITING_CPUS - 1)
117 #define READY_MASK (~WAITING_MASK)
118 
119 #define CPUIDLE_COUPLED_NOT_IDLE (-1)
120 
121 static DEFINE_MUTEX(cpuidle_coupled_lock);
122 static DEFINE_PER_CPU(struct call_single_data, cpuidle_coupled_poke_cb);
123 
124 /*
125  * The cpuidle_coupled_poked_mask mask is used to avoid calling
126  * __smp_call_function_single with the per cpu call_single_data struct already
127  * in use. This prevents a deadlock where two cpus are waiting for each others
128  * call_single_data struct to be available
129  */
130 static cpumask_t cpuidle_coupled_poked_mask;
131 
151 {
152  int n = dev->coupled->online_count;
153 
155  atomic_inc(a);
156 
157  while (atomic_read(a) < n)
158  cpu_relax();
159 
160  if (atomic_inc_return(a) == n * 2) {
161  atomic_set(a, 0);
162  return;
163  }
164 
165  while (atomic_read(a) > n)
166  cpu_relax();
167 }
168 
178  struct cpuidle_driver *drv, int state)
179 {
180  return drv->states[state].flags & CPUIDLE_FLAG_COUPLED;
181 }
182 
187 static inline void cpuidle_coupled_set_ready(struct cpuidle_coupled *coupled)
188 {
190 }
191 
206 static
207 inline int cpuidle_coupled_set_not_ready(struct cpuidle_coupled *coupled)
208 {
209  int all;
210  int ret;
211 
212  all = coupled->online_count || (coupled->online_count << WAITING_BITS);
213  ret = atomic_add_unless(&coupled->ready_waiting_counts,
214  -MAX_WAITING_CPUS, all);
215 
216  return ret ? 0 : -EINVAL;
217 }
218 
225 static inline int cpuidle_coupled_no_cpus_ready(struct cpuidle_coupled *coupled)
226 {
227  int r = atomic_read(&coupled->ready_waiting_counts) >> WAITING_BITS;
228  return r == 0;
229 }
230 
237 static inline bool cpuidle_coupled_cpus_ready(struct cpuidle_coupled *coupled)
238 {
239  int r = atomic_read(&coupled->ready_waiting_counts) >> WAITING_BITS;
240  return r == coupled->online_count;
241 }
242 
249 static inline bool cpuidle_coupled_cpus_waiting(struct cpuidle_coupled *coupled)
250 {
251  int w = atomic_read(&coupled->ready_waiting_counts) & WAITING_MASK;
252  return w == coupled->online_count;
253 }
254 
261 static inline int cpuidle_coupled_no_cpus_waiting(struct cpuidle_coupled *coupled)
262 {
263  int w = atomic_read(&coupled->ready_waiting_counts) & WAITING_MASK;
264  return w == 0;
265 }
266 
274 static inline int cpuidle_coupled_get_state(struct cpuidle_device *dev,
275  struct cpuidle_coupled *coupled)
276 {
277  int i;
278  int state = INT_MAX;
279 
280  /*
281  * Read barrier ensures that read of requested_state is ordered after
282  * reads of ready_count. Matches the write barriers
283  * cpuidle_set_state_waiting.
284  */
285  smp_rmb();
286 
287  for_each_cpu_mask(i, coupled->coupled_cpus)
288  if (cpu_online(i) && coupled->requested_state[i] < state)
289  state = coupled->requested_state[i];
290 
291  return state;
292 }
293 
294 static void cpuidle_coupled_poked(void *info)
295 {
296  int cpu = (unsigned long)info;
297  cpumask_clear_cpu(cpu, &cpuidle_coupled_poked_mask);
298 }
299 
312 static void cpuidle_coupled_poke(int cpu)
313 {
314  struct call_single_data *csd = &per_cpu(cpuidle_coupled_poke_cb, cpu);
315 
316  if (!cpumask_test_and_set_cpu(cpu, &cpuidle_coupled_poked_mask))
317  __smp_call_function_single(cpu, csd, 0);
318 }
319 
327 static void cpuidle_coupled_poke_others(int this_cpu,
328  struct cpuidle_coupled *coupled)
329 {
330  int cpu;
331 
332  for_each_cpu_mask(cpu, coupled->coupled_cpus)
333  if (cpu != this_cpu && cpu_online(cpu))
334  cpuidle_coupled_poke(cpu);
335 }
336 
347 static void cpuidle_coupled_set_waiting(int cpu,
348  struct cpuidle_coupled *coupled, int next_state)
349 {
350  int w;
351 
352  coupled->requested_state[cpu] = next_state;
353 
354  /*
355  * If this is the last cpu to enter the waiting state, poke
356  * all the other cpus out of their waiting state so they can
357  * enter a deeper state. This can race with one of the cpus
358  * exiting the waiting state due to an interrupt and
359  * decrementing waiting_count, see comment below.
360  *
361  * The atomic_inc_return provides a write barrier to order the write
362  * to requested_state with the later write that increments ready_count.
363  */
364  w = atomic_inc_return(&coupled->ready_waiting_counts) & WAITING_MASK;
365  if (w == coupled->online_count)
366  cpuidle_coupled_poke_others(cpu, coupled);
367 }
368 
376 static void cpuidle_coupled_set_not_waiting(int cpu,
377  struct cpuidle_coupled *coupled)
378 {
379  /*
380  * Decrementing waiting count can race with incrementing it in
381  * cpuidle_coupled_set_waiting, but that's OK. Worst case, some
382  * cpus will increment ready_count and then spin until they
383  * notice that this cpu has cleared it's requested_state.
384  */
385  atomic_dec(&coupled->ready_waiting_counts);
386 
388 }
389 
399 static void cpuidle_coupled_set_done(int cpu, struct cpuidle_coupled *coupled)
400 {
401  cpuidle_coupled_set_not_waiting(cpu, coupled);
403 }
404 
418 static int cpuidle_coupled_clear_pokes(int cpu)
419 {
421  while (cpumask_test_cpu(cpu, &cpuidle_coupled_poked_mask))
422  cpu_relax();
424 
425  return need_resched() ? -EINTR : 0;
426 }
427 
448  struct cpuidle_driver *drv, int next_state)
449 {
450  int entered_state = -1;
451  struct cpuidle_coupled *coupled = dev->coupled;
452 
453  if (!coupled)
454  return -EINVAL;
455 
456  while (coupled->prevent) {
457  if (cpuidle_coupled_clear_pokes(dev->cpu)) {
459  return entered_state;
460  }
461  entered_state = cpuidle_enter_state(dev, drv,
462  dev->safe_state_index);
463  }
464 
465  /* Read barrier ensures online_count is read after prevent is cleared */
466  smp_rmb();
467 
468  cpuidle_coupled_set_waiting(dev->cpu, coupled, next_state);
469 
470 retry:
471  /*
472  * Wait for all coupled cpus to be idle, using the deepest state
473  * allowed for a single cpu.
474  */
475  while (!cpuidle_coupled_cpus_waiting(coupled)) {
476  if (cpuidle_coupled_clear_pokes(dev->cpu)) {
477  cpuidle_coupled_set_not_waiting(dev->cpu, coupled);
478  goto out;
479  }
480 
481  if (coupled->prevent) {
482  cpuidle_coupled_set_not_waiting(dev->cpu, coupled);
483  goto out;
484  }
485 
486  entered_state = cpuidle_enter_state(dev, drv,
487  dev->safe_state_index);
488  }
489 
490  if (cpuidle_coupled_clear_pokes(dev->cpu)) {
491  cpuidle_coupled_set_not_waiting(dev->cpu, coupled);
492  goto out;
493  }
494 
495  /*
496  * All coupled cpus are probably idle. There is a small chance that
497  * one of the other cpus just became active. Increment the ready count,
498  * and spin until all coupled cpus have incremented the counter. Once a
499  * cpu has incremented the ready counter, it cannot abort idle and must
500  * spin until either all cpus have incremented the ready counter, or
501  * another cpu leaves idle and decrements the waiting counter.
502  */
503 
504  cpuidle_coupled_set_ready(coupled);
505  while (!cpuidle_coupled_cpus_ready(coupled)) {
506  /* Check if any other cpus bailed out of idle. */
507  if (!cpuidle_coupled_cpus_waiting(coupled))
508  if (!cpuidle_coupled_set_not_ready(coupled))
509  goto retry;
510 
511  cpu_relax();
512  }
513 
514  /* all cpus have acked the coupled state */
515  next_state = cpuidle_coupled_get_state(dev, coupled);
516 
517  entered_state = cpuidle_enter_state(dev, drv, next_state);
518 
519  cpuidle_coupled_set_done(dev->cpu, coupled);
520 
521 out:
522  /*
523  * Normal cpuidle states are expected to return with irqs enabled.
524  * That leads to an inefficiency where a cpu receiving an interrupt
525  * that brings it out of idle will process that interrupt before
526  * exiting the idle enter function and decrementing ready_count. All
527  * other cpus will need to spin waiting for the cpu that is processing
528  * the interrupt. If the driver returns with interrupts disabled,
529  * all other cpus will loop back into the safe idle state instead of
530  * spinning, saving power.
531  *
532  * Calling local_irq_enable here allows coupled states to return with
533  * interrupts disabled, but won't cause problems for drivers that
534  * exit with interrupts enabled.
535  */
537 
538  /*
539  * Wait until all coupled cpus have exited idle. There is no risk that
540  * a cpu exits and re-enters the ready state because this cpu has
541  * already decremented its waiting_count.
542  */
543  while (!cpuidle_coupled_no_cpus_ready(coupled))
544  cpu_relax();
545 
546  return entered_state;
547 }
548 
549 static void cpuidle_coupled_update_online_cpus(struct cpuidle_coupled *coupled)
550 {
551  cpumask_t cpus;
552  cpumask_and(&cpus, cpu_online_mask, &coupled->coupled_cpus);
553  coupled->online_count = cpumask_weight(&cpus);
554 }
555 
565 {
566  int cpu;
567  struct cpuidle_device *other_dev;
568  struct call_single_data *csd;
569  struct cpuidle_coupled *coupled;
570 
571  if (cpumask_empty(&dev->coupled_cpus))
572  return 0;
573 
574  for_each_cpu_mask(cpu, dev->coupled_cpus) {
575  other_dev = per_cpu(cpuidle_devices, cpu);
576  if (other_dev && other_dev->coupled) {
577  coupled = other_dev->coupled;
578  goto have_coupled;
579  }
580  }
581 
582  /* No existing coupled info found, create a new one */
583  coupled = kzalloc(sizeof(struct cpuidle_coupled), GFP_KERNEL);
584  if (!coupled)
585  return -ENOMEM;
586 
587  coupled->coupled_cpus = dev->coupled_cpus;
588 
589 have_coupled:
590  dev->coupled = coupled;
591  if (WARN_ON(!cpumask_equal(&dev->coupled_cpus, &coupled->coupled_cpus)))
592  coupled->prevent++;
593 
594  cpuidle_coupled_update_online_cpus(coupled);
595 
596  coupled->refcnt++;
597 
598  csd = &per_cpu(cpuidle_coupled_poke_cb, dev->cpu);
599  csd->func = cpuidle_coupled_poked;
600  csd->info = (void *)(unsigned long)dev->cpu;
601 
602  return 0;
603 }
604 
614 {
615  struct cpuidle_coupled *coupled = dev->coupled;
616 
617  if (cpumask_empty(&dev->coupled_cpus))
618  return;
619 
620  if (--coupled->refcnt)
621  kfree(coupled);
622  dev->coupled = NULL;
623 }
624 
632 static void cpuidle_coupled_prevent_idle(struct cpuidle_coupled *coupled)
633 {
634  int cpu = get_cpu();
635 
636  /* Force all cpus out of the waiting loop. */
637  coupled->prevent++;
638  cpuidle_coupled_poke_others(cpu, coupled);
639  put_cpu();
640  while (!cpuidle_coupled_no_cpus_waiting(coupled))
641  cpu_relax();
642 }
643 
651 static void cpuidle_coupled_allow_idle(struct cpuidle_coupled *coupled)
652 {
653  int cpu = get_cpu();
654 
655  /*
656  * Write barrier ensures readers see the new online_count when they
657  * see prevent == 0.
658  */
659  smp_wmb();
660  coupled->prevent--;
661  /* Force cpus out of the prevent loop. */
662  cpuidle_coupled_poke_others(cpu, coupled);
663  put_cpu();
664 }
665 
675 static int cpuidle_coupled_cpu_notify(struct notifier_block *nb,
676  unsigned long action, void *hcpu)
677 {
678  int cpu = (unsigned long)hcpu;
679  struct cpuidle_device *dev;
680 
681  switch (action & ~CPU_TASKS_FROZEN) {
682  case CPU_UP_PREPARE:
683  case CPU_DOWN_PREPARE:
684  case CPU_ONLINE:
685  case CPU_DEAD:
686  case CPU_UP_CANCELED:
687  case CPU_DOWN_FAILED:
688  break;
689  default:
690  return NOTIFY_OK;
691  }
692 
693  mutex_lock(&cpuidle_lock);
694 
695  dev = per_cpu(cpuidle_devices, cpu);
696  if (!dev || !dev->coupled)
697  goto out;
698 
699  switch (action & ~CPU_TASKS_FROZEN) {
700  case CPU_UP_PREPARE:
701  case CPU_DOWN_PREPARE:
702  cpuidle_coupled_prevent_idle(dev->coupled);
703  break;
704  case CPU_ONLINE:
705  case CPU_DEAD:
706  cpuidle_coupled_update_online_cpus(dev->coupled);
707  /* Fall through */
708  case CPU_UP_CANCELED:
709  case CPU_DOWN_FAILED:
710  cpuidle_coupled_allow_idle(dev->coupled);
711  break;
712  }
713 
714 out:
715  mutex_unlock(&cpuidle_lock);
716  return NOTIFY_OK;
717 }
718 
719 static struct notifier_block cpuidle_coupled_cpu_notifier = {
720  .notifier_call = cpuidle_coupled_cpu_notify,
721 };
722 
723 static int __init cpuidle_coupled_init(void)
724 {
725  return register_cpu_notifier(&cpuidle_coupled_cpu_notifier);
726 }
727 core_initcall(cpuidle_coupled_init);