Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
cpupri.c
Go to the documentation of this file.
1 /*
2  * kernel/sched/cpupri.c
3  *
4  * CPU priority management
5  *
6  * Copyright (C) 2007-2008 Novell
7  *
8  * Author: Gregory Haskins <[email protected]>
9  *
10  * This code tracks the priority of each CPU so that global migration
11  * decisions are easy to calculate. Each CPU can be in a state as follows:
12  *
13  * (INVALID), IDLE, NORMAL, RT1, ... RT99
14  *
15  * going from the lowest priority to the highest. CPUs in the INVALID state
16  * are not eligible for routing. The system maintains this state with
17  * a 2 dimensional bitmap (the first for priority class, the second for cpus
18  * in that class). Therefore a typical application without affinity
19  * restrictions can find a suitable CPU with O(1) complexity (e.g. two bit
20  * searches). For tasks with affinity restrictions, the algorithm has a
21  * worst case complexity of O(min(102, nr_domcpus)), though the scenario that
22  * yields the worst case search is fairly contrived.
23  *
24  * This program is free software; you can redistribute it and/or
25  * modify it under the terms of the GNU General Public License
26  * as published by the Free Software Foundation; version 2
27  * of the License.
28  */
29 
30 #include <linux/gfp.h>
31 #include "cpupri.h"
32 
33 /* Convert between a 140 based task->prio, and our 102 based cpupri */
34 static int convert_prio(int prio)
35 {
36  int cpupri;
37 
38  if (prio == CPUPRI_INVALID)
39  cpupri = CPUPRI_INVALID;
40  else if (prio == MAX_PRIO)
41  cpupri = CPUPRI_IDLE;
42  else if (prio >= MAX_RT_PRIO)
43  cpupri = CPUPRI_NORMAL;
44  else
45  cpupri = MAX_RT_PRIO - prio + 1;
46 
47  return cpupri;
48 }
49 
65 int cpupri_find(struct cpupri *cp, struct task_struct *p,
66  struct cpumask *lowest_mask)
67 {
68  int idx = 0;
69  int task_pri = convert_prio(p->prio);
70 
71  if (task_pri >= MAX_RT_PRIO)
72  return 0;
73 
74  for (idx = 0; idx < task_pri; idx++) {
75  struct cpupri_vec *vec = &cp->pri_to_cpu[idx];
76  int skip = 0;
77 
78  if (!atomic_read(&(vec)->count))
79  skip = 1;
80  /*
81  * When looking at the vector, we need to read the counter,
82  * do a memory barrier, then read the mask.
83  *
84  * Note: This is still all racey, but we can deal with it.
85  * Ideally, we only want to look at masks that are set.
86  *
87  * If a mask is not set, then the only thing wrong is that we
88  * did a little more work than necessary.
89  *
90  * If we read a zero count but the mask is set, because of the
91  * memory barriers, that can only happen when the highest prio
92  * task for a run queue has left the run queue, in which case,
93  * it will be followed by a pull. If the task we are processing
94  * fails to find a proper place to go, that pull request will
95  * pull this task if the run queue is running at a lower
96  * priority.
97  */
98  smp_rmb();
99 
100  /* Need to do the rmb for every iteration */
101  if (skip)
102  continue;
103 
104  if (cpumask_any_and(&p->cpus_allowed, vec->mask) >= nr_cpu_ids)
105  continue;
106 
107  if (lowest_mask) {
108  cpumask_and(lowest_mask, &p->cpus_allowed, vec->mask);
109 
110  /*
111  * We have to ensure that we have at least one bit
112  * still set in the array, since the map could have
113  * been concurrently emptied between the first and
114  * second reads of vec->mask. If we hit this
115  * condition, simply act as though we never hit this
116  * priority level and continue on.
117  */
118  if (cpumask_any(lowest_mask) >= nr_cpu_ids)
119  continue;
120  }
121 
122  return 1;
123  }
124 
125  return 0;
126 }
127 
138 void cpupri_set(struct cpupri *cp, int cpu, int newpri)
139 {
140  int *currpri = &cp->cpu_to_pri[cpu];
141  int oldpri = *currpri;
142  int do_mb = 0;
143 
144  newpri = convert_prio(newpri);
145 
146  BUG_ON(newpri >= CPUPRI_NR_PRIORITIES);
147 
148  if (newpri == oldpri)
149  return;
150 
151  /*
152  * If the cpu was currently mapped to a different value, we
153  * need to map it to the new value then remove the old value.
154  * Note, we must add the new value first, otherwise we risk the
155  * cpu being missed by the priority loop in cpupri_find.
156  */
157  if (likely(newpri != CPUPRI_INVALID)) {
158  struct cpupri_vec *vec = &cp->pri_to_cpu[newpri];
159 
160  cpumask_set_cpu(cpu, vec->mask);
161  /*
162  * When adding a new vector, we update the mask first,
163  * do a write memory barrier, and then update the count, to
164  * make sure the vector is visible when count is set.
165  */
167  atomic_inc(&(vec)->count);
168  do_mb = 1;
169  }
170  if (likely(oldpri != CPUPRI_INVALID)) {
171  struct cpupri_vec *vec = &cp->pri_to_cpu[oldpri];
172 
173  /*
174  * Because the order of modification of the vec->count
175  * is important, we must make sure that the update
176  * of the new prio is seen before we decrement the
177  * old prio. This makes sure that the loop sees
178  * one or the other when we raise the priority of
179  * the run queue. We don't care about when we lower the
180  * priority, as that will trigger an rt pull anyway.
181  *
182  * We only need to do a memory barrier if we updated
183  * the new priority vec.
184  */
185  if (do_mb)
187 
188  /*
189  * When removing from the vector, we decrement the counter first
190  * do a memory barrier and then clear the mask.
191  */
192  atomic_dec(&(vec)->count);
194  cpumask_clear_cpu(cpu, vec->mask);
195  }
196 
197  *currpri = newpri;
198 }
199 
206 int cpupri_init(struct cpupri *cp)
207 {
208  int i;
209 
210  memset(cp, 0, sizeof(*cp));
211 
212  for (i = 0; i < CPUPRI_NR_PRIORITIES; i++) {
213  struct cpupri_vec *vec = &cp->pri_to_cpu[i];
214 
215  atomic_set(&vec->count, 0);
216  if (!zalloc_cpumask_var(&vec->mask, GFP_KERNEL))
217  goto cleanup;
218  }
219 
221  cp->cpu_to_pri[i] = CPUPRI_INVALID;
222  return 0;
223 
224 cleanup:
225  for (i--; i >= 0; i--)
226  free_cpumask_var(cp->pri_to_cpu[i].mask);
227  return -ENOMEM;
228 }
229 
234 void cpupri_cleanup(struct cpupri *cp)
235 {
236  int i;
237 
238  for (i = 0; i < CPUPRI_NR_PRIORITIES; i++)
239  free_cpumask_var(cp->pri_to_cpu[i].mask);
240 }