Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
proportions.c
Go to the documentation of this file.
1 /*
2  * Floating proportions
3  *
4  * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <[email protected]>
5  *
6  * Description:
7  *
8  * The floating proportion is a time derivative with an exponentially decaying
9  * history:
10  *
11  * p_{j} = \Sum_{i=0} (dx_{j}/dt_{-i}) / 2^(1+i)
12  *
13  * Where j is an element from {prop_local}, x_{j} is j's number of events,
14  * and i the time period over which the differential is taken. So d/dt_{-i} is
15  * the differential over the i-th last period.
16  *
17  * The decaying history gives smooth transitions. The time differential carries
18  * the notion of speed.
19  *
20  * The denominator is 2^(1+i) because we want the series to be normalised, ie.
21  *
22  * \Sum_{i=0} 1/2^(1+i) = 1
23  *
24  * Further more, if we measure time (t) in the same events as x; so that:
25  *
26  * t = \Sum_{j} x_{j}
27  *
28  * we get that:
29  *
30  * \Sum_{j} p_{j} = 1
31  *
32  * Writing this in an iterative fashion we get (dropping the 'd's):
33  *
34  * if (++x_{j}, ++t > period)
35  * t /= 2;
36  * for_each (j)
37  * x_{j} /= 2;
38  *
39  * so that:
40  *
41  * p_{j} = x_{j} / t;
42  *
43  * We optimize away the '/= 2' for the global time delta by noting that:
44  *
45  * if (++t > period) t /= 2:
46  *
47  * Can be approximated by:
48  *
49  * period/2 + (++t % period/2)
50  *
51  * [ Furthermore, when we choose period to be 2^n it can be written in terms of
52  * binary operations and wraparound artefacts disappear. ]
53  *
54  * Also note that this yields a natural counter of the elapsed periods:
55  *
56  * c = t / (period/2)
57  *
58  * [ Its monotonic increasing property can be applied to mitigate the wrap-
59  * around issue. ]
60  *
61  * This allows us to do away with the loop over all prop_locals on each period
62  * expiration. By remembering the period count under which it was last accessed
63  * as c_{j}, we can obtain the number of 'missed' cycles from:
64  *
65  * c - c_{j}
66  *
67  * We can then lazily catch up to the global period count every time we are
68  * going to use x_{j}, by doing:
69  *
70  * x_{j} /= 2^(c - c_{j}), c_{j} = c
71  */
72 
73 #include <linux/proportions.h>
74 #include <linux/rcupdate.h>
75 
76 int prop_descriptor_init(struct prop_descriptor *pd, int shift)
77 {
78  int err;
79 
80  if (shift > PROP_MAX_SHIFT)
81  shift = PROP_MAX_SHIFT;
82 
83  pd->index = 0;
84  pd->pg[0].shift = shift;
85  mutex_init(&pd->mutex);
86  err = percpu_counter_init(&pd->pg[0].events, 0);
87  if (err)
88  goto out;
89 
90  err = percpu_counter_init(&pd->pg[1].events, 0);
91  if (err)
92  percpu_counter_destroy(&pd->pg[0].events);
93 
94 out:
95  return err;
96 }
97 
98 /*
99  * We have two copies, and flip between them to make it seem like an atomic
100  * update. The update is not really atomic wrt the events counter, but
101  * it is internally consistent with the bit layout depending on shift.
102  *
103  * We copy the events count, move the bits around and flip the index.
104  */
105 void prop_change_shift(struct prop_descriptor *pd, int shift)
106 {
107  int index;
108  int offset;
109  u64 events;
110  unsigned long flags;
111 
112  if (shift > PROP_MAX_SHIFT)
113  shift = PROP_MAX_SHIFT;
114 
115  mutex_lock(&pd->mutex);
116 
117  index = pd->index ^ 1;
118  offset = pd->pg[pd->index].shift - shift;
119  if (!offset)
120  goto out;
121 
122  pd->pg[index].shift = shift;
123 
124  local_irq_save(flags);
125  events = percpu_counter_sum(&pd->pg[pd->index].events);
126  if (offset < 0)
127  events <<= -offset;
128  else
129  events >>= offset;
130  percpu_counter_set(&pd->pg[index].events, events);
131 
132  /*
133  * ensure the new pg is fully written before the switch
134  */
135  smp_wmb();
136  pd->index = index;
137  local_irq_restore(flags);
138 
139  synchronize_rcu();
140 
141 out:
142  mutex_unlock(&pd->mutex);
143 }
144 
145 /*
146  * wrap the access to the data in an rcu_read_lock() section;
147  * this is used to track the active references.
148  */
149 static struct prop_global *prop_get_global(struct prop_descriptor *pd)
150 __acquires(RCU)
151 {
152  int index;
153 
154  rcu_read_lock();
155  index = pd->index;
156  /*
157  * match the wmb from vcd_flip()
158  */
159  smp_rmb();
160  return &pd->pg[index];
161 }
162 
163 static void prop_put_global(struct prop_descriptor *pd, struct prop_global *pg)
164 __releases(RCU)
165 {
166  rcu_read_unlock();
167 }
168 
169 static void
170 prop_adjust_shift(int *pl_shift, unsigned long *pl_period, int new_shift)
171 {
172  int offset = *pl_shift - new_shift;
173 
174  if (!offset)
175  return;
176 
177  if (offset < 0)
178  *pl_period <<= -offset;
179  else
180  *pl_period >>= offset;
181 
182  *pl_shift = new_shift;
183 }
184 
185 /*
186  * PERCPU
187  */
188 
189 #define PROP_BATCH (8*(1+ilog2(nr_cpu_ids)))
190 
192 {
193  raw_spin_lock_init(&pl->lock);
194  pl->shift = 0;
195  pl->period = 0;
196  return percpu_counter_init(&pl->events, 0);
197 }
198 
200 {
202 }
203 
204 /*
205  * Catch up with missed period expirations.
206  *
207  * until (c_{j} == c)
208  * x_{j} -= x_{j}/2;
209  * c_{j}++;
210  */
211 static
212 void prop_norm_percpu(struct prop_global *pg, struct prop_local_percpu *pl)
213 {
214  unsigned long period = 1UL << (pg->shift - 1);
215  unsigned long period_mask = ~(period - 1);
216  unsigned long global_period;
217  unsigned long flags;
218 
219  global_period = percpu_counter_read(&pg->events);
220  global_period &= period_mask;
221 
222  /*
223  * Fast path - check if the local and global period count still match
224  * outside of the lock.
225  */
226  if (pl->period == global_period)
227  return;
228 
229  raw_spin_lock_irqsave(&pl->lock, flags);
230  prop_adjust_shift(&pl->shift, &pl->period, pg->shift);
231 
232  /*
233  * For each missed period, we half the local counter.
234  * basically:
235  * pl->events >> (global_period - pl->period);
236  */
237  period = (global_period - pl->period) >> (pg->shift - 1);
238  if (period < BITS_PER_LONG) {
239  s64 val = percpu_counter_read(&pl->events);
240 
241  if (val < (nr_cpu_ids * PROP_BATCH))
242  val = percpu_counter_sum(&pl->events);
243 
244  __percpu_counter_add(&pl->events, -val + (val >> period),
245  PROP_BATCH);
246  } else
247  percpu_counter_set(&pl->events, 0);
248 
249  pl->period = global_period;
250  raw_spin_unlock_irqrestore(&pl->lock, flags);
251 }
252 
253 /*
254  * ++x_{j}, ++t
255  */
257 {
258  struct prop_global *pg = prop_get_global(pd);
259 
260  prop_norm_percpu(pg, pl);
262  percpu_counter_add(&pg->events, 1);
263  prop_put_global(pd, pg);
264 }
265 
266 /*
267  * identical to __prop_inc_percpu, except that it limits this pl's fraction to
268  * @frac/PROP_FRAC_BASE by ignoring events when this limit has been exceeded.
269  */
271  struct prop_local_percpu *pl, long frac)
272 {
273  struct prop_global *pg = prop_get_global(pd);
274 
275  prop_norm_percpu(pg, pl);
276 
277  if (unlikely(frac != PROP_FRAC_BASE)) {
278  unsigned long period_2 = 1UL << (pg->shift - 1);
279  unsigned long counter_mask = period_2 - 1;
280  unsigned long global_count;
281  long numerator, denominator;
282 
283  numerator = percpu_counter_read_positive(&pl->events);
284  global_count = percpu_counter_read(&pg->events);
285  denominator = period_2 + (global_count & counter_mask);
286 
287  if (numerator > ((denominator * frac) >> PROP_FRAC_SHIFT))
288  goto out_put;
289  }
290 
291  percpu_counter_add(&pl->events, 1);
292  percpu_counter_add(&pg->events, 1);
293 
294 out_put:
295  prop_put_global(pd, pg);
296 }
297 
298 /*
299  * Obtain a fraction of this proportion
300  *
301  * p_{j} = x_{j} / (period/2 + t % period/2)
302  */
304  struct prop_local_percpu *pl,
305  long *numerator, long *denominator)
306 {
307  struct prop_global *pg = prop_get_global(pd);
308  unsigned long period_2 = 1UL << (pg->shift - 1);
309  unsigned long counter_mask = period_2 - 1;
310  unsigned long global_count;
311 
312  prop_norm_percpu(pg, pl);
313  *numerator = percpu_counter_read_positive(&pl->events);
314 
315  global_count = percpu_counter_read(&pg->events);
316  *denominator = period_2 + (global_count & counter_mask);
317 
318  prop_put_global(pd, pg);
319 }
320 
321 /*
322  * SINGLE
323  */
324 
326 {
327  raw_spin_lock_init(&pl->lock);
328  pl->shift = 0;
329  pl->period = 0;
330  pl->events = 0;
331  return 0;
332 }
333 
335 {
336 }
337 
338 /*
339  * Catch up with missed period expirations.
340  */
341 static
342 void prop_norm_single(struct prop_global *pg, struct prop_local_single *pl)
343 {
344  unsigned long period = 1UL << (pg->shift - 1);
345  unsigned long period_mask = ~(period - 1);
346  unsigned long global_period;
347  unsigned long flags;
348 
349  global_period = percpu_counter_read(&pg->events);
350  global_period &= period_mask;
351 
352  /*
353  * Fast path - check if the local and global period count still match
354  * outside of the lock.
355  */
356  if (pl->period == global_period)
357  return;
358 
359  raw_spin_lock_irqsave(&pl->lock, flags);
360  prop_adjust_shift(&pl->shift, &pl->period, pg->shift);
361  /*
362  * For each missed period, we half the local counter.
363  */
364  period = (global_period - pl->period) >> (pg->shift - 1);
365  if (likely(period < BITS_PER_LONG))
366  pl->events >>= period;
367  else
368  pl->events = 0;
369  pl->period = global_period;
370  raw_spin_unlock_irqrestore(&pl->lock, flags);
371 }
372 
373 /*
374  * ++x_{j}, ++t
375  */
377 {
378  struct prop_global *pg = prop_get_global(pd);
379 
380  prop_norm_single(pg, pl);
381  pl->events++;
382  percpu_counter_add(&pg->events, 1);
383  prop_put_global(pd, pg);
384 }
385 
386 /*
387  * Obtain a fraction of this proportion
388  *
389  * p_{j} = x_{j} / (period/2 + t % period/2)
390  */
392  struct prop_local_single *pl,
393  long *numerator, long *denominator)
394 {
395  struct prop_global *pg = prop_get_global(pd);
396  unsigned long period_2 = 1UL << (pg->shift - 1);
397  unsigned long counter_mask = period_2 - 1;
398  unsigned long global_count;
399 
400  prop_norm_single(pg, pl);
401  *numerator = pl->events;
402 
403  global_count = percpu_counter_read(&pg->events);
404  *denominator = period_2 + (global_count & counter_mask);
405 
406  prop_put_global(pd, pg);
407 }