Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
timekeeping.c
Go to the documentation of this file.
1 /*
2  * linux/kernel/time/timekeeping.c
3  *
4  * Kernel timekeeping code and accessor functions
5  *
6  * This code was moved from linux/kernel/timer.c.
7  * Please see that file for copyright and history logs.
8  *
9  */
10 
12 #include <linux/module.h>
13 #include <linux/interrupt.h>
14 #include <linux/percpu.h>
15 #include <linux/init.h>
16 #include <linux/mm.h>
17 #include <linux/sched.h>
18 #include <linux/syscore_ops.h>
19 #include <linux/clocksource.h>
20 #include <linux/jiffies.h>
21 #include <linux/time.h>
22 #include <linux/tick.h>
23 #include <linux/stop_machine.h>
24 
25 
26 static struct timekeeper timekeeper;
27 
28 /*
29  * This read-write spinlock protects us from races in SMP while
30  * playing with xtime.
31  */
33 
34 /* flag for if timekeeping is suspended */
36 
37 static inline void tk_normalize_xtime(struct timekeeper *tk)
38 {
39  while (tk->xtime_nsec >= ((u64)NSEC_PER_SEC << tk->shift)) {
40  tk->xtime_nsec -= (u64)NSEC_PER_SEC << tk->shift;
41  tk->xtime_sec++;
42  }
43 }
44 
45 static void tk_set_xtime(struct timekeeper *tk, const struct timespec *ts)
46 {
47  tk->xtime_sec = ts->tv_sec;
48  tk->xtime_nsec = (u64)ts->tv_nsec << tk->shift;
49 }
50 
51 static void tk_xtime_add(struct timekeeper *tk, const struct timespec *ts)
52 {
53  tk->xtime_sec += ts->tv_sec;
54  tk->xtime_nsec += (u64)ts->tv_nsec << tk->shift;
55  tk_normalize_xtime(tk);
56 }
57 
58 static void tk_set_wall_to_mono(struct timekeeper *tk, struct timespec wtm)
59 {
60  struct timespec tmp;
61 
62  /*
63  * Verify consistency of: offset_real = -wall_to_monotonic
64  * before modifying anything
65  */
67  -tk->wall_to_monotonic.tv_nsec);
68  WARN_ON_ONCE(tk->offs_real.tv64 != timespec_to_ktime(tmp).tv64);
69  tk->wall_to_monotonic = wtm;
71  tk->offs_real = timespec_to_ktime(tmp);
72 }
73 
74 static void tk_set_sleep_time(struct timekeeper *tk, struct timespec t)
75 {
76  /* Verify consistency before modifying */
77  WARN_ON_ONCE(tk->offs_boot.tv64 != timespec_to_ktime(tk->total_sleep_time).tv64);
78 
79  tk->total_sleep_time = t;
80  tk->offs_boot = timespec_to_ktime(t);
81 }
82 
93 static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock)
94 {
96  u64 tmp, ntpinterval;
97  struct clocksource *old_clock;
98 
99  old_clock = tk->clock;
100  tk->clock = clock;
101  clock->cycle_last = clock->read(clock);
102 
103  /* Do the ns -> cycle conversion first, using original mult */
104  tmp = NTP_INTERVAL_LENGTH;
105  tmp <<= clock->shift;
106  ntpinterval = tmp;
107  tmp += clock->mult/2;
108  do_div(tmp, clock->mult);
109  if (tmp == 0)
110  tmp = 1;
111 
112  interval = (cycle_t) tmp;
113  tk->cycle_interval = interval;
114 
115  /* Go back from cycles -> shifted ns */
116  tk->xtime_interval = (u64) interval * clock->mult;
117  tk->xtime_remainder = ntpinterval - tk->xtime_interval;
118  tk->raw_interval =
119  ((u64) interval * clock->mult) >> clock->shift;
120 
121  /* if changing clocks, convert xtime_nsec shift units */
122  if (old_clock) {
123  int shift_change = clock->shift - old_clock->shift;
124  if (shift_change < 0)
125  tk->xtime_nsec >>= -shift_change;
126  else
127  tk->xtime_nsec <<= shift_change;
128  }
129  tk->shift = clock->shift;
130 
131  tk->ntp_error = 0;
132  tk->ntp_error_shift = NTP_SCALE_SHIFT - clock->shift;
133 
134  /*
135  * The timekeeper keeps its own mult values for the currently
136  * active clocksource. These value will be adjusted via NTP
137  * to counteract clock drifting.
138  */
139  tk->mult = clock->mult;
140 }
141 
142 /* Timekeeper helper functions. */
143 static inline s64 timekeeping_get_ns(struct timekeeper *tk)
144 {
145  cycle_t cycle_now, cycle_delta;
146  struct clocksource *clock;
147  s64 nsec;
148 
149  /* read clocksource: */
150  clock = tk->clock;
151  cycle_now = clock->read(clock);
152 
153  /* calculate the delta since the last update_wall_time: */
154  cycle_delta = (cycle_now - clock->cycle_last) & clock->mask;
155 
156  nsec = cycle_delta * tk->mult + tk->xtime_nsec;
157  nsec >>= tk->shift;
158 
159  /* If arch requires, add in gettimeoffset() */
160  return nsec + arch_gettimeoffset();
161 }
162 
163 static inline s64 timekeeping_get_ns_raw(struct timekeeper *tk)
164 {
165  cycle_t cycle_now, cycle_delta;
166  struct clocksource *clock;
167  s64 nsec;
168 
169  /* read clocksource: */
170  clock = tk->clock;
171  cycle_now = clock->read(clock);
172 
173  /* calculate the delta since the last update_wall_time: */
174  cycle_delta = (cycle_now - clock->cycle_last) & clock->mask;
175 
176  /* convert delta to nanoseconds. */
177  nsec = clocksource_cyc2ns(cycle_delta, clock->mult, clock->shift);
178 
179  /* If arch requires, add in gettimeoffset() */
180  return nsec + arch_gettimeoffset();
181 }
182 
183 /* must hold write on timekeeper.lock */
184 static void timekeeping_update(struct timekeeper *tk, bool clearntp)
185 {
186  if (clearntp) {
187  tk->ntp_error = 0;
188  ntp_clear();
189  }
190  update_vsyscall(tk);
191 }
192 
200 static void timekeeping_forward_now(struct timekeeper *tk)
201 {
202  cycle_t cycle_now, cycle_delta;
203  struct clocksource *clock;
204  s64 nsec;
205 
206  clock = tk->clock;
207  cycle_now = clock->read(clock);
208  cycle_delta = (cycle_now - clock->cycle_last) & clock->mask;
209  clock->cycle_last = cycle_now;
210 
211  tk->xtime_nsec += cycle_delta * tk->mult;
212 
213  /* If arch requires, add in gettimeoffset() */
214  tk->xtime_nsec += (u64)arch_gettimeoffset() << tk->shift;
215 
216  tk_normalize_xtime(tk);
217 
218  nsec = clocksource_cyc2ns(cycle_delta, clock->mult, clock->shift);
219  timespec_add_ns(&tk->raw_time, nsec);
220 }
221 
228 void getnstimeofday(struct timespec *ts)
229 {
230  struct timekeeper *tk = &timekeeper;
231  unsigned long seq;
232  s64 nsecs = 0;
233 
235 
236  do {
237  seq = read_seqbegin(&tk->lock);
238 
239  ts->tv_sec = tk->xtime_sec;
240  nsecs = timekeeping_get_ns(tk);
241 
242  } while (read_seqretry(&tk->lock, seq));
243 
244  ts->tv_nsec = 0;
245  timespec_add_ns(ts, nsecs);
246 }
248 
250 {
251  struct timekeeper *tk = &timekeeper;
252  unsigned int seq;
253  s64 secs, nsecs;
254 
256 
257  do {
258  seq = read_seqbegin(&tk->lock);
259  secs = tk->xtime_sec + tk->wall_to_monotonic.tv_sec;
260  nsecs = timekeeping_get_ns(tk) + tk->wall_to_monotonic.tv_nsec;
261 
262  } while (read_seqretry(&tk->lock, seq));
263  /*
264  * Use ktime_set/ktime_add_ns to create a proper ktime on
265  * 32-bit architectures without CONFIG_KTIME_SCALAR.
266  */
267  return ktime_add_ns(ktime_set(secs, 0), nsecs);
268 }
270 
279 void ktime_get_ts(struct timespec *ts)
280 {
281  struct timekeeper *tk = &timekeeper;
282  struct timespec tomono;
283  s64 nsec;
284  unsigned int seq;
285 
287 
288  do {
289  seq = read_seqbegin(&tk->lock);
290  ts->tv_sec = tk->xtime_sec;
291  nsec = timekeeping_get_ns(tk);
292  tomono = tk->wall_to_monotonic;
293 
294  } while (read_seqretry(&tk->lock, seq));
295 
296  ts->tv_sec += tomono.tv_sec;
297  ts->tv_nsec = 0;
298  timespec_add_ns(ts, nsec + tomono.tv_nsec);
299 }
301 
302 #ifdef CONFIG_NTP_PPS
303 
313 void getnstime_raw_and_real(struct timespec *ts_raw, struct timespec *ts_real)
314 {
315  struct timekeeper *tk = &timekeeper;
316  unsigned long seq;
317  s64 nsecs_raw, nsecs_real;
318 
320 
321  do {
322  seq = read_seqbegin(&tk->lock);
323 
324  *ts_raw = tk->raw_time;
325  ts_real->tv_sec = tk->xtime_sec;
326  ts_real->tv_nsec = 0;
327 
328  nsecs_raw = timekeeping_get_ns_raw(tk);
329  nsecs_real = timekeeping_get_ns(tk);
330 
331  } while (read_seqretry(&tk->lock, seq));
332 
333  timespec_add_ns(ts_raw, nsecs_raw);
334  timespec_add_ns(ts_real, nsecs_real);
335 }
337 
338 #endif /* CONFIG_NTP_PPS */
339 
346 void do_gettimeofday(struct timeval *tv)
347 {
348  struct timespec now;
349 
350  getnstimeofday(&now);
351  tv->tv_sec = now.tv_sec;
352  tv->tv_usec = now.tv_nsec/1000;
353 }
355 
362 int do_settimeofday(const struct timespec *tv)
363 {
364  struct timekeeper *tk = &timekeeper;
365  struct timespec ts_delta, xt;
366  unsigned long flags;
367 
368  if (!timespec_valid_strict(tv))
369  return -EINVAL;
370 
371  write_seqlock_irqsave(&tk->lock, flags);
372 
373  timekeeping_forward_now(tk);
374 
375  xt = tk_xtime(tk);
376  ts_delta.tv_sec = tv->tv_sec - xt.tv_sec;
377  ts_delta.tv_nsec = tv->tv_nsec - xt.tv_nsec;
378 
379  tk_set_wall_to_mono(tk, timespec_sub(tk->wall_to_monotonic, ts_delta));
380 
381  tk_set_xtime(tk, tv);
382 
383  timekeeping_update(tk, true);
384 
385  write_sequnlock_irqrestore(&tk->lock, flags);
386 
387  /* signal hrtimers about time change */
388  clock_was_set();
389 
390  return 0;
391 }
393 
401 {
402  struct timekeeper *tk = &timekeeper;
403  unsigned long flags;
404  struct timespec tmp;
405  int ret = 0;
406 
407  if ((unsigned long)ts->tv_nsec >= NSEC_PER_SEC)
408  return -EINVAL;
409 
410  write_seqlock_irqsave(&tk->lock, flags);
411 
412  timekeeping_forward_now(tk);
413 
414  /* Make sure the proposed value is valid */
415  tmp = timespec_add(tk_xtime(tk), *ts);
416  if (!timespec_valid_strict(&tmp)) {
417  ret = -EINVAL;
418  goto error;
419  }
420 
421  tk_xtime_add(tk, ts);
422  tk_set_wall_to_mono(tk, timespec_sub(tk->wall_to_monotonic, *ts));
423 
424 error: /* even if we error out, we forwarded the time, so call update */
425  timekeeping_update(tk, true);
426 
427  write_sequnlock_irqrestore(&tk->lock, flags);
428 
429  /* signal hrtimers about time change */
430  clock_was_set();
431 
432  return ret;
433 }
435 
441 static int change_clocksource(void *data)
442 {
443  struct timekeeper *tk = &timekeeper;
444  struct clocksource *new, *old;
445  unsigned long flags;
446 
447  new = (struct clocksource *) data;
448 
449  write_seqlock_irqsave(&tk->lock, flags);
450 
451  timekeeping_forward_now(tk);
452  if (!new->enable || new->enable(new) == 0) {
453  old = tk->clock;
454  tk_setup_internals(tk, new);
455  if (old->disable)
456  old->disable(old);
457  }
458  timekeeping_update(tk, true);
459 
460  write_sequnlock_irqrestore(&tk->lock, flags);
461 
462  return 0;
463 }
464 
472 void timekeeping_notify(struct clocksource *clock)
473 {
474  struct timekeeper *tk = &timekeeper;
475 
476  if (tk->clock == clock)
477  return;
478  stop_machine(change_clocksource, clock, NULL);
480 }
481 
488 {
489  struct timespec now;
490 
491  getnstimeofday(&now);
492 
493  return timespec_to_ktime(now);
494 }
496 
503 void getrawmonotonic(struct timespec *ts)
504 {
505  struct timekeeper *tk = &timekeeper;
506  unsigned long seq;
507  s64 nsecs;
508 
509  do {
510  seq = read_seqbegin(&tk->lock);
511  nsecs = timekeeping_get_ns_raw(tk);
512  *ts = tk->raw_time;
513 
514  } while (read_seqretry(&tk->lock, seq));
515 
516  timespec_add_ns(ts, nsecs);
517 }
519 
524 {
525  struct timekeeper *tk = &timekeeper;
526  unsigned long seq;
527  int ret;
528 
529  do {
530  seq = read_seqbegin(&tk->lock);
531 
532  ret = tk->clock->flags & CLOCK_SOURCE_VALID_FOR_HRES;
533 
534  } while (read_seqretry(&tk->lock, seq));
535 
536  return ret;
537 }
538 
543 {
544  struct timekeeper *tk = &timekeeper;
545  unsigned long seq;
546  u64 ret;
547 
548  do {
549  seq = read_seqbegin(&tk->lock);
550 
551  ret = tk->clock->max_idle_ns;
552 
553  } while (read_seqretry(&tk->lock, seq));
554 
555  return ret;
556 }
557 
568 {
569  ts->tv_sec = 0;
570  ts->tv_nsec = 0;
571 }
572 
583 {
584  ts->tv_sec = 0;
585  ts->tv_nsec = 0;
586 }
587 
588 /*
589  * timekeeping_init - Initializes the clocksource and common timekeeping values
590  */
592 {
593  struct timekeeper *tk = &timekeeper;
594  struct clocksource *clock;
595  unsigned long flags;
596  struct timespec now, boot, tmp;
597 
598  read_persistent_clock(&now);
599  if (!timespec_valid_strict(&now)) {
600  pr_warn("WARNING: Persistent clock returned invalid value!\n"
601  " Check your CMOS/BIOS settings.\n");
602  now.tv_sec = 0;
603  now.tv_nsec = 0;
604  }
605 
606  read_boot_clock(&boot);
607  if (!timespec_valid_strict(&boot)) {
608  pr_warn("WARNING: Boot clock returned invalid value!\n"
609  " Check your CMOS/BIOS settings.\n");
610  boot.tv_sec = 0;
611  boot.tv_nsec = 0;
612  }
613 
614  seqlock_init(&tk->lock);
615 
616  ntp_init();
617 
618  write_seqlock_irqsave(&tk->lock, flags);
619  clock = clocksource_default_clock();
620  if (clock->enable)
621  clock->enable(clock);
622  tk_setup_internals(tk, clock);
623 
624  tk_set_xtime(tk, &now);
625  tk->raw_time.tv_sec = 0;
626  tk->raw_time.tv_nsec = 0;
627  if (boot.tv_sec == 0 && boot.tv_nsec == 0)
628  boot = tk_xtime(tk);
629 
630  set_normalized_timespec(&tmp, -boot.tv_sec, -boot.tv_nsec);
631  tk_set_wall_to_mono(tk, tmp);
632 
633  tmp.tv_sec = 0;
634  tmp.tv_nsec = 0;
635  tk_set_sleep_time(tk, tmp);
636 
637  write_sequnlock_irqrestore(&tk->lock, flags);
638 }
639 
640 /* time in seconds when suspend began */
641 static struct timespec timekeeping_suspend_time;
642 
650 static void __timekeeping_inject_sleeptime(struct timekeeper *tk,
651  struct timespec *delta)
652 {
653  if (!timespec_valid_strict(delta)) {
654  printk(KERN_WARNING "__timekeeping_inject_sleeptime: Invalid "
655  "sleep delta value!\n");
656  return;
657  }
658  tk_xtime_add(tk, delta);
659  tk_set_wall_to_mono(tk, timespec_sub(tk->wall_to_monotonic, *delta));
660  tk_set_sleep_time(tk, timespec_add(tk->total_sleep_time, *delta));
661 }
662 
674 {
675  struct timekeeper *tk = &timekeeper;
676  unsigned long flags;
677  struct timespec ts;
678 
679  /* Make sure we don't set the clock twice */
681  if (!(ts.tv_sec == 0 && ts.tv_nsec == 0))
682  return;
683 
684  write_seqlock_irqsave(&tk->lock, flags);
685 
686  timekeeping_forward_now(tk);
687 
688  __timekeeping_inject_sleeptime(tk, delta);
689 
690  timekeeping_update(tk, true);
691 
692  write_sequnlock_irqrestore(&tk->lock, flags);
693 
694  /* signal hrtimers about time change */
695  clock_was_set();
696 }
697 
705 static void timekeeping_resume(void)
706 {
707  struct timekeeper *tk = &timekeeper;
708  unsigned long flags;
709  struct timespec ts;
710 
712 
715 
716  write_seqlock_irqsave(&tk->lock, flags);
717 
718  if (timespec_compare(&ts, &timekeeping_suspend_time) > 0) {
719  ts = timespec_sub(ts, timekeeping_suspend_time);
720  __timekeeping_inject_sleeptime(tk, &ts);
721  }
722  /* re-base the last cycle value */
723  tk->clock->cycle_last = tk->clock->read(tk->clock);
724  tk->ntp_error = 0;
726  timekeeping_update(tk, false);
727  write_sequnlock_irqrestore(&tk->lock, flags);
728 
730 
731  clockevents_notify(CLOCK_EVT_NOTIFY_RESUME, NULL);
732 
733  /* Resume hrtimers */
734  hrtimers_resume();
735 }
736 
737 static int timekeeping_suspend(void)
738 {
739  struct timekeeper *tk = &timekeeper;
740  unsigned long flags;
741  struct timespec delta, delta_delta;
742  static struct timespec old_delta;
743 
744  read_persistent_clock(&timekeeping_suspend_time);
745 
746  write_seqlock_irqsave(&tk->lock, flags);
747  timekeeping_forward_now(tk);
749 
750  /*
751  * To avoid drift caused by repeated suspend/resumes,
752  * which each can add ~1 second drift error,
753  * try to compensate so the difference in system time
754  * and persistent_clock time stays close to constant.
755  */
756  delta = timespec_sub(tk_xtime(tk), timekeeping_suspend_time);
757  delta_delta = timespec_sub(delta, old_delta);
758  if (abs(delta_delta.tv_sec) >= 2) {
759  /*
760  * if delta_delta is too large, assume time correction
761  * has occured and set old_delta to the current delta.
762  */
763  old_delta = delta;
764  } else {
765  /* Otherwise try to adjust old_system to compensate */
766  timekeeping_suspend_time =
767  timespec_add(timekeeping_suspend_time, delta_delta);
768  }
769  write_sequnlock_irqrestore(&tk->lock, flags);
770 
771  clockevents_notify(CLOCK_EVT_NOTIFY_SUSPEND, NULL);
774 
775  return 0;
776 }
777 
778 /* sysfs resume/suspend bits for timekeeping */
779 static struct syscore_ops timekeeping_syscore_ops = {
780  .resume = timekeeping_resume,
781  .suspend = timekeeping_suspend,
782 };
783 
784 static int __init timekeeping_init_ops(void)
785 {
786  register_syscore_ops(&timekeeping_syscore_ops);
787  return 0;
788 }
789 
790 device_initcall(timekeeping_init_ops);
791 
792 /*
793  * If the error is already larger, we look ahead even further
794  * to compensate for late or lost adjustments.
795  */
796 static __always_inline int timekeeping_bigadjust(struct timekeeper *tk,
797  s64 error, s64 *interval,
798  s64 *offset)
799 {
800  s64 tick_error, i;
801  u32 look_ahead, adj;
802  s32 error2, mult;
803 
804  /*
805  * Use the current error value to determine how much to look ahead.
806  * The larger the error the slower we adjust for it to avoid problems
807  * with losing too many ticks, otherwise we would overadjust and
808  * produce an even larger error. The smaller the adjustment the
809  * faster we try to adjust for it, as lost ticks can do less harm
810  * here. This is tuned so that an error of about 1 msec is adjusted
811  * within about 1 sec (or 2^20 nsec in 2^SHIFT_HZ ticks).
812  */
813  error2 = tk->ntp_error >> (NTP_SCALE_SHIFT + 22 - 2 * SHIFT_HZ);
814  error2 = abs(error2);
815  for (look_ahead = 0; error2 > 0; look_ahead++)
816  error2 >>= 2;
817 
818  /*
819  * Now calculate the error in (1 << look_ahead) ticks, but first
820  * remove the single look ahead already included in the error.
821  */
822  tick_error = ntp_tick_length() >> (tk->ntp_error_shift + 1);
823  tick_error -= tk->xtime_interval >> 1;
824  error = ((error - tick_error) >> look_ahead) + tick_error;
825 
826  /* Finally calculate the adjustment shift value. */
827  i = *interval;
828  mult = 1;
829  if (error < 0) {
830  error = -error;
831  *interval = -*interval;
832  *offset = -*offset;
833  mult = -1;
834  }
835  for (adj = 0; error > i; adj++)
836  error >>= 1;
837 
838  *interval <<= adj;
839  *offset <<= adj;
840  return mult << adj;
841 }
842 
843 /*
844  * Adjust the multiplier to reduce the error value,
845  * this is optimized for the most common adjustments of -1,0,1,
846  * for other values we can do a bit more work.
847  */
848 static void timekeeping_adjust(struct timekeeper *tk, s64 offset)
849 {
850  s64 error, interval = tk->cycle_interval;
851  int adj;
852 
853  /*
854  * The point of this is to check if the error is greater than half
855  * an interval.
856  *
857  * First we shift it down from NTP_SHIFT to clocksource->shifted nsecs.
858  *
859  * Note we subtract one in the shift, so that error is really error*2.
860  * This "saves" dividing(shifting) interval twice, but keeps the
861  * (error > interval) comparison as still measuring if error is
862  * larger than half an interval.
863  *
864  * Note: It does not "save" on aggravation when reading the code.
865  */
866  error = tk->ntp_error >> (tk->ntp_error_shift - 1);
867  if (error > interval) {
868  /*
869  * We now divide error by 4(via shift), which checks if
870  * the error is greater than twice the interval.
871  * If it is greater, we need a bigadjust, if its smaller,
872  * we can adjust by 1.
873  */
874  error >>= 2;
875  /*
876  * XXX - In update_wall_time, we round up to the next
877  * nanosecond, and store the amount rounded up into
878  * the error. This causes the likely below to be unlikely.
879  *
880  * The proper fix is to avoid rounding up by using
881  * the high precision tk->xtime_nsec instead of
882  * xtime.tv_nsec everywhere. Fixing this will take some
883  * time.
884  */
885  if (likely(error <= interval))
886  adj = 1;
887  else
888  adj = timekeeping_bigadjust(tk, error, &interval, &offset);
889  } else {
890  if (error < -interval) {
891  /* See comment above, this is just switched for the negative */
892  error >>= 2;
893  if (likely(error >= -interval)) {
894  adj = -1;
895  interval = -interval;
896  offset = -offset;
897  } else {
898  adj = timekeeping_bigadjust(tk, error, &interval, &offset);
899  }
900  } else {
901  goto out_adjust;
902  }
903  }
904 
905  if (unlikely(tk->clock->maxadj &&
906  (tk->mult + adj > tk->clock->mult + tk->clock->maxadj))) {
908  "Adjusting %s more than 11%% (%ld vs %ld)\n",
909  tk->clock->name, (long)tk->mult + adj,
910  (long)tk->clock->mult + tk->clock->maxadj);
911  }
912  /*
913  * So the following can be confusing.
914  *
915  * To keep things simple, lets assume adj == 1 for now.
916  *
917  * When adj != 1, remember that the interval and offset values
918  * have been appropriately scaled so the math is the same.
919  *
920  * The basic idea here is that we're increasing the multiplier
921  * by one, this causes the xtime_interval to be incremented by
922  * one cycle_interval. This is because:
923  * xtime_interval = cycle_interval * mult
924  * So if mult is being incremented by one:
925  * xtime_interval = cycle_interval * (mult + 1)
926  * Its the same as:
927  * xtime_interval = (cycle_interval * mult) + cycle_interval
928  * Which can be shortened to:
929  * xtime_interval += cycle_interval
930  *
931  * So offset stores the non-accumulated cycles. Thus the current
932  * time (in shifted nanoseconds) is:
933  * now = (offset * adj) + xtime_nsec
934  * Now, even though we're adjusting the clock frequency, we have
935  * to keep time consistent. In other words, we can't jump back
936  * in time, and we also want to avoid jumping forward in time.
937  *
938  * So given the same offset value, we need the time to be the same
939  * both before and after the freq adjustment.
940  * now = (offset * adj_1) + xtime_nsec_1
941  * now = (offset * adj_2) + xtime_nsec_2
942  * So:
943  * (offset * adj_1) + xtime_nsec_1 =
944  * (offset * adj_2) + xtime_nsec_2
945  * And we know:
946  * adj_2 = adj_1 + 1
947  * So:
948  * (offset * adj_1) + xtime_nsec_1 =
949  * (offset * (adj_1+1)) + xtime_nsec_2
950  * (offset * adj_1) + xtime_nsec_1 =
951  * (offset * adj_1) + offset + xtime_nsec_2
952  * Canceling the sides:
953  * xtime_nsec_1 = offset + xtime_nsec_2
954  * Which gives us:
955  * xtime_nsec_2 = xtime_nsec_1 - offset
956  * Which simplfies to:
957  * xtime_nsec -= offset
958  *
959  * XXX - TODO: Doc ntp_error calculation.
960  */
961  tk->mult += adj;
962  tk->xtime_interval += interval;
963  tk->xtime_nsec -= offset;
964  tk->ntp_error -= (interval - offset) << tk->ntp_error_shift;
965 
966 out_adjust:
967  /*
968  * It may be possible that when we entered this function, xtime_nsec
969  * was very small. Further, if we're slightly speeding the clocksource
970  * in the code above, its possible the required corrective factor to
971  * xtime_nsec could cause it to underflow.
972  *
973  * Now, since we already accumulated the second, cannot simply roll
974  * the accumulated second back, since the NTP subsystem has been
975  * notified via second_overflow. So instead we push xtime_nsec forward
976  * by the amount we underflowed, and add that amount into the error.
977  *
978  * We'll correct this error next time through this function, when
979  * xtime_nsec is not as small.
980  */
981  if (unlikely((s64)tk->xtime_nsec < 0)) {
982  s64 neg = -(s64)tk->xtime_nsec;
983  tk->xtime_nsec = 0;
984  tk->ntp_error += neg << tk->ntp_error_shift;
985  }
986 
987 }
988 
997 static inline void accumulate_nsecs_to_secs(struct timekeeper *tk)
998 {
999  u64 nsecps = (u64)NSEC_PER_SEC << tk->shift;
1000 
1001  while (tk->xtime_nsec >= nsecps) {
1002  int leap;
1003 
1004  tk->xtime_nsec -= nsecps;
1005  tk->xtime_sec++;
1006 
1007  /* Figure out if its a leap sec and apply if needed */
1008  leap = second_overflow(tk->xtime_sec);
1009  if (unlikely(leap)) {
1010  struct timespec ts;
1011 
1012  tk->xtime_sec += leap;
1013 
1014  ts.tv_sec = leap;
1015  ts.tv_nsec = 0;
1016  tk_set_wall_to_mono(tk,
1017  timespec_sub(tk->wall_to_monotonic, ts));
1018 
1019  clock_was_set_delayed();
1020  }
1021  }
1022 }
1023 
1033 static cycle_t logarithmic_accumulation(struct timekeeper *tk, cycle_t offset,
1034  u32 shift)
1035 {
1036  u64 raw_nsecs;
1037 
1038  /* If the offset is smaller then a shifted interval, do nothing */
1039  if (offset < tk->cycle_interval<<shift)
1040  return offset;
1041 
1042  /* Accumulate one shifted interval */
1043  offset -= tk->cycle_interval << shift;
1044  tk->clock->cycle_last += tk->cycle_interval << shift;
1045 
1046  tk->xtime_nsec += tk->xtime_interval << shift;
1047  accumulate_nsecs_to_secs(tk);
1048 
1049  /* Accumulate raw time */
1050  raw_nsecs = (u64)tk->raw_interval << shift;
1051  raw_nsecs += tk->raw_time.tv_nsec;
1052  if (raw_nsecs >= NSEC_PER_SEC) {
1053  u64 raw_secs = raw_nsecs;
1054  raw_nsecs = do_div(raw_secs, NSEC_PER_SEC);
1055  tk->raw_time.tv_sec += raw_secs;
1056  }
1057  tk->raw_time.tv_nsec = raw_nsecs;
1058 
1059  /* Accumulate error between NTP and clock interval */
1060  tk->ntp_error += ntp_tick_length() << shift;
1061  tk->ntp_error -= (tk->xtime_interval + tk->xtime_remainder) <<
1062  (tk->ntp_error_shift + shift);
1063 
1064  return offset;
1065 }
1066 
1067 #ifdef CONFIG_GENERIC_TIME_VSYSCALL_OLD
1068 static inline void old_vsyscall_fixup(struct timekeeper *tk)
1069 {
1070  s64 remainder;
1071 
1072  /*
1073  * Store only full nanoseconds into xtime_nsec after rounding
1074  * it up and add the remainder to the error difference.
1075  * XXX - This is necessary to avoid small 1ns inconsistnecies caused
1076  * by truncating the remainder in vsyscalls. However, it causes
1077  * additional work to be done in timekeeping_adjust(). Once
1078  * the vsyscall implementations are converted to use xtime_nsec
1079  * (shifted nanoseconds), and CONFIG_GENERIC_TIME_VSYSCALL_OLD
1080  * users are removed, this can be killed.
1081  */
1082  remainder = tk->xtime_nsec & ((1ULL << tk->shift) - 1);
1083  tk->xtime_nsec -= remainder;
1084  tk->xtime_nsec += 1ULL << tk->shift;
1085  tk->ntp_error += remainder << tk->ntp_error_shift;
1086 
1087 }
1088 #else
1089 #define old_vsyscall_fixup(tk)
1090 #endif
1091 
1092 
1093 
1098 static void update_wall_time(void)
1099 {
1100  struct clocksource *clock;
1101  struct timekeeper *tk = &timekeeper;
1102  cycle_t offset;
1103  int shift = 0, maxshift;
1104  unsigned long flags;
1105 
1106  write_seqlock_irqsave(&tk->lock, flags);
1107 
1108  /* Make sure we're fully resumed: */
1110  goto out;
1111 
1112  clock = tk->clock;
1113 
1114 #ifdef CONFIG_ARCH_USES_GETTIMEOFFSET
1115  offset = tk->cycle_interval;
1116 #else
1117  offset = (clock->read(clock) - clock->cycle_last) & clock->mask;
1118 #endif
1119 
1120  /* Check if there's really nothing to do */
1122  goto out;
1123 
1124  /*
1125  * With NO_HZ we may have to accumulate many cycle_intervals
1126  * (think "ticks") worth of time at once. To do this efficiently,
1127  * we calculate the largest doubling multiple of cycle_intervals
1128  * that is smaller than the offset. We then accumulate that
1129  * chunk in one go, and then try to consume the next smaller
1130  * doubled multiple.
1131  */
1132  shift = ilog2(offset) - ilog2(tk->cycle_interval);
1133  shift = max(0, shift);
1134  /* Bound shift to one less than what overflows tick_length */
1135  maxshift = (64 - (ilog2(ntp_tick_length())+1)) - 1;
1136  shift = min(shift, maxshift);
1137  while (offset >= tk->cycle_interval) {
1138  offset = logarithmic_accumulation(tk, offset, shift);
1139  if (offset < tk->cycle_interval<<shift)
1140  shift--;
1141  }
1142 
1143  /* correct the clock when NTP error is too big */
1144  timekeeping_adjust(tk, offset);
1145 
1146  /*
1147  * XXX This can be killed once everyone converts
1148  * to the new update_vsyscall.
1149  */
1150  old_vsyscall_fixup(tk);
1151 
1152  /*
1153  * Finally, make sure that after the rounding
1154  * xtime_nsec isn't larger than NSEC_PER_SEC
1155  */
1156  accumulate_nsecs_to_secs(tk);
1157 
1158  timekeeping_update(tk, false);
1159 
1160 out:
1161  write_sequnlock_irqrestore(&tk->lock, flags);
1162 
1163 }
1164 
1176 void getboottime(struct timespec *ts)
1177 {
1178  struct timekeeper *tk = &timekeeper;
1179  struct timespec boottime = {
1180  .tv_sec = tk->wall_to_monotonic.tv_sec +
1181  tk->total_sleep_time.tv_sec,
1182  .tv_nsec = tk->wall_to_monotonic.tv_nsec +
1183  tk->total_sleep_time.tv_nsec
1184  };
1185 
1186  set_normalized_timespec(ts, -boottime.tv_sec, -boottime.tv_nsec);
1187 }
1189 
1200 {
1201  struct timekeeper *tk = &timekeeper;
1202  struct timespec tomono, sleep;
1203  s64 nsec;
1204  unsigned int seq;
1205 
1207 
1208  do {
1209  seq = read_seqbegin(&tk->lock);
1210  ts->tv_sec = tk->xtime_sec;
1211  nsec = timekeeping_get_ns(tk);
1212  tomono = tk->wall_to_monotonic;
1213  sleep = tk->total_sleep_time;
1214 
1215  } while (read_seqretry(&tk->lock, seq));
1216 
1217  ts->tv_sec += tomono.tv_sec + sleep.tv_sec;
1218  ts->tv_nsec = 0;
1219  timespec_add_ns(ts, nsec + tomono.tv_nsec + sleep.tv_nsec);
1220 }
1222 
1232 {
1233  struct timespec ts;
1234 
1236  return timespec_to_ktime(ts);
1237 }
1239 
1245 {
1246  struct timekeeper *tk = &timekeeper;
1247 
1248  *ts = timespec_add(*ts, tk->total_sleep_time);
1249 }
1251 
1252 unsigned long get_seconds(void)
1253 {
1254  struct timekeeper *tk = &timekeeper;
1255 
1256  return tk->xtime_sec;
1257 }
1259 
1261 {
1262  struct timekeeper *tk = &timekeeper;
1263 
1264  return tk_xtime(tk);
1265 }
1266 
1268 {
1269  struct timekeeper *tk = &timekeeper;
1270  struct timespec now;
1271  unsigned long seq;
1272 
1273  do {
1274  seq = read_seqbegin(&tk->lock);
1275 
1276  now = tk_xtime(tk);
1277  } while (read_seqretry(&tk->lock, seq));
1278 
1279  return now;
1280 }
1282 
1284 {
1285  struct timekeeper *tk = &timekeeper;
1286  struct timespec now, mono;
1287  unsigned long seq;
1288 
1289  do {
1290  seq = read_seqbegin(&tk->lock);
1291 
1292  now = tk_xtime(tk);
1293  mono = tk->wall_to_monotonic;
1294  } while (read_seqretry(&tk->lock, seq));
1295 
1296  set_normalized_timespec(&now, now.tv_sec + mono.tv_sec,
1297  now.tv_nsec + mono.tv_nsec);
1298  return now;
1299 }
1300 
1301 /*
1302  * The 64-bit jiffies value is not atomic - you MUST NOT read it
1303  * without sampling the sequence number in xtime_lock.
1304  * jiffies is defined in the linker script...
1305  */
1306 void do_timer(unsigned long ticks)
1307 {
1308  jiffies_64 += ticks;
1309  update_wall_time();
1310  calc_global_load(ticks);
1311 }
1312 
1321  struct timespec *wtom, struct timespec *sleep)
1322 {
1323  struct timekeeper *tk = &timekeeper;
1324  unsigned long seq;
1325 
1326  do {
1327  seq = read_seqbegin(&tk->lock);
1328  *xtim = tk_xtime(tk);
1329  *wtom = tk->wall_to_monotonic;
1330  *sleep = tk->total_sleep_time;
1331  } while (read_seqretry(&tk->lock, seq));
1332 }
1333 
1334 #ifdef CONFIG_HIGH_RES_TIMERS
1335 
1344 {
1345  struct timekeeper *tk = &timekeeper;
1346  ktime_t now;
1347  unsigned int seq;
1348  u64 secs, nsecs;
1349 
1350  do {
1351  seq = read_seqbegin(&tk->lock);
1352 
1353  secs = tk->xtime_sec;
1354  nsecs = timekeeping_get_ns(tk);
1355 
1356  *offs_real = tk->offs_real;
1357  *offs_boot = tk->offs_boot;
1358  } while (read_seqretry(&tk->lock, seq));
1359 
1360  now = ktime_add_ns(ktime_set(secs, 0), nsecs);
1361  now = ktime_sub(now, *offs_real);
1362  return now;
1363 }
1364 #endif
1365 
1370 {
1371  struct timekeeper *tk = &timekeeper;
1372  unsigned long seq;
1373  struct timespec wtom;
1374 
1375  do {
1376  seq = read_seqbegin(&tk->lock);
1377  wtom = tk->wall_to_monotonic;
1378  } while (read_seqretry(&tk->lock, seq));
1379 
1380  return timespec_to_ktime(wtom);
1381 }
1383 
1390 void xtime_update(unsigned long ticks)
1391 {
1392  write_seqlock(&xtime_lock);
1393  do_timer(ticks);
1394  write_sequnlock(&xtime_lock);
1395 }