30 #include <linux/module.h>
35 #include <asm/mmu_context.h>
37 #include <linux/capability.h>
41 #include <linux/perf_event.h>
59 #include <linux/sysctl.h>
67 #include <linux/hrtimer.h>
70 #include <linux/ctype.h>
72 #include <linux/slab.h>
74 #include <linux/binfmts.h>
76 #include <asm/switch_to.h>
78 #include <asm/irq_regs.h>
79 #include <asm/mutex.h>
80 #ifdef CONFIG_PARAVIRT
81 #include <asm/paravirt.h>
85 #include "../workqueue_sched.h"
86 #include "../smpboot.h"
88 #define CREATE_TRACE_POINTS
97 if (hrtimer_active(period_timer))
100 now = hrtimer_cb_get_time(period_timer);
103 soft = hrtimer_get_softexpires(period_timer);
104 hard = hrtimer_get_expires(period_timer);
105 delta = ktime_to_ns(ktime_sub(hard, soft));
114 static void update_rq_clock_task(
struct rq *
rq,
s64 delta);
125 update_rq_clock_task(rq, delta);
132 #define SCHED_FEAT(name, enabled) \
133 (1UL << __SCHED_FEAT_##name) * enabled |
141 #ifdef CONFIG_SCHED_DEBUG
142 #define SCHED_FEAT(name, enabled) \
145 static const char *
const sched_feat_names[] = {
151 static int sched_feat_show(
struct seq_file *
m,
void *
v)
165 #ifdef HAVE_JUMP_LABEL
167 #define jump_label_key__true STATIC_KEY_INIT_TRUE
168 #define jump_label_key__false STATIC_KEY_INIT_FALSE
170 #define SCHED_FEAT(name, enabled) \
171 jump_label_key__##enabled ,
173 struct static_key sched_feat_keys[__SCHED_FEAT_NR] = {
179 static void sched_feat_disable(
int i)
181 if (static_key_enabled(&sched_feat_keys[i]))
182 static_key_slow_dec(&sched_feat_keys[i]);
185 static void sched_feat_enable(
int i)
187 if (!static_key_enabled(&sched_feat_keys[i]))
188 static_key_slow_inc(&sched_feat_keys[i]);
191 static void sched_feat_disable(
int i) { };
192 static void sched_feat_enable(
int i) { };
196 sched_feat_write(
struct file *filp,
const char __user *ubuf,
197 size_t cnt, loff_t *ppos)
213 if (
strncmp(cmp,
"NO_", 3) == 0) {
219 if (
strcmp(cmp, sched_feat_names[i]) == 0) {
222 sched_feat_disable(i);
225 sched_feat_enable(i);
231 if (i == __SCHED_FEAT_NR)
239 static int sched_feat_open(
struct inode *
inode,
struct file *filp)
245 .
open = sched_feat_open,
246 .write = sched_feat_write,
252 static __init int sched_init_debug(
void)
331 static void __task_rq_unlock(
struct rq *rq)
349 static struct rq *this_rq_lock(
void)
361 #ifdef CONFIG_SCHED_HRTICK
373 static void hrtick_clear(
struct rq *rq)
375 if (hrtimer_active(&rq->hrtick_timer))
385 struct rq *rq =
container_of(timer,
struct rq, hrtick_timer);
391 rq->
curr->sched_class->task_tick(rq, rq->
curr, 1);
401 static void __hrtick_start(
void *
arg)
407 rq->hrtick_csd_pending = 0;
416 void hrtick_start(
struct rq *rq,
u64 delay)
418 struct hrtimer *timer = &rq->hrtick_timer;
421 hrtimer_set_expires(timer, time);
425 }
else if (!rq->hrtick_csd_pending) {
426 __smp_call_function_single(cpu_of(rq), &rq->hrtick_csd, 0);
427 rq->hrtick_csd_pending = 1;
434 int cpu = (
int)(
long)hcpu;
443 hrtick_clear(
cpu_rq(cpu));
450 static __init void init_hrtick(
void)
460 void hrtick_start(
struct rq *rq,
u64 delay)
466 static inline void init_hrtick(
void)
471 static void init_rq_hrtick(
struct rq *rq)
474 rq->hrtick_csd_pending = 0;
476 rq->hrtick_csd.flags = 0;
477 rq->hrtick_csd.func = __hrtick_start;
478 rq->hrtick_csd.info =
rq;
482 rq->hrtick_timer.function = hrtick;
485 static inline void hrtick_clear(
struct rq *rq)
489 static inline void init_rq_hrtick(
struct rq *rq)
493 static inline void init_hrtick(
void)
507 #ifndef tsk_is_polling
508 #define tsk_is_polling(t) 0
517 if (test_tsk_need_resched(p))
520 set_tsk_need_resched(p);
534 struct rq *rq =
cpu_rq(cpu);
552 int get_nohz_timer_target(
void)
556 struct sched_domain *
sd;
559 for_each_domain(cpu, sd) {
581 void wake_up_idle_cpu(
int cpu)
583 struct rq *rq =
cpu_rq(cpu);
603 set_tsk_need_resched(rq->
idle);
611 static inline bool got_nohz_idle_kick(
void)
619 static inline bool got_nohz_idle_kick(
void)
626 void sched_avg_update(
struct rq *rq)
630 while ((
s64)(rq->
clock - rq->age_stamp) > period) {
636 asm(
"" :
"+rm" (rq->age_stamp));
646 set_tsk_need_resched(p);
650 #if defined(CONFIG_RT_GROUP_SCHED) || (defined(CONFIG_FAIR_GROUP_SCHED) && \
651 (defined(CONFIG_SMP) || defined(CONFIG_CFS_BANDWIDTH)))
658 int walk_tg_tree_from(
struct task_group *
from,
667 ret = (*down)(parent,
data);
670 list_for_each_entry_rcu(child, &parent->children,
siblings) {
677 ret = (*up)(parent,
data);
678 if (ret || parent == from)
682 parent = parent->parent;
689 int tg_nop(
struct task_group *tg,
void *data)
713 static void enqueue_task(
struct rq *rq,
struct task_struct *p,
int flags)
720 static void dequeue_task(
struct rq *rq,
struct task_struct *p,
int flags)
732 enqueue_task(rq, p, flags);
740 dequeue_task(rq, p, flags);
743 static void update_rq_clock_task(
struct rq *rq,
s64 delta)
749 #if defined(CONFIG_IRQ_TIME_ACCOUNTING) || defined(CONFIG_PARAVIRT_TIME_ACCOUNTING)
750 s64 steal = 0, irq_delta = 0;
752 #ifdef CONFIG_IRQ_TIME_ACCOUNTING
753 irq_delta = irq_time_read(cpu_of(rq)) - rq->prev_irq_time;
770 if (irq_delta > delta)
773 rq->prev_irq_time += irq_delta;
776 #ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
780 steal = paravirt_steal_clock(cpu_of(rq));
781 steal -= rq->prev_steal_time_rq;
786 st = steal_ticks(steal);
789 rq->prev_steal_time_rq += steal;
797 #if defined(CONFIG_IRQ_TIME_ACCOUNTING) || defined(CONFIG_PARAVIRT_TIME_ACCOUNTING)
798 if ((irq_delta + steal) &&
sched_feat(NONTASK_POWER))
799 sched_rt_avg_update(rq, irq_delta + steal);
836 static inline int __normal_prio(
struct task_struct *p)
852 if (task_has_rt_policy(p))
855 prio = __normal_prio(p);
874 if (!rt_prio(p->
prio))
888 static inline void check_class_changed(
struct rq *rq,
struct task_struct *p,
896 }
else if (oldprio != p->
prio)
905 rq->
curr->sched_class->check_preempt_curr(rq, p, flags);
908 if (
class == rq->
curr->sched_class)
921 if (rq->
curr->on_rq && test_tsk_need_resched(rq->
curr))
926 void set_task_cpu(
struct task_struct *p,
unsigned int new_cpu)
928 #ifdef CONFIG_SCHED_DEBUG
936 #ifdef CONFIG_LOCKDEP
948 lockdep_is_held(&
task_rq(p)->lock)));
952 trace_sched_migrate_task(p, new_cpu);
954 if (task_cpu(p) != new_cpu) {
955 p->
se.nr_migrations++;
959 __set_task_cpu(p, new_cpu);
962 struct migration_arg {
967 static int migration_cpu_stop(
void *data);
1012 while (task_running(rq, p)) {
1023 rq = task_rq_lock(p, &flags);
1024 trace_sched_wait_task(p);
1025 running = task_running(rq, p);
1028 if (!match_state || p->
state == match_state)
1030 task_rq_unlock(rq, p, &flags);
1107 static int select_fallback_rq(
int cpu,
struct task_struct *p)
1141 do_set_cpus_allowed(p, cpu_possible_mask);
1158 if (p->
mm && printk_ratelimit()) {
1159 printk_sched(
"process %d (%s) no longer affine to cpu%d\n",
1160 task_pid_nr(p), p->
comm, cpu);
1171 int select_task_rq(
struct task_struct *p,
int sd_flags,
int wake_flags)
1173 int cpu = p->
sched_class->select_task_rq(p, sd_flags, wake_flags);
1187 cpu = select_fallback_rq(task_cpu(p), p);
1194 s64 diff = sample - *
avg;
1200 ttwu_stat(
struct task_struct *p,
int cpu,
int wake_flags)
1202 #ifdef CONFIG_SCHEDSTATS
1208 if (cpu == this_cpu) {
1212 struct sched_domain *sd;
1216 for_each_domain(this_cpu, sd) {
1239 static void ttwu_activate(
struct rq *rq,
struct task_struct *p,
int en_flags)
1253 ttwu_do_wakeup(
struct rq *rq,
struct task_struct *p,
int wake_flags)
1255 trace_sched_wakeup(p,
true);
1263 if (rq->idle_stamp) {
1264 u64 delta = rq->
clock - rq->idle_stamp;
1270 update_avg(&rq->avg_idle, delta);
1277 ttwu_do_activate(
struct rq *rq,
struct task_struct *p,
int wake_flags)
1285 ttwu_do_wakeup(rq, p, wake_flags);
1294 static int ttwu_remote(
struct task_struct *p,
int wake_flags)
1299 rq = __task_rq_lock(p);
1301 ttwu_do_wakeup(rq, p, wake_flags);
1304 __task_rq_unlock(rq);
1310 static void sched_ttwu_pending(
void)
1313 struct llist_node *llist = llist_del_all(&rq->wake_list);
1320 llist = llist_next(llist);
1321 ttwu_do_activate(rq, p, 0);
1327 void scheduler_ipi(
void)
1329 if (llist_empty(&
this_rq()->wake_list) && !got_nohz_idle_kick())
1346 sched_ttwu_pending();
1351 if (
unlikely(got_nohz_idle_kick() && !need_resched())) {
1358 static void ttwu_queue_remote(
struct task_struct *p,
int cpu)
1360 if (llist_add(&p->wake_entry, &
cpu_rq(cpu)->wake_list))
1364 bool cpus_share_cache(
int this_cpu,
int that_cpu)
1366 return per_cpu(sd_llc_id, this_cpu) ==
per_cpu(sd_llc_id, that_cpu);
1370 static void ttwu_queue(
struct task_struct *p,
int cpu)
1372 struct rq *rq =
cpu_rq(cpu);
1374 #if defined(CONFIG_SMP)
1377 ttwu_queue_remote(p, cpu);
1383 ttwu_do_activate(rq, p, 0);
1405 unsigned long flags;
1406 int cpu, success = 0;
1410 if (!(p->
state & state))
1416 if (p->
on_rq && ttwu_remote(p, wake_flags))
1437 cpu = select_task_rq(p, SD_BALANCE_WAKE, wake_flags);
1438 if (task_cpu(p) != cpu) {
1440 set_task_cpu(p, cpu);
1446 ttwu_stat(p, cpu, wake_flags);
1461 static void try_to_wake_up_local(
struct task_struct *p)
1481 ttwu_do_wakeup(rq, p, 0);
1500 return try_to_wake_up(p,
TASK_ALL, 0);
1506 return try_to_wake_up(p, state, 0);
1520 p->
se.exec_start = 0;
1521 p->
se.sum_exec_runtime = 0;
1522 p->
se.prev_sum_exec_runtime = 0;
1523 p->
se.nr_migrations = 0;
1525 INIT_LIST_HEAD(&p->
se.group_node);
1527 #ifdef CONFIG_SCHEDSTATS
1528 memset(&p->
se.statistics, 0,
sizeof(p->
se.statistics));
1531 INIT_LIST_HEAD(&p->
rt.run_list);
1533 #ifdef CONFIG_PREEMPT_NOTIFIERS
1543 unsigned long flags;
1563 if (task_has_rt_policy(p)) {
1580 if (!rt_prio(p->
prio))
1594 set_task_cpu(p, cpu);
1597 #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
1598 if (
likely(sched_info_on()))
1599 memset(&p->sched_info, 0,
sizeof(p->sched_info));
1601 #if defined(CONFIG_SMP)
1604 #ifdef CONFIG_PREEMPT_COUNT
1609 plist_node_init(&p->pushable_tasks,
MAX_PRIO);
1624 unsigned long flags;
1634 set_task_cpu(p, select_task_rq(p, SD_BALANCE_FORK, 0));
1637 rq = __task_rq_lock(p);
1640 trace_sched_wakeup_new(p,
true);
1646 task_rq_unlock(rq, p, &flags);
1649 #ifdef CONFIG_PREEMPT_NOTIFIERS
1655 void preempt_notifier_register(
struct preempt_notifier *notifier)
1657 hlist_add_head(¬ifier->link, &
current->preempt_notifiers);
1667 void preempt_notifier_unregister(
struct preempt_notifier *notifier)
1669 hlist_del(¬ifier->link);
1675 struct preempt_notifier *notifier;
1686 struct preempt_notifier *notifier;
1690 notifier->ops->sched_out(notifier, next);
1695 static void fire_sched_in_preempt_notifiers(
struct task_struct *curr)
1700 fire_sched_out_preempt_notifiers(
struct task_struct *curr,
1724 trace_sched_switch(prev, next);
1726 perf_event_task_sched_out(prev, next);
1727 fire_sched_out_preempt_notifiers(prev, next);
1728 prepare_lock_switch(rq, next);
1747 static void finish_task_switch(
struct rq *rq,
struct task_struct *
prev)
1766 prev_state =
prev->state;
1770 finish_lock_switch(rq,
prev);
1773 fire_sched_in_preempt_notifiers(
current);
1782 put_task_struct(
prev);
1789 static inline void pre_schedule(
struct rq *rq,
struct task_struct *
prev)
1796 static inline void post_schedule(
struct rq *rq)
1798 if (rq->post_schedule) {
1799 unsigned long flags;
1802 if (rq->
curr->sched_class->post_schedule)
1803 rq->
curr->sched_class->post_schedule(rq);
1806 rq->post_schedule = 0;
1812 static inline void pre_schedule(
struct rq *rq,
struct task_struct *p)
1816 static inline void post_schedule(
struct rq *rq)
1831 finish_task_switch(rq, prev);
1839 #ifdef __ARCH_WANT_UNLOCKED_CTXSW
1852 context_switch(
struct rq *rq,
struct task_struct *prev,
1857 prepare_task_switch(rq, prev, next);
1885 #ifndef __ARCH_WANT_UNLOCKED_CTXSW
1890 rcu_switch(prev, next);
1899 finish_task_switch(
this_rq(), prev);
1911 unsigned long i,
sum = 0;
1914 sum +=
cpu_rq(i)->nr_running;
1921 unsigned long i,
sum = 0;
1924 sum +=
cpu_rq(i)->nr_uninterruptible;
1939 unsigned long long sum = 0;
1942 sum +=
cpu_rq(i)->nr_switches;
1949 unsigned long i,
sum = 0;
1959 struct rq *
this =
cpu_rq(cpu);
2033 loads[0] = (avenrun[0] +
offset) << shift;
2034 loads[1] = (avenrun[1] +
offset) << shift;
2035 loads[2] = (avenrun[2] +
offset) << shift;
2038 static long calc_load_fold_active(
struct rq *
this_rq)
2040 long nr_active, delta = 0;
2056 static unsigned long
2057 calc_load(
unsigned long load,
unsigned long exp,
unsigned long active)
2109 static int calc_load_idx;
2111 static inline int calc_load_write_idx(
void)
2113 int idx = calc_load_idx;
2131 static inline int calc_load_read_idx(
void)
2133 return calc_load_idx & 1;
2136 void calc_load_enter_idle(
void)
2138 struct rq *this_rq =
this_rq();
2145 delta = calc_load_fold_active(this_rq);
2147 int idx = calc_load_write_idx();
2148 atomic_long_add(delta, &calc_load_idle[idx]);
2152 void calc_load_exit_idle(
void)
2154 struct rq *this_rq =
this_rq();
2172 static long calc_load_fold_idle(
void)
2174 int idx = calc_load_read_idx();
2177 if (atomic_long_read(&calc_load_idle[idx]))
2198 static unsigned long
2199 fixed_power_int(
unsigned long x,
unsigned int frac_bits,
unsigned int n)
2201 unsigned long result = 1
UL << frac_bits;
2206 result += 1
UL << (frac_bits - 1);
2207 result >>= frac_bits;
2213 x += 1
UL << (frac_bits - 1);
2243 static unsigned long
2244 calc_load_n(
unsigned long load,
unsigned long exp,
2245 unsigned long active,
unsigned int n)
2248 return calc_load(load, fixed_power_int(exp,
FSHIFT, n), active);
2260 static void calc_global_nohz(
void)
2264 if (!
time_before(jiffies, calc_load_update + 10)) {
2268 delta =
jiffies - calc_load_update - 10;
2271 active = atomic_long_read(&calc_load_tasks);
2272 active = active > 0 ? active *
FIXED_1 : 0;
2274 avenrun[0] = calc_load_n(avenrun[0],
EXP_1, active, n);
2275 avenrun[1] = calc_load_n(avenrun[1],
EXP_5, active, n);
2276 avenrun[2] = calc_load_n(avenrun[2],
EXP_15, active, n);
2293 static inline long calc_load_fold_idle(
void) {
return 0; }
2294 static inline void calc_global_nohz(
void) { }
2312 delta = calc_load_fold_idle();
2314 atomic_long_add(delta, &calc_load_tasks);
2316 active = atomic_long_read(&calc_load_tasks);
2317 active = active > 0 ? active *
FIXED_1 : 0;
2319 avenrun[0] = calc_load(avenrun[0],
EXP_1, active);
2320 avenrun[1] = calc_load(avenrun[1],
EXP_5, active);
2321 avenrun[2] = calc_load(avenrun[2],
EXP_15, active);
2335 static void calc_load_account_active(
struct rq *this_rq)
2342 delta = calc_load_fold_active(this_rq);
2344 atomic_long_add(delta, &calc_load_tasks);
2380 #define DEGRADE_SHIFT 7
2381 static const unsigned char
2383 static const unsigned char
2385 {0, 0, 0, 0, 0, 0, 0, 0},
2386 {64, 32, 8, 0, 0, 0, 0, 0},
2387 {96, 72, 40, 12, 1, 0, 0},
2388 {112, 98, 75, 43, 15, 1, 0},
2389 {120, 112, 98, 76, 45, 16, 2} };
2396 static unsigned long
2397 decay_load_missed(
unsigned long load,
unsigned long missed_updates,
int idx)
2401 if (!missed_updates)
2404 if (missed_updates >= degrade_zero_ticks[idx])
2408 return load >> missed_updates;
2410 while (missed_updates) {
2411 if (missed_updates % 2)
2414 missed_updates >>= 1;
2425 static void __update_cpu_load(
struct rq *this_rq,
unsigned long this_load,
2426 unsigned long pending_updates)
2435 unsigned long old_load, new_load;
2440 old_load = decay_load_missed(old_load, pending_updates - 1, i);
2441 new_load = this_load;
2447 if (new_load > old_load)
2448 new_load += scale - 1;
2450 this_rq->
cpu_load[
i] = (old_load * (scale - 1) + new_load) >>
i;
2453 sched_avg_update(this_rq);
2476 unsigned long curr_jiffies =
ACCESS_ONCE(jiffies);
2477 unsigned long load = this_rq->
load.weight;
2478 unsigned long pending_updates;
2489 __update_cpu_load(this_rq, load, pending_updates);
2497 struct rq *this_rq =
this_rq();
2498 unsigned long curr_jiffies =
ACCESS_ONCE(jiffies);
2499 unsigned long pending_updates;
2506 if (pending_updates) {
2512 __update_cpu_load(this_rq, 0, pending_updates);
2521 static void update_cpu_load_active(
struct rq *this_rq)
2527 __update_cpu_load(this_rq, this_rq->
load.weight, 1);
2529 calc_load_account_active(this_rq);
2541 unsigned long flags;
2545 dest_cpu = p->
sched_class->select_task_rq(p, SD_BALANCE_EXEC, 0);
2550 struct migration_arg
arg = {
p, dest_cpu };
2574 static u64 do_task_delta_exec(
struct task_struct *p,
struct rq *rq)
2578 if (task_current(rq, p)) {
2590 unsigned long flags;
2594 rq = task_rq_lock(p, &flags);
2595 ns = do_task_delta_exec(p, rq);
2596 task_rq_unlock(rq, p, &flags);
2608 unsigned long flags;
2612 rq = task_rq_lock(p, &flags);
2613 ns = p->
se.sum_exec_runtime + do_task_delta_exec(p, rq);
2614 task_rq_unlock(rq, p, &flags);
2626 struct rq *rq =
cpu_rq(cpu);
2633 update_cpu_load_active(rq);
2641 trigger_load_balance(rq, cpu);
2655 #if defined(CONFIG_PREEMPT) && (defined(CONFIG_DEBUG_PREEMPT) || \
2656 defined(CONFIG_PREEMPT_TRACER))
2660 #ifdef CONFIG_DEBUG_PREEMPT
2668 #ifdef CONFIG_DEBUG_PREEMPT
2682 #ifdef CONFIG_DEBUG_PREEMPT
2718 print_irqtrace_events(prev);
2726 static inline void schedule_debug(
struct task_struct *prev)
2734 __schedule_bug(prev);
2742 static void put_prev_task(
struct rq *rq,
struct task_struct *prev)
2753 pick_next_task(
struct rq *rq)
2769 p =
class->pick_next_task(rq);
2814 static void __sched __schedule(
void)
2817 unsigned long *switch_count;
2828 schedule_debug(prev);
2835 switch_count = &prev->
nivcsw;
2853 try_to_wake_up_local(to_wakeup);
2856 switch_count = &prev->
nvcsw;
2859 pre_schedule(rq, prev);
2862 idle_balance(cpu, rq);
2864 put_prev_task(rq, prev);
2865 next = pick_next_task(rq);
2866 clear_tsk_need_resched(prev);
2869 if (
likely(prev != next)) {
2874 context_switch(rq, prev, next);
2893 static inline void sched_submit_work(
struct task_struct *tsk)
2895 if (!tsk->
state || tsk_is_pi_blocked(tsk))
2901 if (blk_needs_flush_plug(tsk))
2902 blk_schedule_flush_plug(tsk);
2909 sched_submit_work(tsk);
2914 #ifdef CONFIG_RCU_USER_QS
2941 #ifdef CONFIG_MUTEX_SPIN_ON_OWNER
2945 if (lock->owner != owner)
2956 return owner->on_cpu;
2969 while (owner_running(lock, owner)) {
2982 return lock->owner ==
NULL;
2986 #ifdef CONFIG_PREEMPT
3013 }
while (need_resched());
3043 }
while (need_resched());
3051 return try_to_wake_up(curr->
private, mode, wake_flags);
3065 int nr_exclusive,
int wake_flags,
void *
key)
3070 unsigned flags = curr->
flags;
3072 if (curr->
func(curr, mode, wake_flags, key) &&
3089 int nr_exclusive,
void *key)
3091 unsigned long flags;
3094 __wake_up_common(q, mode, nr_exclusive, 0, key);
3095 spin_unlock_irqrestore(&q->
lock, flags);
3104 __wake_up_common(q, mode, nr, 0,
NULL);
3110 __wake_up_common(q, mode, 1, 0, key);
3132 int nr_exclusive,
void *key)
3134 unsigned long flags;
3144 __wake_up_common(q, mode, nr_exclusive, wake_flags, key);
3145 spin_unlock_irqrestore(&q->
lock, flags);
3172 unsigned long flags;
3177 spin_unlock_irqrestore(&x->
wait.
lock, flags);
3192 unsigned long flags;
3197 spin_unlock_irqrestore(&x->
wait.
lock, flags);
3202 do_wait_for_common(
struct completion *x,
long timeout,
int state)
3207 __add_wait_queue_tail_exclusive(&x->
wait, &
wait);
3209 if (signal_pending_state(state,
current)) {
3217 }
while (!x->
done && timeout);
3218 __remove_wait_queue(&x->
wait, &
wait);
3223 return timeout ?: 1;
3227 wait_for_common(
struct completion *x,
long timeout,
int state)
3232 timeout = do_wait_for_common(x, timeout, state);
3303 unsigned long timeout)
3341 unsigned long timeout)
3361 unsigned long flags;
3369 spin_unlock_irqrestore(&x->
wait.
lock, flags);
3384 unsigned long flags;
3390 spin_unlock_irqrestore(&x->
wait.
lock, flags);
3398 unsigned long flags;
3401 init_waitqueue_entry(&wait,
current);
3406 __add_wait_queue(q, &wait);
3407 spin_unlock(&q->
lock);
3409 spin_lock_irq(&q->
lock);
3410 __remove_wait_queue(q, &wait);
3411 spin_unlock_irqrestore(&q->
lock, flags);
3441 #ifdef CONFIG_RT_MUTEXES
3453 void rt_mutex_setprio(
struct task_struct *p,
int prio)
3455 int oldprio, on_rq, running;
3461 rq = __task_rq_lock(p);
3481 trace_sched_pi_setprio(p, prio);
3485 running = task_current(rq, p);
3503 check_class_changed(rq, p, prev_class, oldprio);
3505 __task_rq_unlock(rq);
3510 int old_prio,
delta, on_rq;
3511 unsigned long flags;
3514 if (
TASK_NICE(p) == nice || nice < -20 || nice > 19)
3520 rq = task_rq_lock(p, &flags);
3527 if (task_has_rt_policy(p)) {
3533 dequeue_task(rq, p, 0);
3538 p->
prio = effective_prio(p);
3539 delta = p->
prio - old_prio;
3542 enqueue_task(rq, p, 0);
3547 if (delta < 0 || (delta > 0 && task_running(rq, p)))
3551 task_rq_unlock(rq, p, &flags);
3563 int nice_rlim = 20 - nice;
3565 return (nice_rlim <= task_rlimit(p,
RLIMIT_NICE) ||
3569 #ifdef __ARCH_WANT_SYS_NICE
3587 if (increment < -40)
3640 struct rq *rq =
cpu_rq(cpu);
3649 if (!llist_empty(&rq->wake_list))
3662 return cpu_rq(cpu)->idle;
3683 if (rt_prio(p->
prio))
3693 static bool check_same_owner(
struct task_struct *p)
3700 match = (uid_eq(cred->
euid, pcred->euid) ||
3701 uid_eq(cred->
euid, pcred->uid));
3706 static int __sched_setscheduler(
struct task_struct *p,
int policy,
3709 int retval, oldprio, oldpolicy = -1, on_rq, running;
3710 unsigned long flags;
3721 policy = oldpolicy = p->
policy;
3748 if (rt_policy(policy)) {
3749 unsigned long rlim_rtprio =
3753 if (policy != p->
policy && !rlim_rtprio)
3772 if (!check_same_owner(p))
3793 rq = task_rq_lock(p, &flags);
3798 if (p == rq->
stop) {
3799 task_rq_unlock(rq, p, &flags);
3808 task_rq_unlock(rq, p, &flags);
3812 #ifdef CONFIG_RT_GROUP_SCHED
3818 if (rt_bandwidth_enabled() && rt_policy(policy) &&
3820 !task_group_is_autogroup(task_group(p))) {
3821 task_rq_unlock(rq, p, &flags);
3829 policy = oldpolicy = -1;
3830 task_rq_unlock(rq, p, &flags);
3834 running = task_current(rq, p);
3836 dequeue_task(rq, p, 0);
3849 enqueue_task(rq, p, 0);
3851 check_class_changed(rq, p, prev_class, oldprio);
3852 task_rq_unlock(rq, p, &flags);
3870 return __sched_setscheduler(p, policy, param,
true);
3888 return __sched_setscheduler(p, policy, param,
false);
3892 do_sched_setscheduler(
pid_t pid,
int policy,
struct sched_param __user *param)
3898 if (!param || pid < 0)
3905 p = find_process_by_pid(pid);
3926 return do_sched_setscheduler(pid, policy, param);
3936 return do_sched_setscheduler(pid, -1, param);
3953 p = find_process_by_pid(pid);
3975 if (!param || pid < 0)
3979 p = find_process_by_pid(pid);
4012 p = find_process_by_pid(pid);
4023 if (!alloc_cpumask_var(&cpus_allowed,
GFP_KERNEL)) {
4027 if (!alloc_cpumask_var(&new_mask,
GFP_KERNEL)) {
4029 goto out_free_cpus_allowed;
4040 cpumask_and(new_mask, in_mask, cpus_allowed);
4042 retval = set_cpus_allowed_ptr(p, new_mask);
4046 if (!cpumask_subset(new_mask, cpus_allowed)) {
4052 cpumask_copy(new_mask, cpus_allowed);
4057 free_cpumask_var(new_mask);
4058 out_free_cpus_allowed:
4059 free_cpumask_var(cpus_allowed);
4066 static int get_user_cpu_mask(
unsigned long __user *user_mask_ptr,
unsigned len,
4069 if (len < cpumask_size())
4070 cpumask_clear(new_mask);
4071 else if (len > cpumask_size())
4072 len = cpumask_size();
4084 unsigned long __user *, user_mask_ptr)
4089 if (!alloc_cpumask_var(&new_mask,
GFP_KERNEL))
4092 retval = get_user_cpu_mask(user_mask_ptr, len, new_mask);
4095 free_cpumask_var(new_mask);
4102 unsigned long flags;
4109 p = find_process_by_pid(pid);
4135 unsigned long __user *, user_mask_ptr)
4142 if (len & (
sizeof(
unsigned long)-1))
4150 size_t retlen =
min_t(
size_t, len, cpumask_size());
4157 free_cpumask_var(mask);
4170 struct rq *rq = this_rq_lock();
4173 current->sched_class->yield_task(rq);
4189 static inline int should_resched(
void)
4194 static void __cond_resched(
void)
4203 if (should_resched()) {
4221 int resched = should_resched();
4226 if (spin_needbreak(lock) || resched) {
4243 if (should_resched()) {
4297 struct rq *
rq, *p_rq;
4298 unsigned long flags;
4306 double_rq_lock(rq, p_rq);
4308 double_rq_unlock(rq, p_rq);
4318 if (task_running(p_rq, p) || p->
state)
4321 yielded = curr->
sched_class->yield_to_task(rq, p, preempt);
4328 if (preempt && rq != p_rq)
4333 double_rq_unlock(rq, p_rq);
4349 struct rq *rq =
raw_rq();
4351 delayacct_blkio_start();
4358 delayacct_blkio_end();
4364 struct rq *rq =
raw_rq();
4367 delayacct_blkio_start();
4374 delayacct_blkio_end();
4439 unsigned int time_slice;
4440 unsigned long flags;
4450 p = find_process_by_pid(pid);
4458 rq = task_rq_lock(p, &flags);
4459 time_slice = p->
sched_class->get_rr_interval(rq, p);
4460 task_rq_unlock(rq, p, &flags);
4476 unsigned long free = 0;
4481 state <
sizeof(stat_nam) - 1 ? stat_nam[state] :
'?');
4482 #if BITS_PER_LONG == 32
4493 #ifdef CONFIG_DEBUG_STACK_USAGE
4494 free = stack_not_used(p);
4507 #if BITS_PER_LONG == 32
4509 " task PC stack pid father\n");
4512 " task PC stack pid father\n");
4521 if (!state_filter || (p->
state & state_filter))
4527 #ifdef CONFIG_SCHED_DEBUG
4553 struct rq *rq =
cpu_rq(cpu);
4554 unsigned long flags;
4574 __set_task_cpu(idle, cpu);
4578 #if defined(CONFIG_SMP)
4590 ftrace_graph_init_idle_task(idle, cpu);
4591 #if defined(CONFIG_SMP)
4631 unsigned long flags;
4633 unsigned int dest_cpu;
4636 rq = task_rq_lock(p, &flags);
4641 if (!cpumask_intersects(new_mask, cpu_active_mask)) {
4651 do_set_cpus_allowed(p, new_mask);
4659 struct migration_arg
arg = {
p, dest_cpu };
4661 task_rq_unlock(rq, p, &flags);
4667 task_rq_unlock(rq, p, &flags);
4684 static int __migrate_task(
struct task_struct *p,
int src_cpu,
int dest_cpu)
4686 struct rq *rq_dest, *rq_src;
4692 rq_src =
cpu_rq(src_cpu);
4693 rq_dest =
cpu_rq(dest_cpu);
4696 double_rq_lock(rq_src, rq_dest);
4698 if (task_cpu(p) != src_cpu)
4709 dequeue_task(rq_src, p, 0);
4710 set_task_cpu(p, dest_cpu);
4711 enqueue_task(rq_dest, p, 0);
4717 double_rq_unlock(rq_src, rq_dest);
4727 static int migration_cpu_stop(
void *data)
4729 struct migration_arg *
arg =
data;
4736 __migrate_task(arg->task, raw_smp_processor_id(), arg->dest_cpu);
4741 #ifdef CONFIG_HOTPLUG_CPU
4747 void idle_task_exit(
void)
4765 static void calc_load_migrate(
struct rq *rq)
4767 long delta = calc_load_fold_active(rq);
4769 atomic_long_add(delta, &calc_load_tasks);
4780 static void migrate_tasks(
unsigned int dead_cpu)
4782 struct rq *rq =
cpu_rq(dead_cpu);
4805 next = pick_next_task(rq);
4810 dest_cpu = select_fallback_rq(dead_cpu, next);
4813 __migrate_task(next, dead_cpu, dest_cpu);
4823 #if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL)
4825 static struct ctl_table sd_ctl_dir[] = {
4833 static struct ctl_table sd_ctl_root[] = {
4837 .child = sd_ctl_dir,
4842 static struct ctl_table *sd_alloc_ctl_entry(
int n)
4850 static void sd_free_ctl_entry(
struct ctl_table **tablep)
4860 for (entry = *tablep; entry->
mode; entry++) {
4862 sd_free_ctl_entry(&entry->
child);
4871 static int min_load_idx = 0;
4875 set_table_entry(
struct ctl_table *entry,
4887 entry->
extra1 = &min_load_idx;
4888 entry->
extra2 = &max_load_idx;
4893 sd_alloc_ctl_domain_table(
struct sched_domain *sd)
4900 set_table_entry(&table[0],
"min_interval", &sd->min_interval,
4902 set_table_entry(&table[1],
"max_interval", &sd->max_interval,
4904 set_table_entry(&table[2],
"busy_idx", &sd->busy_idx,
4906 set_table_entry(&table[3],
"idle_idx", &sd->idle_idx,
4908 set_table_entry(&table[4],
"newidle_idx", &sd->newidle_idx,
4910 set_table_entry(&table[5],
"wake_idx", &sd->wake_idx,
4912 set_table_entry(&table[6],
"forkexec_idx", &sd->forkexec_idx,
4914 set_table_entry(&table[7],
"busy_factor", &sd->busy_factor,
4916 set_table_entry(&table[8],
"imbalance_pct", &sd->imbalance_pct,
4918 set_table_entry(&table[9],
"cache_nice_tries",
4919 &sd->cache_nice_tries,
4921 set_table_entry(&table[10],
"flags", &sd->flags,
4923 set_table_entry(&table[11],
"name", sd->name,
4930 static ctl_table *sd_alloc_ctl_cpu_table(
int cpu)
4933 struct sched_domain *sd;
4934 int domain_num = 0,
i;
4937 for_each_domain(cpu, sd)
4939 entry = table = sd_alloc_ctl_entry(domain_num + 1);
4944 for_each_domain(cpu, sd) {
4948 entry->
child = sd_alloc_ctl_domain_table(sd);
4956 static void register_sched_domain_sysctl(
void)
4959 struct ctl_table *entry = sd_alloc_ctl_entry(cpu_num + 1);
4972 entry->
child = sd_alloc_ctl_cpu_table(i);
4981 static void unregister_sched_domain_sysctl(
void)
4983 if (sd_sysctl_header)
4985 sd_sysctl_header =
NULL;
4986 if (sd_ctl_dir[0].child)
4987 sd_free_ctl_entry(&sd_ctl_dir[0].child);
4990 static void register_sched_domain_sysctl(
void)
4993 static void unregister_sched_domain_sysctl(
void)
4998 static void set_rq_online(
struct rq *rq)
5003 cpumask_set_cpu(rq->cpu, rq->rd->online);
5007 if (
class->rq_online)
5008 class->rq_online(rq);
5013 static void set_rq_offline(
struct rq *rq)
5019 if (
class->rq_offline)
5020 class->rq_offline(rq);
5023 cpumask_clear_cpu(rq->cpu, rq->rd->online);
5033 migration_call(
struct notifier_block *nfb,
unsigned long action,
void *hcpu)
5035 int cpu = (
long)hcpu;
5036 unsigned long flags;
5037 struct rq *rq =
cpu_rq(cpu);
5056 #ifdef CONFIG_HOTPLUG_CPU
5058 sched_ttwu_pending();
5071 calc_load_migrate(rq);
5087 .notifier_call = migration_call,
5092 unsigned long action,
void *hcpu)
5105 unsigned long action,
void *hcpu)
5116 static int __init migration_init(
void)
5123 BUG_ON(err == NOTIFY_BAD);
5124 migration_call(&migration_notifier,
CPU_ONLINE, cpu);
5125 register_cpu_notifier(&migration_notifier);
5140 #ifdef CONFIG_SCHED_DEBUG
5144 static int __init sched_debug_setup(
char *
str)
5146 sched_debug_enabled = 1;
5152 static inline bool sched_debug(
void)
5154 return sched_debug_enabled;
5157 static int sched_domain_debug_one(
struct sched_domain *sd,
int cpu,
int level,
5160 struct sched_group *
group = sd->groups;
5163 cpulist_scnprintf(str,
sizeof(str), sched_domain_span(sd));
5164 cpumask_clear(groupmask);
5168 if (!(sd->flags & SD_LOAD_BALANCE)) {
5169 printk(
"does not load-balance\n");
5200 if (!group->sgp->power_orig) {
5207 if (!cpumask_weight(sched_group_cpus(group))) {
5213 if (!(sd->flags & SD_OVERLAP) &&
5214 cpumask_intersects(groupmask, sched_group_cpus(group))) {
5220 cpumask_or(groupmask, groupmask, sched_group_cpus(group));
5222 cpulist_scnprintf(str,
sizeof(str), sched_group_cpus(group));
5230 group = group->next;
5231 }
while (group != sd->groups);
5234 if (!cpumask_equal(sched_domain_span(sd), groupmask))
5238 !cpumask_subset(groupmask, sched_domain_span(sd->parent)))
5240 "of domain->span\n");
5244 static void sched_domain_debug(
struct sched_domain *sd,
int cpu)
5248 if (!sched_debug_enabled)
5259 if (sched_domain_debug_one(sd, cpu, level, sched_domains_tmpmask))
5268 # define sched_domain_debug(sd, cpu) do { } while (0)
5269 static inline bool sched_debug(
void)
5275 static int sd_degenerate(
struct sched_domain *sd)
5277 if (cpumask_weight(sched_domain_span(sd)) == 1)
5281 if (sd->flags & (SD_LOAD_BALANCE |
5282 SD_BALANCE_NEWIDLE |
5286 SD_SHARE_PKG_RESOURCES)) {
5287 if (sd->groups != sd->groups->next)
5292 if (sd->flags & (SD_WAKE_AFFINE))
5299 sd_parent_degenerate(
struct sched_domain *sd,
struct sched_domain *parent)
5301 unsigned long cflags = sd->flags, pflags = parent->flags;
5303 if (sd_degenerate(parent))
5306 if (!cpumask_equal(sched_domain_span(sd), sched_domain_span(parent)))
5310 if (parent->groups == parent->groups->next) {
5311 pflags &= ~(SD_LOAD_BALANCE |
5312 SD_BALANCE_NEWIDLE |
5316 SD_SHARE_PKG_RESOURCES);
5318 pflags &= ~SD_SERIALIZE;
5320 if (~cflags & pflags)
5326 static void free_rootdomain(
struct rcu_head *rcu)
5328 struct root_domain *
rd =
container_of(rcu,
struct root_domain, rcu);
5331 free_cpumask_var(rd->rto_mask);
5332 free_cpumask_var(rd->online);
5333 free_cpumask_var(rd->span);
5337 static void rq_attach_root(
struct rq *rq,
struct root_domain *
rd)
5339 struct root_domain *old_rd =
NULL;
5340 unsigned long flags;
5350 cpumask_clear_cpu(rq->cpu, old_rd->span);
5364 cpumask_set_cpu(rq->cpu, rd->span);
5374 static int init_rootdomain(
struct root_domain *rd)
5376 memset(rd, 0,
sizeof(*rd));
5378 if (!alloc_cpumask_var(&rd->span,
GFP_KERNEL))
5380 if (!alloc_cpumask_var(&rd->online,
GFP_KERNEL))
5382 if (!alloc_cpumask_var(&rd->rto_mask,
GFP_KERNEL))
5390 free_cpumask_var(rd->rto_mask);
5392 free_cpumask_var(rd->online);
5394 free_cpumask_var(rd->span);
5403 struct root_domain def_root_domain;
5405 static void init_defrootdomain(
void)
5407 init_rootdomain(&def_root_domain);
5412 static struct root_domain *alloc_rootdomain(
void)
5414 struct root_domain *
rd;
5420 if (init_rootdomain(rd) != 0) {
5428 static void free_sched_groups(
struct sched_group *
sg,
int free_sgp)
5444 }
while (sg != first);
5447 static void free_sched_domain(
struct rcu_head *rcu)
5449 struct sched_domain *sd =
container_of(rcu,
struct sched_domain, rcu);
5455 if (sd->flags & SD_OVERLAP) {
5456 free_sched_groups(sd->groups, 1);
5458 kfree(sd->groups->sgp);
5464 static void destroy_sched_domain(
struct sched_domain *sd,
int cpu)
5466 call_rcu(&sd->rcu, free_sched_domain);
5469 static void destroy_sched_domains(
struct sched_domain *sd,
int cpu)
5471 for (; sd; sd = sd->parent)
5472 destroy_sched_domain(sd, cpu);
5487 static void update_top_cache_domain(
int cpu)
5489 struct sched_domain *sd;
5492 sd = highest_flag_domain(cpu, SD_SHARE_PKG_RESOURCES);
5494 id = cpumask_first(sched_domain_span(sd));
5505 cpu_attach_domain(
struct sched_domain *sd,
struct root_domain *rd,
int cpu)
5507 struct rq *rq =
cpu_rq(cpu);
5508 struct sched_domain *
tmp;
5511 for (tmp = sd;
tmp; ) {
5512 struct sched_domain *parent = tmp->parent;
5516 if (sd_parent_degenerate(tmp, parent)) {
5517 tmp->parent = parent->parent;
5519 parent->parent->child =
tmp;
5520 destroy_sched_domain(parent, cpu);
5525 if (sd && sd_degenerate(sd)) {
5528 destroy_sched_domain(tmp, cpu);
5533 sched_domain_debug(sd, cpu);
5535 rq_attach_root(rq, rd);
5538 destroy_sched_domains(tmp, cpu);
5540 update_top_cache_domain(cpu);
5547 static int __init isolated_cpu_setup(
char *str)
5549 alloc_bootmem_cpumask_var(&cpu_isolated_map);
5550 cpulist_parse(str, cpu_isolated_map);
5554 __setup(
"isolcpus=", isolated_cpu_setup);
5556 static const struct cpumask *cpu_cpu_mask(
int cpu)
5564 struct sched_group_power **
__percpu sgp;
5568 struct sched_domain **
__percpu sd;
5569 struct root_domain *
rd;
5579 struct sched_domain_topology_level;
5581 typedef struct sched_domain *(*sched_domain_init_f)(
struct sched_domain_topology_level *tl,
int cpu);
5582 typedef const struct cpumask *(*sched_domain_mask_f)(
int cpu);
5584 #define SDTL_OVERLAP 0x01
5586 struct sched_domain_topology_level {
5587 sched_domain_init_f
init;
5588 sched_domain_mask_f
mask;
5591 struct sd_data data;
5607 static void build_group_mask(
struct sched_domain *sd,
struct sched_group *sg)
5609 const struct cpumask *span = sched_domain_span(sd);
5610 struct sd_data *sdd = sd->private;
5611 struct sched_domain *sibling;
5619 cpumask_set_cpu(i, sched_group_mask(sg));
5627 int group_balance_cpu(
struct sched_group *sg)
5633 build_overlap_sched_groups(
struct sched_domain *sd,
int cpu)
5636 const struct cpumask *span = sched_domain_span(sd);
5637 struct cpumask *covered = sched_domains_tmpmask;
5638 struct sd_data *sdd = sd->private;
5639 struct sched_domain *child;
5642 cpumask_clear(covered);
5656 sg = kzalloc_node(
sizeof(
struct sched_group) + cpumask_size(),
5662 sg_span = sched_group_cpus(sg);
5664 child = child->child;
5665 cpumask_copy(sg_span, sched_domain_span(child));
5667 cpumask_set_cpu(i, sg_span);
5669 cpumask_or(covered, covered, sg_span);
5673 build_group_mask(sd, sg);
5688 group_balance_cpu(sg) == cpu)
5698 sd->groups = groups;
5703 free_sched_groups(first, 0);
5708 static int get_group(
int cpu,
struct sd_data *sdd,
struct sched_group **sg)
5710 struct sched_domain *sd = *
per_cpu_ptr(sdd->sd, cpu);
5711 struct sched_domain *child = sd->child;
5714 cpu = cpumask_first(sched_domain_span(child));
5733 build_sched_groups(
struct sched_domain *sd,
int cpu)
5735 struct sched_group *first =
NULL, *last =
NULL;
5736 struct sd_data *sdd = sd->private;
5737 const struct cpumask *span = sched_domain_span(sd);
5741 get_group(cpu, sdd, &sd->groups);
5744 if (cpu != cpumask_first(sched_domain_span(sd)))
5748 covered = sched_domains_tmpmask;
5750 cpumask_clear(covered);
5753 struct sched_group *
sg;
5754 int group = get_group(i, sdd, &sg);
5760 cpumask_clear(sched_group_cpus(sg));
5762 cpumask_setall(sched_group_mask(sg));
5765 if (get_group(j, sdd,
NULL) != group)
5768 cpumask_set_cpu(j, covered);
5769 cpumask_set_cpu(j, sched_group_cpus(sg));
5793 static void init_sched_groups_power(
int cpu,
struct sched_domain *sd)
5795 struct sched_group *sg = sd->groups;
5800 sg->group_weight = cpumask_weight(sched_group_cpus(sg));
5802 }
while (sg != sd->groups);
5804 if (cpu != group_balance_cpu(sg))
5808 atomic_set(&sg->sgp->nr_busy_cpus, sg->group_weight);
5813 return 0*SD_ASYM_PACKING;
5821 #ifdef CONFIG_SCHED_DEBUG
5822 # define SD_INIT_NAME(sd, type) sd->name = #type
5824 # define SD_INIT_NAME(sd, type) do { } while (0)
5827 #define SD_INIT_FUNC(type) \
5828 static noinline struct sched_domain * \
5829 sd_init_##type(struct sched_domain_topology_level *tl, int cpu) \
5831 struct sched_domain *sd = *per_cpu_ptr(tl->data.sd, cpu); \
5832 *sd = SD_##type##_INIT; \
5833 SD_INIT_NAME(sd, type); \
5834 sd->private = &tl->data; \
5839 #ifdef CONFIG_SCHED_SMT
5840 SD_INIT_FUNC(SIBLING)
5842 #ifdef CONFIG_SCHED_MC
5845 #ifdef CONFIG_SCHED_BOOK
5849 static int default_relax_domain_level = -1;
5850 int sched_domain_level_max;
5852 static int __init setup_relax_domain_level(
char *str)
5854 if (
kstrtoint(str, 0, &default_relax_domain_level))
5855 pr_warn(
"Unable to set relax_domain_level\n");
5859 __setup(
"relax_domain_level=", setup_relax_domain_level);
5861 static void set_domain_attribute(
struct sched_domain *sd,
5862 struct sched_domain_attr *
attr)
5866 if (!attr || attr->relax_domain_level < 0) {
5867 if (default_relax_domain_level < 0)
5870 request = default_relax_domain_level;
5872 request = attr->relax_domain_level;
5873 if (request < sd->level) {
5875 sd->flags &= ~(SD_BALANCE_WAKE|SD_BALANCE_NEWIDLE);
5878 sd->flags |= (SD_BALANCE_WAKE|SD_BALANCE_NEWIDLE);
5885 static void __free_domain_allocs(
struct s_data *
d,
enum s_alloc
what,
5891 free_rootdomain(&d->rd->rcu);
5895 __sdt_free(cpu_map);
5901 static enum s_alloc __visit_domain_allocation_hell(
struct s_data *d,
5902 const struct cpumask *cpu_map)
5904 memset(d, 0,
sizeof(*d));
5906 if (__sdt_alloc(cpu_map))
5907 return sa_sd_storage;
5910 return sa_sd_storage;
5911 d->rd = alloc_rootdomain();
5914 return sa_rootdomain;
5922 static void claim_allocations(
int cpu,
struct sched_domain *sd)
5924 struct sd_data *sdd = sd->private;
5936 #ifdef CONFIG_SCHED_SMT
5937 static const struct cpumask *cpu_smt_mask(
int cpu)
5946 static struct sched_domain_topology_level default_topology[] = {
5947 #ifdef CONFIG_SCHED_SMT
5948 { sd_init_SIBLING, cpu_smt_mask, },
5950 #ifdef CONFIG_SCHED_MC
5953 #ifdef CONFIG_SCHED_BOOK
5954 { sd_init_BOOK, cpu_book_mask, },
5956 { sd_init_CPU, cpu_cpu_mask, },
5960 static struct sched_domain_topology_level *sched_domain_topology = default_topology;
5964 static int sched_domains_numa_levels;
5965 static int *sched_domains_numa_distance;
5966 static struct cpumask ***sched_domains_numa_masks;
5967 static int sched_domains_curr_level;
5969 static inline int sd_local_flags(
int level)
5974 return SD_BALANCE_EXEC | SD_BALANCE_FORK | SD_WAKE_AFFINE;
5977 static struct sched_domain *
5978 sd_numa_init(
struct sched_domain_topology_level *tl,
int cpu)
5980 struct sched_domain *sd = *
per_cpu_ptr(tl->data.sd, cpu);
5981 int level = tl->numa_level;
5982 int sd_weight = cpumask_weight(
5983 sched_domains_numa_masks[level][
cpu_to_node(cpu)]);
5985 *sd = (
struct sched_domain){
5986 .min_interval = sd_weight,
5987 .max_interval = 2*sd_weight,
5989 .imbalance_pct = 125,
5990 .cache_nice_tries = 2,
5997 .flags = 1*SD_LOAD_BALANCE
5998 | 1*SD_BALANCE_NEWIDLE
6003 | 0*SD_SHARE_CPUPOWER
6004 | 0*SD_SHARE_PKG_RESOURCES
6006 | 0*SD_PREFER_SIBLING
6007 | sd_local_flags(level)
6010 .balance_interval = sd_weight,
6012 SD_INIT_NAME(sd, NUMA);
6013 sd->private = &tl->data;
6018 sched_domains_curr_level = tl->numa_level;
6023 static const struct cpumask *sd_numa_mask(
int cpu)
6025 return sched_domains_numa_masks[sched_domains_curr_level][
cpu_to_node(cpu)];
6028 static void sched_numa_warn(
const char *str)
6030 static int done =
false;
6049 static bool find_numa_distance(
int distance)
6056 for (i = 0; i < sched_domains_numa_levels; i++) {
6057 if (sched_domains_numa_distance[i] == distance)
6064 static void sched_init_numa(
void)
6067 struct sched_domain_topology_level *tl;
6071 sched_domains_numa_distance = kzalloc(
sizeof(
int) * nr_node_ids,
GFP_KERNEL);
6072 if (!sched_domains_numa_distance)
6082 next_distance = curr_distance;
6088 if (distance > curr_distance &&
6089 (distance < next_distance ||
6090 next_distance == curr_distance))
6091 next_distance = distance;
6099 sched_numa_warn(
"Node-distance not symmetric");
6101 if (sched_debug() && i && !find_numa_distance(distance))
6102 sched_numa_warn(
"Node-0 not representative");
6104 if (next_distance != curr_distance) {
6105 sched_domains_numa_distance[level++] = next_distance;
6106 sched_domains_numa_levels =
level;
6107 curr_distance = next_distance;
6134 sched_domains_numa_levels = 0;
6136 sched_domains_numa_masks = kzalloc(
sizeof(
void *) * level,
GFP_KERNEL);
6137 if (!sched_domains_numa_masks)
6144 for (i = 0; i <
level; i++) {
6145 sched_domains_numa_masks[
i] =
6146 kzalloc(nr_node_ids *
sizeof(
void *),
GFP_KERNEL);
6147 if (!sched_domains_numa_masks[i])
6155 sched_domains_numa_masks[
i][
j] =
mask;
6166 tl = kzalloc((
ARRAY_SIZE(default_topology) + level) *
6167 sizeof(
struct sched_domain_topology_level),
GFP_KERNEL);
6174 for (i = 0; default_topology[
i].init; i++)
6175 tl[i] = default_topology[i];
6180 for (j = 0; j <
level; i++, j++) {
6181 tl[
i] = (
struct sched_domain_topology_level){
6182 .init = sd_numa_init,
6183 .mask = sd_numa_mask,
6184 .flags = SDTL_OVERLAP,
6189 sched_domain_topology = tl;
6191 sched_domains_numa_levels =
level;
6194 static void sched_domains_numa_masks_set(
int cpu)
6199 for (i = 0; i < sched_domains_numa_levels; i++) {
6201 if (
node_distance(j, node) <= sched_domains_numa_distance[i])
6202 cpumask_set_cpu(cpu, sched_domains_numa_masks[i][j]);
6207 static void sched_domains_numa_masks_clear(
int cpu)
6210 for (i = 0; i < sched_domains_numa_levels; i++) {
6212 cpumask_clear_cpu(cpu, sched_domains_numa_masks[i][j]);
6220 static int sched_domains_numa_masks_update(
struct notifier_block *nfb,
6221 unsigned long action,
6224 int cpu = (
long)hcpu;
6228 sched_domains_numa_masks_set(cpu);
6232 sched_domains_numa_masks_clear(cpu);
6242 static inline void sched_init_numa(
void)
6246 static int sched_domains_numa_masks_update(
struct notifier_block *nfb,
6247 unsigned long action,
6254 static int __sdt_alloc(
const struct cpumask *cpu_map)
6256 struct sched_domain_topology_level *tl;
6259 for (tl = sched_domain_topology; tl->init; tl++) {
6260 struct sd_data *sdd = &tl->data;
6275 struct sched_domain *sd;
6276 struct sched_group *
sg;
6277 struct sched_group_power *sgp;
6279 sd = kzalloc_node(
sizeof(
struct sched_domain) + cpumask_size(),
6286 sg = kzalloc_node(
sizeof(
struct sched_group) + cpumask_size(),
6295 sgp = kzalloc_node(
sizeof(
struct sched_group_power) + cpumask_size(),
6307 static void __sdt_free(
const struct cpumask *cpu_map)
6309 struct sched_domain_topology_level *tl;
6312 for (tl = sched_domain_topology; tl->init; tl++) {
6313 struct sd_data *sdd = &tl->data;
6316 struct sched_domain *sd;
6320 if (sd && (sd->flags & SD_OVERLAP))
6321 free_sched_groups(sd->groups, 0);
6339 struct sched_domain *build_sched_domain(
struct sched_domain_topology_level *tl,
6340 struct s_data *d,
const struct cpumask *cpu_map,
6341 struct sched_domain_attr *attr,
struct sched_domain *child,
6344 struct sched_domain *sd = tl->init(tl, cpu);
6348 cpumask_and(sched_domain_span(sd), cpu_map, tl->mask(cpu));
6350 sd->level = child->level + 1;
6351 sched_domain_level_max =
max(sched_domain_level_max, sd->level);
6355 set_domain_attribute(sd, attr);
6364 static int build_sched_domains(
const struct cpumask *cpu_map,
6365 struct sched_domain_attr *attr)
6368 struct sched_domain *sd;
6372 alloc_state = __visit_domain_allocation_hell(&d, cpu_map);
6373 if (alloc_state != sa_rootdomain)
6378 struct sched_domain_topology_level *tl;
6381 for (tl = sched_domain_topology; tl->init; tl++) {
6382 sd = build_sched_domain(tl, &d, cpu_map, attr, sd, i);
6383 if (tl->flags & SDTL_OVERLAP ||
sched_feat(FORCE_SD_OVERLAP))
6384 sd->flags |= SD_OVERLAP;
6385 if (cpumask_equal(cpu_map, sched_domain_span(sd)))
6397 for (sd = *
per_cpu_ptr(d.sd, i); sd; sd = sd->parent) {
6398 sd->span_weight = cpumask_weight(sched_domain_span(sd));
6399 if (sd->flags & SD_OVERLAP) {
6400 if (build_overlap_sched_groups(sd, i))
6403 if (build_sched_groups(sd, i))
6414 for (sd = *
per_cpu_ptr(d.sd, i); sd; sd = sd->parent) {
6415 claim_allocations(i, sd);
6416 init_sched_groups_power(i, sd);
6424 cpu_attach_domain(sd, d.rd, i);
6430 __free_domain_allocs(&d, alloc_state, cpu_map);
6435 static int ndoms_cur;
6436 static struct sched_domain_attr *dattr_cur;
6464 for (i = 0; i < ndoms; i++) {
6465 if (!alloc_cpumask_var(&doms[i],
GFP_KERNEL)) {
6466 free_sched_domains(doms, i);
6473 void free_sched_domains(
cpumask_var_t doms[],
unsigned int ndoms)
6476 for (i = 0; i < ndoms; i++)
6477 free_cpumask_var(doms[i]);
6486 static int init_sched_domains(
const struct cpumask *cpu_map)
6492 doms_cur = alloc_sched_domains(ndoms_cur);
6494 doms_cur = &fallback_doms;
6495 cpumask_andnot(doms_cur[0], cpu_map, cpu_isolated_map);
6496 err = build_sched_domains(doms_cur[0],
NULL);
6497 register_sched_domain_sysctl();
6506 static void detach_destroy_domains(
const struct cpumask *cpu_map)
6512 cpu_attach_domain(
NULL, &def_root_domain, i);
6517 static
int dattrs_equal(
struct sched_domain_attr *
cur,
int idx_cur,
6518 struct sched_domain_attr *new,
int idx_new)
6520 struct sched_domain_attr tmp;
6527 return !
memcmp(cur ? (cur + idx_cur) : &tmp,
6528 new ? (
new + idx_new) : &tmp,
6529 sizeof(
struct sched_domain_attr));
6558 void partition_sched_domains(
int ndoms_new,
cpumask_var_t doms_new[],
6559 struct sched_domain_attr *dattr_new)
6567 unregister_sched_domain_sysctl();
6572 n = doms_new ? ndoms_new : 0;
6575 for (i = 0; i < ndoms_cur; i++) {
6576 for (j = 0; j < n && !new_topology; j++) {
6577 if (cpumask_equal(doms_cur[i], doms_new[j])
6578 && dattrs_equal(dattr_cur, i, dattr_new, j))
6582 detach_destroy_domains(doms_cur[i]);
6587 if (doms_new == NULL) {
6589 doms_new = &fallback_doms;
6590 cpumask_andnot(doms_new[0], cpu_active_mask, cpu_isolated_map);
6595 for (i = 0; i < ndoms_new; i++) {
6596 for (j = 0; j < ndoms_cur && !new_topology; j++) {
6597 if (cpumask_equal(doms_new[i], doms_cur[j])
6598 && dattrs_equal(dattr_new, i, dattr_cur, j))
6602 build_sched_domains(doms_new[i], dattr_new ? dattr_new + i : NULL);
6608 if (doms_cur != &fallback_doms)
6609 free_sched_domains(doms_cur, ndoms_cur);
6611 doms_cur = doms_new;
6612 dattr_cur = dattr_new;
6613 ndoms_cur = ndoms_new;
6615 register_sched_domain_sysctl();
6620 static int num_cpus_frozen;
6630 static int cpuset_cpu_active(
struct notifier_block *nfb,
unsigned long action,
6644 if (
likely(num_cpus_frozen)) {
6645 partition_sched_domains(1, NULL, NULL);
6665 static int cpuset_cpu_inactive(
struct notifier_block *nfb,
unsigned long action,
6674 partition_sched_domains(1, NULL, NULL);
6686 alloc_cpumask_var(&non_isolated_cpus,
GFP_KERNEL);
6687 alloc_cpumask_var(&fallback_doms,
GFP_KERNEL);
6693 init_sched_domains(cpu_active_mask);
6694 cpumask_andnot(non_isolated_cpus, cpu_possible_mask, cpu_isolated_map);
6695 if (cpumask_empty(non_isolated_cpus))
6710 if (set_cpus_allowed_ptr(
current, non_isolated_cpus) < 0)
6713 free_cpumask_var(non_isolated_cpus);
6733 #ifdef CONFIG_CGROUP_SCHED
6743 unsigned long alloc_size = 0,
ptr;
6745 #ifdef CONFIG_FAIR_GROUP_SCHED
6746 alloc_size += 2 *
nr_cpu_ids *
sizeof(
void **);
6748 #ifdef CONFIG_RT_GROUP_SCHED
6749 alloc_size += 2 *
nr_cpu_ids *
sizeof(
void **);
6751 #ifdef CONFIG_CPUMASK_OFFSTACK
6757 #ifdef CONFIG_FAIR_GROUP_SCHED
6765 #ifdef CONFIG_RT_GROUP_SCHED
6773 #ifdef CONFIG_CPUMASK_OFFSTACK
6775 per_cpu(load_balance_tmpmask, i) = (
void *)
ptr;
6776 ptr += cpumask_size();
6782 init_defrootdomain();
6786 global_rt_period(), global_rt_runtime());
6788 #ifdef CONFIG_RT_GROUP_SCHED
6790 global_rt_period(), global_rt_runtime());
6793 #ifdef CONFIG_CGROUP_SCHED
6801 #ifdef CONFIG_CGROUP_CPUACCT
6805 BUG_ON(!root_cpuacct.cpuusage);
6817 #ifdef CONFIG_FAIR_GROUP_SCHED
6819 INIT_LIST_HEAD(&rq->leaf_cfs_rq_list);
6844 #ifdef CONFIG_RT_GROUP_SCHED
6845 INIT_LIST_HEAD(&rq->leaf_rt_rq_list);
6858 rq->post_schedule = 0;
6859 rq->active_balance = 0;
6867 INIT_LIST_HEAD(&rq->cfs_tasks);
6869 rq_attach_root(rq, &def_root_domain);
6880 #ifdef CONFIG_PREEMPT_NOTIFIERS
6884 #ifdef CONFIG_RT_MUTEXES
6910 zalloc_cpumask_var(&sched_domains_tmpmask,
GFP_NOWAIT);
6912 if (cpu_isolated_map == NULL)
6913 zalloc_cpumask_var(&cpu_isolated_map,
GFP_NOWAIT);
6914 idle_thread_set_boot_cpu();
6918 scheduler_running = 1;
6921 #ifdef CONFIG_DEBUG_ATOMIC_SLEEP
6922 static inline int preempt_count_equals(
int preempt_offset)
6926 return (nested == preempt_offset);
6929 void __might_sleep(
const char *
file,
int line,
int preempt_offset)
6931 static unsigned long prev_jiffy;
6934 if ((preempt_count_equals(preempt_offset) && !
irqs_disabled()) ||
6942 "BUG: sleeping function called from invalid context at %s:%d\n",
6945 "in_atomic(): %d, irqs_disabled(): %d, pid: %d, name: %s\n",
6951 print_irqtrace_events(
current);
6957 #ifdef CONFIG_MAGIC_SYSRQ
6958 static void normalize_task(
struct rq *rq,
struct task_struct *p)
6961 int old_prio = p->
prio;
6973 check_class_changed(rq, p, prev_class, old_prio);
6979 unsigned long flags;
6990 p->
se.exec_start = 0;
6991 #ifdef CONFIG_SCHEDSTATS
6992 p->
se.statistics.wait_start = 0;
6993 p->
se.statistics.sleep_start = 0;
6994 p->
se.statistics.block_start = 0;
7008 rq = __task_rq_lock(p);
7010 normalize_task(rq, p);
7012 __task_rq_unlock(rq);
7021 #if defined(CONFIG_IA64) || defined(CONFIG_KGDB_KDB)
7068 #ifdef CONFIG_CGROUP_SCHED
7072 static void free_sched_group(
struct task_group *tg)
7081 struct task_group *sched_create_group(
struct task_group *parent)
7083 struct task_group *tg;
7084 unsigned long flags;
7097 list_add_rcu(&tg->list, &task_groups);
7101 tg->parent = parent;
7102 INIT_LIST_HEAD(&tg->children);
7103 list_add_rcu(&tg->siblings, &parent->children);
7104 spin_unlock_irqrestore(&task_group_lock, flags);
7109 free_sched_group(tg);
7114 static void free_sched_group_rcu(
struct rcu_head *rhp)
7117 free_sched_group(
container_of(rhp,
struct task_group, rcu));
7121 void sched_destroy_group(
struct task_group *tg)
7123 unsigned long flags;
7131 list_del_rcu(&tg->
list);
7133 spin_unlock_irqrestore(&task_group_lock, flags);
7136 call_rcu(&tg->rcu, free_sched_group_rcu);
7146 struct task_group *tg;
7148 unsigned long flags;
7151 rq = task_rq_lock(tsk, &flags);
7153 running = task_current(rq, tsk);
7157 dequeue_task(rq, tsk, 0);
7159 tsk->sched_class->put_prev_task(rq, tsk);
7161 tg =
container_of(task_subsys_state_check(tsk, cpu_cgroup_subsys_id,
7162 lockdep_is_held(&tsk->sighand->siglock)),
7163 struct task_group, css);
7164 tg = autogroup_task_group(tsk, tg);
7165 tsk->sched_task_group = tg;
7167 #ifdef CONFIG_FAIR_GROUP_SCHED
7168 if (tsk->sched_class->task_move_group)
7169 tsk->sched_class->task_move_group(tsk, on_rq);
7172 set_task_rq(tsk, task_cpu(tsk));
7175 tsk->sched_class->set_curr_task(rq);
7177 enqueue_task(rq, tsk, 0);
7179 task_rq_unlock(rq, tsk, &flags);
7183 #if defined(CONFIG_RT_GROUP_SCHED) || defined(CONFIG_CFS_BANDWIDTH)
7184 static unsigned long to_ratio(
u64 period,
u64 runtime)
7189 return div64_u64(runtime << 20, period);
7193 #ifdef CONFIG_RT_GROUP_SCHED
7200 static inline int tg_has_rt_tasks(
struct task_group *tg)
7205 if (rt_task(p) &&
task_rq(p)->
rt.tg == tg)
7212 struct rt_schedulable_data {
7213 struct task_group *tg;
7218 static int tg_rt_schedulable(
struct task_group *tg,
void *data)
7220 struct rt_schedulable_data *d =
data;
7221 struct task_group *child;
7222 unsigned long total,
sum = 0;
7225 period = ktime_to_ns(tg->rt_bandwidth.rt_period);
7226 runtime = tg->rt_bandwidth.rt_runtime;
7229 period = d->rt_period;
7230 runtime = d->rt_runtime;
7242 if (rt_bandwidth_enabled() && !runtime && tg_has_rt_tasks(tg))
7245 total = to_ratio(period, runtime);
7250 if (total > to_ratio(global_rt_period(), global_rt_runtime()))
7256 list_for_each_entry_rcu(child, &tg->children, siblings) {
7257 period = ktime_to_ns(child->rt_bandwidth.rt_period);
7258 runtime = child->rt_bandwidth.rt_runtime;
7260 if (child == d->tg) {
7261 period = d->rt_period;
7262 runtime = d->rt_runtime;
7265 sum += to_ratio(period, runtime);
7274 static int __rt_schedulable(
struct task_group *tg,
u64 period,
u64 runtime)
7278 struct rt_schedulable_data data = {
7281 .rt_runtime = runtime,
7285 ret = walk_tg_tree(tg_rt_schedulable, tg_nop, &data);
7291 static int tg_set_rt_bandwidth(
struct task_group *tg,
7292 u64 rt_period,
u64 rt_runtime)
7298 err = __rt_schedulable(tg, rt_period, rt_runtime);
7303 tg->rt_bandwidth.rt_period = ns_to_ktime(rt_period);
7304 tg->rt_bandwidth.rt_runtime = rt_runtime;
7321 int sched_group_set_rt_runtime(
struct task_group *tg,
long rt_runtime_us)
7325 rt_period = ktime_to_ns(tg->rt_bandwidth.rt_period);
7327 if (rt_runtime_us < 0)
7330 return tg_set_rt_bandwidth(tg, rt_period, rt_runtime);
7333 long sched_group_rt_runtime(
struct task_group *tg)
7340 rt_runtime_us = tg->rt_bandwidth.rt_runtime;
7342 return rt_runtime_us;
7345 int sched_group_set_rt_period(
struct task_group *tg,
long rt_period_us)
7350 rt_runtime = tg->rt_bandwidth.rt_runtime;
7355 return tg_set_rt_bandwidth(tg, rt_period, rt_runtime);
7358 long sched_group_rt_period(
struct task_group *tg)
7362 rt_period_us = ktime_to_ns(tg->rt_bandwidth.rt_period);
7364 return rt_period_us;
7367 static int sched_rt_global_constraints(
void)
7372 if (sysctl_sched_rt_period <= 0)
7375 runtime = global_rt_runtime();
7376 period = global_rt_period();
7386 ret = __rt_schedulable(NULL, 0, 0);
7393 int sched_rt_can_attach(
struct task_group *tg,
struct task_struct *tsk)
7396 if (rt_task(tsk) && tg->rt_bandwidth.rt_runtime == 0)
7403 static int sched_rt_global_constraints(
void)
7405 unsigned long flags;
7408 if (sysctl_sched_rt_period <= 0)
7415 if (sysctl_sched_rt_runtime == 0)
7433 void __user *
buffer,
size_t *lenp,
7437 int old_period, old_runtime;
7446 if (!ret && write) {
7447 ret = sched_rt_global_constraints();
7449 sysctl_sched_rt_period = old_period;
7450 sysctl_sched_rt_runtime = old_runtime;
7454 ns_to_ktime(global_rt_period());
7462 #ifdef CONFIG_CGROUP_SCHED
7465 static inline struct task_group *cgroup_tg(
struct cgroup *cgrp)
7467 return container_of(cgroup_subsys_state(cgrp, cpu_cgroup_subsys_id),
7468 struct task_group, css);
7471 static struct cgroup_subsys_state *cpu_cgroup_create(
struct cgroup *cgrp)
7473 struct task_group *tg, *parent;
7475 if (!cgrp->parent) {
7480 parent = cgroup_tg(cgrp->parent);
7481 tg = sched_create_group(parent);
7488 static void cpu_cgroup_destroy(
struct cgroup *cgrp)
7490 struct task_group *tg = cgroup_tg(cgrp);
7492 sched_destroy_group(tg);
7495 static int cpu_cgroup_can_attach(
struct cgroup *cgrp,
7500 cgroup_taskset_for_each(task, cgrp, tset) {
7501 #ifdef CONFIG_RT_GROUP_SCHED
7502 if (!sched_rt_can_attach(cgroup_tg(cgrp), task))
7513 static void cpu_cgroup_attach(
struct cgroup *cgrp,
7518 cgroup_taskset_for_each(task, cgrp, tset)
7519 sched_move_task(task);
7523 cpu_cgroup_exit(
struct cgroup *cgrp,
struct cgroup *old_cgrp,
7534 sched_move_task(task);
7537 #ifdef CONFIG_FAIR_GROUP_SCHED
7538 static int cpu_shares_write_u64(
struct cgroup *cgrp,
struct cftype *cftype,
7541 return sched_group_set_shares(cgroup_tg(cgrp),
scale_load(shareval));
7544 static u64 cpu_shares_read_u64(
struct cgroup *cgrp,
struct cftype *cft)
7546 struct task_group *tg = cgroup_tg(cgrp);
7551 #ifdef CONFIG_CFS_BANDWIDTH
7557 static int __cfs_schedulable(
struct task_group *tg,
u64 period,
u64 runtime);
7559 static int tg_set_cfs_bandwidth(
struct task_group *tg,
u64 period,
u64 quota)
7561 int i, ret = 0, runtime_enabled, runtime_was_enabled;
7564 if (tg == &root_task_group)
7572 if (quota < min_cfs_quota_period || period < min_cfs_quota_period)
7580 if (period > max_cfs_quota_period)
7584 ret = __cfs_schedulable(tg, period, quota);
7589 runtime_was_enabled = cfs_b->quota !=
RUNTIME_INF;
7592 cfs_b->period = ns_to_ktime(period);
7593 cfs_b->quota = quota;
7595 __refill_cfs_bandwidth_runtime(cfs_b);
7597 if (runtime_enabled && cfs_b->timer_active) {
7599 cfs_b->timer_active = 0;
7600 __start_cfs_bandwidth(cfs_b);
7606 struct rq *rq = cfs_rq->rq;
7609 cfs_rq->runtime_enabled = runtime_enabled;
7610 cfs_rq->runtime_remaining = 0;
7612 if (cfs_rq->throttled)
7613 unthrottle_cfs_rq(cfs_rq);
7622 int tg_set_cfs_quota(
struct task_group *tg,
long cfs_quota_us)
7626 period = ktime_to_ns(tg->cfs_bandwidth.period);
7627 if (cfs_quota_us < 0)
7632 return tg_set_cfs_bandwidth(tg, period, quota);
7635 long tg_get_cfs_quota(
struct task_group *tg)
7642 quota_us = tg->cfs_bandwidth.quota;
7648 int tg_set_cfs_period(
struct task_group *tg,
long cfs_period_us)
7653 quota = tg->cfs_bandwidth.quota;
7655 return tg_set_cfs_bandwidth(tg, period, quota);
7658 long tg_get_cfs_period(
struct task_group *tg)
7662 cfs_period_us = ktime_to_ns(tg->cfs_bandwidth.period);
7665 return cfs_period_us;
7668 static s64 cpu_cfs_quota_read_s64(
struct cgroup *cgrp,
struct cftype *cft)
7670 return tg_get_cfs_quota(cgroup_tg(cgrp));
7673 static int cpu_cfs_quota_write_s64(
struct cgroup *cgrp,
struct cftype *cftype,
7676 return tg_set_cfs_quota(cgroup_tg(cgrp), cfs_quota_us);
7679 static u64 cpu_cfs_period_read_u64(
struct cgroup *cgrp,
struct cftype *cft)
7681 return tg_get_cfs_period(cgroup_tg(cgrp));
7684 static int cpu_cfs_period_write_u64(
struct cgroup *cgrp,
struct cftype *cftype,
7687 return tg_set_cfs_period(cgroup_tg(cgrp), cfs_period_us);
7690 struct cfs_schedulable_data {
7691 struct task_group *tg;
7699 static u64 normalize_cfs_quota(
struct task_group *tg,
7700 struct cfs_schedulable_data *d)
7708 period = tg_get_cfs_period(tg);
7709 quota = tg_get_cfs_quota(tg);
7716 return to_ratio(period, quota);
7719 static int tg_cfs_schedulable_down(
struct task_group *tg,
void *data)
7721 struct cfs_schedulable_data *d =
data;
7723 s64 quota = 0, parent_quota = -1;
7728 struct cfs_bandwidth *parent_b = &tg->parent->cfs_bandwidth;
7730 quota = normalize_cfs_quota(tg, d);
7731 parent_quota = parent_b->hierarchal_quota;
7738 quota = parent_quota;
7739 else if (parent_quota !=
RUNTIME_INF && quota > parent_quota)
7742 cfs_b->hierarchal_quota = quota;
7747 static int __cfs_schedulable(
struct task_group *tg,
u64 period,
u64 quota)
7750 struct cfs_schedulable_data data = {
7762 ret = walk_tg_tree(tg_cfs_schedulable_down, tg_nop, &data);
7768 static int cpu_stats_show(
struct cgroup *cgrp,
struct cftype *cft,
7769 struct cgroup_map_cb *
cb)
7771 struct task_group *tg = cgroup_tg(cgrp);
7774 cb->fill(cb,
"nr_periods", cfs_b->nr_periods);
7775 cb->fill(cb,
"nr_throttled", cfs_b->nr_throttled);
7776 cb->fill(cb,
"throttled_time", cfs_b->throttled_time);
7783 #ifdef CONFIG_RT_GROUP_SCHED
7784 static int cpu_rt_runtime_write(
struct cgroup *cgrp,
struct cftype *cft,
7787 return sched_group_set_rt_runtime(cgroup_tg(cgrp), val);
7790 static s64 cpu_rt_runtime_read(
struct cgroup *cgrp,
struct cftype *cft)
7792 return sched_group_rt_runtime(cgroup_tg(cgrp));
7795 static int cpu_rt_period_write_uint(
struct cgroup *cgrp,
struct cftype *cftype,
7798 return sched_group_set_rt_period(cgroup_tg(cgrp), rt_period_us);
7801 static u64 cpu_rt_period_read_uint(
struct cgroup *cgrp,
struct cftype *cft)
7803 return sched_group_rt_period(cgroup_tg(cgrp));
7807 static struct cftype cpu_files[] = {
7808 #ifdef CONFIG_FAIR_GROUP_SCHED
7811 .read_u64 = cpu_shares_read_u64,
7812 .write_u64 = cpu_shares_write_u64,
7815 #ifdef CONFIG_CFS_BANDWIDTH
7817 .name =
"cfs_quota_us",
7818 .read_s64 = cpu_cfs_quota_read_s64,
7819 .write_s64 = cpu_cfs_quota_write_s64,
7822 .name =
"cfs_period_us",
7823 .read_u64 = cpu_cfs_period_read_u64,
7824 .write_u64 = cpu_cfs_period_write_u64,
7828 .read_map = cpu_stats_show,
7831 #ifdef CONFIG_RT_GROUP_SCHED
7833 .name =
"rt_runtime_us",
7834 .read_s64 = cpu_rt_runtime_read,
7835 .write_s64 = cpu_rt_runtime_write,
7838 .name =
"rt_period_us",
7839 .read_u64 = cpu_rt_period_read_uint,
7840 .write_u64 = cpu_rt_period_write_uint,
7846 struct cgroup_subsys cpu_cgroup_subsys = {
7848 .create = cpu_cgroup_create,
7849 .destroy = cpu_cgroup_destroy,
7850 .can_attach = cpu_cgroup_can_attach,
7851 .attach = cpu_cgroup_attach,
7852 .exit = cpu_cgroup_exit,
7853 .subsys_id = cpu_cgroup_subsys_id,
7854 .base_cftypes = cpu_files,
7860 #ifdef CONFIG_CGROUP_CPUACCT
7869 struct cpuacct root_cpuacct;
7872 static struct cgroup_subsys_state *cpuacct_create(
struct cgroup *cgrp)
7877 return &root_cpuacct.css;
7889 goto out_free_cpuusage;
7902 static void cpuacct_destroy(
struct cgroup *cgrp)
7904 struct cpuacct *ca = cgroup_ca(cgrp);
7911 static u64 cpuacct_cpuusage_read(
struct cpuacct *ca,
int cpu)
7916 #ifndef CONFIG_64BIT
7930 static void cpuacct_cpuusage_write(
struct cpuacct *ca,
int cpu,
u64 val)
7934 #ifndef CONFIG_64BIT
7947 static u64 cpuusage_read(
struct cgroup *cgrp,
struct cftype *cft)
7949 struct cpuacct *ca = cgroup_ca(cgrp);
7950 u64 totalcpuusage = 0;
7954 totalcpuusage += cpuacct_cpuusage_read(ca, i);
7959 static
int cpuusage_write(
struct cgroup *cgrp,
struct cftype *cftype,
7962 struct cpuacct *ca = cgroup_ca(cgrp);
7972 cpuacct_cpuusage_write(ca, i, 0);
7978 static
int cpuacct_percpu_seq_read(
struct cgroup *cgroup,
struct cftype *cft,
7981 struct cpuacct *ca = cgroup_ca(cgroup);
7986 percpu = cpuacct_cpuusage_read(ca, i);
7987 seq_printf(m,
"%llu ", (
unsigned long long) percpu);
7993 static const char *cpuacct_stat_desc[] = {
7998 static int cpuacct_stats_show(
struct cgroup *cgrp,
struct cftype *cft,
7999 struct cgroup_map_cb *cb)
8001 struct cpuacct *ca = cgroup_ca(cgrp);
8027 static struct cftype
files[] = {
8030 .read_u64 = cpuusage_read,
8031 .write_u64 = cpuusage_write,
8034 .name =
"usage_percpu",
8035 .read_seq_string = cpuacct_percpu_seq_read,
8039 .read_map = cpuacct_stats_show,
8054 if (
unlikely(!cpuacct_subsys.active))
8057 cpu = task_cpu(tsk);
8063 for (;
ca; ca = parent_ca(ca)) {
8065 *cpuusage += cputime;
8071 struct cgroup_subsys cpuacct_subsys = {
8073 .create = cpuacct_create,
8074 .destroy = cpuacct_destroy,
8075 .subsys_id = cpuacct_subsys_id,
8076 .base_cftypes =
files,