30 #include <linux/module.h>
35 #include <asm/mmu_context.h>
37 #include <linux/capability.h>
41 #include <linux/perf_event.h>
59 #include <linux/sysctl.h>
67 #include <linux/hrtimer.h>
70 #include <linux/ctype.h>
72 #include <linux/slab.h>
74 #include <linux/binfmts.h>
76 #include <asm/switch_to.h>
78 #include <asm/irq_regs.h>
79 #include <asm/mutex.h>
80 #ifdef CONFIG_PARAVIRT
81 #include <asm/paravirt.h>
85 #include "../workqueue_sched.h"
86 #include "../smpboot.h"
88 #define CREATE_TRACE_POINTS
97 if (hrtimer_active(period_timer))
100 now = hrtimer_cb_get_time(period_timer);
103 soft = hrtimer_get_softexpires(period_timer);
104 hard = hrtimer_get_expires(period_timer);
105 delta = ktime_to_ns(ktime_sub(hard, soft));
114 static void update_rq_clock_task(
struct rq *
rq,
s64 delta);
125 update_rq_clock_task(rq, delta);
132 #define SCHED_FEAT(name, enabled) \
133 (1UL << __SCHED_FEAT_##name) * enabled |
141 #ifdef CONFIG_SCHED_DEBUG
142 #define SCHED_FEAT(name, enabled) \
145 static const char *
const sched_feat_names[] = {
151 static int sched_feat_show(
struct seq_file *
m,
void *
v)
165 #ifdef HAVE_JUMP_LABEL
167 #define jump_label_key__true STATIC_KEY_INIT_TRUE
168 #define jump_label_key__false STATIC_KEY_INIT_FALSE
170 #define SCHED_FEAT(name, enabled) \
171 jump_label_key__##enabled ,
173 struct static_key sched_feat_keys[__SCHED_FEAT_NR] = {
179 static void sched_feat_disable(
int i)
181 if (static_key_enabled(&sched_feat_keys[i]))
182 static_key_slow_dec(&sched_feat_keys[i]);
185 static void sched_feat_enable(
int i)
187 if (!static_key_enabled(&sched_feat_keys[i]))
188 static_key_slow_inc(&sched_feat_keys[i]);
191 static void sched_feat_disable(
int i) { };
192 static void sched_feat_enable(
int i) { };
196 sched_feat_write(
struct file *filp,
const char __user *ubuf,
197 size_t cnt, loff_t *ppos)
213 if (
strncmp(cmp,
"NO_", 3) == 0) {
219 if (
strcmp(cmp, sched_feat_names[i]) == 0) {
222 sched_feat_disable(i);
225 sched_feat_enable(i);
231 if (i == __SCHED_FEAT_NR)
239 static int sched_feat_open(
struct inode *
inode,
struct file *filp)
245 .
open = sched_feat_open,
246 .write = sched_feat_write,
252 static __init int sched_init_debug(
void)
331 static void __task_rq_unlock(
struct rq *rq)
349 static struct rq *this_rq_lock(
void)
361 #ifdef CONFIG_SCHED_HRTICK
373 static void hrtick_clear(
struct rq *rq)
375 if (hrtimer_active(&rq->hrtick_timer))
385 struct rq *rq =
container_of(timer,
struct rq, hrtick_timer);
391 rq->
curr->sched_class->task_tick(rq, rq->
curr, 1);
401 static void __hrtick_start(
void *
arg)
407 rq->hrtick_csd_pending = 0;
416 void hrtick_start(
struct rq *rq,
u64 delay)
418 struct hrtimer *timer = &rq->hrtick_timer;
421 hrtimer_set_expires(timer, time);
425 }
else if (!rq->hrtick_csd_pending) {
426 __smp_call_function_single(cpu_of(rq), &rq->hrtick_csd, 0);
427 rq->hrtick_csd_pending = 1;
434 int cpu = (
int)(
long)hcpu;
443 hrtick_clear(
cpu_rq(cpu));
450 static __init void init_hrtick(
void)
460 void hrtick_start(
struct rq *rq,
u64 delay)
466 static inline void init_hrtick(
void)
471 static void init_rq_hrtick(
struct rq *rq)
474 rq->hrtick_csd_pending = 0;
476 rq->hrtick_csd.flags = 0;
477 rq->hrtick_csd.func = __hrtick_start;
478 rq->hrtick_csd.info =
rq;
482 rq->hrtick_timer.function = hrtick;
485 static inline void hrtick_clear(
struct rq *rq)
489 static inline void init_rq_hrtick(
struct rq *rq)
493 static inline void init_hrtick(
void)
507 #ifndef tsk_is_polling
508 #define tsk_is_polling(t) 0
517 if (test_tsk_need_resched(p))
520 set_tsk_need_resched(p);
534 struct rq *rq =
cpu_rq(cpu);
552 int get_nohz_timer_target(
void)
556 struct sched_domain *
sd;
559 for_each_domain(cpu, sd) {
581 void wake_up_idle_cpu(
int cpu)
583 struct rq *rq =
cpu_rq(cpu);
603 set_tsk_need_resched(rq->
idle);
611 static inline bool got_nohz_idle_kick(
void)
619 static inline bool got_nohz_idle_kick(
void)
626 void sched_avg_update(
struct rq *rq)
630 while ((
s64)(rq->
clock - rq->age_stamp) > period) {
636 asm(
"" :
"+rm" (rq->age_stamp));
646 set_tsk_need_resched(p);
650 #if defined(CONFIG_RT_GROUP_SCHED) || (defined(CONFIG_FAIR_GROUP_SCHED) && \
651 (defined(CONFIG_SMP) || defined(CONFIG_CFS_BANDWIDTH)))
658 int walk_tg_tree_from(
struct task_group *
from,
667 ret = (*down)(parent,
data);
670 list_for_each_entry_rcu(child, &parent->children,
siblings) {
677 ret = (*up)(parent,
data);
678 if (ret || parent == from)
682 parent = parent->parent;
689 int tg_nop(
struct task_group *tg,
void *data)
713 static void enqueue_task(
struct rq *rq,
struct task_struct *p,
int flags)
720 static void dequeue_task(
struct rq *rq,
struct task_struct *p,
int flags)
732 enqueue_task(rq, p, flags);
740 dequeue_task(rq, p, flags);
743 static void update_rq_clock_task(
struct rq *rq,
s64 delta)
749 #if defined(CONFIG_IRQ_TIME_ACCOUNTING) || defined(CONFIG_PARAVIRT_TIME_ACCOUNTING)
750 s64 steal = 0, irq_delta = 0;
752 #ifdef CONFIG_IRQ_TIME_ACCOUNTING
753 irq_delta = irq_time_read(cpu_of(rq)) - rq->prev_irq_time;
770 if (irq_delta > delta)
773 rq->prev_irq_time += irq_delta;
776 #ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
780 steal = paravirt_steal_clock(cpu_of(rq));
781 steal -= rq->prev_steal_time_rq;
786 st = steal_ticks(steal);
789 rq->prev_steal_time_rq += steal;
797 #if defined(CONFIG_IRQ_TIME_ACCOUNTING) || defined(CONFIG_PARAVIRT_TIME_ACCOUNTING)
798 if ((irq_delta + steal) &&
sched_feat(NONTASK_POWER))
799 sched_rt_avg_update(rq, irq_delta + steal);
836 static inline int __normal_prio(
struct task_struct *p)
852 if (task_has_rt_policy(p))
855 prio = __normal_prio(p);
874 if (!rt_prio(p->
prio))
888 static inline void check_class_changed(
struct rq *rq,
struct task_struct *p,
896 }
else if (oldprio != p->
prio)
905 rq->
curr->sched_class->check_preempt_curr(rq, p, flags);
908 if (
class == rq->
curr->sched_class)
921 if (rq->
curr->on_rq && test_tsk_need_resched(rq->
curr))
926 void set_task_cpu(
struct task_struct *p,
unsigned int new_cpu)
928 #ifdef CONFIG_SCHED_DEBUG
936 #ifdef CONFIG_LOCKDEP
948 lockdep_is_held(&
task_rq(p)->lock)));
952 trace_sched_migrate_task(p, new_cpu);
954 if (task_cpu(p) != new_cpu) {
955 p->
se.nr_migrations++;
959 __set_task_cpu(p, new_cpu);
962 struct migration_arg {
967 static int migration_cpu_stop(
void *data);
1012 while (task_running(rq, p)) {
1023 rq = task_rq_lock(p, &flags);
1024 trace_sched_wait_task(p);
1025 running = task_running(rq, p);
1028 if (!match_state || p->
state == match_state)
1030 task_rq_unlock(rq, p, &flags);
1107 static int select_fallback_rq(
int cpu,
struct task_struct *p)
1141 do_set_cpus_allowed(p, cpu_possible_mask);
1158 if (p->
mm && printk_ratelimit()) {
1159 printk_sched(
"process %d (%s) no longer affine to cpu%d\n",
1160 task_pid_nr(p), p->
comm, cpu);
1171 int select_task_rq(
struct task_struct *p,
int sd_flags,
int wake_flags)
1173 int cpu = p->
sched_class->select_task_rq(p, sd_flags, wake_flags);
1187 cpu = select_fallback_rq(task_cpu(p), p);
1194 s64 diff = sample - *
avg;
1200 ttwu_stat(
struct task_struct *p,
int cpu,
int wake_flags)
1202 #ifdef CONFIG_SCHEDSTATS
1208 if (cpu == this_cpu) {
1212 struct sched_domain *sd;
1216 for_each_domain(this_cpu, sd) {
1239 static void ttwu_activate(
struct rq *rq,
struct task_struct *p,
int en_flags)
1253 ttwu_do_wakeup(
struct rq *rq,
struct task_struct *p,
int wake_flags)
1255 trace_sched_wakeup(p,
true);
1263 if (rq->idle_stamp) {
1264 u64 delta = rq->
clock - rq->idle_stamp;
1270 update_avg(&rq->avg_idle, delta);
1277 ttwu_do_activate(
struct rq *rq,
struct task_struct *p,
int wake_flags)
1285 ttwu_do_wakeup(rq, p, wake_flags);
1294 static int ttwu_remote(
struct task_struct *p,
int wake_flags)
1299 rq = __task_rq_lock(p);
1301 ttwu_do_wakeup(rq, p, wake_flags);
1304 __task_rq_unlock(rq);
1310 static void sched_ttwu_pending(
void)
1313 struct llist_node *llist = llist_del_all(&rq->wake_list);
1320 llist = llist_next(llist);
1321 ttwu_do_activate(rq, p, 0);
1327 void scheduler_ipi(
void)
1329 if (llist_empty(&
this_rq()->wake_list) && !got_nohz_idle_kick())
1346 sched_ttwu_pending();
1351 if (
unlikely(got_nohz_idle_kick() && !need_resched())) {
1358 static void ttwu_queue_remote(
struct task_struct *p,
int cpu)
1360 if (llist_add(&p->wake_entry, &
cpu_rq(cpu)->wake_list))
1364 bool cpus_share_cache(
int this_cpu,
int that_cpu)
1366 return per_cpu(sd_llc_id, this_cpu) ==
per_cpu(sd_llc_id, that_cpu);
1370 static void ttwu_queue(
struct task_struct *p,
int cpu)
1372 struct rq *rq =
cpu_rq(cpu);
1374 #if defined(CONFIG_SMP)
1377 ttwu_queue_remote(p, cpu);
1383 ttwu_do_activate(rq, p, 0);
1405 unsigned long flags;
1406 int cpu, success = 0;
1410 if (!(p->
state & state))
1416 if (p->
on_rq && ttwu_remote(p, wake_flags))
1437 cpu = select_task_rq(p, SD_BALANCE_WAKE, wake_flags);
1438 if (task_cpu(p) != cpu) {
1440 set_task_cpu(p, cpu);
1446 ttwu_stat(p, cpu, wake_flags);
1461 static void try_to_wake_up_local(
struct task_struct *p)
1481 ttwu_do_wakeup(rq, p, 0);
1500 return try_to_wake_up(p,
TASK_ALL, 0);
1506 return try_to_wake_up(p, state, 0);
1520 p->
se.exec_start = 0;
1521 p->
se.sum_exec_runtime = 0;
1522 p->
se.prev_sum_exec_runtime = 0;
1523 p->
se.nr_migrations = 0;
1525 INIT_LIST_HEAD(&p->
se.group_node);
1527 #ifdef CONFIG_SCHEDSTATS
1528 memset(&p->
se.statistics, 0,
sizeof(p->
se.statistics));
1531 INIT_LIST_HEAD(&p->
rt.run_list);
1533 #ifdef CONFIG_PREEMPT_NOTIFIERS
1543 unsigned long flags;
1563 if (task_has_rt_policy(p)) {
1580 if (!rt_prio(p->
prio))
1594 set_task_cpu(p, cpu);
1597 #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
1598 if (
likely(sched_info_on()))
1599 memset(&p->sched_info, 0,
sizeof(p->sched_info));
1601 #if defined(CONFIG_SMP)
1604 #ifdef CONFIG_PREEMPT_COUNT
1609 plist_node_init(&p->pushable_tasks,
MAX_PRIO);
1624 unsigned long flags;
1634 set_task_cpu(p, select_task_rq(p, SD_BALANCE_FORK, 0));
1637 rq = __task_rq_lock(p);
1640 trace_sched_wakeup_new(p,
true);
1646 task_rq_unlock(rq, p, &flags);
1649 #ifdef CONFIG_PREEMPT_NOTIFIERS
1655 void preempt_notifier_register(
struct preempt_notifier *notifier)
1657 hlist_add_head(¬ifier->link, &
current->preempt_notifiers);
1667 void preempt_notifier_unregister(
struct preempt_notifier *notifier)
1669 hlist_del(¬ifier->link);
1675 struct preempt_notifier *notifier;
1686 struct preempt_notifier *notifier;
1690 notifier->ops->sched_out(notifier, next);
1695 static void fire_sched_in_preempt_notifiers(
struct task_struct *curr)
1700 fire_sched_out_preempt_notifiers(
struct task_struct *curr,
1724 trace_sched_switch(prev, next);
1726 perf_event_task_sched_out(prev, next);
1727 fire_sched_out_preempt_notifiers(prev, next);
1728 prepare_lock_switch(rq, next);
1747 static void finish_task_switch(
struct rq *rq,
struct task_struct *
prev)
1766 prev_state =
prev->state;
1770 finish_lock_switch(rq,
prev);
1773 fire_sched_in_preempt_notifiers(
current);
1782 put_task_struct(
prev);
1789 static inline void pre_schedule(
struct rq *rq,
struct task_struct *
prev)
1796 static inline void post_schedule(
struct rq *rq)
1798 if (rq->post_schedule) {
1799 unsigned long flags;
1802 if (rq->
curr->sched_class->post_schedule)
1803 rq->
curr->sched_class->post_schedule(rq);
1806 rq->post_schedule = 0;
1812 static inline void pre_schedule(
struct rq *rq,
struct task_struct *p)
1816 static inline void post_schedule(
struct rq *rq)
1831 finish_task_switch(rq, prev);
1839 #ifdef __ARCH_WANT_UNLOCKED_CTXSW
1852 context_switch(
struct rq *rq,
struct task_struct *prev,
1857 prepare_task_switch(rq, prev, next);
1885 #ifndef __ARCH_WANT_UNLOCKED_CTXSW
1890 rcu_switch(prev, next);
1899 finish_task_switch(
this_rq(), prev);
1911 unsigned long i,
sum = 0;
1914 sum +=
cpu_rq(i)->nr_running;
1921 unsigned long i,
sum = 0;
1924 sum +=
cpu_rq(i)->nr_uninterruptible;
1939 unsigned long long sum = 0;
1942 sum +=
cpu_rq(i)->nr_switches;
1949 unsigned long i,
sum = 0;
1959 struct rq *
this =
cpu_rq(cpu);
2033 loads[0] = (avenrun[0] +
offset) << shift;
2034 loads[1] = (avenrun[1] +
offset) << shift;
2035 loads[2] = (avenrun[2] +
offset) << shift;
2038 static long calc_load_fold_active(
struct rq *
this_rq)
2040 long nr_active, delta = 0;
2056 static unsigned long
2057 calc_load(
unsigned long load,
unsigned long exp,
unsigned long active)
2109 static int calc_load_idx;
2111 static inline int calc_load_write_idx(
void)
2113 int idx = calc_load_idx;
2131 static inline int calc_load_read_idx(
void)
2133 return calc_load_idx & 1;
2136 void calc_load_enter_idle(
void)
2138 struct rq *this_rq =
this_rq();
2145 delta = calc_load_fold_active(this_rq);
2147 int idx = calc_load_write_idx();
2148 atomic_long_add(delta, &calc_load_idle[idx]);
2152 void calc_load_exit_idle(
void)
2154 struct rq *this_rq =
this_rq();
2172 static long calc_load_fold_idle(
void)
2174 int idx = calc_load_read_idx();
2177 if (atomic_long_read(&calc_load_idle[idx]))
2198 static unsigned long
2199 fixed_power_int(
unsigned long x,
unsigned int frac_bits,
unsigned int n)
2201 unsigned long result = 1
UL << frac_bits;
2206 result += 1
UL << (frac_bits - 1);
2207 result >>= frac_bits;
2213 x += 1
UL << (frac_bits - 1);
2243 static unsigned long
2244 calc_load_n(
unsigned long load,
unsigned long exp,
2245 unsigned long active,
unsigned int n)
2248 return calc_load(load, fixed_power_int(exp,
FSHIFT, n), active);
2260 static void calc_global_nohz(
void)
2264 if (!
time_before(jiffies, calc_load_update + 10)) {
2268 delta =
jiffies - calc_load_update - 10;
2271 active = atomic_long_read(&calc_load_tasks);
2272 active = active > 0 ? active *
FIXED_1 : 0;
2274 avenrun[0] = calc_load_n(avenrun[0],
EXP_1, active, n);
2275 avenrun[1] = calc_load_n(avenrun[1],
EXP_5, active, n);
2276 avenrun[2] = calc_load_n(avenrun[2],
EXP_15, active, n);
2293 static inline long calc_load_fold_idle(
void) {
return 0; }
2294 static inline void calc_global_nohz(
void) { }
2312 delta = calc_load_fold_idle();
2314 atomic_long_add(delta, &calc_load_tasks);
2316 active = atomic_long_read(&calc_load_tasks);
2317 active = active > 0 ? active *
FIXED_1 : 0;
2319 avenrun[0] = calc_load(avenrun[0],
EXP_1, active);
2320 avenrun[1] = calc_load(avenrun[1],
EXP_5, active);
2321 avenrun[2] = calc_load(avenrun[2],
EXP_15, active);
2335 static void calc_load_account_active(
struct rq *this_rq)
2342 delta = calc_load_fold_active(this_rq);
2344 atomic_long_add(delta, &calc_load_tasks);
2380 #define DEGRADE_SHIFT 7
2381 static const unsigned char
2383 static const unsigned char
2385 {0, 0, 0, 0, 0, 0, 0, 0},
2386 {64, 32, 8, 0, 0, 0, 0, 0},
2387 {96, 72, 40, 12, 1, 0, 0},
2388 {112, 98, 75, 43, 15, 1, 0},
2389 {120, 112, 98, 76, 45, 16, 2} };
2396 static unsigned long
2397 decay_load_missed(
unsigned long load,
unsigned long missed_updates,
int idx)
2401 if (!missed_updates)
2404 if (missed_updates >= degrade_zero_ticks[idx])
2408 return load >> missed_updates;
2410 while (missed_updates) {
2411 if (missed_updates % 2)
2414 missed_updates >>= 1;
2425 static void __update_cpu_load(
struct rq *this_rq,
unsigned long this_load,
2426 unsigned long pending_updates)
2435 unsigned long old_load, new_load;
2440 old_load = decay_load_missed(old_load, pending_updates - 1, i);
2441 new_load = this_load;
2447 if (new_load > old_load)
2448 new_load += scale - 1;
2450 this_rq->
cpu_load[
i] = (old_load * (scale - 1) + new_load) >>
i;
2453 sched_avg_update(this_rq);
2476 unsigned long curr_jiffies =
ACCESS_ONCE(jiffies);
2477 unsigned long load = this_rq->
load.weight;
2478 unsigned long pending_updates;
2489 __update_cpu_load(this_rq, load, pending_updates);
2497 struct rq *this_rq =
this_rq();
2498 unsigned long curr_jiffies =
ACCESS_ONCE(jiffies);
2499 unsigned long pending_updates;
2506 if (pending_updates) {
2512 __update_cpu_load(this_rq, 0, pending_updates);
2521 static void update_cpu_load_active(
struct rq *this_rq)
2527 __update_cpu_load(this_rq, this_rq->
load.weight, 1);
2529 calc_load_account_active(this_rq);
2541 unsigned long flags;
2545 dest_cpu = p->
sched_class->select_task_rq(p, SD_BALANCE_EXEC, 0);
2550 struct migration_arg
arg = {
p, dest_cpu };
2574 static u64 do_task_delta_exec(
struct task_struct *p,
struct rq *rq)
2578 if (task_current(rq, p)) {
2590 unsigned long flags;
2594 rq = task_rq_lock(p, &flags);
2595 ns = do_task_delta_exec(p, rq);
2596 task_rq_unlock(rq, p, &flags);
2608 unsigned long flags;
2612 rq = task_rq_lock(p, &flags);
2613 ns = p->
se.sum_exec_runtime + do_task_delta_exec(p, rq);
2614 task_rq_unlock(rq, p, &flags);
2626 struct rq *rq =
cpu_rq(cpu);
2633 update_cpu_load_active(rq);
2641 trigger_load_balance(rq, cpu);
2655 #if defined(CONFIG_PREEMPT) && (defined(CONFIG_DEBUG_PREEMPT) || \
2656 defined(CONFIG_PREEMPT_TRACER))
2660 #ifdef CONFIG_DEBUG_PREEMPT
2668 #ifdef CONFIG_DEBUG_PREEMPT
2682 #ifdef CONFIG_DEBUG_PREEMPT
2718 print_irqtrace_events(prev);
2726 static inline void schedule_debug(
struct task_struct *prev)
2734 __schedule_bug(prev);
2742 static void put_prev_task(
struct rq *rq,
struct task_struct *prev)
2753 pick_next_task(
struct rq *rq)
2769 p =
class->pick_next_task(rq);
2814 static void __sched __schedule(
void)
2817 unsigned long *switch_count;
2828 schedule_debug(prev);
2835 switch_count = &prev->
nivcsw;
2853 try_to_wake_up_local(to_wakeup);
2856 switch_count = &prev->
nvcsw;
2859 pre_schedule(rq, prev);
2862 idle_balance(cpu, rq);
2864 put_prev_task(rq, prev);
2865 next = pick_next_task(rq);
2866 clear_tsk_need_resched(prev);
2869 if (
likely(prev != next)) {
2874 context_switch(rq, prev, next);
2893 static inline void sched_submit_work(
struct task_struct *tsk)
2895 if (!tsk->
state || tsk_is_pi_blocked(tsk))
2901 if (blk_needs_flush_plug(tsk))
2902 blk_schedule_flush_plug(tsk);
2909 sched_submit_work(tsk);
2914 #ifdef CONFIG_RCU_USER_QS
2941 #ifdef CONFIG_MUTEX_SPIN_ON_OWNER
2945 if (lock->owner != owner)
2956 return owner->on_cpu;
2969 while (owner_running(lock, owner)) {
2982 return lock->owner ==
NULL;
2986 #ifdef CONFIG_PREEMPT
3013 }
while (need_resched());
3043 }
while (need_resched());
3051 return try_to_wake_up(curr->
private, mode, wake_flags);
3065 int nr_exclusive,
int wake_flags,
void *
key)
3070 unsigned flags = curr->
flags;
3072 if (curr->
func(curr, mode, wake_flags, key) &&
3089 int nr_exclusive,
void *key)
3091 unsigned long flags;
3094 __wake_up_common(q, mode, nr_exclusive, 0, key);
3095 spin_unlock_irqrestore(&q->
lock, flags);
3104 __wake_up_common(q, mode, nr, 0,
NULL);
3110 __wake_up_common(q, mode, 1, 0, key);
3132 int nr_exclusive,
void *key)
3134 unsigned long flags;
3144 __wake_up_common(q, mode, nr_exclusive, wake_flags, key);
3145 spin_unlock_irqrestore(&q->
lock, flags);
3172 unsigned long flags;
3177 spin_unlock_irqrestore(&x->
wait.
lock, flags);
3192 unsigned long flags;
3197 spin_unlock_irqrestore(&x->
wait.
lock, flags);
3202 do_wait_for_common(
struct completion *x,
long timeout,
int state)
3207 __add_wait_queue_tail_exclusive(&x->
wait, &
wait);
3209 if (signal_pending_state(state,
current)) {
3217 }
while (!x->
done && timeout);
3218 __remove_wait_queue(&x->
wait, &
wait);
3223 return timeout ?: 1;
3227 wait_for_common(
struct completion *x,
long timeout,
int state)
3232 timeout = do_wait_for_common(x, timeout, state);
3303 unsigned long timeout)
3341 unsigned long timeout)
3361 unsigned long flags;
3369 spin_unlock_irqrestore(&x->
wait.
lock, flags);
3384 unsigned long flags;
3390 spin_unlock_irqrestore(&x->
wait.
lock, flags);
3398 unsigned long flags;
3401 init_waitqueue_entry(&wait,
current);
3406 __add_wait_queue(q, &wait);
3407 spin_unlock(&q->
lock);
3409 spin_lock_irq(&q->
lock);
3410 __remove_wait_queue(q, &wait);
3411 spin_unlock_irqrestore(&q->
lock, flags);
3441 #ifdef CONFIG_RT_MUTEXES
3453 void rt_mutex_setprio(
struct task_struct *p,
int prio)
3455 int oldprio, on_rq, running;
3461 rq = __task_rq_lock(p);
3481 trace_sched_pi_setprio(p, prio);
3485 running = task_current(rq, p);
3503 check_class_changed(rq, p, prev_class, oldprio);
3505 __task_rq_unlock(rq);
3510 int old_prio,
delta, on_rq;
3511 unsigned long flags;
3514 if (
TASK_NICE(p) == nice || nice < -20 || nice > 19)
3520 rq = task_rq_lock(p, &flags);
3527 if (task_has_rt_policy(p)) {
3533 dequeue_task(rq, p, 0);
3538 p->
prio = effective_prio(p);
3539 delta = p->
prio - old_prio;
3542 enqueue_task(rq, p, 0);
3547 if (delta < 0 || (delta > 0 && task_running(rq, p)))
3551 task_rq_unlock(rq, p, &flags);
3563 int nice_rlim = 20 - nice;
3565 return (nice_rlim <= task_rlimit(p,
RLIMIT_NICE) ||
3569 #ifdef __ARCH_WANT_SYS_NICE
3587 if (increment < -40)
3640 struct rq *rq =
cpu_rq(cpu);
3649 if (!llist_empty(&rq->wake_list))
3662 return cpu_rq(cpu)->idle;
3683 if (rt_prio(p->
prio))
3693 static bool check_same_owner(
struct task_struct *p)
3700 match = (uid_eq(cred->
euid, pcred->euid) ||
3701 uid_eq(cred->
euid, pcred->uid));
3706 static int __sched_setscheduler(
struct task_struct *p,
int policy,
3709 int retval, oldprio, oldpolicy = -1, on_rq, running;
3710 unsigned long flags;
3721 policy = oldpolicy = p->
policy;
3748 if (rt_policy(policy)) {
3749 unsigned long rlim_rtprio =
3753 if (policy != p->
policy && !rlim_rtprio)
3772 if (!check_same_owner(p))
3793 rq = task_rq_lock(p, &flags);
3798 if (p == rq->
stop) {
3799 task_rq_unlock(rq, p, &flags);
3808 task_rq_unlock(rq, p, &flags);
3812 #ifdef CONFIG_RT_GROUP_SCHED
3818 if (rt_bandwidth_enabled() && rt_policy(policy) &&
3820 !task_group_is_autogroup(task_group(p))) {
3821 task_rq_unlock(rq, p, &flags);
3829 policy = oldpolicy = -1;
3830 task_rq_unlock(rq, p, &flags);
3834 running = task_current(rq, p);
3836 dequeue_task(rq, p, 0);
3849 enqueue_task(rq, p, 0);
3851 check_class_changed(rq, p, prev_class, oldprio);
3852 task_rq_unlock(rq, p, &flags);
3870 return __sched_setscheduler(p, policy, param,
true);
3888 return __sched_setscheduler(p, policy, param,
false);
3892 do_sched_setscheduler(
pid_t pid,
int policy,
struct sched_param __user *param)
3898 if (!param || pid < 0)
3905 p = find_process_by_pid(pid);
3926 return do_sched_setscheduler(pid, policy, param);
3936 return do_sched_setscheduler(pid, -1, param);
3953 p = find_process_by_pid(pid);
3975 if (!param || pid < 0)
3979 p = find_process_by_pid(pid);
4012 p = find_process_by_pid(pid);
4023 if (!alloc_cpumask_var(&cpus_allowed,
GFP_KERNEL)) {
4027 if (!alloc_cpumask_var(&new_mask,
GFP_KERNEL)) {
4029 goto out_free_cpus_allowed;
4040 cpumask_and(new_mask, in_mask, cpus_allowed);
4042 retval = set_cpus_allowed_ptr(p, new_mask);
4046 if (!cpumask_subset(new_mask, cpus_allowed)) {
4052 cpumask_copy(new_mask, cpus_allowed);
4057 free_cpumask_var(new_mask);
4058 out_free_cpus_allowed:
4059 free_cpumask_var(cpus_allowed);
4066 static int get_user_cpu_mask(
unsigned long __user *user_mask_ptr,
unsigned len,
4069 if (len < cpumask_size())
4070 cpumask_clear(new_mask);
4071 else if (len > cpumask_size())
4072 len = cpumask_size();
4084 unsigned long __user *, user_mask_ptr)
4089 if (!alloc_cpumask_var(&new_mask,
GFP_KERNEL))
4092 retval = get_user_cpu_mask(user_mask_ptr, len, new_mask);
4095 free_cpumask_var(new_mask);
4102 unsigned long flags;
4109 p = find_process_by_pid(pid);
4135 unsigned long __user *, user_mask_ptr)
4142 if (len & (
sizeof(
unsigned long)-1))
4150 size_t retlen =
min_t(
size_t, len, cpumask_size());
4157 free_cpumask_var(mask);
4170 struct rq *rq = this_rq_lock();
4173 current->sched_class->yield_task(rq);
4189 static inline int should_resched(
void)
4194 static void __cond_resched(
void)
4203 if (should_resched()) {
4221 int resched = should_resched();
4226 if (spin_needbreak(lock) || resched) {
4243 if (should_resched()) {
4297 struct rq *
rq, *p_rq;
4298 unsigned long flags;
4306 double_rq_lock(rq, p_rq);
4308 double_rq_unlock(rq, p_rq);
4318 if (task_running(p_rq, p) || p->
state)
4321 yielded = curr->
sched_class->yield_to_task(rq, p, preempt);
4328 if (preempt && rq != p_rq)
4333 double_rq_unlock(rq, p_rq);
4349 struct rq *rq =
raw_rq();
4351 delayacct_blkio_start();
4358 delayacct_blkio_end();
4364 struct rq *rq =
raw_rq();
4367 delayacct_blkio_start();
4374 delayacct_blkio_end();
4439 unsigned int time_slice;
4440 unsigned long flags;
4450 p = find_process_by_pid(pid);
4458 rq = task_rq_lock(p, &flags);
4459 time_slice = p->
sched_class->get_rr_interval(rq, p);
4460 task_rq_unlock(rq, p, &flags);
4476 unsigned long free = 0;
4481 state <
sizeof(stat_nam) - 1 ? stat_nam[state] :
'?');
4482 #if BITS_PER_LONG == 32
4493 #ifdef CONFIG_DEBUG_STACK_USAGE
4494 free = stack_not_used(p);
4507 #if BITS_PER_LONG == 32
4509 " task PC stack pid father\n");
4512 " task PC stack pid father\n");
4521 if (!state_filter || (p->
state & state_filter))
4527 #ifdef CONFIG_SCHED_DEBUG
4553 struct rq *rq =
cpu_rq(cpu);
4554 unsigned long flags;
4574 __set_task_cpu(idle, cpu);
4578 #if defined(CONFIG_SMP)
4590 ftrace_graph_init_idle_task(idle, cpu);
4591 #if defined(CONFIG_SMP)
4631 unsigned long flags;
4633 unsigned int dest_cpu;
4636 rq = task_rq_lock(p, &flags);
4641 if (!cpumask_intersects(new_mask, cpu_active_mask)) {
4651 do_set_cpus_allowed(p, new_mask);
4659 struct migration_arg
arg = {
p, dest_cpu };
4661 task_rq_unlock(rq, p, &flags);
4667 task_rq_unlock(rq, p, &flags);
4684 static int __migrate_task(
struct task_struct *p,
int src_cpu,
int dest_cpu)
4686 struct rq *rq_dest, *rq_src;
4692 rq_src =
cpu_rq(src_cpu);
4693 rq_dest =
cpu_rq(dest_cpu);
4696 double_rq_lock(rq_src, rq_dest);
4698 if (task_cpu(p) != src_cpu)
4709 dequeue_task(rq_src, p, 0);
4710 set_task_cpu(p, dest_cpu);
4711 enqueue_task(rq_dest, p, 0);
4717 double_rq_unlock(rq_src, rq_dest);
4727 static int migration_cpu_stop(
void *data)
4729 struct migration_arg *
arg =
data;
4736 __migrate_task(arg->task, raw_smp_processor_id(), arg->dest_cpu);
4741 #ifdef CONFIG_HOTPLUG_CPU
4747 void idle_task_exit(
void)
4765 static void calc_load_migrate(
struct rq *rq)
4767 long delta = calc_load_fold_active(rq);
4769 atomic_long_add(delta, &calc_load_tasks);
4780 static void migrate_tasks(
unsigned int dead_cpu)
4782 struct rq *rq =
cpu_rq(dead_cpu);
4805 next = pick_next_task(rq);
4810 dest_cpu = select_fallback_rq(dead_cpu, next);
4813 __migrate_task(next, dead_cpu, dest_cpu);
4823 #if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL)
4825 static struct ctl_table sd_ctl_dir[] = {
4833 static struct ctl_table sd_ctl_root[] = {
4837 .child = sd_ctl_dir,
4842 static struct ctl_table *sd_alloc_ctl_entry(
int n)
4850 static void sd_free_ctl_entry(
struct ctl_table **tablep)
4860 for (entry = *tablep; entry->
mode; entry++) {
4862 sd_free_ctl_entry(&entry->
child);
4871 static int min_load_idx = 0;
4875 set_table_entry(
struct ctl_table *entry,
4887 entry->
extra1 = &min_load_idx;
4888 entry->
extra2 = &max_load_idx;
4893 sd_alloc_ctl_domain_table(
struct sched_domain *sd)
4900 set_table_entry(&table[0],
"min_interval", &sd->min_interval,
4902 set_table_entry(&table[1],
"max_interval", &sd->max_interval,
4904 set_table_entry(&table[2],
"busy_idx", &sd->busy_idx,
4906 set_table_entry(&table[3],
"idle_idx", &sd->idle_idx,
4908 set_table_entry(&table[4],
"newidle_idx", &sd->newidle_idx,
4910 set_table_entry(&table[5],
"wake_idx", &sd->wake_idx,
4912 set_table_entry(&table[6],
"forkexec_idx", &sd->forkexec_idx,
4914 set_table_entry(&table[7],
"busy_factor", &sd->busy_factor,
4916 set_table_entry(&table[8],
"imbalance_pct", &sd->imbalance_pct,
4918 set_table_entry(&table[9],
"cache_nice_tries",
4919 &sd->cache_nice_tries,
4921 set_table_entry(&table[10],
"flags", &sd->flags,
4923 set_table_entry(&table[11],
"name", sd->name,
4930 static ctl_table *sd_alloc_ctl_cpu_table(
int cpu)
4933 struct sched_domain *sd;
4934 int domain_num = 0,
i;
4937 for_each_domain(cpu, sd)
4939 entry = table = sd_alloc_ctl_entry(domain_num + 1);
4944 for_each_domain(cpu, sd) {
4948 entry->
child = sd_alloc_ctl_domain_table(sd);
4956 static void register_sched_domain_sysctl(
void)
4959 struct ctl_table *entry = sd_alloc_ctl_entry(cpu_num + 1);
4972 entry->
child = sd_alloc_ctl_cpu_table(i);
4981 static void unregister_sched_domain_sysctl(
void)
4983 if (sd_sysctl_header)
4985 sd_sysctl_header =
NULL;
4986 if (sd_ctl_dir[0].child)
4987 sd_free_ctl_entry(&sd_ctl_dir[0].child);
4990 static void register_sched_domain_sysctl(
void)
4993 static void unregister_sched_domain_sysctl(
void)
4998 static void set_rq_online(
struct rq *rq)
5003 cpumask_set_cpu(rq->cpu, rq->rd->online);
5007 if (
class->rq_online)
5008 class->rq_online(rq);
5013 static void set_rq_offline(
struct rq *rq)
5019 if (
class->rq_offline)
5020 class->rq_offline(rq);
5023 cpumask_clear_cpu(rq->cpu, rq->rd->online);
5033 migration_call(
struct notifier_block *nfb,
unsigned long action,
void *hcpu)
5035 int cpu = (
long)hcpu;
5036 unsigned long flags;
5037 struct rq *rq =
cpu_rq(cpu);
5056 #ifdef CONFIG_HOTPLUG_CPU
5058 sched_ttwu_pending();
5071 calc_load_migrate(rq);
5087 .notifier_call = migration_call,
5092 unsigned long action,
void *hcpu)
5105 unsigned long action,
void *hcpu)
5116 static int __init migration_init(
void)
5123 BUG_ON(err == NOTIFY_BAD);
5124 migration_call(&migration_notifier,
CPU_ONLINE, cpu);
5125 register_cpu_notifier(&migration_notifier);
5140 #ifdef CONFIG_SCHED_DEBUG
5144 static int __init sched_debug_setup(
char *
str)
5146 sched_debug_enabled = 1;
5152 static inline bool sched_debug(
void)
5154 return sched_debug_enabled;
5157 static int sched_domain_debug_one(
struct sched_domain *sd,
int cpu,
int level,
5160 struct sched_group *
group = sd->groups;
5163 cpulist_scnprintf(str,
sizeof(str), sched_domain_span(sd));
5164 cpumask_clear(groupmask);
5168 if (!(sd->flags & SD_LOAD_BALANCE)) {
5169 printk(
"does not load-balance\n");
5200 if (!group->sgp->power_orig) {
5207 if (!cpumask_weight(sched_group_cpus(group))) {
5213 if (!(sd->flags & SD_OVERLAP) &&
5214 cpumask_intersects(groupmask, sched_group_cpus(group))) {
5220 cpumask_or(groupmask, groupmask, sched_group_cpus(group));
5222 cpulist_scnprintf(str,
sizeof(str), sched_group_cpus(group));
5230 group = group->next;
5231 }
while (group != sd->groups);
5234 if (!cpumask_equal(sched_domain_span(sd), groupmask))
5238 !cpumask_subset(groupmask, sched_domain_span(sd->parent)))
5240 "of domain->span\n");
5244 static void sched_domain_debug(
struct sched_domain *sd,
int cpu)
5248 if (!sched_debug_enabled)
5259 if (sched_domain_debug_one(sd, cpu, level, sched_domains_tmpmask))
5268 # define sched_domain_debug(sd, cpu) do { } while (0)
5269 static inline bool sched_debug(
void)
5275 static int sd_degenerate(
struct sched_domain *sd)
5277 if (cpumask_weight(sched_domain_span(sd)) == 1)
5281 if (sd->flags & (SD_LOAD_BALANCE |
5282 SD_BALANCE_NEWIDLE |
5286 SD_SHARE_PKG_RESOURCES)) {
5287 if (sd->groups != sd->groups->next)
5292 if (sd->flags & (SD_WAKE_AFFINE))
5299 sd_parent_degenerate(
struct sched_domain *sd,
struct sched_domain *parent)
5301 unsigned long cflags = sd->flags, pflags = parent->flags;
5303 if (sd_degenerate(parent))
5306 if (!cpumask_equal(sched_domain_span(sd), sched_domain_span(parent)))
5310 if (parent->groups == parent->groups->next) {
5311 pflags &= ~(SD_LOAD_BALANCE |
5312 SD_BALANCE_NEWIDLE |
5316 SD_SHARE_PKG_RESOURCES);
5318 pflags &= ~SD_SERIALIZE;
5320 if (~cflags & pflags)
5326 static void free_rootdomain(
struct rcu_head *rcu)
5328 struct root_domain *
rd =
container_of(rcu,
struct root_domain, rcu);
5331 free_cpumask_var(rd->rto_mask);
5332 free_cpumask_var(rd->online);
5333 free_cpumask_var(rd->span);
5337 static void rq_attach_root(
struct rq *rq,
struct root_domain *
rd)
5339 struct root_domain *old_rd =
NULL;
5340 unsigned long flags;
5350 cpumask_clear_cpu(rq->cpu, old_rd->span);
5364 cpumask_set_cpu(rq->cpu, rd->span);
5374 static int init_rootdomain(
struct root_domain *rd)
5376 memset(rd, 0,
sizeof(*rd));
5378 if (!alloc_cpumask_var(&rd->span,
GFP_KERNEL))
5380 if (!alloc_cpumask_var(&rd->online,
GFP_KERNEL))
5382 if (!alloc_cpumask_var(&rd->rto_mask,
GFP_KERNEL))
5390 free_cpumask_var(rd->rto_mask);
5392 free_cpumask_var(rd->online);
5394 free_cpumask_var(rd->span);
5403 struct root_domain def_root_domain;
5405 static void init_defrootdomain(
void)
5407 init_rootdomain(&def_root_domain);
5412 static struct root_domain *alloc_rootdomain(
void)
5414 struct root_domain *
rd;
5420 if (init_rootdomain(rd) != 0) {
5428 static void free_sched_groups(
struct sched_group *
sg,
int free_sgp)
5444 }
while (sg != first);
5447 static void free_sched_domain(
struct rcu_head *rcu)
5449 struct sched_domain *sd =
container_of(rcu,
struct sched_domain, rcu);
5455 if (sd->flags & SD_OVERLAP) {
5456 free_sched_groups(sd->groups, 1);
5458 kfree(sd->groups->sgp);
5464 static void destroy_sched_domain(
struct sched_domain *sd,
int cpu)
5466 call_rcu(&sd->rcu, free_sched_domain);
5469 static void destroy_sched_domains(
struct sched_domain *sd,
int cpu)
5471 for (; sd; sd = sd->parent)
5472 destroy_sched_domain(sd, cpu);
5487 static void update_top_cache_domain(
int cpu)
5489 struct sched_domain *sd;
5492 sd = highest_flag_domain(cpu, SD_SHARE_PKG_RESOURCES);
5494 id = cpumask_first(sched_domain_span(sd));
5505 cpu_attach_domain(
struct sched_domain *sd,
struct root_domain *rd,
int cpu)
5507 struct rq *rq =
cpu_rq(cpu);
5508 struct sched_domain *
tmp;
5511 for (tmp = sd;
tmp; ) {
5512 struct sched_domain *parent = tmp->parent;
5516 if (sd_parent_degenerate(tmp, parent)) {
5517 tmp->parent = parent->parent;
5519 parent->parent->child =
tmp;
5520 destroy_sched_domain(parent, cpu);
5525 if (sd && sd_degenerate(sd)) {
5528 destroy_sched_domain(tmp, cpu);
5533 sched_domain_debug(sd, cpu);
5535 rq_attach_root(rq, rd);
5538 destroy_sched_domains(tmp, cpu);
5540 update_top_cache_domain(cpu);
5547 static int __init isolated_cpu_setup(
char *str)
5549 alloc_bootmem_cpumask_var(&cpu_isolated_map);
5550 cpulist_parse(str, cpu_isolated_map);
5554 __setup(
"isolcpus=", isolated_cpu_setup);
5556 static const struct cpumask *cpu_cpu_mask(
int cpu)
5564 struct sched_group_power **
__percpu sgp;
5568 struct sched_domain **
__percpu sd;
5569 struct root_domain *
rd;
5579 struct sched_domain_topology_level;
5581 typedef struct sched_domain *(*sched_domain_init_f)(
struct sched_domain_topology_level *tl,
int cpu);
5582 typedef const struct cpumask *(*sched_domain_mask_f)(
int cpu);
5584 #define SDTL_OVERLAP 0x01
5586 struct sched_domain_topology_level {
5587 sched_domain_init_f
init;
5588 sched_domain_mask_f
mask;
5591 struct sd_data data;
5607 static void build_group_mask(
struct sched_domain *sd,
struct sched_group *sg)
5609 const struct cpumask *span = sched_domain_span(sd);
5610 struct sd_data *sdd = sd->private;
5611 struct sched_domain *sibling;
5619 cpumask_set_cpu(i, sched_group_mask(sg));
5627 int group_balance_cpu(
struct sched_group *sg)
5633 build_overlap_sched_groups(
struct sched_domain *sd,
int cpu)
5636 const struct cpumask *span = sched_domain_span(sd);
5637 struct cpumask *covered = sched_domains_tmpmask;
5638 struct sd_data *sdd = sd->private;
5639 struct sched_domain *child;
5642 cpumask_clear(covered);
5656 sg = kzalloc_node(
sizeof(
struct sched_group) + cpumask_size(),
5662 sg_span = sched_group_cpus(sg);
5664 child = child->child;
5665 cpumask_copy(sg_span, sched_domain_span(child));
5667 cpumask_set_cpu(i, sg_span);
5669 cpumask_or(covered, covered, sg_span);
5673 build_group_mask(sd, sg);
5688 group_balance_cpu(sg) == cpu)
5698 sd->groups = groups;
5703 free_sched_groups(first, 0);
5708 static int get_group(
int cpu,
struct sd_data *sdd,
struct sched_group **sg)
5710 struct sched_domain *sd = *
per_cpu_ptr(sdd->sd, cpu);
5711 struct sched_domain *child = sd->child;
5714 cpu = cpumask_first(sched_domain_span(child));
5733 build_sched_groups(
struct sched_domain *sd,
int cpu)
5735 struct sched_group *first =
NULL, *last =
NULL;
5736 struct sd_data *sdd = sd->private;
5737 const struct cpumask *span = sched_domain_span(sd);
5741 get_group(cpu, sdd, &sd->groups);
5744 if (cpu != cpumask_first(sched_domain_span(sd)))
5748 covered = sched_domains_tmpmask;
5750 cpumask_clear(covered);
5753 struct sched_group *
sg;
5754 int group = get_group(i, sdd, &sg);
5760 cpumask_clear(sched_group_cpus(sg));
5762 cpumask_setall(sched_group_mask(sg));
5765 if (get_group(j, sdd,
NULL) != group)
5768 cpumask_set_cpu(j, covered);
5769 cpumask_set_cpu(j, sched_group_cpus(sg));
5793 static void init_sched_groups_power(
int cpu,
struct sched_domain *sd)
5795 struct sched_group *sg = sd->groups;
5800 sg->group_weight = cpumask_weight(sched_group_cpus(sg));
5802 }
while (sg != sd->groups);
5804 if (cpu != group_balance_cpu(sg))
5808 atomic_set(&sg->sgp->nr_busy_cpus, sg->group_weight);
5813 return 0*SD_ASYM_PACKING;
5821 #ifdef CONFIG_SCHED_DEBUG
5822 # define SD_INIT_NAME(sd, type) sd->name = #type
5824 # define SD_INIT_NAME(sd, type) do { } while (0)
5827 #define SD_INIT_FUNC(type) \
5828 static noinline struct sched_domain * \
5829 sd_init_##type(struct sched_domain_topology_level *tl, int cpu) \
5831 struct sched_domain *sd = *per_cpu_ptr(tl->data.sd, cpu); \
5832 *sd = SD_##type##_INIT; \
5833 SD_INIT_NAME(sd, type); \
5834 sd->private = &tl->data; \
5839 #ifdef CONFIG_SCHED_SMT
5840 SD_INIT_FUNC(SIBLING)
5842 #ifdef CONFIG_SCHED_MC
5845 #ifdef CONFIG_SCHED_BOOK
5849 static int default_relax_domain_level = -1;
5850 int sched_domain_level_max;
5852 static int __init setup_relax_domain_level(
char *str)
5854 if (
kstrtoint(str, 0, &default_relax_domain_level))
5855 pr_warn(
"Unable to set relax_domain_level\n");
5859 __setup(
"relax_domain_level=", setup_relax_domain_level);
5861 static void set_domain_attribute(
struct sched_domain *sd,
5862 struct sched_domain_attr *
attr)
5866 if (!attr || attr->relax_domain_level < 0) {
5867 if (default_relax_domain_level < 0)
5870 request = default_relax_domain_level;
5872 request = attr->relax_domain_level;
5873 if (request < sd->level) {
5875 sd->flags &= ~(SD_BALANCE_WAKE|SD_BALANCE_NEWIDLE);
5878 sd->flags |= (SD_BALANCE_WAKE|SD_BALANCE_NEWIDLE);
5885 static void __free_domain_allocs(
struct s_data *
d,
enum s_alloc
what,
5891 free_rootdomain(&d->rd->rcu);
5895 __sdt_free(cpu_map);
5901 static enum s_alloc __visit_domain_allocation_hell(
struct s_data *d,
5902 const struct cpumask *cpu_map)
5904 memset(d, 0,
sizeof(*d));
5906 if (__sdt_alloc(cpu_map))
5907 return sa_sd_storage;
5910 return sa_sd_storage;
5911 d->rd = alloc_rootdomain();
5914 return sa_rootdomain;
5922 static void claim_allocations(
int cpu,
struct sched_domain *sd)
5924 struct sd_data *sdd = sd->private;
5936 #ifdef CONFIG_SCHED_SMT
5937 static const struct cpumask *cpu_smt_mask(
int cpu)
5946 static struct sched_domain_topology_level default_topology[] = {
5947 #ifdef CONFIG_SCHED_SMT
5948 { sd_init_SIBLING, cpu_smt_mask, },
5950 #ifdef CONFIG_SCHED_MC
5953 #ifdef CONFIG_SCHED_BOOK
5954 { sd_init_BOOK, cpu_book_mask, },
5956 { sd_init_CPU, cpu_cpu_mask, },
5960 static struct sched_domain_topology_level *sched_domain_topology = default_topology;
5964 static int sched_domains_numa_levels;
5965 static int *sched_domains_numa_distance;
5966 static struct cpumask ***sched_domains_numa_masks;
5967 static int sched_domains_curr_level;
5969 static inline int sd_local_flags(
int level)
5974 return SD_BALANCE_EXEC | SD_BALANCE_FORK | SD_WAKE_AFFINE;
5977 static struct sched_domain *
5978 sd_numa_init(
struct sched_domain_topology_level *tl,
int cpu)
5980 struct sched_domain *sd = *
per_cpu_ptr(tl->data.sd, cpu);
5981 int level = tl->numa_level;
5982 int sd_weight = cpumask_weight(
5983 sched_domains_numa_masks[level][
cpu_to_node(cpu)]);
5985 *sd = (
struct sched_domain){
5986 .min_interval = sd_weight,
5987 .max_interval = 2*sd_weight,
5989 .imbalance_pct = 125,
5990 .cache_nice_tries = 2,
5997 .flags = 1*SD_LOAD_BALANCE
5998 | 1*SD_BALANCE_NEWIDLE
6003 | 0*SD_SHARE_CPUPOWER
6004 | 0*SD_SHARE_PKG_RESOURCES
6006 | 0*SD_PREFER_SIBLING
6007 | sd_local_flags(level)
6010 .balance_interval = sd_weight,
6012 SD_INIT_NAME(sd, NUMA);
6013 sd->private = &tl->data;
6018 sched_domains_curr_level = tl->numa_level;
6023 static const struct cpumask *sd_numa_mask(
int cpu)
6025 return sched_domains_numa_masks[sched_domains_curr_level][
cpu_to_node(cpu)];
6028 static void sched_numa_warn(
const char *str)
6030 static int done =
false;
6049 static bool find_numa_distance(
int distance)
6056 for (i = 0; i < sched_domains_numa_levels; i++) {
6057 if (sched_domains_numa_distance[i] == distance)
6064 static void sched_init_numa(
void)
6067 struct sched_domain_topology_level *tl;
6071 sched_domains_numa_distance = kzalloc(
sizeof(
int) * nr_node_ids,
GFP_KERNEL);
6072 if (!sched_domains_numa_distance)
6082 next_distance = curr_distance;
6088 if (distance > curr_distance &&
6089 (distance < next_distance ||
6090 next_distance == curr_distance))
6091 next_distance = distance;
6099 sched_numa_warn(
"Node-distance not symmetric");
6101 if (sched_debug() && i && !find_numa_distance(distance))
6102 sched_numa_warn(
"Node-0 not representative");
6104 if (next_distance != curr_distance) {
6105 sched_domains_numa_distance[level++] = next_distance;
6106 sched_domains_numa_levels =
level;
6107 curr_distance = next_distance;
6134 sched_domains_numa_levels = 0;
6136 sched_domains_numa_masks = kzalloc(
sizeof(
void *) * level,
GFP_KERNEL);
6137 if (!sched_domains_numa_masks)
6144 for (i = 0; i <
level; i++) {
6145 sched_domains_numa_masks[
i] =
6146 kzalloc(nr_node_ids *
sizeof(
void *),
GFP_KERNEL);
6147 if (!sched_domains_numa_masks[i])
6155 sched_domains_numa_masks[
i][
j] =
mask;
6166 tl = kzalloc((
ARRAY_SIZE(default_topology) + level) *
6167 sizeof(
struct sched_domain_topology_level),
GFP_KERNEL);
6174 for (i = 0; default_topology[
i].init; i++)
6175 tl[i] = default_topology[i];
6180 for (j = 0; j <
level; i++, j++) {
6181 tl[
i] = (
struct sched_domain_topology_level){
6182 .init = sd_numa_init,
6183 .mask = sd_numa_mask,
6184 .flags = SDTL_OVERLAP,
6189 sched_domain_topology = tl;
6191 sched_domains_numa_levels =
level;
6194 static void sched_domains_numa_masks_set(
int cpu)
6199 for (i = 0; i < sched_domains_numa_levels; i++) {
6201 if (
node_distance(j, node) <= sched_domains_numa_distance[i])
6202 cpumask_set_cpu(cpu, sched_domains_numa_masks[i][j]);
6207 static void sched_domains_numa_masks_clear(
int cpu)
6210 for (i = 0; i < sched_domains_numa_levels; i++) {
6212 cpumask_clear_cpu(cpu, sched_domains_numa_masks[i][j]);
6220 static int sched_domains_numa_masks_update(
struct notifier_block *nfb,
6221 unsigned long action,
6224 int cpu = (
long)hcpu;
6228 sched_domains_numa_masks_set(cpu);
6232 sched_domains_numa_masks_clear(cpu);
6242 static inline void sched_init_numa(
void)
6246 static int sched_domains_numa_masks_update(
struct notifier_block *nfb,
6247 unsigned long action,
6254 static int __sdt_alloc(
const struct cpumask *cpu_map)
6256 struct sched_domain_topology_level *tl;
6259 for (tl = sched_domain_topology; tl->init; tl++) {
6260 struct sd_data *sdd = &tl->data;
6275 struct sched_domain *sd;
6276 struct sched_group *
sg;
6277 struct sched_group_power *sgp;
6279 sd = kzalloc_node(
sizeof(
struct sched_domain) + cpumask_size(),
6286 sg = kzalloc_node(
sizeof(
struct sched_group) + cpumask_size(),
6295 sgp = kzalloc_node(
sizeof(
struct sched_group_power) + cpumask_size(),
6307 static void __sdt_free(
const struct cpumask *cpu_map)
6309 struct sched_domain_topology_level *tl;
6312 for (tl = sched_domain_topology; tl->init; tl++) {
6313 struct sd_data *sdd = &tl->data;
6316 struct sched_domain *sd;
6320 if (sd && (sd->flags & SD_OVERLAP))
6321 free_sched_groups(sd->groups, 0);
6339 struct sched_domain *build_sched_domain(
struct sched_domain_topology_level *tl,
6340 struct s_data *d,
const struct cpumask *cpu_map,
6341 struct sched_domain_attr *attr,
struct sched_domain *child,
6344 struct sched_domain *sd = tl->init(tl, cpu);
6348 cpumask_and(sched_domain_span(sd), cpu_map, tl->mask(cpu));
6350 sd->level = child->level + 1;
6351 sched_domain_level_max =
max(sched_domain_level_max, sd->level);
6355 set_domain_attribute(sd, attr);
6364 static int build_sched_domains(
const struct cpumask *cpu_map,
6365 struct sched_domain_attr *attr)
6368 struct sched_domain *sd;
6372 alloc_state = __visit_domain_allocation_hell(&d, cpu_map);
6373 if (alloc_state != sa_rootdomain)
6378 struct sched_domain_topology_level *tl;
6381 for (tl = sched_domain_topology; tl->init; tl++) {
6382 sd = build_sched_domain(tl, &d, cpu_map, attr, sd, i);
6383 if (tl->flags & SDTL_OVERLAP ||
sched_feat(FORCE_SD_OVERLAP))
6384 sd->flags |= SD_OVERLAP;
6385 if (cpumask_equal(cpu_map, sched_domain_span(sd)))
6397 for (sd = *
per_cpu_ptr(d.sd, i); sd; sd = sd->parent) {
6398 sd->span_weight = cpumask_weight(sched_domain_span(sd));
6399 if (sd->flags & SD_OVERLAP) {
6400 if (build_overlap_sched_groups(sd, i))
6403 if (build_sched_groups(sd, i))
6414 for (sd = *
per_cpu_ptr(d.sd, i); sd; sd = sd->parent) {
6415 claim_allocations(i, sd);
6416 init_sched_groups_power(i, sd);
6424 cpu_attach_domain(sd, d.rd, i);
6430 __free_domain_allocs(&d, alloc_state, cpu_map);
6435 static int ndoms_cur;
6436 static struct sched_domain_attr *dattr_cur;
6464 for (i = 0; i < ndoms; i++) {
6465 if (!alloc_cpumask_var(&doms[i],
GFP_KERNEL)) {
6466 free_sched_domains(doms, i);
6473 void free_sched_domains(
cpumask_var_t doms[],
unsigned int ndoms)
6476 for (i = 0; i < ndoms; i++)
6477 free_cpumask_var(doms[i]);
6486 static int init_sched_domains(
const struct cpumask *cpu_map)
6492 doms_cur = alloc_sched_domains(ndoms_cur);
6494 doms_cur = &fallback_doms;
6495 cpumask_andnot(doms_cur[0], cpu_map, cpu_isolated_map);
6496 err = build_sched_domains(doms_cur[0],
NULL);
6497 register_sched_domain_sysctl();
6506 static void detach_destroy_domains(
const struct cpumask *cpu_map)
6512 cpu_attach_domain(
NULL, &def_root_domain, i);
6517 static
int dattrs_equal(
struct sched_domain_attr *
cur,
int idx_cur,
6518 struct sched_domain_attr *new,
int idx_new)
6520 struct sched_domain_attr tmp;
6527 return !
memcmp(cur ? (cur + idx_cur) : &tmp,
6528 new ? (
new + idx_new) : &tmp,
6529 sizeof(
struct sched_domain_attr));
6558 void partition_sched_domains(
int ndoms_new,
cpumask_var_t doms_new[],
6559 struct sched_domain_attr *dattr_new)
6567 unregister_sched_domain_sysctl();