summaryrefslogtreecommitdiffstats
path: root/kernel/sched/sched.h
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched/sched.h')
-rw-r--r--kernel/sched/sched.h437
1 files changed, 358 insertions, 79 deletions
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index a189bec13729..c80d42e9589b 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -366,6 +366,7 @@ struct cfs_bandwidth {
ktime_t period;
u64 quota;
u64 runtime;
+ u64 burst;
s64 hierarchical_quota;
u8 idle;
@@ -526,6 +527,11 @@ struct cfs_rq {
u64 exec_clock;
u64 min_vruntime;
+#ifdef CONFIG_SCHED_CORE
+ unsigned int forceidle_seq;
+ u64 min_vruntime_fi;
+#endif
+
#ifndef CONFIG_64BIT
u64 min_vruntime_copy;
#endif
@@ -631,8 +637,8 @@ struct rt_rq {
} highest_prio;
#endif
#ifdef CONFIG_SMP
- unsigned long rt_nr_migratory;
- unsigned long rt_nr_total;
+ unsigned int rt_nr_migratory;
+ unsigned int rt_nr_total;
int overloaded;
struct plist_head pushable_tasks;
@@ -646,7 +652,7 @@ struct rt_rq {
raw_spinlock_t rt_runtime_lock;
#ifdef CONFIG_RT_GROUP_SCHED
- unsigned long rt_nr_boosted;
+ unsigned int rt_nr_boosted;
struct rq *rq;
struct task_group *tg;
@@ -663,7 +669,7 @@ struct dl_rq {
/* runqueue is an rbtree, ordered by deadline */
struct rb_root_cached root;
- unsigned long dl_nr_running;
+ unsigned int dl_nr_running;
#ifdef CONFIG_SMP
/*
@@ -677,7 +683,7 @@ struct dl_rq {
u64 next;
} earliest_dl;
- unsigned long dl_nr_migratory;
+ unsigned int dl_nr_migratory;
int overloaded;
/*
@@ -905,7 +911,7 @@ DECLARE_STATIC_KEY_FALSE(sched_uclamp_used);
*/
struct rq {
/* runqueue lock: */
- raw_spinlock_t lock;
+ raw_spinlock_t __lock;
/*
* nr_running and cpu_load should be in the same cacheline because
@@ -955,7 +961,7 @@ struct rq {
* one CPU and if it got migrated afterwards it may decrease
* it on another CPU. Always updated under the runqueue lock:
*/
- unsigned long nr_uninterruptible;
+ unsigned int nr_uninterruptible;
struct task_struct __rcu *curr;
struct task_struct *idle;
@@ -1017,6 +1023,9 @@ struct rq {
u64 idle_stamp;
u64 avg_idle;
+ unsigned long wake_stamp;
+ u64 wake_avg_idle;
+
/* This is used to determine avg_idle's max value */
u64 max_idle_balance_cost;
@@ -1075,6 +1084,22 @@ struct rq {
#endif
unsigned int push_busy;
struct cpu_stop_work push_work;
+
+#ifdef CONFIG_SCHED_CORE
+ /* per rq */
+ struct rq *core;
+ struct task_struct *core_pick;
+ unsigned int core_enabled;
+ unsigned int core_sched_seq;
+ struct rb_root core_tree;
+
+ /* shared state */
+ unsigned int core_task_seq;
+ unsigned int core_pick_seq;
+ unsigned long core_cookie;
+ unsigned char core_forceidle;
+ unsigned int core_forceidle_seq;
+#endif
};
#ifdef CONFIG_FAIR_GROUP_SCHED
@@ -1113,6 +1138,206 @@ static inline bool is_migration_disabled(struct task_struct *p)
#endif
}
+struct sched_group;
+#ifdef CONFIG_SCHED_CORE
+static inline struct cpumask *sched_group_span(struct sched_group *sg);
+
+DECLARE_STATIC_KEY_FALSE(__sched_core_enabled);
+
+static inline bool sched_core_enabled(struct rq *rq)
+{
+ return static_branch_unlikely(&__sched_core_enabled) && rq->core_enabled;
+}
+
+static inline bool sched_core_disabled(void)
+{
+ return !static_branch_unlikely(&__sched_core_enabled);
+}
+
+/*
+ * Be careful with this function; not for general use. The return value isn't
+ * stable unless you actually hold a relevant rq->__lock.
+ */
+static inline raw_spinlock_t *rq_lockp(struct rq *rq)
+{
+ if (sched_core_enabled(rq))
+ return &rq->core->__lock;
+
+ return &rq->__lock;
+}
+
+static inline raw_spinlock_t *__rq_lockp(struct rq *rq)
+{
+ if (rq->core_enabled)
+ return &rq->core->__lock;
+
+ return &rq->__lock;
+}
+
+bool cfs_prio_less(struct task_struct *a, struct task_struct *b, bool fi);
+
+/*
+ * Helpers to check if the CPU's core cookie matches with the task's cookie
+ * when core scheduling is enabled.
+ * A special case is that the task's cookie always matches with CPU's core
+ * cookie if the CPU is in an idle core.
+ */
+static inline bool sched_cpu_cookie_match(struct rq *rq, struct task_struct *p)
+{
+ /* Ignore cookie match if core scheduler is not enabled on the CPU. */
+ if (!sched_core_enabled(rq))
+ return true;
+
+ return rq->core->core_cookie == p->core_cookie;
+}
+
+static inline bool sched_core_cookie_match(struct rq *rq, struct task_struct *p)
+{
+ bool idle_core = true;
+ int cpu;
+
+ /* Ignore cookie match if core scheduler is not enabled on the CPU. */
+ if (!sched_core_enabled(rq))
+ return true;
+
+ for_each_cpu(cpu, cpu_smt_mask(cpu_of(rq))) {
+ if (!available_idle_cpu(cpu)) {
+ idle_core = false;
+ break;
+ }
+ }
+
+ /*
+ * A CPU in an idle core is always the best choice for tasks with
+ * cookies.
+ */
+ return idle_core || rq->core->core_cookie == p->core_cookie;
+}
+
+static inline bool sched_group_cookie_match(struct rq *rq,
+ struct task_struct *p,
+ struct sched_group *group)
+{
+ int cpu;
+
+ /* Ignore cookie match if core scheduler is not enabled on the CPU. */
+ if (!sched_core_enabled(rq))
+ return true;
+
+ for_each_cpu_and(cpu, sched_group_span(group), p->cpus_ptr) {
+ if (sched_core_cookie_match(rq, p))
+ return true;
+ }
+ return false;
+}
+
+extern void queue_core_balance(struct rq *rq);
+
+static inline bool sched_core_enqueued(struct task_struct *p)
+{
+ return !RB_EMPTY_NODE(&p->core_node);
+}
+
+extern void sched_core_enqueue(struct rq *rq, struct task_struct *p);
+extern void sched_core_dequeue(struct rq *rq, struct task_struct *p);
+
+extern void sched_core_get(void);
+extern void sched_core_put(void);
+
+extern unsigned long sched_core_alloc_cookie(void);
+extern void sched_core_put_cookie(unsigned long cookie);
+extern unsigned long sched_core_get_cookie(unsigned long cookie);
+extern unsigned long sched_core_update_cookie(struct task_struct *p, unsigned long cookie);
+
+#else /* !CONFIG_SCHED_CORE */
+
+static inline bool sched_core_enabled(struct rq *rq)
+{
+ return false;
+}
+
+static inline bool sched_core_disabled(void)
+{
+ return true;
+}
+
+static inline raw_spinlock_t *rq_lockp(struct rq *rq)
+{
+ return &rq->__lock;
+}
+
+static inline raw_spinlock_t *__rq_lockp(struct rq *rq)
+{
+ return &rq->__lock;
+}
+
+static inline void queue_core_balance(struct rq *rq)
+{
+}
+
+static inline bool sched_cpu_cookie_match(struct rq *rq, struct task_struct *p)
+{
+ return true;
+}
+
+static inline bool sched_core_cookie_match(struct rq *rq, struct task_struct *p)
+{
+ return true;
+}
+
+static inline bool sched_group_cookie_match(struct rq *rq,
+ struct task_struct *p,
+ struct sched_group *group)
+{
+ return true;
+}
+#endif /* CONFIG_SCHED_CORE */
+
+static inline void lockdep_assert_rq_held(struct rq *rq)
+{
+ lockdep_assert_held(__rq_lockp(rq));
+}
+
+extern void raw_spin_rq_lock_nested(struct rq *rq, int subclass);
+extern bool raw_spin_rq_trylock(struct rq *rq);
+extern void raw_spin_rq_unlock(struct rq *rq);
+
+static inline void raw_spin_rq_lock(struct rq *rq)
+{
+ raw_spin_rq_lock_nested(rq, 0);
+}
+
+static inline void raw_spin_rq_lock_irq(struct rq *rq)
+{
+ local_irq_disable();
+ raw_spin_rq_lock(rq);
+}
+
+static inline void raw_spin_rq_unlock_irq(struct rq *rq)
+{
+ raw_spin_rq_unlock(rq);
+ local_irq_enable();
+}
+
+static inline unsigned long _raw_spin_rq_lock_irqsave(struct rq *rq)
+{
+ unsigned long flags;
+ local_irq_save(flags);
+ raw_spin_rq_lock(rq);
+ return flags;
+}
+
+static inline void raw_spin_rq_unlock_irqrestore(struct rq *rq, unsigned long flags)
+{
+ raw_spin_rq_unlock(rq);
+ local_irq_restore(flags);
+}
+
+#define raw_spin_rq_lock_irqsave(rq, flags) \
+do { \
+ flags = _raw_spin_rq_lock_irqsave(rq); \
+} while (0)
+
#ifdef CONFIG_SCHED_SMT
extern void __update_idle_core(struct rq *rq);
@@ -1134,6 +1359,57 @@ DECLARE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
#define cpu_curr(cpu) (cpu_rq(cpu)->curr)
#define raw_rq() raw_cpu_ptr(&runqueues)
+#ifdef CONFIG_FAIR_GROUP_SCHED
+static inline struct task_struct *task_of(struct sched_entity *se)
+{
+ SCHED_WARN_ON(!entity_is_task(se));
+ return container_of(se, struct task_struct, se);
+}
+
+static inline struct cfs_rq *task_cfs_rq(struct task_struct *p)
+{
+ return p->se.cfs_rq;
+}
+
+/* runqueue on which this entity is (to be) queued */
+static inline struct cfs_rq *cfs_rq_of(struct sched_entity *se)
+{
+ return se->cfs_rq;
+}
+
+/* runqueue "owned" by this group */
+static inline struct cfs_rq *group_cfs_rq(struct sched_entity *grp)
+{
+ return grp->my_q;
+}
+
+#else
+
+static inline struct task_struct *task_of(struct sched_entity *se)
+{
+ return container_of(se, struct task_struct, se);
+}
+
+static inline struct cfs_rq *task_cfs_rq(struct task_struct *p)
+{
+ return &task_rq(p)->cfs;
+}
+
+static inline struct cfs_rq *cfs_rq_of(struct sched_entity *se)
+{
+ struct task_struct *p = task_of(se);
+ struct rq *rq = task_rq(p);
+
+ return &rq->cfs;
+}
+
+/* runqueue "owned" by this group */
+static inline struct cfs_rq *group_cfs_rq(struct sched_entity *grp)
+{
+ return NULL;
+}
+#endif
+
extern void update_rq_clock(struct rq *rq);
static inline u64 __rq_clock_broken(struct rq *rq)
@@ -1179,7 +1455,7 @@ static inline void assert_clock_updated(struct rq *rq)
static inline u64 rq_clock(struct rq *rq)
{
- lockdep_assert_held(&rq->lock);
+ lockdep_assert_rq_held(rq);
assert_clock_updated(rq);
return rq->clock;
@@ -1187,7 +1463,7 @@ static inline u64 rq_clock(struct rq *rq)
static inline u64 rq_clock_task(struct rq *rq)
{
- lockdep_assert_held(&rq->lock);
+ lockdep_assert_rq_held(rq);
assert_clock_updated(rq);
return rq->clock_task;
@@ -1213,7 +1489,7 @@ static inline u64 rq_clock_thermal(struct rq *rq)
static inline void rq_clock_skip_update(struct rq *rq)
{
- lockdep_assert_held(&rq->lock);
+ lockdep_assert_rq_held(rq);
rq->clock_update_flags |= RQCF_REQ_SKIP;
}
@@ -1223,7 +1499,7 @@ static inline void rq_clock_skip_update(struct rq *rq)
*/
static inline void rq_clock_cancel_skipupdate(struct rq *rq)
{
- lockdep_assert_held(&rq->lock);
+ lockdep_assert_rq_held(rq);
rq->clock_update_flags &= ~RQCF_REQ_SKIP;
}
@@ -1254,7 +1530,7 @@ extern struct callback_head balance_push_callback;
*/
static inline void rq_pin_lock(struct rq *rq, struct rq_flags *rf)
{
- rf->cookie = lockdep_pin_lock(&rq->lock);
+ rf->cookie = lockdep_pin_lock(__rq_lockp(rq));
#ifdef CONFIG_SCHED_DEBUG
rq->clock_update_flags &= (RQCF_REQ_SKIP|RQCF_ACT_SKIP);
@@ -1272,12 +1548,12 @@ static inline void rq_unpin_lock(struct rq *rq, struct rq_flags *rf)
rf->clock_update_flags = RQCF_UPDATED;
#endif
- lockdep_unpin_lock(&rq->lock, rf->cookie);
+ lockdep_unpin_lock(__rq_lockp(rq), rf->cookie);
}
static inline void rq_repin_lock(struct rq *rq, struct rq_flags *rf)
{
- lockdep_repin_lock(&rq->lock, rf->cookie);
+ lockdep_repin_lock(__rq_lockp(rq), rf->cookie);
#ifdef CONFIG_SCHED_DEBUG
/*
@@ -1298,7 +1574,7 @@ static inline void __task_rq_unlock(struct rq *rq, struct rq_flags *rf)
__releases(rq->lock)
{
rq_unpin_lock(rq, rf);
- raw_spin_unlock(&rq->lock);
+ raw_spin_rq_unlock(rq);
}
static inline void
@@ -1307,7 +1583,7 @@ task_rq_unlock(struct rq *rq, struct task_struct *p, struct rq_flags *rf)
__releases(p->pi_lock)
{
rq_unpin_lock(rq, rf);
- raw_spin_unlock(&rq->lock);
+ raw_spin_rq_unlock(rq);
raw_spin_unlock_irqrestore(&p->pi_lock, rf->flags);
}
@@ -1315,7 +1591,7 @@ static inline void
rq_lock_irqsave(struct rq *rq, struct rq_flags *rf)
__acquires(rq->lock)
{
- raw_spin_lock_irqsave(&rq->lock, rf->flags);
+ raw_spin_rq_lock_irqsave(rq, rf->flags);
rq_pin_lock(rq, rf);
}
@@ -1323,7 +1599,7 @@ static inline void
rq_lock_irq(struct rq *rq, struct rq_flags *rf)
__acquires(rq->lock)
{
- raw_spin_lock_irq(&rq->lock);
+ raw_spin_rq_lock_irq(rq);
rq_pin_lock(rq, rf);
}
@@ -1331,7 +1607,7 @@ static inline void
rq_lock(struct rq *rq, struct rq_flags *rf)
__acquires(rq->lock)
{
- raw_spin_lock(&rq->lock);
+ raw_spin_rq_lock(rq);
rq_pin_lock(rq, rf);
}
@@ -1339,7 +1615,7 @@ static inline void
rq_relock(struct rq *rq, struct rq_flags *rf)
__acquires(rq->lock)
{
- raw_spin_lock(&rq->lock);
+ raw_spin_rq_lock(rq);
rq_repin_lock(rq, rf);
}
@@ -1348,7 +1624,7 @@ rq_unlock_irqrestore(struct rq *rq, struct rq_flags *rf)
__releases(rq->lock)
{
rq_unpin_lock(rq, rf);
- raw_spin_unlock_irqrestore(&rq->lock, rf->flags);
+ raw_spin_rq_unlock_irqrestore(rq, rf->flags);
}
static inline void
@@ -1356,7 +1632,7 @@ rq_unlock_irq(struct rq *rq, struct rq_flags *rf)
__releases(rq->lock)
{
rq_unpin_lock(rq, rf);
- raw_spin_unlock_irq(&rq->lock);
+ raw_spin_rq_unlock_irq(rq);
}
static inline void
@@ -1364,7 +1640,7 @@ rq_unlock(struct rq *rq, struct rq_flags *rf)
__releases(rq->lock)
{
rq_unpin_lock(rq, rf);
- raw_spin_unlock(&rq->lock);
+ raw_spin_rq_unlock(rq);
}
static inline struct rq *
@@ -1429,7 +1705,7 @@ queue_balance_callback(struct rq *rq,
struct callback_head *head,
void (*func)(struct rq *rq))
{
- lockdep_assert_held(&rq->lock);
+ lockdep_assert_rq_held(rq);
if (unlikely(head->next || rq->balance_callback == &balance_push_callback))
return;
@@ -1844,6 +2120,9 @@ struct sched_class {
#ifdef CONFIG_SMP
int (*balance)(struct rq *rq, struct task_struct *prev, struct rq_flags *rf);
int (*select_task_rq)(struct task_struct *p, int task_cpu, int flags);
+
+ struct task_struct * (*pick_task)(struct rq *rq);
+
void (*migrate_task_rq)(struct task_struct *p, int new_cpu);
void (*task_woken)(struct rq *this_rq, struct task_struct *task);
@@ -1893,7 +2172,6 @@ static inline void put_prev_task(struct rq *rq, struct task_struct *prev)
static inline void set_next_task(struct rq *rq, struct task_struct *next)
{
- WARN_ON_ONCE(rq->curr != next);
next->sched_class->set_next_task(rq, next, false);
}
@@ -1969,7 +2247,7 @@ static inline struct task_struct *get_push_task(struct rq *rq)
{
struct task_struct *p = rq->curr;
- lockdep_assert_held(&rq->lock);
+ lockdep_assert_rq_held(rq);
if (rq->push_busy)
return NULL;
@@ -2181,10 +2459,38 @@ unsigned long arch_scale_freq_capacity(int cpu)
}
#endif
+
#ifdef CONFIG_SMP
-#ifdef CONFIG_PREEMPTION
-static inline void double_rq_lock(struct rq *rq1, struct rq *rq2);
+static inline bool rq_order_less(struct rq *rq1, struct rq *rq2)
+{
+#ifdef CONFIG_SCHED_CORE
+ /*
+ * In order to not have {0,2},{1,3} turn into into an AB-BA,
+ * order by core-id first and cpu-id second.
+ *
+ * Notably:
+ *
+ * double_rq_lock(0,3); will take core-0, core-1 lock
+ * double_rq_lock(1,2); will take core-1, core-0 lock
+ *
+ * when only cpu-id is considered.
+ */
+ if (rq1->core->cpu < rq2->core->cpu)
+ return true;
+ if (rq1->core->cpu > rq2->core->cpu)
+ return false;
+
+ /*
+ * __sched_core_flip() relies on SMT having cpu-id lock order.
+ */
+#endif
+ return rq1->cpu < rq2->cpu;
+}
+
+extern void double_rq_lock(struct rq *rq1, struct rq *rq2);
+
+#ifdef CONFIG_PREEMPTION
/*
* fair double_lock_balance: Safely acquires both rq->locks in a fair
@@ -2199,7 +2505,7 @@ static inline int _double_lock_balance(struct rq *this_rq, struct rq *busiest)
__acquires(busiest->lock)
__acquires(this_rq->lock)
{
- raw_spin_unlock(&this_rq->lock);
+ raw_spin_rq_unlock(this_rq);
double_rq_lock(this_rq, busiest);
return 1;
@@ -2218,20 +2524,21 @@ static inline int _double_lock_balance(struct rq *this_rq, struct rq *busiest)
__acquires(busiest->lock)
__acquires(this_rq->lock)
{
- int ret = 0;
-
- if (unlikely(!raw_spin_trylock(&busiest->lock))) {
- if (busiest < this_rq) {
- raw_spin_unlock(&this_rq->lock);
- raw_spin_lock(&busiest->lock);
- raw_spin_lock_nested(&this_rq->lock,
- SINGLE_DEPTH_NESTING);
- ret = 1;
- } else
- raw_spin_lock_nested(&busiest->lock,
- SINGLE_DEPTH_NESTING);
+ if (__rq_lockp(this_rq) == __rq_lockp(busiest))
+ return 0;
+
+ if (likely(raw_spin_rq_trylock(busiest)))
+ return 0;
+
+ if (rq_order_less(this_rq, busiest)) {
+ raw_spin_rq_lock_nested(busiest, SINGLE_DEPTH_NESTING);
+ return 0;
}
- return ret;
+
+ raw_spin_rq_unlock(this_rq);
+ double_rq_lock(this_rq, busiest);
+
+ return 1;
}
#endif /* CONFIG_PREEMPTION */
@@ -2241,11 +2548,7 @@ static inline int _double_lock_balance(struct rq *this_rq, struct rq *busiest)
*/
static inline int double_lock_balance(struct rq *this_rq, struct rq *busiest)
{
- if (unlikely(!irqs_disabled())) {
- /* printk() doesn't work well under rq->lock */
- raw_spin_unlock(&this_rq->lock);
- BUG_ON(1);
- }
+ lockdep_assert_irqs_disabled();
return _double_lock_balance(this_rq, busiest);
}
@@ -2253,8 +2556,9 @@ static inline int double_lock_balance(struct rq *this_rq, struct rq *busiest)
static inline void double_unlock_balance(struct rq *this_rq, struct rq *busiest)
__releases(busiest->lock)
{
- raw_spin_unlock(&busiest->lock);
- lock_set_subclass(&this_rq->lock.dep_map, 0, _RET_IP_);
+ if (__rq_lockp(this_rq) != __rq_lockp(busiest))
+ raw_spin_rq_unlock(busiest);
+ lock_set_subclass(&__rq_lockp(this_rq)->dep_map, 0, _RET_IP_);
}
static inline void double_lock(spinlock_t *l1, spinlock_t *l2)
@@ -2285,31 +2589,6 @@ static inline void double_raw_lock(raw_spinlock_t *l1, raw_spinlock_t *l2)
}
/*
- * double_rq_lock - safely lock two runqueues
- *
- * Note this does not disable interrupts like task_rq_lock,
- * you need to do so manually before calling.
- */
-static inline void double_rq_lock(struct rq *rq1, struct rq *rq2)
- __acquires(rq1->lock)
- __acquires(rq2->lock)
-{
- BUG_ON(!irqs_disabled());
- if (rq1 == rq2) {
- raw_spin_lock(&rq1->lock);
- __acquire(rq2->lock); /* Fake it out ;) */
- } else {
- if (rq1 < rq2) {
- raw_spin_lock(&rq1->lock);
- raw_spin_lock_nested(&rq2->lock, SINGLE_DEPTH_NESTING);
- } else {
- raw_spin_lock(&rq2->lock);
- raw_spin_lock_nested(&rq1->lock, SINGLE_DEPTH_NESTING);
- }
- }
-}
-
-/*
* double_rq_unlock - safely unlock two runqueues
*
* Note this does not restore interrupts like task_rq_unlock,
@@ -2319,11 +2598,11 @@ static inline void double_rq_unlock(struct rq *rq1, struct rq *rq2)
__releases(rq1->lock)
__releases(rq2->lock)
{
- raw_spin_unlock(&rq1->lock);
- if (rq1 != rq2)
- raw_spin_unlock(&rq2->lock);
+ if (__rq_lockp(rq1) != __rq_lockp(rq2))
+ raw_spin_rq_unlock(rq2);
else
__release(rq2->lock);
+ raw_spin_rq_unlock(rq1);
}
extern void set_rq_online (struct rq *rq);
@@ -2344,7 +2623,7 @@ static inline void double_rq_lock(struct rq *rq1, struct rq *rq2)
{
BUG_ON(!irqs_disabled());
BUG_ON(rq1 != rq2);
- raw_spin_lock(&rq1->lock);
+ raw_spin_rq_lock(rq1);
__acquire(rq2->lock); /* Fake it out ;) */
}
@@ -2359,7 +2638,7 @@ static inline void double_rq_unlock(struct rq *rq1, struct rq *rq2)
__releases(rq2->lock)
{
BUG_ON(rq1 != rq2);
- raw_spin_unlock(&rq1->lock);
+ raw_spin_rq_unlock(rq1);
__release(rq2->lock);
}