summaryrefslogtreecommitdiffstats
path: root/drivers/cpuidle
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/cpuidle')
-rw-r--r--drivers/cpuidle/Kconfig1
-rw-r--r--drivers/cpuidle/Kconfig.arm10
-rw-r--r--drivers/cpuidle/cpuidle-arm.c4
-rw-r--r--drivers/cpuidle/cpuidle-big_little.c12
-rw-r--r--drivers/cpuidle/cpuidle-haltpoll.c2
-rw-r--r--drivers/cpuidle/cpuidle-mvebu-v7.c15
-rw-r--r--drivers/cpuidle/cpuidle-psci-domain.c7
-rw-r--r--drivers/cpuidle/cpuidle-psci.c25
-rw-r--r--drivers/cpuidle/cpuidle-qcom-spm.c4
-rw-r--r--drivers/cpuidle/cpuidle-riscv-sbi.c19
-rw-r--r--drivers/cpuidle/cpuidle-tegra.c31
-rw-r--r--drivers/cpuidle/cpuidle.c72
-rw-r--r--drivers/cpuidle/driver.c4
-rw-r--r--drivers/cpuidle/dt_idle_states.c2
-rw-r--r--drivers/cpuidle/governors/teo.c102
-rw-r--r--drivers/cpuidle/poll_state.c8
-rw-r--r--drivers/cpuidle/sysfs.c6
17 files changed, 244 insertions, 80 deletions
diff --git a/drivers/cpuidle/Kconfig b/drivers/cpuidle/Kconfig
index ff71dd662880..cac5997dca50 100644
--- a/drivers/cpuidle/Kconfig
+++ b/drivers/cpuidle/Kconfig
@@ -74,6 +74,7 @@ endmenu
config HALTPOLL_CPUIDLE
tristate "Halt poll cpuidle driver"
depends on X86 && KVM_GUEST
+ select CPU_IDLE_GOV_HALTPOLL
default y
help
This option enables halt poll cpuidle driver, which allows to poll
diff --git a/drivers/cpuidle/Kconfig.arm b/drivers/cpuidle/Kconfig.arm
index 747aa537389b..a1ee475d180d 100644
--- a/drivers/cpuidle/Kconfig.arm
+++ b/drivers/cpuidle/Kconfig.arm
@@ -24,6 +24,14 @@ config ARM_PSCI_CPUIDLE
It provides an idle driver that is capable of detecting and
managing idle states through the PSCI firmware interface.
+ The driver has limitations when used with PREEMPT_RT:
+ - If the idle states are described with the non-hierarchical layout,
+ all idle states are still available.
+
+ - If the idle states are described with the hierarchical layout,
+ only the idle states defined per CPU are available, but not the ones
+ being shared among a group of CPUs (aka cluster idle states).
+
config ARM_PSCI_CPUIDLE_DOMAIN
bool "PSCI CPU idle Domain"
depends on ARM_PSCI_CPUIDLE
@@ -102,6 +110,7 @@ config ARM_MVEBU_V7_CPUIDLE
config ARM_TEGRA_CPUIDLE
bool "CPU Idle Driver for NVIDIA Tegra SoCs"
depends on (ARCH_TEGRA || COMPILE_TEST) && !ARM64 && MMU
+ depends on ARCH_SUSPEND_POSSIBLE
select ARCH_NEEDS_CPU_IDLE_COUPLED if SMP
select ARM_CPU_SUSPEND
help
@@ -110,6 +119,7 @@ config ARM_TEGRA_CPUIDLE
config ARM_QCOM_SPM_CPUIDLE
bool "CPU Idle Driver for Qualcomm Subsystem Power Manager (SPM)"
depends on (ARCH_QCOM || COMPILE_TEST) && !ARM64 && MMU
+ depends on ARCH_SUSPEND_POSSIBLE
select ARM_CPU_SUSPEND
select CPU_IDLE_MULTIPLE_DRIVERS
select DT_IDLE_STATES
diff --git a/drivers/cpuidle/cpuidle-arm.c b/drivers/cpuidle/cpuidle-arm.c
index 8c758920d699..7cfb980a357d 100644
--- a/drivers/cpuidle/cpuidle-arm.c
+++ b/drivers/cpuidle/cpuidle-arm.c
@@ -31,8 +31,8 @@
* Called from the CPUidle framework to program the device to the
* specified target state selected by the governor.
*/
-static int arm_enter_idle_state(struct cpuidle_device *dev,
- struct cpuidle_driver *drv, int idx)
+static __cpuidle int arm_enter_idle_state(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv, int idx)
{
/*
* Pass idle state index to arm_cpuidle_suspend which in turn
diff --git a/drivers/cpuidle/cpuidle-big_little.c b/drivers/cpuidle/cpuidle-big_little.c
index abe51185f243..74972deda0ea 100644
--- a/drivers/cpuidle/cpuidle-big_little.c
+++ b/drivers/cpuidle/cpuidle-big_little.c
@@ -64,7 +64,8 @@ static struct cpuidle_driver bl_idle_little_driver = {
.enter = bl_enter_powerdown,
.exit_latency = 700,
.target_residency = 2500,
- .flags = CPUIDLE_FLAG_TIMER_STOP,
+ .flags = CPUIDLE_FLAG_TIMER_STOP |
+ CPUIDLE_FLAG_RCU_IDLE,
.name = "C1",
.desc = "ARM little-cluster power down",
},
@@ -85,7 +86,8 @@ static struct cpuidle_driver bl_idle_big_driver = {
.enter = bl_enter_powerdown,
.exit_latency = 500,
.target_residency = 2000,
- .flags = CPUIDLE_FLAG_TIMER_STOP,
+ .flags = CPUIDLE_FLAG_TIMER_STOP |
+ CPUIDLE_FLAG_RCU_IDLE,
.name = "C1",
.desc = "ARM big-cluster power down",
},
@@ -120,15 +122,17 @@ static int notrace bl_powerdown_finisher(unsigned long arg)
* Called from the CPUidle framework to program the device to the
* specified target state selected by the governor.
*/
-static int bl_enter_powerdown(struct cpuidle_device *dev,
- struct cpuidle_driver *drv, int idx)
+static __cpuidle int bl_enter_powerdown(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv, int idx)
{
cpu_pm_enter();
+ ct_cpuidle_enter();
cpu_suspend(0, bl_powerdown_finisher);
/* signals the MCPM core that CPU is out of low power state */
mcpm_cpu_powered_up();
+ ct_cpuidle_exit();
cpu_pm_exit();
diff --git a/drivers/cpuidle/cpuidle-haltpoll.c b/drivers/cpuidle/cpuidle-haltpoll.c
index 3a39a7f48b77..e66df22f9695 100644
--- a/drivers/cpuidle/cpuidle-haltpoll.c
+++ b/drivers/cpuidle/cpuidle-haltpoll.c
@@ -32,7 +32,7 @@ static int default_enter_idle(struct cpuidle_device *dev,
local_irq_enable();
return index;
}
- default_idle();
+ arch_cpu_idle();
return index;
}
diff --git a/drivers/cpuidle/cpuidle-mvebu-v7.c b/drivers/cpuidle/cpuidle-mvebu-v7.c
index 01a856971f05..563dba609b98 100644
--- a/drivers/cpuidle/cpuidle-mvebu-v7.c
+++ b/drivers/cpuidle/cpuidle-mvebu-v7.c
@@ -25,9 +25,9 @@
static int (*mvebu_v7_cpu_suspend)(int);
-static int mvebu_v7_enter_idle(struct cpuidle_device *dev,
- struct cpuidle_driver *drv,
- int index)
+static __cpuidle int mvebu_v7_enter_idle(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv,
+ int index)
{
int ret;
bool deepidle = false;
@@ -36,7 +36,10 @@ static int mvebu_v7_enter_idle(struct cpuidle_device *dev,
if (drv->states[index].flags & MVEBU_V7_FLAG_DEEP_IDLE)
deepidle = true;
+ ct_cpuidle_enter();
ret = mvebu_v7_cpu_suspend(deepidle);
+ ct_cpuidle_exit();
+
cpu_pm_exit();
if (ret)
@@ -53,6 +56,7 @@ static struct cpuidle_driver armadaxp_idle_driver = {
.exit_latency = 100,
.power_usage = 50,
.target_residency = 1000,
+ .flags = CPUIDLE_FLAG_RCU_IDLE,
.name = "MV CPU IDLE",
.desc = "CPU power down",
},
@@ -61,7 +65,7 @@ static struct cpuidle_driver armadaxp_idle_driver = {
.exit_latency = 1000,
.power_usage = 5,
.target_residency = 10000,
- .flags = MVEBU_V7_FLAG_DEEP_IDLE,
+ .flags = MVEBU_V7_FLAG_DEEP_IDLE | CPUIDLE_FLAG_RCU_IDLE,
.name = "MV CPU DEEP IDLE",
.desc = "CPU and L2 Fabric power down",
},
@@ -76,7 +80,7 @@ static struct cpuidle_driver armada370_idle_driver = {
.exit_latency = 100,
.power_usage = 5,
.target_residency = 1000,
- .flags = MVEBU_V7_FLAG_DEEP_IDLE,
+ .flags = MVEBU_V7_FLAG_DEEP_IDLE | CPUIDLE_FLAG_RCU_IDLE,
.name = "Deep Idle",
.desc = "CPU and L2 Fabric power down",
},
@@ -91,6 +95,7 @@ static struct cpuidle_driver armada38x_idle_driver = {
.exit_latency = 10,
.power_usage = 5,
.target_residency = 100,
+ .flags = CPUIDLE_FLAG_RCU_IDLE,
.name = "Idle",
.desc = "CPU and SCU power down",
},
diff --git a/drivers/cpuidle/cpuidle-psci-domain.c b/drivers/cpuidle/cpuidle-psci-domain.c
index c80cf9ddabd8..6ad2954948a5 100644
--- a/drivers/cpuidle/cpuidle-psci-domain.c
+++ b/drivers/cpuidle/cpuidle-psci-domain.c
@@ -64,8 +64,11 @@ static int psci_pd_init(struct device_node *np, bool use_osi)
pd->flags |= GENPD_FLAG_IRQ_SAFE | GENPD_FLAG_CPU_DOMAIN;
- /* Allow power off when OSI has been successfully enabled. */
- if (use_osi)
+ /*
+ * Allow power off when OSI has been successfully enabled.
+ * PREEMPT_RT is not yet ready to enter domain idle states.
+ */
+ if (use_osi && !IS_ENABLED(CONFIG_PREEMPT_RT))
pd->power_off = psci_pd_power_off;
else
pd->flags |= GENPD_FLAG_ALWAYS_ON;
diff --git a/drivers/cpuidle/cpuidle-psci.c b/drivers/cpuidle/cpuidle-psci.c
index 57bc3e3ae391..6de027f9f6f5 100644
--- a/drivers/cpuidle/cpuidle-psci.c
+++ b/drivers/cpuidle/cpuidle-psci.c
@@ -49,14 +49,9 @@ static inline u32 psci_get_domain_state(void)
return __this_cpu_read(domain_state);
}
-static inline int psci_enter_state(int idx, u32 state)
-{
- return CPU_PM_CPU_IDLE_ENTER_PARAM(psci_cpu_suspend_enter, idx, state);
-}
-
-static int __psci_enter_domain_idle_state(struct cpuidle_device *dev,
- struct cpuidle_driver *drv, int idx,
- bool s2idle)
+static __cpuidle int __psci_enter_domain_idle_state(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv, int idx,
+ bool s2idle)
{
struct psci_cpuidle_data *data = this_cpu_ptr(&psci_cpuidle_data);
u32 *states = data->psci_states;
@@ -69,12 +64,10 @@ static int __psci_enter_domain_idle_state(struct cpuidle_device *dev,
return -1;
/* Do runtime PM to manage a hierarchical CPU toplogy. */
- ct_irq_enter_irqson();
if (s2idle)
dev_pm_genpd_suspend(pd_dev);
else
pm_runtime_put_sync_suspend(pd_dev);
- ct_irq_exit_irqson();
state = psci_get_domain_state();
if (!state)
@@ -82,12 +75,10 @@ static int __psci_enter_domain_idle_state(struct cpuidle_device *dev,
ret = psci_cpu_suspend_enter(state) ? -1 : idx;
- ct_irq_enter_irqson();
if (s2idle)
dev_pm_genpd_resume(pd_dev);
else
pm_runtime_get_sync(pd_dev);
- ct_irq_exit_irqson();
cpu_pm_exit();
@@ -192,12 +183,12 @@ static void psci_idle_init_cpuhp(void)
pr_warn("Failed %d while setup cpuhp state\n", err);
}
-static int psci_enter_idle_state(struct cpuidle_device *dev,
- struct cpuidle_driver *drv, int idx)
+static __cpuidle int psci_enter_idle_state(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv, int idx)
{
u32 *state = __this_cpu_read(psci_cpuidle_data.psci_states);
- return psci_enter_state(idx, state[idx]);
+ return CPU_PM_CPU_IDLE_ENTER_PARAM_RCU(psci_cpu_suspend_enter, idx, state[idx]);
}
static const struct of_device_id psci_idle_state_match[] = {
@@ -231,6 +222,9 @@ static int psci_dt_cpu_init_topology(struct cpuidle_driver *drv,
if (!psci_has_osi_support())
return 0;
+ if (IS_ENABLED(CONFIG_PREEMPT_RT))
+ return 0;
+
data->dev = psci_dt_attach_cpu(cpu);
if (IS_ERR_OR_NULL(data->dev))
return PTR_ERR_OR_ZERO(data->dev);
@@ -240,6 +234,7 @@ static int psci_dt_cpu_init_topology(struct cpuidle_driver *drv,
* of a shared state for the domain, assumes the domain states are all
* deeper states.
*/
+ drv->states[state_count - 1].flags |= CPUIDLE_FLAG_RCU_IDLE;
drv->states[state_count - 1].enter = psci_enter_domain_idle_state;
drv->states[state_count - 1].enter_s2idle = psci_enter_s2idle_domain_idle_state;
psci_cpuidle_use_cpuhp = true;
diff --git a/drivers/cpuidle/cpuidle-qcom-spm.c b/drivers/cpuidle/cpuidle-qcom-spm.c
index beedf22cbe78..326bca154ac7 100644
--- a/drivers/cpuidle/cpuidle-qcom-spm.c
+++ b/drivers/cpuidle/cpuidle-qcom-spm.c
@@ -58,8 +58,8 @@ static int qcom_cpu_spc(struct spm_driver_data *drv)
return ret;
}
-static int spm_enter_idle_state(struct cpuidle_device *dev,
- struct cpuidle_driver *drv, int idx)
+static __cpuidle int spm_enter_idle_state(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv, int idx)
{
struct cpuidle_qcom_spm_data *data = container_of(drv, struct cpuidle_qcom_spm_data,
cpuidle_driver);
diff --git a/drivers/cpuidle/cpuidle-riscv-sbi.c b/drivers/cpuidle/cpuidle-riscv-sbi.c
index 05fe2902df9a..be383f4b6855 100644
--- a/drivers/cpuidle/cpuidle-riscv-sbi.c
+++ b/drivers/cpuidle/cpuidle-riscv-sbi.c
@@ -93,8 +93,8 @@ static int sbi_suspend(u32 state)
return sbi_suspend_finisher(state, 0, 0);
}
-static int sbi_cpuidle_enter_state(struct cpuidle_device *dev,
- struct cpuidle_driver *drv, int idx)
+static __cpuidle int sbi_cpuidle_enter_state(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv, int idx)
{
u32 *states = __this_cpu_read(sbi_cpuidle_data.states);
u32 state = states[idx];
@@ -106,9 +106,9 @@ static int sbi_cpuidle_enter_state(struct cpuidle_device *dev,
idx, state);
}
-static int __sbi_enter_domain_idle_state(struct cpuidle_device *dev,
- struct cpuidle_driver *drv, int idx,
- bool s2idle)
+static __cpuidle int __sbi_enter_domain_idle_state(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv, int idx,
+ bool s2idle)
{
struct sbi_cpuidle_data *data = this_cpu_ptr(&sbi_cpuidle_data);
u32 *states = data->states;
@@ -121,12 +121,12 @@ static int __sbi_enter_domain_idle_state(struct cpuidle_device *dev,
return -1;
/* Do runtime PM to manage a hierarchical CPU toplogy. */
- ct_irq_enter_irqson();
if (s2idle)
dev_pm_genpd_suspend(pd_dev);
else
pm_runtime_put_sync_suspend(pd_dev);
- ct_irq_exit_irqson();
+
+ ct_cpuidle_enter();
if (sbi_is_domain_state_available())
state = sbi_get_domain_state();
@@ -135,12 +135,12 @@ static int __sbi_enter_domain_idle_state(struct cpuidle_device *dev,
ret = sbi_suspend(state) ? -1 : idx;
- ct_irq_enter_irqson();
+ ct_cpuidle_exit();
+
if (s2idle)
dev_pm_genpd_resume(pd_dev);
else
pm_runtime_get_sync(pd_dev);
- ct_irq_exit_irqson();
cpu_pm_exit();
@@ -251,6 +251,7 @@ static int sbi_dt_cpu_init_topology(struct cpuidle_driver *drv,
* of a shared state for the domain, assumes the domain states are all
* deeper states.
*/
+ drv->states[state_count - 1].flags |= CPUIDLE_FLAG_RCU_IDLE;
drv->states[state_count - 1].enter = sbi_enter_domain_idle_state;
drv->states[state_count - 1].enter_s2idle =
sbi_enter_s2idle_domain_idle_state;
diff --git a/drivers/cpuidle/cpuidle-tegra.c b/drivers/cpuidle/cpuidle-tegra.c
index 9845629aeb6d..b203a93deac5 100644
--- a/drivers/cpuidle/cpuidle-tegra.c
+++ b/drivers/cpuidle/cpuidle-tegra.c
@@ -160,8 +160,8 @@ static int tegra_cpuidle_coupled_barrier(struct cpuidle_device *dev)
return 0;
}
-static int tegra_cpuidle_state_enter(struct cpuidle_device *dev,
- int index, unsigned int cpu)
+static __cpuidle int tegra_cpuidle_state_enter(struct cpuidle_device *dev,
+ int index, unsigned int cpu)
{
int err;
@@ -180,9 +180,11 @@ static int tegra_cpuidle_state_enter(struct cpuidle_device *dev,
}
local_fiq_disable();
- RCU_NONIDLE(tegra_pm_set_cpu_in_lp2());
+ tegra_pm_set_cpu_in_lp2();
cpu_pm_enter();
+ ct_cpuidle_enter();
+
switch (index) {
case TEGRA_C7:
err = tegra_cpuidle_c7_enter();
@@ -197,8 +199,10 @@ static int tegra_cpuidle_state_enter(struct cpuidle_device *dev,
break;
}
+ ct_cpuidle_exit();
+
cpu_pm_exit();
- RCU_NONIDLE(tegra_pm_clear_cpu_in_lp2());
+ tegra_pm_clear_cpu_in_lp2();
local_fiq_enable();
return err ?: index;
@@ -222,10 +226,11 @@ static int tegra_cpuidle_adjust_state_index(int index, unsigned int cpu)
return index;
}
-static int tegra_cpuidle_enter(struct cpuidle_device *dev,
- struct cpuidle_driver *drv,
- int index)
+static __cpuidle int tegra_cpuidle_enter(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv,
+ int index)
{
+ bool do_rcu = drv->states[index].flags & CPUIDLE_FLAG_RCU_IDLE;
unsigned int cpu = cpu_logical_map(dev->cpu);
int ret;
@@ -233,9 +238,13 @@ static int tegra_cpuidle_enter(struct cpuidle_device *dev,
if (dev->states_usage[index].disable)
return -1;
- if (index == TEGRA_C1)
+ if (index == TEGRA_C1) {
+ if (do_rcu)
+ ct_cpuidle_enter();
ret = arm_cpuidle_simple_enter(dev, drv, index);
- else
+ if (do_rcu)
+ ct_cpuidle_exit();
+ } else
ret = tegra_cpuidle_state_enter(dev, index, cpu);
if (ret < 0) {
@@ -285,7 +294,8 @@ static struct cpuidle_driver tegra_idle_driver = {
.exit_latency = 2000,
.target_residency = 2200,
.power_usage = 100,
- .flags = CPUIDLE_FLAG_TIMER_STOP,
+ .flags = CPUIDLE_FLAG_TIMER_STOP |
+ CPUIDLE_FLAG_RCU_IDLE,
.name = "C7",
.desc = "CPU core powered off",
},
@@ -295,6 +305,7 @@ static struct cpuidle_driver tegra_idle_driver = {
.target_residency = 10000,
.power_usage = 0,
.flags = CPUIDLE_FLAG_TIMER_STOP |
+ CPUIDLE_FLAG_RCU_IDLE |
CPUIDLE_FLAG_COUPLED,
.name = "CC6",
.desc = "CPU cluster powered off",
diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
index 6eceb1988243..0b00f21cefe3 100644
--- a/drivers/cpuidle/cpuidle.c
+++ b/drivers/cpuidle/cpuidle.c
@@ -14,6 +14,7 @@
#include <linux/mutex.h>
#include <linux/sched.h>
#include <linux/sched/clock.h>
+#include <linux/sched/idle.h>
#include <linux/notifier.h>
#include <linux/pm_qos.h>
#include <linux/cpu.h>
@@ -136,11 +137,13 @@ int cpuidle_find_deepest_state(struct cpuidle_driver *drv,
}
#ifdef CONFIG_SUSPEND
-static void enter_s2idle_proper(struct cpuidle_driver *drv,
- struct cpuidle_device *dev, int index)
+static noinstr void enter_s2idle_proper(struct cpuidle_driver *drv,
+ struct cpuidle_device *dev, int index)
{
- ktime_t time_start, time_end;
struct cpuidle_state *target_state = &drv->states[index];
+ ktime_t time_start, time_end;
+
+ instrumentation_begin();
time_start = ns_to_ktime(local_clock());
@@ -151,13 +154,18 @@ static void enter_s2idle_proper(struct cpuidle_driver *drv,
* suspended is generally unsafe.
*/
stop_critical_timings();
- if (!(target_state->flags & CPUIDLE_FLAG_RCU_IDLE))
- ct_idle_enter();
+ if (!(target_state->flags & CPUIDLE_FLAG_RCU_IDLE)) {
+ ct_cpuidle_enter();
+ /* Annotate away the indirect call */
+ instrumentation_begin();
+ }
target_state->enter_s2idle(dev, drv, index);
if (WARN_ON_ONCE(!irqs_disabled()))
- local_irq_disable();
- if (!(target_state->flags & CPUIDLE_FLAG_RCU_IDLE))
- ct_idle_exit();
+ raw_local_irq_disable();
+ if (!(target_state->flags & CPUIDLE_FLAG_RCU_IDLE)) {
+ instrumentation_end();
+ ct_cpuidle_exit();
+ }
tick_unfreeze();
start_critical_timings();
@@ -165,6 +173,7 @@ static void enter_s2idle_proper(struct cpuidle_driver *drv,
dev->states_usage[index].s2idle_time += ktime_us_delta(time_end, time_start);
dev->states_usage[index].s2idle_usage++;
+ instrumentation_end();
}
/**
@@ -199,8 +208,9 @@ int cpuidle_enter_s2idle(struct cpuidle_driver *drv, struct cpuidle_device *dev)
* @drv: cpuidle driver for this cpu
* @index: index into the states table in @drv of the state to enter
*/
-int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
- int index)
+noinstr int cpuidle_enter_state(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv,
+ int index)
{
int entered_state;
@@ -208,6 +218,8 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
bool broadcast = !!(target_state->flags & CPUIDLE_FLAG_TIMER_STOP);
ktime_t time_start, time_end;
+ instrumentation_begin();
+
/*
* Tell the time framework to switch to a broadcast timer because our
* local timer will be shut down. If a local timer is used from another
@@ -234,11 +246,33 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
time_start = ns_to_ktime(local_clock());
stop_critical_timings();
- if (!(target_state->flags & CPUIDLE_FLAG_RCU_IDLE))
- ct_idle_enter();
+ if (!(target_state->flags & CPUIDLE_FLAG_RCU_IDLE)) {
+ ct_cpuidle_enter();
+ /* Annotate away the indirect call */
+ instrumentation_begin();
+ }
+
+ /*
+ * NOTE!!
+ *
+ * For cpuidle_state::enter() methods that do *NOT* set
+ * CPUIDLE_FLAG_RCU_IDLE RCU will be disabled here and these functions
+ * must be marked either noinstr or __cpuidle.
+ *
+ * For cpuidle_state::enter() methods that *DO* set
+ * CPUIDLE_FLAG_RCU_IDLE this isn't required, but they must mark the
+ * function calling ct_cpuidle_enter() as noinstr/__cpuidle and all
+ * functions called within the RCU-idle region.
+ */
entered_state = target_state->enter(dev, drv, index);
- if (!(target_state->flags & CPUIDLE_FLAG_RCU_IDLE))
- ct_idle_exit();
+
+ if (WARN_ONCE(!irqs_disabled(), "%ps leaked IRQ state", target_state->enter))
+ raw_local_irq_disable();
+
+ if (!(target_state->flags & CPUIDLE_FLAG_RCU_IDLE)) {
+ instrumentation_end();
+ ct_cpuidle_exit();
+ }
start_critical_timings();
sched_clock_idle_wakeup_event();
@@ -248,12 +282,8 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
/* The cpu is no longer idle or about to enter idle. */
sched_idle_set_state(NULL);
- if (broadcast) {
- if (WARN_ON_ONCE(!irqs_disabled()))
- local_irq_disable();
-
+ if (broadcast)
tick_broadcast_exit();
- }
if (!cpuidle_state_is_coupled(drv, index))
local_irq_enable();
@@ -305,6 +335,8 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
dev->states_usage[index].rejected++;
}
+ instrumentation_end();
+
return entered_state;
}
@@ -394,7 +426,7 @@ void cpuidle_reflect(struct cpuidle_device *dev, int index)
* @dev: the cpuidle device
*
*/
-u64 cpuidle_poll_time(struct cpuidle_driver *drv,
+__cpuidle u64 cpuidle_poll_time(struct cpuidle_driver *drv,
struct cpuidle_device *dev)
{
int i;
diff --git a/drivers/cpuidle/driver.c b/drivers/cpuidle/driver.c
index f70aa17e2a8e..d9cda7f6ccb9 100644
--- a/drivers/cpuidle/driver.c
+++ b/drivers/cpuidle/driver.c
@@ -183,11 +183,15 @@ static void __cpuidle_driver_init(struct cpuidle_driver *drv)
s->target_residency_ns = s->target_residency * NSEC_PER_USEC;
else if (s->target_residency_ns < 0)
s->target_residency_ns = 0;
+ else
+ s->target_residency = div_u64(s->target_residency_ns, NSEC_PER_USEC);
if (s->exit_latency > 0)
s->exit_latency_ns = s->exit_latency * NSEC_PER_USEC;
else if (s->exit_latency_ns < 0)
s->exit_latency_ns = 0;
+ else
+ s->exit_latency = div_u64(s->exit_latency_ns, NSEC_PER_USEC);
}
}
diff --git a/drivers/cpuidle/dt_idle_states.c b/drivers/cpuidle/dt_idle_states.c
index 7ca3d7d9b5ea..02aa0b39af9d 100644
--- a/drivers/cpuidle/dt_idle_states.c
+++ b/drivers/cpuidle/dt_idle_states.c
@@ -77,7 +77,7 @@ static int init_state_node(struct cpuidle_state *idle_state,
if (err)
desc = state_node->name;
- idle_state->flags = 0;
+ idle_state->flags = CPUIDLE_FLAG_RCU_IDLE;
if (of_property_read_bool(state_node, "local-timer-stop"))
idle_state->flags |= CPUIDLE_FLAG_TIMER_STOP;
/*
diff --git a/drivers/cpuidle/governors/teo.c b/drivers/cpuidle/governors/teo.c
index d9262db79cae..987fc5f3997d 100644
--- a/drivers/cpuidle/governors/teo.c
+++ b/drivers/cpuidle/governors/teo.c
@@ -2,8 +2,13 @@
/*
* Timer events oriented CPU idle governor
*
+ * TEO governor:
* Copyright (C) 2018 - 2021 Intel Corporation
* Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+ *
+ * Util-awareness mechanism:
+ * Copyright (C) 2022 Arm Ltd.
+ * Author: Kajetan Puchalski <kajetan.puchalski@arm.com>
*/
/**
@@ -99,15 +104,56 @@
* select the given idle state instead of the candidate one.
*
* 3. By default, select the candidate state.
+ *
+ * Util-awareness mechanism:
+ *
+ * The idea behind the util-awareness extension is that there are two distinct
+ * scenarios for the CPU which should result in two different approaches to idle
+ * state selection - utilized and not utilized.
+ *
+ * In this case, 'utilized' means that the average runqueue util of the CPU is
+ * above a certain threshold.
+ *
+ * When the CPU is utilized while going into idle, more likely than not it will
+ * be woken up to do more work soon and so a shallower idle state should be
+ * selected to minimise latency and maximise performance. When the CPU is not
+ * being utilized, the usual metrics-based approach to selecting the deepest
+ * available idle state should be preferred to take advantage of the power
+ * saving.
+ *
+ * In order to achieve this, the governor uses a utilization threshold.
+ * The threshold is computed per-CPU as a percentage of the CPU's capacity
+ * by bit shifting the capacity value. Based on testing, the shift of 6 (~1.56%)
+ * seems to be getting the best results.
+ *
+ * Before selecting the next idle state, the governor compares the current CPU
+ * util to the precomputed util threshold. If it's below, it defaults to the
+ * TEO metrics mechanism. If it's above, the closest shallower idle state will
+ * be selected instead, as long as is not a polling state.
*/
#include <linux/cpuidle.h>
#include <linux/jiffies.h>
#include <linux/kernel.h>
+#include <linux/sched.h>
#include <linux/sched/clock.h>
+#include <linux/sched/topology.h>
#include <linux/tick.h>
/*
+ * The number of bits to shift the CPU's capacity by in order to determine
+ * the utilized threshold.
+ *
+ * 6 was chosen based on testing as the number that achieved the best balance
+ * of power and performance on average.
+ *
+ * The resulting threshold is high enough to not be triggered by background
+ * noise and low enough to react quickly when activity starts to ramp up.
+ */
+#define UTIL_THRESHOLD_SHIFT 6
+
+
+/*
* The PULSE value is added to metrics when they grow and the DECAY_SHIFT value
* is used for decreasing metrics on a regular basis.
*/
@@ -137,9 +183,11 @@ struct teo_bin {
* @time_span_ns: Time between idle state selection and post-wakeup update.
* @sleep_length_ns: Time till the closest timer event (at the selection time).
* @state_bins: Idle state data bins for this CPU.
- * @total: Grand total of the "intercepts" and "hits" mertics for all bins.
+ * @total: Grand total of the "intercepts" and "hits" metrics for all bins.
* @next_recent_idx: Index of the next @recent_idx entry to update.
* @recent_idx: Indices of bins corresponding to recent "intercepts".
+ * @util_threshold: Threshold above which the CPU is considered utilized
+ * @utilized: Whether the last sleep on the CPU happened while utilized
*/
struct teo_cpu {
s64 time_span_ns;
@@ -148,11 +196,30 @@ struct teo_cpu {
unsigned int total;
int next_recent_idx;
int recent_idx[NR_RECENT];
+ unsigned long util_threshold;
+ bool utilized;
};
static DEFINE_PER_CPU(struct teo_cpu, teo_cpus);
/**
+ * teo_cpu_is_utilized - Check if the CPU's util is above the threshold
+ * @cpu: Target CPU
+ * @cpu_data: Governor CPU data for the target CPU
+ */
+#ifdef CONFIG_SMP
+static bool teo_cpu_is_utilized(int cpu, struct teo_cpu *cpu_data)
+{
+ return sched_cpu_util(cpu) > cpu_data->util_threshold;
+}
+#else
+static bool teo_cpu_is_utilized(int cpu, struct teo_cpu *cpu_data)
+{
+ return false;
+}
+#endif
+
+/**
* teo_update - Update CPU metrics after wakeup.
* @drv: cpuidle driver containing state data.
* @dev: Target CPU.
@@ -258,15 +325,17 @@ static s64 teo_middle_of_bin(int idx, struct cpuidle_driver *drv)
* @dev: Target CPU.
* @state_idx: Index of the capping idle state.
* @duration_ns: Idle duration value to match.
+ * @no_poll: Don't consider polling states.
*/
static int teo_find_shallower_state(struct cpuidle_driver *drv,
struct cpuidle_device *dev, int state_idx,
- s64 duration_ns)
+ s64 duration_ns, bool no_poll)
{
int i;
for (i = state_idx - 1; i >= 0; i--) {
- if (dev->states_usage[i].disable)
+ if (dev->states_usage[i].disable ||
+ (no_poll && drv->states[i].flags & CPUIDLE_FLAG_POLLING))
continue;
state_idx = i;
@@ -321,6 +390,22 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
goto end;
}
+ cpu_data->utilized = teo_cpu_is_utilized(dev->cpu, cpu_data);
+ /*
+ * If the CPU is being utilized over the threshold and there are only 2
+ * states to choose from, the metrics need not be considered, so choose
+ * the shallowest non-polling state and exit.
+ */
+ if (drv->state_count < 3 && cpu_data->utilized) {
+ for (i = 0; i < drv->state_count; ++i) {
+ if (!dev->states_usage[i].disable &&
+ !(drv->states[i].flags & CPUIDLE_FLAG_POLLING)) {
+ idx = i;
+ goto end;
+ }
+ }
+ }
+
/*
* Find the deepest idle state whose target residency does not exceed
* the current sleep length and the deepest idle state not deeper than
@@ -452,6 +537,13 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
if (idx > constraint_idx)
idx = constraint_idx;
+ /*
+ * If the CPU is being utilized over the threshold, choose a shallower
+ * non-polling state to improve latency
+ */
+ if (cpu_data->utilized)
+ idx = teo_find_shallower_state(drv, dev, idx, duration_ns, true);
+
end:
/*
* Don't stop the tick if the selected state is a polling one or if the
@@ -469,7 +561,7 @@ end:
*/
if (idx > idx0 &&
drv->states[idx].target_residency_ns > delta_tick)
- idx = teo_find_shallower_state(drv, dev, idx, delta_tick);
+ idx = teo_find_shallower_state(drv, dev, idx, delta_tick, false);
}
return idx;
@@ -508,9 +600,11 @@ static int teo_enable_device(struct cpuidle_driver *drv,
struct cpuidle_device *dev)
{
struct teo_cpu *cpu_data = per_cpu_ptr(&teo_cpus, dev->cpu);
+ unsigned long max_capacity = arch_scale_cpu_capacity(dev->cpu);
int i;
memset(cpu_data, 0, sizeof(*cpu_data));
+ cpu_data->util_threshold = max_capacity >> UTIL_THRESHOLD_SHIFT;
for (i = 0; i < NR_RECENT; i++)
cpu_data->recent_idx[i] = -1;
diff --git a/drivers/cpuidle/poll_state.c b/drivers/cpuidle/poll_state.c
index f7e83613ae94..bdcfeaecd228 100644
--- a/drivers/cpuidle/poll_state.c
+++ b/drivers/cpuidle/poll_state.c
@@ -13,11 +13,13 @@
static int __cpuidle poll_idle(struct cpuidle_device *dev,
struct cpuidle_driver *drv, int index)
{
- u64 time_start = local_clock();
+ u64 time_start;
+
+ time_start = local_clock();
dev->poll_time_limit = false;
- local_irq_enable();
+ raw_local_irq_enable();
if (!current_set_polling_and_test()) {
unsigned int loop_count = 0;
u64 limit;
@@ -36,6 +38,8 @@ static int __cpuidle poll_idle(struct cpuidle_device *dev,
}
}
}
+ raw_local_irq_disable();
+
current_clr_polling();
return index;
diff --git a/drivers/cpuidle/sysfs.c b/drivers/cpuidle/sysfs.c
index 2b496a53cbca..48948b171749 100644
--- a/drivers/cpuidle/sysfs.c
+++ b/drivers/cpuidle/sysfs.c
@@ -200,7 +200,7 @@ static void cpuidle_sysfs_release(struct kobject *kobj)
complete(&kdev->kobj_unregister);
}
-static struct kobj_type ktype_cpuidle = {
+static const struct kobj_type ktype_cpuidle = {
.sysfs_ops = &cpuidle_sysfs_ops,
.release = cpuidle_sysfs_release,
};
@@ -447,7 +447,7 @@ static void cpuidle_state_sysfs_release(struct kobject *kobj)
complete(&state_obj->kobj_unregister);
}
-static struct kobj_type ktype_state_cpuidle = {
+static const struct kobj_type ktype_state_cpuidle = {
.sysfs_ops = &cpuidle_state_sysfs_ops,
.default_groups = cpuidle_state_default_groups,
.release = cpuidle_state_sysfs_release,
@@ -594,7 +594,7 @@ static struct attribute *cpuidle_driver_default_attrs[] = {
};
ATTRIBUTE_GROUPS(cpuidle_driver_default);
-static struct kobj_type ktype_driver_cpuidle = {
+static const struct kobj_type ktype_driver_cpuidle = {
.sysfs_ops = &cpuidle_driver_sysfs_ops,
.default_groups = cpuidle_driver_default_groups,
.release = cpuidle_driver_sysfs_release,