summaryrefslogtreecommitdiffstats
path: root/drivers/cpuidle/cpuidle-pseries.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/cpuidle/cpuidle-pseries.c')
-rw-r--r--drivers/cpuidle/cpuidle-pseries.c77
1 files changed, 46 insertions, 31 deletions
diff --git a/drivers/cpuidle/cpuidle-pseries.c b/drivers/cpuidle/cpuidle-pseries.c
index a2b5c6f60cf0..7e7ab5597d7a 100644
--- a/drivers/cpuidle/cpuidle-pseries.c
+++ b/drivers/cpuidle/cpuidle-pseries.c
@@ -346,11 +346,9 @@ static int pseries_cpuidle_driver_init(void)
static void __init fixup_cede0_latency(void)
{
struct xcede_latency_payload *payload;
- u64 min_latency_us;
+ u64 min_xcede_latency_us = UINT_MAX;
int i;
- min_latency_us = dedicated_states[1].exit_latency; // CEDE latency
-
if (parse_cede_parameters())
return;
@@ -358,42 +356,45 @@ static void __init fixup_cede0_latency(void)
nr_xcede_records);
payload = &xcede_latency_parameter.payload;
+
+ /*
+ * The CEDE idle state maps to CEDE(0). While the hypervisor
+ * does not advertise CEDE(0) exit latency values, it does
+ * advertise the latency values of the extended CEDE states.
+ * We use the lowest advertised exit latency value as a proxy
+ * for the exit latency of CEDE(0).
+ */
for (i = 0; i < nr_xcede_records; i++) {
struct xcede_latency_record *record = &payload->records[i];
+ u8 hint = record->hint;
u64 latency_tb = be64_to_cpu(record->latency_ticks);
u64 latency_us = DIV_ROUND_UP_ULL(tb_to_ns(latency_tb), NSEC_PER_USEC);
- if (latency_us == 0)
- pr_warn("cpuidle: xcede record %d has an unrealistic latency of 0us.\n", i);
-
- if (latency_us < min_latency_us)
- min_latency_us = latency_us;
- }
-
- /*
- * By default, we assume that CEDE(0) has exit latency 10us,
- * since there is no way for us to query from the platform.
- *
- * However, if the wakeup latency of an Extended CEDE state is
- * smaller than 10us, then we can be sure that CEDE(0)
- * requires no more than that.
- *
- * Perform the fix-up.
- */
- if (min_latency_us < dedicated_states[1].exit_latency) {
/*
- * We set a minimum of 1us wakeup latency for cede0 to
- * distinguish it from snooze
+ * We expect the exit latency of an extended CEDE
+ * state to be non-zero, it to since it takes at least
+ * a few nanoseconds to wakeup the idle CPU and
+ * dispatch the virtual processor into the Linux
+ * Guest.
+ *
+ * So we consider only non-zero value for performing
+ * the fixup of CEDE(0) latency.
*/
- u64 cede0_latency = 1;
+ if (latency_us == 0) {
+ pr_warn("cpuidle: Skipping xcede record %d [hint=%d]. Exit latency = 0us\n",
+ i, hint);
+ continue;
+ }
- if (min_latency_us > cede0_latency)
- cede0_latency = min_latency_us - 1;
+ if (latency_us < min_xcede_latency_us)
+ min_xcede_latency_us = latency_us;
+ }
- dedicated_states[1].exit_latency = cede0_latency;
- dedicated_states[1].target_residency = 10 * (cede0_latency);
+ if (min_xcede_latency_us != UINT_MAX) {
+ dedicated_states[1].exit_latency = min_xcede_latency_us;
+ dedicated_states[1].target_residency = 10 * (min_xcede_latency_us);
pr_info("cpuidle: Fixed up CEDE exit latency to %llu us\n",
- cede0_latency);
+ min_xcede_latency_us);
}
}
@@ -402,7 +403,7 @@ static void __init fixup_cede0_latency(void)
* pseries_idle_probe()
* Choose state table for shared versus dedicated partition
*/
-static int pseries_idle_probe(void)
+static int __init pseries_idle_probe(void)
{
if (cpuidle_disable != IDLE_NO_OVERRIDE)
@@ -419,7 +420,21 @@ static int pseries_idle_probe(void)
cpuidle_state_table = shared_states;
max_idle_state = ARRAY_SIZE(shared_states);
} else {
- fixup_cede0_latency();
+ /*
+ * Use firmware provided latency values
+ * starting with POWER10 platforms. In the
+ * case that we are running on a POWER10
+ * platform but in an earlier compat mode, we
+ * can still use the firmware provided values.
+ *
+ * However, on platforms prior to POWER10, we
+ * cannot rely on the accuracy of the firmware
+ * provided latency values. On such platforms,
+ * go with the conservative default estimate
+ * of 10us.
+ */
+ if (cpu_has_feature(CPU_FTR_ARCH_31) || pvr_version_is(PVR_POWER10))
+ fixup_cede0_latency();
cpuidle_state_table = dedicated_states;
max_idle_state = NR_DEDICATED_STATES;
}