summaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-02-21 21:13:58 +0100
committerLinus Torvalds <torvalds@linux-foundation.org>2023-02-21 21:13:58 +0100
commit2504ba8b01634319a6f95b7fa9bf9c101437e158 (patch)
treed479a6cc799f958c3961d10d0149d63649901f40 /drivers
parentMerge tag 'hardening-v6.3-rc1' of git://git.kernel.org/pub/scm/linux/kernel/g... (diff)
parentMerge branches 'pm-tools' and 'pm-docs' (diff)
downloadlinux-2504ba8b01634319a6f95b7fa9bf9c101437e158.tar.xz
linux-2504ba8b01634319a6f95b7fa9bf9c101437e158.zip
Merge tag 'pm-6.3-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm
Pull power management updates from Rafael Wysocki: "These add EPP support to the AMD P-state cpufreq driver, add support for new platforms to the Intel RAPL power capping driver, intel_idle and the Qualcomm cpufreq driver, enable thermal cooling for Tegra194, drop the custom cpufreq driver for loongson1 that is not necessary any more (and the corresponding cpufreq platform device), fix assorted issues and clean up code. Specifics: - Add EPP support to the AMD P-state cpufreq driver (Perry Yuan, Wyes Karny, Arnd Bergmann, Bagas Sanjaya) - Drop the custom cpufreq driver for loongson1 that is not necessary any more and the corresponding cpufreq platform device (Keguang Zhang) - Remove "select SRCU" from system sleep, cpufreq and OPP Kconfig entries (Paul E. McKenney) - Enable thermal cooling for Tegra194 (Yi-Wei Wang) - Register module device table and add missing compatibles for cpufreq-qcom-hw (Nícolas F. R. A. Prado, Abel Vesa and Luca Weiss) - Various dt binding updates for qcom-cpufreq-nvmem and opp-v2-kryo-cpu (Christian Marangi) - Make kobj_type structure in the cpufreq core constant (Thomas Weißschuh) - Make cpufreq_unregister_driver() return void (Uwe Kleine-König) - Make the TEO cpuidle governor check CPU utilization in order to refine idle state selection (Kajetan Puchalski) - Make Kconfig select the haltpoll cpuidle governor when the haltpoll cpuidle driver is selected and replace a default_idle() call in that driver with arch_cpu_idle() to allow MWAIT to be used (Li RongQing) - Add Emerald Rapids Xeon support to the intel_idle driver (Artem Bityutskiy) - Add ARCH_SUSPEND_POSSIBLE dependencies for ARMv4 cpuidle drivers to avoid randconfig build failures (Arnd Bergmann) - Make kobj_type structures used in the cpuidle sysfs interface constant (Thomas Weißschuh) - Make the cpuidle driver registration code update microsecond values of idle state parameters in accordance with their nanosecond values if they are provided (Rafael Wysocki) - Make the PSCI cpuidle driver prevent topology CPUs from being suspended on PREEMPT_RT (Krzysztof Kozlowski) - Document that pm_runtime_force_suspend() cannot be used with DPM_FLAG_SMART_SUSPEND (Richard Fitzgerald) - Add EXPORT macros for exporting PM functions from drivers (Richard Fitzgerald) - Remove /** from non-kernel-doc comments in hibernation code (Randy Dunlap) - Fix possible name leak in powercap_register_zone() (Yang Yingliang) - Add Meteor Lake and Emerald Rapids support to the intel_rapl power capping driver (Zhang Rui) - Modify the idle_inject power capping facility to support 100% idle injection (Srinivas Pandruvada) - Fix large time windows handling in the intel_rapl power capping driver (Zhang Rui) - Fix memory leaks with using debugfs_lookup() in the generic PM domains and Energy Model code (Greg Kroah-Hartman) - Add missing 'cache-unified' property in the example for kryo OPP bindings (Rob Herring) - Fix error checking in opp_migrate_dentry() (Qi Zheng) - Let qcom,opp-fuse-level be a 2-long array for qcom SoCs (Konrad Dybcio) - Modify some power management utilities to use the canonical ftrace path (Ross Zwisler) - Correct spelling problems for Documentation/power/ as reported by codespell (Randy Dunlap)" * tag 'pm-6.3-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm: (53 commits) Documentation: amd-pstate: disambiguate user space sections cpufreq: amd-pstate: Fix invalid write to MSR_AMD_CPPC_REQ dt-bindings: opp: opp-v2-kryo-cpu: enlarge opp-supported-hw maximum dt-bindings: cpufreq: qcom-cpufreq-nvmem: make cpr bindings optional dt-bindings: cpufreq: qcom-cpufreq-nvmem: specify supported opp tables PM: Add EXPORT macros for exporting PM functions cpuidle: psci: Do not suspend topology CPUs on PREEMPT_RT MIPS: loongson32: Drop obsolete cpufreq platform device powercap: intel_rapl: Fix handling for large time window cpuidle: driver: Update microsecond values of state parameters as needed cpuidle: sysfs: make kobj_type structures constant cpuidle: add ARCH_SUSPEND_POSSIBLE dependencies PM: EM: fix memory leak with using debugfs_lookup() PM: domains: fix memory leak with using debugfs_lookup() cpufreq: Make kobj_type structure constant cpufreq: davinci: Fix clk use after free cpufreq: amd-pstate: avoid uninitialized variable use cpufreq: Make cpufreq_unregister_driver() return void OPP: fix error checking in opp_migrate_dentry() dt-bindings: cpufreq: cpufreq-qcom-hw: Add SM8550 compatible ...
Diffstat (limited to 'drivers')
-rw-r--r--drivers/acpi/cppc_acpi.c67
-rw-r--r--drivers/base/power/domain.c5
-rw-r--r--drivers/base/power/runtime.c4
-rw-r--r--drivers/cpufreq/Kconfig10
-rw-r--r--drivers/cpufreq/Makefile1
-rw-r--r--drivers/cpufreq/amd-pstate.c705
-rw-r--r--drivers/cpufreq/brcmstb-avs-cpufreq.c5
-rw-r--r--drivers/cpufreq/cpufreq.c10
-rw-r--r--drivers/cpufreq/davinci-cpufreq.c4
-rw-r--r--drivers/cpufreq/loongson1-cpufreq.c222
-rw-r--r--drivers/cpufreq/mediatek-cpufreq-hw.c5
-rw-r--r--drivers/cpufreq/omap-cpufreq.c4
-rw-r--r--drivers/cpufreq/qcom-cpufreq-hw.c4
-rw-r--r--drivers/cpufreq/tegra194-cpufreq.c3
-rw-r--r--drivers/cpuidle/Kconfig1
-rw-r--r--drivers/cpuidle/Kconfig.arm10
-rw-r--r--drivers/cpuidle/cpuidle-haltpoll.c2
-rw-r--r--drivers/cpuidle/cpuidle-psci-domain.c7
-rw-r--r--drivers/cpuidle/cpuidle-psci.c3
-rw-r--r--drivers/cpuidle/driver.c4
-rw-r--r--drivers/cpuidle/governors/teo.c102
-rw-r--r--drivers/cpuidle/sysfs.c6
-rw-r--r--drivers/idle/intel_idle.c2
-rw-r--r--drivers/opp/Kconfig1
-rw-r--r--drivers/opp/debugfs.c2
-rw-r--r--drivers/powercap/idle_inject.c6
-rw-r--r--drivers/powercap/intel_rapl_common.c13
-rw-r--r--drivers/powercap/powercap_sys.c14
28 files changed, 931 insertions, 291 deletions
diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c
index 0f17b1c32718..02d83c807271 100644
--- a/drivers/acpi/cppc_acpi.c
+++ b/drivers/acpi/cppc_acpi.c
@@ -1154,6 +1154,19 @@ int cppc_get_nominal_perf(int cpunum, u64 *nominal_perf)
}
/**
+ * cppc_get_epp_perf - Get the epp register value.
+ * @cpunum: CPU from which to get epp preference value.
+ * @epp_perf: Return address.
+ *
+ * Return: 0 for success, -EIO otherwise.
+ */
+int cppc_get_epp_perf(int cpunum, u64 *epp_perf)
+{
+ return cppc_get_perf(cpunum, ENERGY_PERF, epp_perf);
+}
+EXPORT_SYMBOL_GPL(cppc_get_epp_perf);
+
+/**
* cppc_get_perf_caps - Get a CPU's performance capabilities.
* @cpunum: CPU from which to get capabilities info.
* @perf_caps: ptr to cppc_perf_caps. See cppc_acpi.h
@@ -1365,6 +1378,60 @@ out_err:
}
EXPORT_SYMBOL_GPL(cppc_get_perf_ctrs);
+/*
+ * Set Energy Performance Preference Register value through
+ * Performance Controls Interface
+ */
+int cppc_set_epp_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls, bool enable)
+{
+ int pcc_ss_id = per_cpu(cpu_pcc_subspace_idx, cpu);
+ struct cpc_register_resource *epp_set_reg;
+ struct cpc_register_resource *auto_sel_reg;
+ struct cpc_desc *cpc_desc = per_cpu(cpc_desc_ptr, cpu);
+ struct cppc_pcc_data *pcc_ss_data = NULL;
+ int ret;
+
+ if (!cpc_desc) {
+ pr_debug("No CPC descriptor for CPU:%d\n", cpu);
+ return -ENODEV;
+ }
+
+ auto_sel_reg = &cpc_desc->cpc_regs[AUTO_SEL_ENABLE];
+ epp_set_reg = &cpc_desc->cpc_regs[ENERGY_PERF];
+
+ if (CPC_IN_PCC(epp_set_reg) || CPC_IN_PCC(auto_sel_reg)) {
+ if (pcc_ss_id < 0) {
+ pr_debug("Invalid pcc_ss_id for CPU:%d\n", cpu);
+ return -ENODEV;
+ }
+
+ if (CPC_SUPPORTED(auto_sel_reg)) {
+ ret = cpc_write(cpu, auto_sel_reg, enable);
+ if (ret)
+ return ret;
+ }
+
+ if (CPC_SUPPORTED(epp_set_reg)) {
+ ret = cpc_write(cpu, epp_set_reg, perf_ctrls->energy_perf);
+ if (ret)
+ return ret;
+ }
+
+ pcc_ss_data = pcc_data[pcc_ss_id];
+
+ down_write(&pcc_ss_data->pcc_lock);
+ /* after writing CPC, transfer the ownership of PCC to platform */
+ ret = send_pcc_cmd(pcc_ss_id, CMD_WRITE);
+ up_write(&pcc_ss_data->pcc_lock);
+ } else {
+ ret = -ENOTSUPP;
+ pr_debug("_CPC in PCC is not supported\n");
+ }
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(cppc_set_epp_perf);
+
/**
* cppc_set_enable - Set to enable CPPC on the processor by writing the
* Continuous Performance Control package EnableRegister field.
diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index 967bcf9d415e..6097644ebdc5 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -220,13 +220,10 @@ static void genpd_debug_add(struct generic_pm_domain *genpd);
static void genpd_debug_remove(struct generic_pm_domain *genpd)
{
- struct dentry *d;
-
if (!genpd_debugfs_dir)
return;
- d = debugfs_lookup(genpd->name, genpd_debugfs_dir);
- debugfs_remove(d);
+ debugfs_lookup_and_remove(genpd->name, genpd_debugfs_dir);
}
static void genpd_update_accounting(struct generic_pm_domain *genpd)
diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
index 98f7b3d7d669..4545669cb973 100644
--- a/drivers/base/power/runtime.c
+++ b/drivers/base/power/runtime.c
@@ -1864,6 +1864,10 @@ static bool pm_runtime_need_not_resume(struct device *dev)
* sure the device is put into low power state and it should only be used during
* system-wide PM transitions to sleep states. It assumes that the analogous
* pm_runtime_force_resume() will be used to resume the device.
+ *
+ * Do not use with DPM_FLAG_SMART_SUSPEND as this can lead to an inconsistent
+ * state where this function has called the ->runtime_suspend callback but the
+ * PM core marks the driver as runtime active.
*/
int pm_runtime_force_suspend(struct device *dev)
{
diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig
index 8466f78651fc..2c839bd2b051 100644
--- a/drivers/cpufreq/Kconfig
+++ b/drivers/cpufreq/Kconfig
@@ -3,7 +3,6 @@ menu "CPU Frequency scaling"
config CPU_FREQ
bool "CPU Frequency scaling"
- select SRCU
help
CPU Frequency scaling allows you to change the clock speed of
CPUs on the fly. This is a nice method to save power, because
@@ -271,15 +270,6 @@ config LOONGSON2_CPUFREQ
Loongson2F and its successors support this feature.
If in doubt, say N.
-
-config LOONGSON1_CPUFREQ
- tristate "Loongson1 CPUFreq Driver"
- depends on LOONGSON1_LS1B
- help
- This option adds a CPUFreq driver for loongson1 processors which
- support software configurable cpu frequency.
-
- If in doubt, say N.
endif
if SPARC64
diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile
index a19842fbd521..ef8510774913 100644
--- a/drivers/cpufreq/Makefile
+++ b/drivers/cpufreq/Makefile
@@ -104,7 +104,6 @@ obj-$(CONFIG_POWERNV_CPUFREQ) += powernv-cpufreq.o
obj-$(CONFIG_BMIPS_CPUFREQ) += bmips-cpufreq.o
obj-$(CONFIG_IA64_ACPI_CPUFREQ) += ia64-acpi-cpufreq.o
obj-$(CONFIG_LOONGSON2_CPUFREQ) += loongson2_cpufreq.o
-obj-$(CONFIG_LOONGSON1_CPUFREQ) += loongson1-cpufreq.o
obj-$(CONFIG_SH_CPU_FREQ) += sh-cpufreq.o
obj-$(CONFIG_SPARC_US2E_CPUFREQ) += sparc-us2e-cpufreq.o
obj-$(CONFIG_SPARC_US3_CPUFREQ) += sparc-us3-cpufreq.o
diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
index c17bd845f5fc..45c88894fd8e 100644
--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
@@ -59,8 +59,171 @@
* we disable it by default to go acpi-cpufreq on these processors and add a
* module parameter to be able to enable it manually for debugging.
*/
+static struct cpufreq_driver *current_pstate_driver;
static struct cpufreq_driver amd_pstate_driver;
-static int cppc_load __initdata;
+static struct cpufreq_driver amd_pstate_epp_driver;
+static int cppc_state = AMD_PSTATE_DISABLE;
+struct kobject *amd_pstate_kobj;
+
+/*
+ * AMD Energy Preference Performance (EPP)
+ * The EPP is used in the CCLK DPM controller to drive
+ * the frequency that a core is going to operate during
+ * short periods of activity. EPP values will be utilized for
+ * different OS profiles (balanced, performance, power savings)
+ * display strings corresponding to EPP index in the
+ * energy_perf_strings[]
+ * index String
+ *-------------------------------------
+ * 0 default
+ * 1 performance
+ * 2 balance_performance
+ * 3 balance_power
+ * 4 power
+ */
+enum energy_perf_value_index {
+ EPP_INDEX_DEFAULT = 0,
+ EPP_INDEX_PERFORMANCE,
+ EPP_INDEX_BALANCE_PERFORMANCE,
+ EPP_INDEX_BALANCE_POWERSAVE,
+ EPP_INDEX_POWERSAVE,
+};
+
+static const char * const energy_perf_strings[] = {
+ [EPP_INDEX_DEFAULT] = "default",
+ [EPP_INDEX_PERFORMANCE] = "performance",
+ [EPP_INDEX_BALANCE_PERFORMANCE] = "balance_performance",
+ [EPP_INDEX_BALANCE_POWERSAVE] = "balance_power",
+ [EPP_INDEX_POWERSAVE] = "power",
+ NULL
+};
+
+static unsigned int epp_values[] = {
+ [EPP_INDEX_DEFAULT] = 0,
+ [EPP_INDEX_PERFORMANCE] = AMD_CPPC_EPP_PERFORMANCE,
+ [EPP_INDEX_BALANCE_PERFORMANCE] = AMD_CPPC_EPP_BALANCE_PERFORMANCE,
+ [EPP_INDEX_BALANCE_POWERSAVE] = AMD_CPPC_EPP_BALANCE_POWERSAVE,
+ [EPP_INDEX_POWERSAVE] = AMD_CPPC_EPP_POWERSAVE,
+ };
+
+static inline int get_mode_idx_from_str(const char *str, size_t size)
+{
+ int i;
+
+ for (i=0; i < AMD_PSTATE_MAX; i++) {
+ if (!strncmp(str, amd_pstate_mode_string[i], size))
+ return i;
+ }
+ return -EINVAL;
+}
+
+static DEFINE_MUTEX(amd_pstate_limits_lock);
+static DEFINE_MUTEX(amd_pstate_driver_lock);
+
+static s16 amd_pstate_get_epp(struct amd_cpudata *cpudata, u64 cppc_req_cached)
+{
+ u64 epp;
+ int ret;
+
+ if (boot_cpu_has(X86_FEATURE_CPPC)) {
+ if (!cppc_req_cached) {
+ epp = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ,
+ &cppc_req_cached);
+ if (epp)
+ return epp;
+ }
+ epp = (cppc_req_cached >> 24) & 0xFF;
+ } else {
+ ret = cppc_get_epp_perf(cpudata->cpu, &epp);
+ if (ret < 0) {
+ pr_debug("Could not retrieve energy perf value (%d)\n", ret);
+ return -EIO;
+ }
+ }
+
+ return (s16)(epp & 0xff);
+}
+
+static int amd_pstate_get_energy_pref_index(struct amd_cpudata *cpudata)
+{
+ s16 epp;
+ int index = -EINVAL;
+
+ epp = amd_pstate_get_epp(cpudata, 0);
+ if (epp < 0)
+ return epp;
+
+ switch (epp) {
+ case AMD_CPPC_EPP_PERFORMANCE:
+ index = EPP_INDEX_PERFORMANCE;
+ break;
+ case AMD_CPPC_EPP_BALANCE_PERFORMANCE:
+ index = EPP_INDEX_BALANCE_PERFORMANCE;
+ break;
+ case AMD_CPPC_EPP_BALANCE_POWERSAVE:
+ index = EPP_INDEX_BALANCE_POWERSAVE;
+ break;
+ case AMD_CPPC_EPP_POWERSAVE:
+ index = EPP_INDEX_POWERSAVE;
+ break;
+ default:
+ break;
+ }
+
+ return index;
+}
+
+static int amd_pstate_set_epp(struct amd_cpudata *cpudata, u32 epp)
+{
+ int ret;
+ struct cppc_perf_ctrls perf_ctrls;
+
+ if (boot_cpu_has(X86_FEATURE_CPPC)) {
+ u64 value = READ_ONCE(cpudata->cppc_req_cached);
+
+ value &= ~GENMASK_ULL(31, 24);
+ value |= (u64)epp << 24;
+ WRITE_ONCE(cpudata->cppc_req_cached, value);
+
+ ret = wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value);
+ if (!ret)
+ cpudata->epp_cached = epp;
+ } else {
+ perf_ctrls.energy_perf = epp;
+ ret = cppc_set_epp_perf(cpudata->cpu, &perf_ctrls, 1);
+ if (ret) {
+ pr_debug("failed to set energy perf value (%d)\n", ret);
+ return ret;
+ }
+ cpudata->epp_cached = epp;
+ }
+
+ return ret;
+}
+
+static int amd_pstate_set_energy_pref_index(struct amd_cpudata *cpudata,
+ int pref_index)
+{
+ int epp = -EINVAL;
+ int ret;
+
+ if (!pref_index) {
+ pr_debug("EPP pref_index is invalid\n");
+ return -EINVAL;
+ }
+
+ if (epp == -EINVAL)
+ epp = epp_values[pref_index];
+
+ if (epp > 0 && cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) {
+ pr_debug("EPP cannot be set under performance policy\n");
+ return -EBUSY;
+ }
+
+ ret = amd_pstate_set_epp(cpudata, epp);
+
+ return ret;
+}
static inline int pstate_enable(bool enable)
{
@@ -70,11 +233,21 @@ static inline int pstate_enable(bool enable)
static int cppc_enable(bool enable)
{
int cpu, ret = 0;
+ struct cppc_perf_ctrls perf_ctrls;
for_each_present_cpu(cpu) {
ret = cppc_set_enable(cpu, enable);
if (ret)
return ret;
+
+ /* Enable autonomous mode for EPP */
+ if (cppc_state == AMD_PSTATE_ACTIVE) {
+ /* Set desired perf as zero to allow EPP firmware control */
+ perf_ctrls.desired_perf = 0;
+ ret = cppc_set_perf(cpu, &perf_ctrls);
+ if (ret)
+ return ret;
+ }
}
return ret;
@@ -418,7 +591,7 @@ static void amd_pstate_boost_init(struct amd_cpudata *cpudata)
return;
cpudata->boost_supported = true;
- amd_pstate_driver.boost_enabled = true;
+ current_pstate_driver->boost_enabled = true;
}
static void amd_perf_ctl_reset(unsigned int cpu)
@@ -501,6 +674,8 @@ static int amd_pstate_cpu_init(struct cpufreq_policy *policy)
policy->driver_data = cpudata;
amd_pstate_boost_init(cpudata);
+ if (!current_pstate_driver->adjust_perf)
+ current_pstate_driver->adjust_perf = amd_pstate_adjust_perf;
return 0;
@@ -561,7 +736,7 @@ static ssize_t show_amd_pstate_max_freq(struct cpufreq_policy *policy,
if (max_freq < 0)
return max_freq;
- return sprintf(&buf[0], "%u\n", max_freq);
+ return sysfs_emit(buf, "%u\n", max_freq);
}
static ssize_t show_amd_pstate_lowest_nonlinear_freq(struct cpufreq_policy *policy,
@@ -574,7 +749,7 @@ static ssize_t show_amd_pstate_lowest_nonlinear_freq(struct cpufreq_policy *poli
if (freq < 0)
return freq;
- return sprintf(&buf[0], "%u\n", freq);
+ return sysfs_emit(buf, "%u\n", freq);
}
/*
@@ -589,13 +764,151 @@ static ssize_t show_amd_pstate_highest_perf(struct cpufreq_policy *policy,
perf = READ_ONCE(cpudata->highest_perf);
- return sprintf(&buf[0], "%u\n", perf);
+ return sysfs_emit(buf, "%u\n", perf);
+}
+
+static ssize_t show_energy_performance_available_preferences(
+ struct cpufreq_policy *policy, char *buf)
+{
+ int i = 0;
+ int offset = 0;
+
+ while (energy_perf_strings[i] != NULL)
+ offset += sysfs_emit_at(buf, offset, "%s ", energy_perf_strings[i++]);
+
+ sysfs_emit_at(buf, offset, "\n");
+
+ return offset;
+}
+
+static ssize_t store_energy_performance_preference(
+ struct cpufreq_policy *policy, const char *buf, size_t count)
+{
+ struct amd_cpudata *cpudata = policy->driver_data;
+ char str_preference[21];
+ ssize_t ret;
+
+ ret = sscanf(buf, "%20s", str_preference);
+ if (ret != 1)
+ return -EINVAL;
+
+ ret = match_string(energy_perf_strings, -1, str_preference);
+ if (ret < 0)
+ return -EINVAL;
+
+ mutex_lock(&amd_pstate_limits_lock);
+ ret = amd_pstate_set_energy_pref_index(cpudata, ret);
+ mutex_unlock(&amd_pstate_limits_lock);
+
+ return ret ?: count;
+}
+
+static ssize_t show_energy_performance_preference(
+ struct cpufreq_policy *policy, char *buf)
+{
+ struct amd_cpudata *cpudata = policy->driver_data;
+ int preference;
+
+ preference = amd_pstate_get_energy_pref_index(cpudata);
+ if (preference < 0)
+ return preference;
+
+ return sysfs_emit(buf, "%s\n", energy_perf_strings[preference]);
+}
+
+static ssize_t amd_pstate_show_status(char *buf)
+{
+ if (!current_pstate_driver)
+ return sysfs_emit(buf, "disable\n");
+
+ return sysfs_emit(buf, "%s\n", amd_pstate_mode_string[cppc_state]);
+}
+
+static void amd_pstate_driver_cleanup(void)
+{
+ current_pstate_driver = NULL;
+}
+
+static int amd_pstate_update_status(const char *buf, size_t size)
+{
+ int ret = 0;
+ int mode_idx;
+
+ if (size > 7 || size < 6)
+ return -EINVAL;
+ mode_idx = get_mode_idx_from_str(buf, size);
+
+ switch(mode_idx) {
+ case AMD_PSTATE_DISABLE:
+ if (!current_pstate_driver)
+ return -EINVAL;
+ if (cppc_state == AMD_PSTATE_ACTIVE)
+ return -EBUSY;
+ cpufreq_unregister_driver(current_pstate_driver);
+ amd_pstate_driver_cleanup();
+ break;
+ case AMD_PSTATE_PASSIVE:
+ if (current_pstate_driver) {
+ if (current_pstate_driver == &amd_pstate_driver)
+ return 0;
+ cpufreq_unregister_driver(current_pstate_driver);
+ cppc_state = AMD_PSTATE_PASSIVE;
+ current_pstate_driver = &amd_pstate_driver;
+ }
+
+ ret = cpufreq_register_driver(current_pstate_driver);
+ break;
+ case AMD_PSTATE_ACTIVE:
+ if (current_pstate_driver) {
+ if (current_pstate_driver == &amd_pstate_epp_driver)
+ return 0;
+ cpufreq_unregister_driver(current_pstate_driver);
+ current_pstate_driver = &amd_pstate_epp_driver;
+ cppc_state = AMD_PSTATE_ACTIVE;
+ }
+
+ ret = cpufreq_register_driver(current_pstate_driver);
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+
+ return ret;
+}
+
+static ssize_t show_status(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ ssize_t ret;
+
+ mutex_lock(&amd_pstate_driver_lock);
+ ret = amd_pstate_show_status(buf);
+ mutex_unlock(&amd_pstate_driver_lock);
+
+ return ret;
+}
+
+static ssize_t store_status(struct kobject *a, struct kobj_attribute *b,
+ const char *buf, size_t count)
+{
+ char *p = memchr(buf, '\n', count);
+ int ret;
+
+ mutex_lock(&amd_pstate_driver_lock);
+ ret = amd_pstate_update_status(buf, p ? p - buf : count);
+ mutex_unlock(&amd_pstate_driver_lock);
+
+ return ret < 0 ? ret : count;
}
cpufreq_freq_attr_ro(amd_pstate_max_freq);
cpufreq_freq_attr_ro(amd_pstate_lowest_nonlinear_freq);
cpufreq_freq_attr_ro(amd_pstate_highest_perf);
+cpufreq_freq_attr_rw(energy_performance_preference);
+cpufreq_freq_attr_ro(energy_performance_available_preferences);
+define_one_global_rw(status);
static struct freq_attr *amd_pstate_attr[] = {
&amd_pstate_max_freq,
@@ -604,6 +917,313 @@ static struct freq_attr *amd_pstate_attr[] = {
NULL,
};
+static struct freq_attr *amd_pstate_epp_attr[] = {
+ &amd_pstate_max_freq,
+ &amd_pstate_lowest_nonlinear_freq,
+ &amd_pstate_highest_perf,
+ &energy_performance_preference,
+ &energy_performance_available_preferences,
+ NULL,
+};
+
+static struct attribute *pstate_global_attributes[] = {
+ &status.attr,
+ NULL
+};
+
+static const struct attribute_group amd_pstate_global_attr_group = {
+ .attrs = pstate_global_attributes,
+};
+
+static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy)
+{
+ int min_freq, max_freq, nominal_freq, lowest_nonlinear_freq, ret;
+ struct amd_cpudata *cpudata;
+ struct device *dev;
+ u64 value;
+
+ /*
+ * Resetting PERF_CTL_MSR will put the CPU in P0 frequency,
+ * which is ideal for initialization process.
+ */
+ amd_perf_ctl_reset(policy->cpu);
+ dev = get_cpu_device(policy->cpu);
+ if (!dev)
+ return -ENODEV;
+
+ cpudata = kzalloc(sizeof(*cpudata), GFP_KERNEL);
+ if (!cpudata)
+ return -ENOMEM;
+
+ cpudata->cpu = policy->cpu;
+ cpudata->epp_policy = 0;
+
+ ret = amd_pstate_init_perf(cpudata);
+ if (ret)
+ goto free_cpudata1;
+
+ min_freq = amd_get_min_freq(cpudata);
+ max_freq = amd_get_max_freq(cpudata);
+ nominal_freq = amd_get_nominal_freq(cpudata);
+ lowest_nonlinear_freq = amd_get_lowest_nonlinear_freq(cpudata);
+ if (min_freq < 0 || max_freq < 0 || min_freq > max_freq) {
+ dev_err(dev, "min_freq(%d) or max_freq(%d) value is incorrect\n",
+ min_freq, max_freq);
+ ret = -EINVAL;
+ goto free_cpudata1;
+ }
+
+ policy->cpuinfo.min_freq = min_freq;
+ policy->cpuinfo.max_freq = max_freq;
+ /* It will be updated by governor */
+ policy->cur = policy->cpuinfo.min_freq;
+
+ /* Initial processor data capability frequencies */
+ cpudata->max_freq = max_freq;
+ cpudata->min_freq = min_freq;
+ cpudata->nominal_freq = nominal_freq;
+ cpudata->lowest_nonlinear_freq = lowest_nonlinear_freq;
+
+ policy->driver_data = cpudata;
+
+ cpudata->epp_cached = amd_pstate_get_epp(cpudata, 0);
+
+ policy->min = policy->cpuinfo.min_freq;
+ policy->max = policy->cpuinfo.max_freq;
+
+ /*
+ * Set the policy to powersave to provide a valid fallback value in case
+ * the default cpufreq governor is neither powersave nor performance.
+ */
+ policy->policy = CPUFREQ_POLICY_POWERSAVE;
+
+ if (boot_cpu_has(X86_FEATURE_CPPC)) {
+ policy->fast_switch_possible = true;
+ ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, &value);
+ if (ret)
+ return ret;
+ WRITE_ONCE(cpudata->cppc_req_cached, value);
+
+ ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_CAP1, &value);
+ if (ret)
+ return ret;
+ WRITE_ONCE(cpudata->cppc_cap1_cached, value);
+ }
+ amd_pstate_boost_init(cpudata);
+
+ return 0;
+
+free_cpudata1:
+ kfree(cpudata);
+ return ret;
+}
+
+static int amd_pstate_epp_cpu_exit(struct cpufreq_policy *policy)
+{
+ pr_debug("CPU %d exiting\n", policy->cpu);
+ policy->fast_switch_possible = false;
+ return 0;
+}
+
+static void amd_pstate_epp_init(unsigned int cpu)
+{
+ struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
+ struct amd_cpudata *cpudata = policy->driver_data;
+ u32 max_perf, min_perf;
+ u64 value;
+ s16 epp;
+
+ max_perf = READ_ONCE(cpudata->highest_perf);
+ min_perf = READ_ONCE(cpudata->lowest_perf);
+
+ value = READ_ONCE(cpudata->cppc_req_cached);
+
+ if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE)
+ min_perf = max_perf;
+
+ /* Initial min/max values for CPPC Performance Controls Register */
+ value &= ~AMD_CPPC_MIN_PERF(~0L);
+ value |= AMD_CPPC_MIN_PERF(min_perf);
+
+ value &= ~AMD_CPPC_MAX_PERF(~0L);
+ value |= AMD_CPPC_MAX_PERF(max_perf);
+
+ /* CPPC EPP feature require to set zero to the desire perf bit */
+ value &= ~AMD_CPPC_DES_PERF(~0L);
+ value |= AMD_CPPC_DES_PERF(0);
+
+ if (cpudata->epp_policy == cpudata->policy)
+ goto skip_epp;
+
+ cpudata->epp_policy = cpudata->policy;
+
+ /* Get BIOS pre-defined epp value */
+ epp = amd_pstate_get_epp(cpudata, value);
+ if (epp < 0) {
+ /**
+ * This return value can only be negative for shared_memory
+ * systems where EPP register read/write not supported.
+ */
+ goto skip_epp;
+ }
+
+ if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE)
+ epp = 0;
+
+ /* Set initial EPP value */
+ if (boot_cpu_has(X86_FEATURE_CPPC)) {
+ value &= ~GENMASK_ULL(31, 24);
+ value |= (u64)epp << 24;
+ }
+
+ WRITE_ONCE(cpudata->cppc_req_cached, value);
+ amd_pstate_set_epp(cpudata, epp);
+skip_epp:
+ cpufreq_cpu_put(policy);
+}
+
+static int amd_pstate_epp_set_policy(struct cpufreq_policy *policy)
+{
+ struct amd_cpudata *cpudata = policy->driver_data;
+
+ if (!policy->cpuinfo.max_freq)
+ return -ENODEV;
+
+ pr_debug("set_policy: cpuinfo.max %u policy->max %u\n",
+ policy->cpuinfo.max_freq, policy->max);
+
+ cpudata->policy = policy->policy;
+
+ amd_pstate_epp_init(policy->cpu);
+
+ return 0;
+}
+
+static void amd_pstate_epp_reenable(struct amd_cpudata *cpudata)
+{
+ struct cppc_perf_ctrls perf_ctrls;
+ u64 value, max_perf;
+ int ret;
+
+ ret = amd_pstate_enable(true);
+ if (ret)
+ pr_err("failed to enable amd pstate during resume, return %d\n", ret);
+
+ value = READ_ONCE(cpudata->cppc_req_cached);
+ max_perf = READ_ONCE(cpudata->highest_perf);
+
+ if (boot_cpu_has(X86_FEATURE_CPPC)) {
+ wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value);
+ } else {
+ perf_ctrls.max_perf = max_perf;
+ perf_ctrls.energy_perf = AMD_CPPC_ENERGY_PERF_PREF(cpudata->epp_cached);
+ cppc_set_perf(cpudata->cpu, &perf_ctrls);
+ }
+}
+
+static int amd_pstate_epp_cpu_online(struct cpufreq_policy *policy)
+{
+ struct amd_cpudata *cpudata = policy->driver_data;
+
+ pr_debug("AMD CPU Core %d going online\n", cpudata->cpu);
+
+ if (cppc_state == AMD_PSTATE_ACTIVE) {
+ amd_pstate_epp_reenable(cpudata);
+ cpudata->suspended = false;
+ }
+
+ return 0;
+}
+
+static void amd_pstate_epp_offline(struct cpufreq_policy *policy)
+{
+ struct amd_cpudata *cpudata = policy->driver_data;
+ struct cppc_perf_ctrls perf_ctrls;
+ int min_perf;
+ u64 value;
+
+ min_perf = READ_ONCE(cpudata->lowest_perf);
+ value = READ_ONCE(cpudata->cppc_req_cached);
+
+ mutex_lock(&amd_pstate_limits_lock);
+ if (boot_cpu_has(X86_FEATURE_CPPC)) {
+ cpudata->epp_policy = CPUFREQ_POLICY_UNKNOWN;
+
+ /* Set max perf same as min perf */
+ value &= ~AMD_CPPC_MAX_PERF(~0L);
+ value |= AMD_CPPC_MAX_PERF(min_perf);
+ value &= ~AMD_CPPC_MIN_PERF(~0L);
+ value |= AMD_CPPC_MIN_PERF(min_perf);
+ wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value);
+ } else {
+ perf_ctrls.desired_perf = 0;
+ perf_ctrls.max_perf = min_perf;
+ perf_ctrls.energy_perf = AMD_CPPC_ENERGY_PERF_PREF(HWP_EPP_BALANCE_POWERSAVE);
+ cppc_set_perf(cpudata->cpu, &perf_ctrls);
+ }
+ mutex_unlock(&amd_pstate_limits_lock);
+}
+
+static int amd_pstate_epp_cpu_offline(struct cpufreq_policy *policy)
+{
+ struct amd_cpudata *cpudata = policy->driver_data;
+
+ pr_debug("AMD CPU Core %d going offline\n", cpudata->cpu);
+
+ if (cpudata->suspended)
+ return 0;
+
+ if (cppc_state == AMD_PSTATE_ACTIVE)
+ amd_pstate_epp_offline(policy);
+
+ return 0;
+}
+
+static int amd_pstate_epp_verify_policy(struct cpufreq_policy_data *policy)
+{
+ cpufreq_verify_within_cpu_limits(policy);
+ pr_debug("policy_max =%d, policy_min=%d\n", policy->max, policy->min);
+ return 0;
+}
+
+static int amd_pstate_epp_suspend(struct cpufreq_policy *policy)
+{
+ struct amd_cpudata *cpudata = policy->driver_data;
+ int ret;
+
+ /* avoid suspending when EPP is not enabled */
+ if (cppc_state != AMD_PSTATE_ACTIVE)
+ return 0;
+
+ /* set this flag to avoid setting core offline*/
+ cpudata->suspended = true;
+
+ /* disable CPPC in lowlevel firmware */
+ ret = amd_pstate_enable(false);
+ if (ret)
+ pr_err("failed to suspend, return %d\n", ret);
+
+ return 0;
+}
+
+static int amd_pstate_epp_resume(struct cpufreq_policy *policy)
+{
+ struct amd_cpudata *cpudata = policy->driver_data;
+
+ if (cpudata->suspended) {
+ mutex_lock(&amd_pstate_limits_lock);
+
+ /* enable amd pstate from suspend state*/
+ amd_pstate_epp_reenable(cpudata);
+
+ mutex_unlock(&amd_pstate_limits_lock);
+
+ cpudata->suspended = false;
+ }
+
+ return 0;
+}
+
static struct cpufreq_driver amd_pstate_driver = {
.flags = CPUFREQ_CONST_LOOPS | CPUFREQ_NEED_UPDATE_LIMITS,
.verify = amd_pstate_verify,
@@ -617,6 +1237,20 @@ static struct cpufreq_driver amd_pstate_driver = {
.attr = amd_pstate_attr,
};
+static struct cpufreq_driver amd_pstate_epp_driver = {
+ .flags = CPUFREQ_CONST_LOOPS,
+ .verify = amd_pstate_epp_verify_policy,
+ .setpolicy = amd_pstate_epp_set_policy,
+ .init = amd_pstate_epp_cpu_init,
+ .exit = amd_pstate_epp_cpu_exit,
+ .offline = amd_pstate_epp_cpu_offline,
+ .online = amd_pstate_epp_cpu_online,
+ .suspend = amd_pstate_epp_suspend,
+ .resume = amd_pstate_epp_resume,
+ .name = "amd_pstate_epp",
+ .attr = amd_pstate_epp_attr,
+};
+
static int __init amd_pstate_init(void)
{
int ret;
@@ -626,10 +1260,10 @@ static int __init amd_pstate_init(void)
/*
* by default the pstate driver is disabled to load
* enable the amd_pstate passive mode driver explicitly
- * with amd_pstate=passive in kernel command line
+ * with amd_pstate=passive or other modes in kernel command line
*/
- if (!cppc_load) {
- pr_debug("driver load is disabled, boot with amd_pstate=passive to enable this\n");
+ if (cppc_state == AMD_PSTATE_DISABLE) {
+ pr_debug("driver load is disabled, boot with specific mode to enable this\n");
return -ENODEV;
}
@@ -645,7 +1279,8 @@ static int __init amd_pstate_init(void)
/* capability check */
if (boot_cpu_has(X86_FEATURE_CPPC)) {
pr_debug("AMD CPPC MSR based functionality is supported\n");
- amd_pstate_driver.adjust_perf = amd_pstate_adjust_perf;
+ if (cppc_state == AMD_PSTATE_PASSIVE)
+ current_pstate_driver->adjust_perf = amd_pstate_adjust_perf;
} else {
pr_debug("AMD CPPC shared memory based functionality is supported\n");
static_call_update(amd_pstate_enable, cppc_enable);
@@ -656,31 +1291,63 @@ static int __init amd_pstate_init(void)
/* enable amd pstate feature */
ret = amd_pstate_enable(true);
if (ret) {
- pr_err("failed to enable amd-pstate with return %d\n", ret);
+ pr_err("failed to enable with return %d\n", ret);
return ret;
}
- ret = cpufreq_register_driver(&amd_pstate_driver);
+ ret = cpufreq_register_driver(current_pstate_driver);
if (ret)
- pr_err("failed to register amd_pstate_driver with return %d\n",
- ret);
+ pr_err("failed to register with return %d\n", ret);
+
+ amd_pstate_kobj = kobject_create_and_add("amd_pstate", &cpu_subsys.dev_root->kobj);
+ if (!amd_pstate_kobj) {
+ ret = -EINVAL;
+ pr_err("global sysfs registration failed.\n");
+ goto kobject_free;
+ }
+
+ ret = sysfs_create_group(amd_pstate_kobj, &amd_pstate_global_attr_group);
+ if (ret) {
+ pr_err("sysfs attribute export failed with error %d.\n", ret);
+ goto global_attr_free;
+ }
return ret;
+
+global_attr_free:
+ kobject_put(amd_pstate_kobj);
+kobject_free:
+ cpufreq_unregister_driver(current_pstate_driver);
+ return ret;
}
device_initcall(amd_pstate_init);
static int __init amd_pstate_param(char *str)
{
+ size_t size;
+ int mode_idx;
+
if (!str)
return -EINVAL;
- if (!strcmp(str, "disable")) {
- cppc_load = 0;
- pr_info("driver is explicitly disabled\n");
- } else if (!strcmp(str, "passive"))
- cppc_load = 1;
+ size = strlen(str);
+ mode_idx = get_mode_idx_from_str(str, size);
- return 0;
+ if (mode_idx >= AMD_PSTATE_DISABLE && mode_idx < AMD_PSTATE_MAX) {
+ cppc_state = mode_idx;
+ if (cppc_state == AMD_PSTATE_DISABLE)
+ pr_info("driver is explicitly disabled\n");
+
+ if (cppc_state == AMD_PSTATE_ACTIVE)
+ current_pstate_driver = &amd_pstate_epp_driver;
+
+ if (cppc_state == AMD_PSTATE_PASSIVE)
+ current_pstate_driver = &amd_pstate_driver;
+
+ return 0;
+ }
+
+ return -EINVAL;
}
early_param("amd_pstate", amd_pstate_param);
diff --git a/drivers/cpufreq/brcmstb-avs-cpufreq.c b/drivers/cpufreq/brcmstb-avs-cpufreq.c
index 4153150e20db..ffea6402189d 100644
--- a/drivers/cpufreq/brcmstb-avs-cpufreq.c
+++ b/drivers/cpufreq/brcmstb-avs-cpufreq.c
@@ -751,10 +751,7 @@ static int brcm_avs_cpufreq_probe(struct platform_device *pdev)
static int brcm_avs_cpufreq_remove(struct platform_device *pdev)
{
- int ret;
-
- ret = cpufreq_unregister_driver(&brcm_avs_driver);
- WARN_ON(ret);
+ cpufreq_unregister_driver(&brcm_avs_driver);
brcm_avs_prepare_uninit(pdev);
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 7e56a42750ea..6d8fd3b8dcb5 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -993,7 +993,7 @@ static const struct sysfs_ops sysfs_ops = {
.store = store,
};
-static struct kobj_type ktype_cpufreq = {
+static const struct kobj_type ktype_cpufreq = {
.sysfs_ops = &sysfs_ops,
.default_groups = cpufreq_groups,
.release = cpufreq_sysfs_release,
@@ -2904,12 +2904,12 @@ EXPORT_SYMBOL_GPL(cpufreq_register_driver);
* Returns zero if successful, and -EINVAL if the cpufreq_driver is
* currently not initialised.
*/
-int cpufreq_unregister_driver(struct cpufreq_driver *driver)
+void cpufreq_unregister_driver(struct cpufreq_driver *driver)
{
unsigned long flags;
- if (!cpufreq_driver || (driver != cpufreq_driver))
- return -EINVAL;
+ if (WARN_ON(!cpufreq_driver || (driver != cpufreq_driver)))
+ return;
pr_debug("unregistering driver %s\n", driver->name);
@@ -2926,8 +2926,6 @@ int cpufreq_unregister_driver(struct cpufreq_driver *driver)
write_unlock_irqrestore(&cpufreq_driver_lock, flags);
cpus_read_unlock();
-
- return 0;
}
EXPORT_SYMBOL_GPL(cpufreq_unregister_driver);
diff --git a/drivers/cpufreq/davinci-cpufreq.c b/drivers/cpufreq/davinci-cpufreq.c
index 9e97f60f8199..ebb3a8102681 100644
--- a/drivers/cpufreq/davinci-cpufreq.c
+++ b/drivers/cpufreq/davinci-cpufreq.c
@@ -133,12 +133,14 @@ static int __init davinci_cpufreq_probe(struct platform_device *pdev)
static int __exit davinci_cpufreq_remove(struct platform_device *pdev)
{
+ cpufreq_unregister_driver(&davinci_driver);
+
clk_put(cpufreq.armclk);
if (cpufreq.asyncclk)
clk_put(cpufreq.asyncclk);
- return cpufreq_unregister_driver(&davinci_driver);
+ return 0;
}
static struct platform_driver davinci_cpufreq_driver = {
diff --git a/drivers/cpufreq/loongson1-cpufreq.c b/drivers/cpufreq/loongson1-cpufreq.c
deleted file mode 100644
index fb72d709db56..000000000000
--- a/drivers/cpufreq/loongson1-cpufreq.c
+++ /dev/null
@@ -1,222 +0,0 @@
-/*
- * CPU Frequency Scaling for Loongson 1 SoC
- *
- * Copyright (C) 2014-2016 Zhang, Keguang <keguang.zhang@gmail.com>
- *
- * This file is licensed under the terms of the GNU General Public
- * License version 2. This program is licensed "as is" without any
- * warranty of any kind, whether express or implied.
- */
-
-#include <linux/clk.h>
-#include <linux/clk-provider.h>
-#include <linux/cpu.h>
-#include <linux/cpufreq.h>
-#include <linux/delay.h>
-#include <linux/io.h>
-#include <linux/module.h>
-#include <linux/platform_device.h>
-#include <linux/slab.h>
-
-#include <cpufreq.h>
-#include <loongson1.h>
-
-struct ls1x_cpufreq {
- struct device *dev;
- struct clk *clk; /* CPU clk */
- struct clk *mux_clk; /* MUX of CPU clk */
- struct clk *pll_clk; /* PLL clk */
- struct clk *osc_clk; /* OSC clk */
- unsigned int max_freq;
- unsigned int min_freq;
-};
-
-static struct ls1x_cpufreq *cpufreq;
-
-static int ls1x_cpufreq_notifier(struct notifier_block *nb,
- unsigned long val, void *data)
-{
- if (val == CPUFREQ_POSTCHANGE)
- current_cpu_data.udelay_val = loops_per_jiffy;
-
- return NOTIFY_OK;
-}
-
-static struct notifier_block ls1x_cpufreq_notifier_block = {
- .notifier_call = ls1x_cpufreq_notifier
-};
-
-static int ls1x_cpufreq_target(struct cpufreq_policy *policy,
- unsigned int index)
-{
- struct device *cpu_dev = get_cpu_device(policy->cpu);
- unsigned int old_freq, new_freq;
-
- old_freq = policy->cur;
- new_freq = policy->freq_table[index].frequency;
-
- /*
- * The procedure of reconfiguring CPU clk is as below.
- *
- * - Reparent CPU clk to OSC clk
- * - Reset CPU clock (very important)
- * - Reconfigure CPU DIV
- * - Reparent CPU clk back to CPU DIV clk
- */
-
- clk_set_parent(policy->clk, cpufreq->osc_clk);
- __raw_writel(__raw_readl(LS1X_CLK_PLL_DIV) | RST_CPU_EN | RST_CPU,
- LS1X_CLK_PLL_DIV);
- __raw_writel(__raw_readl(LS1X_CLK_PLL_DIV) & ~(RST_CPU_EN | RST_CPU),
- LS1X_CLK_PLL_DIV);
- clk_set_rate(cpufreq->mux_clk, new_freq * 1000);
- clk_set_parent(policy->clk, cpufreq->mux_clk);
- dev_dbg(cpu_dev, "%u KHz --> %u KHz\n", old_freq, new_freq);
-
- return 0;
-}
-
-static int ls1x_cpufreq_init(struct cpufreq_policy *policy)
-{
- struct device *cpu_dev = get_cpu_device(policy->cpu);
- struct cpufreq_frequency_table *freq_tbl;
- unsigned int pll_freq, freq;
- int steps, i;
-
- pll_freq = clk_get_rate(cpufreq->pll_clk) / 1000;
-
- steps = 1 << DIV_CPU_WIDTH;
- freq_tbl = kcalloc(steps, sizeof(*freq_tbl), GFP_KERNEL);
- if (!freq_tbl)
- return -ENOMEM;
-
- for (i = 0; i < (steps - 1); i++) {
- freq = pll_freq / (i + 1);
- if ((freq < cpufreq->min_freq) || (freq > cpufreq->max_freq))
- freq_tbl[i].frequency = CPUFREQ_ENTRY_INVALID;
- else
- freq_tbl[i].frequency = freq;
- dev_dbg(cpu_dev,
- "cpufreq table: index %d: frequency %d\n", i,
- freq_tbl[i].frequency);
- }
- freq_tbl[i].frequency = CPUFREQ_TABLE_END;
-
- policy->clk = cpufreq->clk;
- cpufreq_generic_init(policy, freq_tbl, 0);
-
- return 0;
-}
-
-static int ls1x_cpufreq_exit(struct cpufreq_policy *policy)
-{
- kfree(policy->freq_table);
- return 0;
-}
-
-static struct cpufreq_driver ls1x_cpufreq_driver = {
- .name = "cpufreq-ls1x",
- .flags = CPUFREQ_NEED_INITIAL_FREQ_CHECK,
- .verify = cpufreq_generic_frequency_table_verify,
- .target_index = ls1x_cpufreq_target,
- .get = cpufreq_generic_get,
- .init = ls1x_cpufreq_init,
- .exit = ls1x_cpufreq_exit,
- .attr = cpufreq_generic_attr,
-};
-
-static int ls1x_cpufreq_remove(struct platform_device *pdev)
-{
- cpufreq_unregister_notifier(&ls1x_cpufreq_notifier_block,
- CPUFREQ_TRANSITION_NOTIFIER);
- cpufreq_unregister_driver(&ls1x_cpufreq_driver);
-
- return 0;
-}
-
-static int ls1x_cpufreq_probe(struct platform_device *pdev)
-{
- struct plat_ls1x_cpufreq *pdata = dev_get_platdata(&pdev->dev);
- struct clk *clk;
- int ret;
-
- if (!pdata || !pdata->clk_name || !pdata->osc_clk_name) {
- dev_err(&pdev->dev, "platform data missing\n");
- return -EINVAL;
- }
-
- cpufreq =
- devm_kzalloc(&pdev->dev, sizeof(struct ls1x_cpufreq), GFP_KERNEL);
- if (!cpufreq)
- return -ENOMEM;
-
- cpufreq->dev = &pdev->dev;
-
- clk = devm_clk_get(&pdev->dev, pdata->clk_name);
- if (IS_ERR(clk)) {
- dev_err(&pdev->dev, "unable to get %s clock\n",
- pdata->clk_name);
- return PTR_ERR(clk);
- }
- cpufreq->clk = clk;
-
- clk = clk_get_parent(clk);
- if (IS_ERR(clk)) {
- dev_err(&pdev->dev, "unable to get parent of %s clock\n",
- __clk_get_name(cpufreq->clk));
- return PTR_ERR(clk);
- }
- cpufreq->mux_clk = clk;
-
- clk = clk_get_parent(clk);
- if (IS_ERR(clk)) {
- dev_err(&pdev->dev, "unable to get parent of %s clock\n",
- __clk_get_name(cpufreq->mux_clk));
- return PTR_ERR(clk);
- }
- cpufreq->pll_clk = clk;
-
- clk = devm_clk_get(&pdev->dev, pdata->osc_clk_name);
- if (IS_ERR(clk)) {
- dev_err(&pdev->dev, "unable to get %s clock\n",
- pdata->osc_clk_name);
- return PTR_ERR(clk);
- }
- cpufreq->osc_clk = clk;
-
- cpufreq->max_freq = pdata->max_freq;
- cpufreq->min_freq = pdata->min_freq;
-
- ret = cpufreq_register_driver(&ls1x_cpufreq_driver);
- if (ret) {
- dev_err(&pdev->dev,
- "failed to register CPUFreq driver: %d\n", ret);
- return ret;
- }
-
- ret = cpufreq_register_notifier(&ls1x_cpufreq_notifier_block,
- CPUFREQ_TRANSITION_NOTIFIER);
-
- if (ret) {
- dev_err(&pdev->dev,
- "failed to register CPUFreq notifier: %d\n",ret);
- cpufreq_unregister_driver(&ls1x_cpufreq_driver);
- }
-
- return ret;
-}
-
-static struct platform_driver ls1x_cpufreq_platdrv = {
- .probe = ls1x_cpufreq_probe,
- .remove = ls1x_cpufreq_remove,
- .driver = {
- .name = "ls1x-cpufreq",
- },
-};
-
-module_platform_driver(ls1x_cpufreq_platdrv);
-
-MODULE_ALIAS("platform:ls1x-cpufreq");
-MODULE_AUTHOR("Kelvin Cheung <keguang.zhang@gmail.com>");
-MODULE_DESCRIPTION("Loongson1 CPUFreq driver");
-MODULE_LICENSE("GPL");
diff --git a/drivers/cpufreq/mediatek-cpufreq-hw.c b/drivers/cpufreq/mediatek-cpufreq-hw.c
index f80339779084..b22f5cc8a463 100644
--- a/drivers/cpufreq/mediatek-cpufreq-hw.c
+++ b/drivers/cpufreq/mediatek-cpufreq-hw.c
@@ -317,13 +317,16 @@ static int mtk_cpufreq_hw_driver_probe(struct platform_device *pdev)
static int mtk_cpufreq_hw_driver_remove(struct platform_device *pdev)
{
- return cpufreq_unregister_driver(&cpufreq_mtk_hw_driver);
+ cpufreq_unregister_driver(&cpufreq_mtk_hw_driver);
+
+ return 0;
}
static const struct of_device_id mtk_cpufreq_hw_match[] = {
{ .compatible = "mediatek,cpufreq-hw", .data = &cpufreq_mtk_offsets },
{}
};
+MODULE_DEVICE_TABLE(of, mtk_cpufreq_hw_match);
static struct platform_driver mtk_cpufreq_hw_driver = {
.probe = mtk_cpufreq_hw_driver_probe,
diff --git a/drivers/cpufreq/omap-cpufreq.c b/drivers/cpufreq/omap-cpufreq.c
index 1b50df06c6bc..81649a1969b6 100644
--- a/drivers/cpufreq/omap-cpufreq.c
+++ b/drivers/cpufreq/omap-cpufreq.c
@@ -184,7 +184,9 @@ static int omap_cpufreq_probe(struct platform_device *pdev)
static int omap_cpufreq_remove(struct platform_device *pdev)
{
- return cpufreq_unregister_driver(&omap_driver);
+ cpufreq_unregister_driver(&omap_driver);
+
+ return 0;
}
static struct platform_driver omap_cpufreq_platdrv = {
diff --git a/drivers/cpufreq/qcom-cpufreq-hw.c b/drivers/cpufreq/qcom-cpufreq-hw.c
index d3f55ca06ed3..2f581d2d617d 100644
--- a/drivers/cpufreq/qcom-cpufreq-hw.c
+++ b/drivers/cpufreq/qcom-cpufreq-hw.c
@@ -770,7 +770,9 @@ of_exit:
static int qcom_cpufreq_hw_driver_remove(struct platform_device *pdev)
{
- return cpufreq_unregister_driver(&cpufreq_qcom_hw_driver);
+ cpufreq_unregister_driver(&cpufreq_qcom_hw_driver);
+
+ return 0;
}
static struct platform_driver qcom_cpufreq_hw_driver = {
diff --git a/drivers/cpufreq/tegra194-cpufreq.c b/drivers/cpufreq/tegra194-cpufreq.c
index 4596c3e323aa..5890e25d7f77 100644
--- a/drivers/cpufreq/tegra194-cpufreq.c
+++ b/drivers/cpufreq/tegra194-cpufreq.c
@@ -411,7 +411,8 @@ static int tegra194_cpufreq_set_target(struct cpufreq_policy *policy,
static struct cpufreq_driver tegra194_cpufreq_driver = {
.name = "tegra194",
- .flags = CPUFREQ_CONST_LOOPS | CPUFREQ_NEED_INITIAL_FREQ_CHECK,
+ .flags = CPUFREQ_CONST_LOOPS | CPUFREQ_NEED_INITIAL_FREQ_CHECK |
+ CPUFREQ_IS_COOLING_DEV,
.verify = cpufreq_generic_frequency_table_verify,
.target_index = tegra194_cpufreq_set_target,
.get = tegra194_get_speed,
diff --git a/drivers/cpuidle/Kconfig b/drivers/cpuidle/Kconfig
index ff71dd662880..cac5997dca50 100644
--- a/drivers/cpuidle/Kconfig
+++ b/drivers/cpuidle/Kconfig
@@ -74,6 +74,7 @@ endmenu
config HALTPOLL_CPUIDLE
tristate "Halt poll cpuidle driver"
depends on X86 && KVM_GUEST
+ select CPU_IDLE_GOV_HALTPOLL
default y
help
This option enables halt poll cpuidle driver, which allows to poll
diff --git a/drivers/cpuidle/Kconfig.arm b/drivers/cpuidle/Kconfig.arm
index 747aa537389b..a1ee475d180d 100644
--- a/drivers/cpuidle/Kconfig.arm
+++ b/drivers/cpuidle/Kconfig.arm
@@ -24,6 +24,14 @@ config ARM_PSCI_CPUIDLE
It provides an idle driver that is capable of detecting and
managing idle states through the PSCI firmware interface.
+ The driver has limitations when used with PREEMPT_RT:
+ - If the idle states are described with the non-hierarchical layout,
+ all idle states are still available.
+
+ - If the idle states are described with the hierarchical layout,
+ only the idle states defined per CPU are available, but not the ones
+ being shared among a group of CPUs (aka cluster idle states).
+
config ARM_PSCI_CPUIDLE_DOMAIN
bool "PSCI CPU idle Domain"
depends on ARM_PSCI_CPUIDLE
@@ -102,6 +110,7 @@ config ARM_MVEBU_V7_CPUIDLE
config ARM_TEGRA_CPUIDLE
bool "CPU Idle Driver for NVIDIA Tegra SoCs"
depends on (ARCH_TEGRA || COMPILE_TEST) && !ARM64 && MMU
+ depends on ARCH_SUSPEND_POSSIBLE
select ARCH_NEEDS_CPU_IDLE_COUPLED if SMP
select ARM_CPU_SUSPEND
help
@@ -110,6 +119,7 @@ config ARM_TEGRA_CPUIDLE
config ARM_QCOM_SPM_CPUIDLE
bool "CPU Idle Driver for Qualcomm Subsystem Power Manager (SPM)"
depends on (ARCH_QCOM || COMPILE_TEST) && !ARM64 && MMU
+ depends on ARCH_SUSPEND_POSSIBLE
select ARM_CPU_SUSPEND
select CPU_IDLE_MULTIPLE_DRIVERS
select DT_IDLE_STATES
diff --git a/drivers/cpuidle/cpuidle-haltpoll.c b/drivers/cpuidle/cpuidle-haltpoll.c
index 3a39a7f48b77..e66df22f9695 100644
--- a/drivers/cpuidle/cpuidle-haltpoll.c
+++ b/drivers/cpuidle/cpuidle-haltpoll.c
@@ -32,7 +32,7 @@ static int default_enter_idle(struct cpuidle_device *dev,
local_irq_enable();
return index;
}
- default_idle();
+ arch_cpu_idle();
return index;
}
diff --git a/drivers/cpuidle/cpuidle-psci-domain.c b/drivers/cpuidle/cpuidle-psci-domain.c
index c80cf9ddabd8..6ad2954948a5 100644
--- a/drivers/cpuidle/cpuidle-psci-domain.c
+++ b/drivers/cpuidle/cpuidle-psci-domain.c
@@ -64,8 +64,11 @@ static int psci_pd_init(struct device_node *np, bool use_osi)
pd->flags |= GENPD_FLAG_IRQ_SAFE | GENPD_FLAG_CPU_DOMAIN;
- /* Allow power off when OSI has been successfully enabled. */
- if (use_osi)
+ /*
+ * Allow power off when OSI has been successfully enabled.
+ * PREEMPT_RT is not yet ready to enter domain idle states.
+ */
+ if (use_osi && !IS_ENABLED(CONFIG_PREEMPT_RT))
pd->power_off = psci_pd_power_off;
else
pd->flags |= GENPD_FLAG_ALWAYS_ON;
diff --git a/drivers/cpuidle/cpuidle-psci.c b/drivers/cpuidle/cpuidle-psci.c
index 312a34ef28dc..6de027f9f6f5 100644
--- a/drivers/cpuidle/cpuidle-psci.c
+++ b/drivers/cpuidle/cpuidle-psci.c
@@ -222,6 +222,9 @@ static int psci_dt_cpu_init_topology(struct cpuidle_driver *drv,
if (!psci_has_osi_support())
return 0;
+ if (IS_ENABLED(CONFIG_PREEMPT_RT))
+ return 0;
+
data->dev = psci_dt_attach_cpu(cpu);
if (IS_ERR_OR_NULL(data->dev))
return PTR_ERR_OR_ZERO(data->dev);
diff --git a/drivers/cpuidle/driver.c b/drivers/cpuidle/driver.c
index f70aa17e2a8e..d9cda7f6ccb9 100644
--- a/drivers/cpuidle/driver.c
+++ b/drivers/cpuidle/driver.c
@@ -183,11 +183,15 @@ static void __cpuidle_driver_init(struct cpuidle_driver *drv)
s->target_residency_ns = s->target_residency * NSEC_PER_USEC;
else if (s->target_residency_ns < 0)
s->target_residency_ns = 0;
+ else
+ s->target_residency = div_u64(s->target_residency_ns, NSEC_PER_USEC);
if (s->exit_latency > 0)
s->exit_latency_ns = s->exit_latency * NSEC_PER_USEC;
else if (s->exit_latency_ns < 0)
s->exit_latency_ns = 0;
+ else
+ s->exit_latency = div_u64(s->exit_latency_ns, NSEC_PER_USEC);
}
}
diff --git a/drivers/cpuidle/governors/teo.c b/drivers/cpuidle/governors/teo.c
index d9262db79cae..987fc5f3997d 100644
--- a/drivers/cpuidle/governors/teo.c
+++ b/drivers/cpuidle/governors/teo.c
@@ -2,8 +2,13 @@
/*
* Timer events oriented CPU idle governor
*
+ * TEO governor:
* Copyright (C) 2018 - 2021 Intel Corporation
* Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+ *
+ * Util-awareness mechanism:
+ * Copyright (C) 2022 Arm Ltd.
+ * Author: Kajetan Puchalski <kajetan.puchalski@arm.com>
*/
/**
@@ -99,15 +104,56 @@
* select the given idle state instead of the candidate one.
*
* 3. By default, select the candidate state.
+ *
+ * Util-awareness mechanism:
+ *
+ * The idea behind the util-awareness extension is that there are two distinct
+ * scenarios for the CPU which should result in two different approaches to idle
+ * state selection - utilized and not utilized.
+ *
+ * In this case, 'utilized' means that the average runqueue util of the CPU is
+ * above a certain threshold.
+ *
+ * When the CPU is utilized while going into idle, more likely than not it will
+ * be woken up to do more work soon and so a shallower idle state should be
+ * selected to minimise latency and maximise performance. When the CPU is not
+ * being utilized, the usual metrics-based approach to selecting the deepest
+ * available idle state should be preferred to take advantage of the power
+ * saving.
+ *
+ * In order to achieve this, the governor uses a utilization threshold.
+ * The threshold is computed per-CPU as a percentage of the CPU's capacity
+ * by bit shifting the capacity value. Based on testing, the shift of 6 (~1.56%)
+ * seems to be getting the best results.
+ *
+ * Before selecting the next idle state, the governor compares the current CPU
+ * util to the precomputed util threshold. If it's below, it defaults to the
+ * TEO metrics mechanism. If it's above, the closest shallower idle state will
+ * be selected instead, as long as is not a polling state.
*/
#include <linux/cpuidle.h>
#include <linux/jiffies.h>
#include <linux/kernel.h>
+#include <linux/sched.h>
#include <linux/sched/clock.h>
+#include <linux/sched/topology.h>
#include <linux/tick.h>
/*
+ * The number of bits to shift the CPU's capacity by in order to determine
+ * the utilized threshold.
+ *
+ * 6 was chosen based on testing as the number that achieved the best balance
+ * of power and performance on average.
+ *
+ * The resulting threshold is high enough to not be triggered by background
+ * noise and low enough to react quickly when activity starts to ramp up.
+ */
+#define UTIL_THRESHOLD_SHIFT 6
+
+
+/*
* The PULSE value is added to metrics when they grow and the DECAY_SHIFT value
* is used for decreasing metrics on a regular basis.
*/
@@ -137,9 +183,11 @@ struct teo_bin {
* @time_span_ns: Time between idle state selection and post-wakeup update.
* @sleep_length_ns: Time till the closest timer event (at the selection time).
* @state_bins: Idle state data bins for this CPU.
- * @total: Grand total of the "intercepts" and "hits" mertics for all bins.
+ * @total: Grand total of the "intercepts" and "hits" metrics for all bins.
* @next_recent_idx: Index of the next @recent_idx entry to update.
* @recent_idx: Indices of bins corresponding to recent "intercepts".
+ * @util_threshold: Threshold above which the CPU is considered utilized
+ * @utilized: Whether the last sleep on the CPU happened while utilized
*/
struct teo_cpu {
s64 time_span_ns;
@@ -148,11 +196,30 @@ struct teo_cpu {
unsigned int total;
int next_recent_idx;
int recent_idx[NR_RECENT];
+ unsigned long util_threshold;
+ bool utilized;
};
static DEFINE_PER_CPU(struct teo_cpu, teo_cpus);
/**
+ * teo_cpu_is_utilized - Check if the CPU's util is above the threshold
+ * @cpu: Target CPU
+ * @cpu_data: Governor CPU data for the target CPU
+ */
+#ifdef CONFIG_SMP
+static bool teo_cpu_is_utilized(int cpu, struct teo_cpu *cpu_data)
+{
+ return sched_cpu_util(cpu) > cpu_data->util_threshold;
+}
+#else
+static bool teo_cpu_is_utilized(int cpu, struct teo_cpu *cpu_data)
+{
+ return false;
+}
+#endif
+
+/**
* teo_update - Update CPU metrics after wakeup.
* @drv: cpuidle driver containing state data.
* @dev: Target CPU.
@@ -258,15 +325,17 @@ static s64 teo_middle_of_bin(int idx, struct cpuidle_driver *drv)
* @dev: Target CPU.
* @state_idx: Index of the capping idle state.
* @duration_ns: Idle duration value to match.
+ * @no_poll: Don't consider polling states.
*/
static int teo_find_shallower_state(struct cpuidle_driver *drv,
struct cpuidle_device *dev, int state_idx,
- s64 duration_ns)
+ s64 duration_ns, bool no_poll)
{
int i;
for (i = state_idx - 1; i >= 0; i--) {
- if (dev->states_usage[i].disable)
+ if (dev->states_usage[i].disable ||
+ (no_poll && drv->states[i].flags & CPUIDLE_FLAG_POLLING))
continue;
state_idx = i;
@@ -321,6 +390,22 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
goto end;
}
+ cpu_data->utilized = teo_cpu_is_utilized(dev->cpu, cpu_data);
+ /*
+ * If the CPU is being utilized over the threshold and there are only 2
+ * states to choose from, the metrics need not be considered, so choose
+ * the shallowest non-polling state and exit.
+ */
+ if (drv->state_count < 3 && cpu_data->utilized) {
+ for (i = 0; i < drv->state_count; ++i) {
+ if (!dev->states_usage[i].disable &&
+ !(drv->states[i].flags & CPUIDLE_FLAG_POLLING)) {
+ idx = i;
+ goto end;
+ }
+ }
+ }
+
/*
* Find the deepest idle state whose target residency does not exceed
* the current sleep length and the deepest idle state not deeper than
@@ -452,6 +537,13 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
if (idx > constraint_idx)
idx = constraint_idx;
+ /*
+ * If the CPU is being utilized over the threshold, choose a shallower
+ * non-polling state to improve latency
+ */
+ if (cpu_data->utilized)
+ idx = teo_find_shallower_state(drv, dev, idx, duration_ns, true);
+
end:
/*
* Don't stop the tick if the selected state is a polling one or if the
@@ -469,7 +561,7 @@ end:
*/
if (idx > idx0 &&
drv->states[idx].target_residency_ns > delta_tick)
- idx = teo_find_shallower_state(drv, dev, idx, delta_tick);
+ idx = teo_find_shallower_state(drv, dev, idx, delta_tick, false);
}
return idx;
@@ -508,9 +600,11 @@ static int teo_enable_device(struct cpuidle_driver *drv,
struct cpuidle_device *dev)
{
struct teo_cpu *cpu_data = per_cpu_ptr(&teo_cpus, dev->cpu);
+ unsigned long max_capacity = arch_scale_cpu_capacity(dev->cpu);
int i;
memset(cpu_data, 0, sizeof(*cpu_data));
+ cpu_data->util_threshold = max_capacity >> UTIL_THRESHOLD_SHIFT;
for (i = 0; i < NR_RECENT; i++)
cpu_data->recent_idx[i] = -1;
diff --git a/drivers/cpuidle/sysfs.c b/drivers/cpuidle/sysfs.c
index 2b496a53cbca..48948b171749 100644
--- a/drivers/cpuidle/sysfs.c
+++ b/drivers/cpuidle/sysfs.c
@@ -200,7 +200,7 @@ static void cpuidle_sysfs_release(struct kobject *kobj)
complete(&kdev->kobj_unregister);
}
-static struct kobj_type ktype_cpuidle = {
+static const struct kobj_type ktype_cpuidle = {
.sysfs_ops = &cpuidle_sysfs_ops,
.release = cpuidle_sysfs_release,
};
@@ -447,7 +447,7 @@ static void cpuidle_state_sysfs_release(struct kobject *kobj)
complete(&state_obj->kobj_unregister);
}
-static struct kobj_type ktype_state_cpuidle = {
+static const struct kobj_type ktype_state_cpuidle = {
.sysfs_ops = &cpuidle_state_sysfs_ops,
.default_groups = cpuidle_state_default_groups,
.release = cpuidle_state_sysfs_release,
@@ -594,7 +594,7 @@ static struct attribute *cpuidle_driver_default_attrs[] = {
};
ATTRIBUTE_GROUPS(cpuidle_driver_default);
-static struct kobj_type ktype_driver_cpuidle = {
+static const struct kobj_type ktype_driver_cpuidle = {
.sysfs_ops = &cpuidle_driver_sysfs_ops,
.default_groups = cpuidle_driver_default_groups,
.release = cpuidle_driver_sysfs_release,
diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
index e2d64a8f9422..938c17f25d94 100644
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -1424,6 +1424,7 @@ static const struct x86_cpu_id intel_idle_ids[] __initconst = {
X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, &idle_cpu_adl_l),
X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_N, &idle_cpu_adl_n),
X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &idle_cpu_spr),
+ X86_MATCH_INTEL_FAM6_MODEL(EMERALDRAPIDS_X, &idle_cpu_spr),
X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL, &idle_cpu_knl),
X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM, &idle_cpu_knl),
X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT, &idle_cpu_bxt),
@@ -1859,6 +1860,7 @@ static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv)
skx_idle_state_table_update();
break;
case INTEL_FAM6_SAPPHIRERAPIDS_X:
+ case INTEL_FAM6_EMERALDRAPIDS_X:
spr_idle_state_table_update();
break;
case INTEL_FAM6_ALDERLAKE:
diff --git a/drivers/opp/Kconfig b/drivers/opp/Kconfig
index e8ce47b32735..d7c649a1a981 100644
--- a/drivers/opp/Kconfig
+++ b/drivers/opp/Kconfig
@@ -1,7 +1,6 @@
# SPDX-License-Identifier: GPL-2.0-only
config PM_OPP
bool
- select SRCU
help
SOCs have a standard set of tuples consisting of frequency and
voltage pairs that the device will support per voltage domain. This
diff --git a/drivers/opp/debugfs.c b/drivers/opp/debugfs.c
index 96a30a032c5f..2c7fb683441e 100644
--- a/drivers/opp/debugfs.c
+++ b/drivers/opp/debugfs.c
@@ -235,7 +235,7 @@ static void opp_migrate_dentry(struct opp_device *opp_dev,
dentry = debugfs_rename(rootdir, opp_dev->dentry, rootdir,
opp_table->dentry_name);
- if (!dentry) {
+ if (IS_ERR(dentry)) {
dev_err(dev, "%s: Failed to rename link from: %s to %s\n",
__func__, dev_name(opp_dev->dev), dev_name(dev));
return;
diff --git a/drivers/powercap/idle_inject.c b/drivers/powercap/idle_inject.c
index fe86a09e3b67..c03b5402c03b 100644
--- a/drivers/powercap/idle_inject.c
+++ b/drivers/powercap/idle_inject.c
@@ -155,10 +155,12 @@ void idle_inject_set_duration(struct idle_inject_device *ii_dev,
unsigned int run_duration_us,
unsigned int idle_duration_us)
{
- if (run_duration_us && idle_duration_us) {
+ if (run_duration_us + idle_duration_us) {
WRITE_ONCE(ii_dev->run_duration_us, run_duration_us);
WRITE_ONCE(ii_dev->idle_duration_us, idle_duration_us);
}
+ if (!run_duration_us)
+ pr_debug("CPU is forced to 100 percent idle\n");
}
/**
@@ -201,7 +203,7 @@ int idle_inject_start(struct idle_inject_device *ii_dev)
unsigned int idle_duration_us = READ_ONCE(ii_dev->idle_duration_us);
unsigned int run_duration_us = READ_ONCE(ii_dev->run_duration_us);
- if (!idle_duration_us || !run_duration_us)
+ if (!(idle_duration_us + run_duration_us))
return -EINVAL;
pr_debug("Starting injecting idle cycles on CPUs '%*pbl'\n",
diff --git a/drivers/powercap/intel_rapl_common.c b/drivers/powercap/intel_rapl_common.c
index 26d00b1853b4..8970c7b80884 100644
--- a/drivers/powercap/intel_rapl_common.c
+++ b/drivers/powercap/intel_rapl_common.c
@@ -999,7 +999,15 @@ static u64 rapl_compute_time_window_core(struct rapl_package *rp, u64 value,
do_div(value, rp->time_unit);
y = ilog2(value);
- f = div64_u64(4 * (value - (1 << y)), 1 << y);
+
+ /*
+ * The target hardware field is 7 bits wide, so return all ones
+ * if the exponent is too large.
+ */
+ if (y > 0x1f)
+ return 0x7f;
+
+ f = div64_u64(4 * (value - (1ULL << y)), 1ULL << y);
value = (y & 0x1f) | ((f & 0x3) << 5);
}
return value;
@@ -1113,7 +1121,10 @@ static const struct x86_cpu_id rapl_ids[] __initconst = {
X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE, &rapl_defaults_core),
X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P, &rapl_defaults_core),
X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_S, &rapl_defaults_core),
+ X86_MATCH_INTEL_FAM6_MODEL(METEORLAKE, &rapl_defaults_core),
+ X86_MATCH_INTEL_FAM6_MODEL(METEORLAKE_L, &rapl_defaults_core),
X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &rapl_defaults_spr_server),
+ X86_MATCH_INTEL_FAM6_MODEL(EMERALDRAPIDS_X, &rapl_defaults_spr_server),
X86_MATCH_INTEL_FAM6_MODEL(LAKEFIELD, &rapl_defaults_core),
X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT, &rapl_defaults_byt),
diff --git a/drivers/powercap/powercap_sys.c b/drivers/powercap/powercap_sys.c
index 1f968353d479..e180dee0f83d 100644
--- a/drivers/powercap/powercap_sys.c
+++ b/drivers/powercap/powercap_sys.c
@@ -530,9 +530,6 @@ struct powercap_zone *powercap_register_zone(
power_zone->name = kstrdup(name, GFP_KERNEL);
if (!power_zone->name)
goto err_name_alloc;
- dev_set_name(&power_zone->dev, "%s:%x",
- dev_name(power_zone->dev.parent),
- power_zone->id);
power_zone->constraints = kcalloc(nr_constraints,
sizeof(*power_zone->constraints),
GFP_KERNEL);
@@ -555,9 +552,16 @@ struct powercap_zone *powercap_register_zone(
power_zone->dev_attr_groups[0] = &power_zone->dev_zone_attr_group;
power_zone->dev_attr_groups[1] = NULL;
power_zone->dev.groups = power_zone->dev_attr_groups;
+ dev_set_name(&power_zone->dev, "%s:%x",
+ dev_name(power_zone->dev.parent),
+ power_zone->id);
result = device_register(&power_zone->dev);
- if (result)
- goto err_dev_ret;
+ if (result) {
+ put_device(&power_zone->dev);
+ mutex_unlock(&control_type->lock);
+
+ return ERR_PTR(result);
+ }
control_type->nr_zones++;
mutex_unlock(&control_type->lock);