summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRafael J. Wysocki <rafael.j.wysocki@intel.com>2022-05-06 21:00:44 +0200
committerRafael J. Wysocki <rafael.j.wysocki@intel.com>2022-05-06 21:00:44 +0200
commit46acb9d9b6bb24448936365ec4e49198aa712090 (patch)
tree39ef5f9c16564721025ee376ecc1dba3e0907c74
parentcpufreq: Avoid unnecessary frequency updates due to mismatch (diff)
parentpowercap: DTPM: Check for Energy Model type (diff)
downloadlinux-46acb9d9b6bb24448936365ec4e49198aa712090.tar.xz
linux-46acb9d9b6bb24448936365ec4e49198aa712090.zip
Merge Energy Model material for 5.19 to satisfy dependencies.
-rw-r--r--Documentation/power/energy-model.rst24
-rw-r--r--drivers/cpufreq/mediatek-cpufreq-hw.c4
-rw-r--r--drivers/cpufreq/scmi-cpufreq.c4
-rw-r--r--drivers/opp/of.c6
-rw-r--r--drivers/powercap/dtpm_cpu.c2
-rw-r--r--drivers/thermal/cpufreq_cooling.c2
-rw-r--r--drivers/thermal/devfreq_cooling.c8
-rw-r--r--include/linux/energy_model.h35
-rw-r--r--kernel/power/energy_model.c63
9 files changed, 101 insertions, 47 deletions
diff --git a/Documentation/power/energy-model.rst b/Documentation/power/energy-model.rst
index 49549aab41b4..feb257b7f350 100644
--- a/Documentation/power/energy-model.rst
+++ b/Documentation/power/energy-model.rst
@@ -123,6 +123,26 @@ allows a platform to register EM power values which are reflecting total power
(static + dynamic). These power values might be coming directly from
experiments and measurements.
+Registration of 'artificial' EM
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+There is an option to provide a custom callback for drivers missing detailed
+knowledge about power value for each performance state. The callback
+.get_cost() is optional and provides the 'cost' values used by the EAS.
+This is useful for platforms that only provide information on relative
+efficiency between CPU types, where one could use the information to
+create an abstract power model. But even an abstract power model can
+sometimes be hard to fit in, given the input power value size restrictions.
+The .get_cost() allows to provide the 'cost' values which reflect the
+efficiency of the CPUs. This would allow to provide EAS information which
+has different relation than what would be forced by the EM internal
+formulas calculating 'cost' values. To register an EM for such platform, the
+driver must set the flag 'milliwatts' to 0, provide .get_power() callback
+and provide .get_cost() callback. The EM framework would handle such platform
+properly during registration. A flag EM_PERF_DOMAIN_ARTIFICIAL is set for such
+platform. Special care should be taken by other frameworks which are using EM
+to test and treat this flag properly.
+
Registration of 'simple' EM
~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -181,8 +201,8 @@ EM framework::
-> drivers/cpufreq/foo_cpufreq.c
- 01 static int est_power(unsigned long *mW, unsigned long *KHz,
- 02 struct device *dev)
+ 01 static int est_power(struct device *dev, unsigned long *mW,
+ 02 unsigned long *KHz)
03 {
04 long freq, power;
05
diff --git a/drivers/cpufreq/mediatek-cpufreq-hw.c b/drivers/cpufreq/mediatek-cpufreq-hw.c
index 0a94c56ddad2..813cccbfe934 100644
--- a/drivers/cpufreq/mediatek-cpufreq-hw.c
+++ b/drivers/cpufreq/mediatek-cpufreq-hw.c
@@ -51,8 +51,8 @@ static const u16 cpufreq_mtk_offsets[REG_ARRAY_SIZE] = {
};
static int __maybe_unused
-mtk_cpufreq_get_cpu_power(unsigned long *mW,
- unsigned long *KHz, struct device *cpu_dev)
+mtk_cpufreq_get_cpu_power(struct device *cpu_dev, unsigned long *mW,
+ unsigned long *KHz)
{
struct mtk_cpufreq_data *data;
struct cpufreq_policy *policy;
diff --git a/drivers/cpufreq/scmi-cpufreq.c b/drivers/cpufreq/scmi-cpufreq.c
index 919fa6e3f462..6d2a4cf46db7 100644
--- a/drivers/cpufreq/scmi-cpufreq.c
+++ b/drivers/cpufreq/scmi-cpufreq.c
@@ -96,8 +96,8 @@ scmi_get_sharing_cpus(struct device *cpu_dev, struct cpumask *cpumask)
}
static int __maybe_unused
-scmi_get_cpu_power(unsigned long *power, unsigned long *KHz,
- struct device *cpu_dev)
+scmi_get_cpu_power(struct device *cpu_dev, unsigned long *power,
+ unsigned long *KHz)
{
unsigned long Hz;
int ret, domain;
diff --git a/drivers/opp/of.c b/drivers/opp/of.c
index 440ab5a03df9..485ea980bde7 100644
--- a/drivers/opp/of.c
+++ b/drivers/opp/of.c
@@ -1448,7 +1448,7 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_get_of_node);
* Returns 0 on success or a proper -EINVAL value in case of error.
*/
static int __maybe_unused
-_get_dt_power(unsigned long *mW, unsigned long *kHz, struct device *dev)
+_get_dt_power(struct device *dev, unsigned long *mW, unsigned long *kHz)
{
struct dev_pm_opp *opp;
unsigned long opp_freq, opp_power;
@@ -1482,8 +1482,8 @@ _get_dt_power(unsigned long *mW, unsigned long *kHz, struct device *dev)
* Returns -EINVAL if the power calculation failed because of missing
* parameters, 0 otherwise.
*/
-static int __maybe_unused _get_power(unsigned long *mW, unsigned long *kHz,
- struct device *dev)
+static int __maybe_unused _get_power(struct device *dev, unsigned long *mW,
+ unsigned long *kHz)
{
struct dev_pm_opp *opp;
struct device_node *np;
diff --git a/drivers/powercap/dtpm_cpu.c b/drivers/powercap/dtpm_cpu.c
index bca2f912d349..f5eced0842b3 100644
--- a/drivers/powercap/dtpm_cpu.c
+++ b/drivers/powercap/dtpm_cpu.c
@@ -211,7 +211,7 @@ static int __dtpm_cpu_setup(int cpu, struct dtpm *parent)
return 0;
pd = em_cpu_get(cpu);
- if (!pd)
+ if (!pd || em_is_artificial(pd))
return -EINVAL;
dtpm_cpu = kzalloc(sizeof(*dtpm_cpu), GFP_KERNEL);
diff --git a/drivers/thermal/cpufreq_cooling.c b/drivers/thermal/cpufreq_cooling.c
index 0bfb8eebd126..b8151d95a806 100644
--- a/drivers/thermal/cpufreq_cooling.c
+++ b/drivers/thermal/cpufreq_cooling.c
@@ -328,7 +328,7 @@ static inline bool em_is_sane(struct cpufreq_cooling_device *cpufreq_cdev,
struct cpufreq_policy *policy;
unsigned int nr_levels;
- if (!em)
+ if (!em || em_is_artificial(em))
return false;
policy = cpufreq_cdev->policy;
diff --git a/drivers/thermal/devfreq_cooling.c b/drivers/thermal/devfreq_cooling.c
index 4310cb342a9f..b04dcbbf721a 100644
--- a/drivers/thermal/devfreq_cooling.c
+++ b/drivers/thermal/devfreq_cooling.c
@@ -358,6 +358,7 @@ of_devfreq_cooling_register_power(struct device_node *np, struct devfreq *df,
struct thermal_cooling_device *cdev;
struct device *dev = df->dev.parent;
struct devfreq_cooling_device *dfc;
+ struct em_perf_domain *em;
char *name;
int err, num_opps;
@@ -367,8 +368,9 @@ of_devfreq_cooling_register_power(struct device_node *np, struct devfreq *df,
dfc->devfreq = df;
- dfc->em_pd = em_pd_get(dev);
- if (dfc->em_pd) {
+ em = em_pd_get(dev);
+ if (em && !em_is_artificial(em)) {
+ dfc->em_pd = em;
devfreq_cooling_ops.get_requested_power =
devfreq_cooling_get_requested_power;
devfreq_cooling_ops.state2power = devfreq_cooling_state2power;
@@ -379,7 +381,7 @@ of_devfreq_cooling_register_power(struct device_node *np, struct devfreq *df,
num_opps = em_pd_nr_perf_states(dfc->em_pd);
} else {
/* Backward compatibility for drivers which do not use IPA */
- dev_dbg(dev, "missing EM for cooling device\n");
+ dev_dbg(dev, "missing proper EM for cooling device\n");
num_opps = dev_pm_opp_get_opp_count(dev);
diff --git a/include/linux/energy_model.h b/include/linux/energy_model.h
index 9f3c400bc52d..8419bffb4398 100644
--- a/include/linux/energy_model.h
+++ b/include/linux/energy_model.h
@@ -67,11 +67,16 @@ struct em_perf_domain {
*
* EM_PERF_DOMAIN_SKIP_INEFFICIENCIES: Skip inefficient states when estimating
* energy consumption.
+ *
+ * EM_PERF_DOMAIN_ARTIFICIAL: The power values are artificial and might be
+ * created by platform missing real power information
*/
#define EM_PERF_DOMAIN_MILLIWATTS BIT(0)
#define EM_PERF_DOMAIN_SKIP_INEFFICIENCIES BIT(1)
+#define EM_PERF_DOMAIN_ARTIFICIAL BIT(2)
#define em_span_cpus(em) (to_cpumask((em)->cpus))
+#define em_is_artificial(em) ((em)->flags & EM_PERF_DOMAIN_ARTIFICIAL)
#ifdef CONFIG_ENERGY_MODEL
#define EM_MAX_POWER 0xFFFF
@@ -96,11 +101,11 @@ struct em_data_callback {
/**
* active_power() - Provide power at the next performance state of
* a device
+ * @dev : Device for which we do this operation (can be a CPU)
* @power : Active power at the performance state
* (modified)
* @freq : Frequency at the performance state in kHz
* (modified)
- * @dev : Device for which we do this operation (can be a CPU)
*
* active_power() must find the lowest performance state of 'dev' above
* 'freq' and update 'power' and 'freq' to the matching active power
@@ -112,11 +117,32 @@ struct em_data_callback {
*
* Return 0 on success.
*/
- int (*active_power)(unsigned long *power, unsigned long *freq,
- struct device *dev);
+ int (*active_power)(struct device *dev, unsigned long *power,
+ unsigned long *freq);
+
+ /**
+ * get_cost() - Provide the cost at the given performance state of
+ * a device
+ * @dev : Device for which we do this operation (can be a CPU)
+ * @freq : Frequency at the performance state in kHz
+ * @cost : The cost value for the performance state
+ * (modified)
+ *
+ * In case of CPUs, the cost is the one of a single CPU in the domain.
+ * It is expected to fit in the [0, EM_MAX_POWER] range due to internal
+ * usage in EAS calculation.
+ *
+ * Return 0 on success, or appropriate error value in case of failure.
+ */
+ int (*get_cost)(struct device *dev, unsigned long freq,
+ unsigned long *cost);
};
-#define EM_DATA_CB(_active_power_cb) { .active_power = &_active_power_cb }
#define EM_SET_ACTIVE_POWER_CB(em_cb, cb) ((em_cb).active_power = cb)
+#define EM_ADV_DATA_CB(_active_power_cb, _cost_cb) \
+ { .active_power = _active_power_cb, \
+ .get_cost = _cost_cb }
+#define EM_DATA_CB(_active_power_cb) \
+ EM_ADV_DATA_CB(_active_power_cb, NULL)
struct em_perf_domain *em_cpu_get(int cpu);
struct em_perf_domain *em_pd_get(struct device *dev);
@@ -264,6 +290,7 @@ static inline int em_pd_nr_perf_states(struct em_perf_domain *pd)
#else
struct em_data_callback {};
+#define EM_ADV_DATA_CB(_active_power_cb, _cost_cb) { }
#define EM_DATA_CB(_active_power_cb) { }
#define EM_SET_ACTIVE_POWER_CB(em_cb, cb) do { } while (0)
diff --git a/kernel/power/energy_model.c b/kernel/power/energy_model.c
index 0153b0ca7b23..092513575e4e 100644
--- a/kernel/power/energy_model.c
+++ b/kernel/power/energy_model.c
@@ -54,28 +54,15 @@ static int em_debug_cpus_show(struct seq_file *s, void *unused)
}
DEFINE_SHOW_ATTRIBUTE(em_debug_cpus);
-static int em_debug_units_show(struct seq_file *s, void *unused)
+static int em_debug_flags_show(struct seq_file *s, void *unused)
{
struct em_perf_domain *pd = s->private;
- char *units = (pd->flags & EM_PERF_DOMAIN_MILLIWATTS) ?
- "milliWatts" : "bogoWatts";
- seq_printf(s, "%s\n", units);
+ seq_printf(s, "%#lx\n", pd->flags);
return 0;
}
-DEFINE_SHOW_ATTRIBUTE(em_debug_units);
-
-static int em_debug_skip_inefficiencies_show(struct seq_file *s, void *unused)
-{
- struct em_perf_domain *pd = s->private;
- int enabled = (pd->flags & EM_PERF_DOMAIN_SKIP_INEFFICIENCIES) ? 1 : 0;
-
- seq_printf(s, "%d\n", enabled);
-
- return 0;
-}
-DEFINE_SHOW_ATTRIBUTE(em_debug_skip_inefficiencies);
+DEFINE_SHOW_ATTRIBUTE(em_debug_flags);
static void em_debug_create_pd(struct device *dev)
{
@@ -89,9 +76,8 @@ static void em_debug_create_pd(struct device *dev)
debugfs_create_file("cpus", 0444, d, dev->em_pd->cpus,
&em_debug_cpus_fops);
- debugfs_create_file("units", 0444, d, dev->em_pd, &em_debug_units_fops);
- debugfs_create_file("skip-inefficiencies", 0444, d, dev->em_pd,
- &em_debug_skip_inefficiencies_fops);
+ debugfs_create_file("flags", 0444, d, dev->em_pd,
+ &em_debug_flags_fops);
/* Create a sub-directory for each performance state */
for (i = 0; i < dev->em_pd->nr_perf_states; i++)
@@ -121,7 +107,8 @@ static void em_debug_remove_pd(struct device *dev) {}
#endif
static int em_create_perf_table(struct device *dev, struct em_perf_domain *pd,
- int nr_states, struct em_data_callback *cb)
+ int nr_states, struct em_data_callback *cb,
+ unsigned long flags)
{
unsigned long power, freq, prev_freq = 0, prev_cost = ULONG_MAX;
struct em_perf_state *table;
@@ -139,7 +126,7 @@ static int em_create_perf_table(struct device *dev, struct em_perf_domain *pd,
* lowest performance state of 'dev' above 'freq' and updates
* 'power' and 'freq' accordingly.
*/
- ret = cb->active_power(&power, &freq, dev);
+ ret = cb->active_power(dev, &power, &freq);
if (ret) {
dev_err(dev, "EM: invalid perf. state: %d\n",
ret);
@@ -173,10 +160,22 @@ static int em_create_perf_table(struct device *dev, struct em_perf_domain *pd,
/* Compute the cost of each performance state. */
fmax = (u64) table[nr_states - 1].frequency;
for (i = nr_states - 1; i >= 0; i--) {
- unsigned long power_res = em_scale_power(table[i].power);
+ unsigned long power_res, cost;
+
+ if (flags & EM_PERF_DOMAIN_ARTIFICIAL) {
+ ret = cb->get_cost(dev, table[i].frequency, &cost);
+ if (ret || !cost || cost > EM_MAX_POWER) {
+ dev_err(dev, "EM: invalid cost %lu %d\n",
+ cost, ret);
+ goto free_ps_table;
+ }
+ } else {
+ power_res = em_scale_power(table[i].power);
+ cost = div64_u64(fmax * power_res, table[i].frequency);
+ }
+
+ table[i].cost = cost;
- table[i].cost = div64_u64(fmax * power_res,
- table[i].frequency);
if (table[i].cost >= prev_cost) {
table[i].flags = EM_PERF_STATE_INEFFICIENT;
dev_dbg(dev, "EM: OPP:%lu is inefficient\n",
@@ -197,7 +196,8 @@ free_ps_table:
}
static int em_create_pd(struct device *dev, int nr_states,
- struct em_data_callback *cb, cpumask_t *cpus)
+ struct em_data_callback *cb, cpumask_t *cpus,
+ unsigned long flags)
{
struct em_perf_domain *pd;
struct device *cpu_dev;
@@ -215,7 +215,7 @@ static int em_create_pd(struct device *dev, int nr_states,
return -ENOMEM;
}
- ret = em_create_perf_table(dev, pd, nr_states, cb);
+ ret = em_create_perf_table(dev, pd, nr_states, cb, flags);
if (ret) {
kfree(pd);
return ret;
@@ -332,6 +332,7 @@ int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states,
bool milliwatts)
{
unsigned long cap, prev_cap = 0;
+ unsigned long flags = 0;
int cpu, ret;
if (!dev || !nr_states || !cb)
@@ -378,12 +379,16 @@ int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states,
}
}
- ret = em_create_pd(dev, nr_states, cb, cpus);
+ if (milliwatts)
+ flags |= EM_PERF_DOMAIN_MILLIWATTS;
+ else if (cb->get_cost)
+ flags |= EM_PERF_DOMAIN_ARTIFICIAL;
+
+ ret = em_create_pd(dev, nr_states, cb, cpus, flags);
if (ret)
goto unlock;
- if (milliwatts)
- dev->em_pd->flags |= EM_PERF_DOMAIN_MILLIWATTS;
+ dev->em_pd->flags |= flags;
em_cpufreq_update_efficiencies(dev);