diff options
author | Rafael J. Wysocki <rafael.j.wysocki@intel.com> | 2015-10-16 22:12:02 +0200 |
---|---|---|
committer | Rafael J. Wysocki <rafael.j.wysocki@intel.com> | 2015-10-16 22:12:02 +0200 |
commit | 7855e10294efd4925b351c56d2834dc9f7cff7a2 (patch) | |
tree | 6a23920b576108029fa79ce048af6d0a36dd897f | |
parent | cpufreq: intel_pstate: Fix divide by zero on Knights Landing (KNL) (diff) | |
parent | cpufreq: intel_pstate: Avoid calculation for max/min (diff) | |
download | linux-7855e10294efd4925b351c56d2834dc9f7cff7a2.tar.xz linux-7855e10294efd4925b351c56d2834dc9f7cff7a2.zip |
Merge back earlier cpufreq material for v4.4.
-rw-r--r-- | Documentation/kernel-parameters.txt | 3 | ||||
-rw-r--r-- | arch/x86/include/asm/msr-index.h | 7 | ||||
-rw-r--r-- | drivers/cpufreq/Kconfig.x86 | 1 | ||||
-rw-r--r-- | drivers/cpufreq/cpufreq.c | 7 | ||||
-rw-r--r-- | drivers/cpufreq/cpufreq_conservative.c | 31 | ||||
-rw-r--r-- | drivers/cpufreq/cpufreq_governor.c | 12 | ||||
-rw-r--r-- | drivers/cpufreq/cpufreq_governor.h | 1 | ||||
-rw-r--r-- | drivers/cpufreq/imx6q-cpufreq.c | 50 | ||||
-rw-r--r-- | drivers/cpufreq/integrator-cpufreq.c | 2 | ||||
-rw-r--r-- | drivers/cpufreq/intel_pstate.c | 276 | ||||
-rw-r--r-- | drivers/cpufreq/powernv-cpufreq.c | 10 | ||||
-rw-r--r-- | drivers/cpufreq/tegra20-cpufreq.c | 2 |
12 files changed, 351 insertions, 51 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 22a4b687ea5b..9b75e2a760de 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1546,6 +1546,9 @@ bytes respectively. Such letter suffixes can also be entirely omitted. hwp_only Only load intel_pstate on systems which support hardware P state control (HWP) if available. + no_acpi + Don't use ACPI processor performance control objects + _PSS and _PPC specified limits. intremap= [X86-64, Intel-IOMMU] on enable Interrupt Remapping (default) diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index b8c14bb7fc8f..9f3905697f12 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -206,6 +206,13 @@ #define MSR_GFX_PERF_LIMIT_REASONS 0x000006B0 #define MSR_RING_PERF_LIMIT_REASONS 0x000006B1 +/* Config TDP MSRs */ +#define MSR_CONFIG_TDP_NOMINAL 0x00000648 +#define MSR_CONFIG_TDP_LEVEL1 0x00000649 +#define MSR_CONFIG_TDP_LEVEL2 0x0000064A +#define MSR_CONFIG_TDP_CONTROL 0x0000064B +#define MSR_TURBO_ACTIVATION_RATIO 0x0000064C + /* Hardware P state interface */ #define MSR_PPERF 0x0000064e #define MSR_PERF_LIMIT_REASONS 0x0000064f diff --git a/drivers/cpufreq/Kconfig.x86 b/drivers/cpufreq/Kconfig.x86 index c59bdcb83217..adbd1de1cea5 100644 --- a/drivers/cpufreq/Kconfig.x86 +++ b/drivers/cpufreq/Kconfig.x86 @@ -5,6 +5,7 @@ config X86_INTEL_PSTATE bool "Intel P state control" depends on X86 + select ACPI_PROCESSOR if ACPI help This driver provides a P state for Intel core processors. The driver implements an internal governor and will become diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 25c4c15103a0..8701dc559850 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -843,18 +843,11 @@ static ssize_t store(struct kobject *kobj, struct attribute *attr, down_write(&policy->rwsem); - /* Updating inactive policies is invalid, so avoid doing that. */ - if (unlikely(policy_is_inactive(policy))) { - ret = -EBUSY; - goto unlock_policy_rwsem; - } - if (fattr->store) ret = fattr->store(policy, buf, count); else ret = -EIO; -unlock_policy_rwsem: up_write(&policy->rwsem); unlock: put_online_cpus(); diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index 84a1506950a7..1fa1deb6e91f 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -23,6 +23,19 @@ static DEFINE_PER_CPU(struct cs_cpu_dbs_info_s, cs_cpu_dbs_info); +static int cs_cpufreq_governor_dbs(struct cpufreq_policy *policy, + unsigned int event); + +#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE +static +#endif +struct cpufreq_governor cpufreq_gov_conservative = { + .name = "conservative", + .governor = cs_cpufreq_governor_dbs, + .max_transition_latency = TRANSITION_LATENCY_LIMIT, + .owner = THIS_MODULE, +}; + static inline unsigned int get_freq_target(struct cs_dbs_tuners *cs_tuners, struct cpufreq_policy *policy) { @@ -119,12 +132,14 @@ static int dbs_cpufreq_notifier(struct notifier_block *nb, unsigned long val, struct cpufreq_freqs *freq = data; struct cs_cpu_dbs_info_s *dbs_info = &per_cpu(cs_cpu_dbs_info, freq->cpu); - struct cpufreq_policy *policy; + struct cpufreq_policy *policy = cpufreq_cpu_get_raw(freq->cpu); - if (!dbs_info->enable) + if (!policy) return 0; - policy = dbs_info->cdbs.shared->policy; + /* policy isn't governed by conservative governor */ + if (policy->governor != &cpufreq_gov_conservative) + return 0; /* * we only care if our internally tracked freq moves outside the 'valid' @@ -367,16 +382,6 @@ static int cs_cpufreq_governor_dbs(struct cpufreq_policy *policy, return cpufreq_governor_dbs(policy, &cs_dbs_cdata, event); } -#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE -static -#endif -struct cpufreq_governor cpufreq_gov_conservative = { - .name = "conservative", - .governor = cs_cpufreq_governor_dbs, - .max_transition_latency = TRANSITION_LATENCY_LIMIT, - .owner = THIS_MODULE, -}; - static int __init cpufreq_gov_dbs_init(void) { return cpufreq_register_governor(&cpufreq_gov_conservative); diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index 939197ffa4ac..750626d8fb03 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -463,7 +463,6 @@ static int cpufreq_governor_start(struct cpufreq_policy *policy, cdata->get_cpu_dbs_info_s(cpu); cs_dbs_info->down_skip = 0; - cs_dbs_info->enable = 1; cs_dbs_info->requested_freq = policy->cur; } else { struct od_ops *od_ops = cdata->gov_ops; @@ -482,9 +481,7 @@ static int cpufreq_governor_start(struct cpufreq_policy *policy, static int cpufreq_governor_stop(struct cpufreq_policy *policy, struct dbs_data *dbs_data) { - struct common_dbs_data *cdata = dbs_data->cdata; - unsigned int cpu = policy->cpu; - struct cpu_dbs_info *cdbs = cdata->get_cpu_cdbs(cpu); + struct cpu_dbs_info *cdbs = dbs_data->cdata->get_cpu_cdbs(policy->cpu); struct cpu_common_dbs_info *shared = cdbs->shared; /* State should be equivalent to START */ @@ -493,13 +490,6 @@ static int cpufreq_governor_stop(struct cpufreq_policy *policy, gov_cancel_work(dbs_data, policy); - if (cdata->governor == GOV_CONSERVATIVE) { - struct cs_cpu_dbs_info_s *cs_dbs_info = - cdata->get_cpu_dbs_info_s(cpu); - - cs_dbs_info->enable = 0; - } - shared->policy = NULL; mutex_destroy(&shared->timer_mutex); return 0; diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h index 50f171796632..5621bb03e874 100644 --- a/drivers/cpufreq/cpufreq_governor.h +++ b/drivers/cpufreq/cpufreq_governor.h @@ -170,7 +170,6 @@ struct cs_cpu_dbs_info_s { struct cpu_dbs_info cdbs; unsigned int down_skip; unsigned int requested_freq; - unsigned int enable:1; }; /* Per policy Governors sysfs tunables */ diff --git a/drivers/cpufreq/imx6q-cpufreq.c b/drivers/cpufreq/imx6q-cpufreq.c index 380a90d3c57e..9b4a7bd04dea 100644 --- a/drivers/cpufreq/imx6q-cpufreq.c +++ b/drivers/cpufreq/imx6q-cpufreq.c @@ -30,6 +30,10 @@ static struct clk *pll1_sw_clk; static struct clk *step_clk; static struct clk *pll2_pfd2_396m_clk; +/* clk used by i.MX6UL */ +static struct clk *pll2_bus_clk; +static struct clk *secondary_sel_clk; + static struct device *cpu_dev; static bool free_opp; static struct cpufreq_frequency_table *freq_table; @@ -91,16 +95,36 @@ static int imx6q_set_target(struct cpufreq_policy *policy, unsigned int index) * The setpoints are selected per PLL/PDF frequencies, so we need to * reprogram PLL for frequency scaling. The procedure of reprogramming * PLL1 is as below. - * + * For i.MX6UL, it has a secondary clk mux, the cpu frequency change + * flow is slightly different from other i.MX6 OSC. + * The cpu frequeny change flow for i.MX6(except i.MX6UL) is as below: * - Enable pll2_pfd2_396m_clk and reparent pll1_sw_clk to it * - Reprogram pll1_sys_clk and reparent pll1_sw_clk back to it * - Disable pll2_pfd2_396m_clk */ - clk_set_parent(step_clk, pll2_pfd2_396m_clk); - clk_set_parent(pll1_sw_clk, step_clk); - if (freq_hz > clk_get_rate(pll2_pfd2_396m_clk)) { - clk_set_rate(pll1_sys_clk, new_freq * 1000); + if (of_machine_is_compatible("fsl,imx6ul")) { + /* + * When changing pll1_sw_clk's parent to pll1_sys_clk, + * CPU may run at higher than 528MHz, this will lead to + * the system unstable if the voltage is lower than the + * voltage of 528MHz, so lower the CPU frequency to one + * half before changing CPU frequency. + */ + clk_set_rate(arm_clk, (old_freq >> 1) * 1000); clk_set_parent(pll1_sw_clk, pll1_sys_clk); + if (freq_hz > clk_get_rate(pll2_pfd2_396m_clk)) + clk_set_parent(secondary_sel_clk, pll2_bus_clk); + else + clk_set_parent(secondary_sel_clk, pll2_pfd2_396m_clk); + clk_set_parent(step_clk, secondary_sel_clk); + clk_set_parent(pll1_sw_clk, step_clk); + } else { + clk_set_parent(step_clk, pll2_pfd2_396m_clk); + clk_set_parent(pll1_sw_clk, step_clk); + if (freq_hz > clk_get_rate(pll2_pfd2_396m_clk)) { + clk_set_rate(pll1_sys_clk, new_freq * 1000); + clk_set_parent(pll1_sw_clk, pll1_sys_clk); + } } /* Ensure the arm clock divider is what we expect */ @@ -186,6 +210,16 @@ static int imx6q_cpufreq_probe(struct platform_device *pdev) goto put_clk; } + if (of_machine_is_compatible("fsl,imx6ul")) { + pll2_bus_clk = clk_get(cpu_dev, "pll2_bus"); + secondary_sel_clk = clk_get(cpu_dev, "secondary_sel"); + if (IS_ERR(pll2_bus_clk) || IS_ERR(secondary_sel_clk)) { + dev_err(cpu_dev, "failed to get clocks specific to imx6ul\n"); + ret = -ENOENT; + goto put_clk; + } + } + arm_reg = regulator_get(cpu_dev, "arm"); pu_reg = regulator_get_optional(cpu_dev, "pu"); soc_reg = regulator_get(cpu_dev, "soc"); @@ -331,6 +365,10 @@ put_clk: clk_put(step_clk); if (!IS_ERR(pll2_pfd2_396m_clk)) clk_put(pll2_pfd2_396m_clk); + if (!IS_ERR(pll2_bus_clk)) + clk_put(pll2_bus_clk); + if (!IS_ERR(secondary_sel_clk)) + clk_put(secondary_sel_clk); of_node_put(np); return ret; } @@ -350,6 +388,8 @@ static int imx6q_cpufreq_remove(struct platform_device *pdev) clk_put(pll1_sw_clk); clk_put(step_clk); clk_put(pll2_pfd2_396m_clk); + clk_put(pll2_bus_clk); + clk_put(secondary_sel_clk); return 0; } diff --git a/drivers/cpufreq/integrator-cpufreq.c b/drivers/cpufreq/integrator-cpufreq.c index 2faa4216bf2a..79e3ff2771a6 100644 --- a/drivers/cpufreq/integrator-cpufreq.c +++ b/drivers/cpufreq/integrator-cpufreq.c @@ -221,6 +221,8 @@ static const struct of_device_id integrator_cpufreq_match[] = { { }, }; +MODULE_DEVICE_TABLE(of, integrator_cpufreq_match); + static struct platform_driver integrator_cpufreq_driver = { .driver = { .name = "integrator-cpufreq", diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index aa33b92b3e3e..6a5a22192128 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -34,6 +34,10 @@ #include <asm/cpu_device_id.h> #include <asm/cpufeature.h> +#if IS_ENABLED(CONFIG_ACPI) +#include <acpi/processor.h> +#endif + #define BYT_RATIOS 0x66a #define BYT_VIDS 0x66b #define BYT_TURBO_RATIOS 0x66c @@ -43,7 +47,6 @@ #define int_tofp(X) ((int64_t)(X) << FRAC_BITS) #define fp_toint(X) ((X) >> FRAC_BITS) - static inline int32_t mul_fp(int32_t x, int32_t y) { return ((int64_t)x * (int64_t)y) >> FRAC_BITS; @@ -78,6 +81,7 @@ struct pstate_data { int current_pstate; int min_pstate; int max_pstate; + int max_pstate_physical; int scaling; int turbo_pstate; }; @@ -113,6 +117,9 @@ struct cpudata { u64 prev_mperf; u64 prev_tsc; struct sample sample; +#if IS_ENABLED(CONFIG_ACPI) + struct acpi_processor_performance acpi_perf_data; +#endif }; static struct cpudata **all_cpu_data; @@ -127,6 +134,7 @@ struct pstate_adjust_policy { struct pstate_funcs { int (*get_max)(void); + int (*get_max_physical)(void); int (*get_min)(void); int (*get_turbo)(void); int (*get_scaling)(void); @@ -142,6 +150,7 @@ struct cpu_defaults { static struct pstate_adjust_policy pid_params; static struct pstate_funcs pstate_funcs; static int hwp_active; +static int no_acpi_perf; struct perf_limits { int no_turbo; @@ -154,6 +163,8 @@ struct perf_limits { int max_sysfs_pct; int min_policy_pct; int min_sysfs_pct; + int max_perf_ctl; + int min_perf_ctl; }; static struct perf_limits limits = { @@ -167,8 +178,157 @@ static struct perf_limits limits = { .max_sysfs_pct = 100, .min_policy_pct = 0, .min_sysfs_pct = 0, + .max_perf_ctl = 0, + .min_perf_ctl = 0, }; +#if IS_ENABLED(CONFIG_ACPI) +/* + * The max target pstate ratio is a 8 bit value in both PLATFORM_INFO MSR and + * in TURBO_RATIO_LIMIT MSR, which pstate driver stores in max_pstate and + * max_turbo_pstate fields. The PERF_CTL MSR contains 16 bit value for P state + * ratio, out of it only high 8 bits are used. For example 0x1700 is setting + * target ratio 0x17. The _PSS control value stores in a format which can be + * directly written to PERF_CTL MSR. But in intel_pstate driver this shift + * occurs during write to PERF_CTL (E.g. for cores core_set_pstate()). + * This function converts the _PSS control value to intel pstate driver format + * for comparison and assignment. + */ +static int convert_to_native_pstate_format(struct cpudata *cpu, int index) +{ + return cpu->acpi_perf_data.states[index].control >> 8; +} + +static int intel_pstate_init_perf_limits(struct cpufreq_policy *policy) +{ + struct cpudata *cpu; + int ret; + bool turbo_absent = false; + int max_pstate_index; + int min_pss_ctl, max_pss_ctl, turbo_pss_ctl; + int i; + + cpu = all_cpu_data[policy->cpu]; + + pr_debug("intel_pstate: default limits 0x%x 0x%x 0x%x\n", + cpu->pstate.min_pstate, cpu->pstate.max_pstate, + cpu->pstate.turbo_pstate); + + if (!cpu->acpi_perf_data.shared_cpu_map && + zalloc_cpumask_var_node(&cpu->acpi_perf_data.shared_cpu_map, + GFP_KERNEL, cpu_to_node(policy->cpu))) { + return -ENOMEM; + } + + ret = acpi_processor_register_performance(&cpu->acpi_perf_data, + policy->cpu); + if (ret) + return ret; + + /* + * Check if the control value in _PSS is for PERF_CTL MSR, which should + * guarantee that the states returned by it map to the states in our + * list directly. + */ + if (cpu->acpi_perf_data.control_register.space_id != + ACPI_ADR_SPACE_FIXED_HARDWARE) + return -EIO; + + pr_debug("intel_pstate: CPU%u - ACPI _PSS perf data\n", policy->cpu); + for (i = 0; i < cpu->acpi_perf_data.state_count; i++) + pr_debug(" %cP%d: %u MHz, %u mW, 0x%x\n", + (i == cpu->acpi_perf_data.state ? '*' : ' '), i, + (u32) cpu->acpi_perf_data.states[i].core_frequency, + (u32) cpu->acpi_perf_data.states[i].power, + (u32) cpu->acpi_perf_data.states[i].control); + + /* + * If there is only one entry _PSS, simply ignore _PSS and continue as + * usual without taking _PSS into account + */ + if (cpu->acpi_perf_data.state_count < 2) + return 0; + + turbo_pss_ctl = convert_to_native_pstate_format(cpu, 0); + min_pss_ctl = convert_to_native_pstate_format(cpu, + cpu->acpi_perf_data.state_count - 1); + /* Check if there is a turbo freq in _PSS */ + if (turbo_pss_ctl <= cpu->pstate.max_pstate && + turbo_pss_ctl > cpu->pstate.min_pstate) { + pr_debug("intel_pstate: no turbo range exists in _PSS\n"); + limits.no_turbo = limits.turbo_disabled = 1; + cpu->pstate.turbo_pstate = cpu->pstate.max_pstate; + turbo_absent = true; + } + + /* Check if the max non turbo p state < Intel P state max */ + max_pstate_index = turbo_absent ? 0 : 1; + max_pss_ctl = convert_to_native_pstate_format(cpu, max_pstate_index); + if (max_pss_ctl < cpu->pstate.max_pstate && + max_pss_ctl > cpu->pstate.min_pstate) + cpu->pstate.max_pstate = max_pss_ctl; + + /* check If min perf > Intel P State min */ + if (min_pss_ctl > cpu->pstate.min_pstate && + min_pss_ctl < cpu->pstate.max_pstate) { + cpu->pstate.min_pstate = min_pss_ctl; + policy->cpuinfo.min_freq = min_pss_ctl * cpu->pstate.scaling; + } + + if (turbo_absent) + policy->cpuinfo.max_freq = cpu->pstate.max_pstate * + cpu->pstate.scaling; + else { + policy->cpuinfo.max_freq = cpu->pstate.turbo_pstate * + cpu->pstate.scaling; + /* + * The _PSS table doesn't contain whole turbo frequency range. + * This just contains +1 MHZ above the max non turbo frequency, + * with control value corresponding to max turbo ratio. But + * when cpufreq set policy is called, it will call with this + * max frequency, which will cause a reduced performance as + * this driver uses real max turbo frequency as the max + * frequeny. So correct this frequency in _PSS table to + * correct max turbo frequency based on the turbo ratio. + * Also need to convert to MHz as _PSS freq is in MHz. + */ + cpu->acpi_perf_data.states[0].core_frequency = + turbo_pss_ctl * 100; + } + + pr_debug("intel_pstate: Updated limits using _PSS 0x%x 0x%x 0x%x\n", + cpu->pstate.min_pstate, cpu->pstate.max_pstate, + cpu->pstate.turbo_pstate); + pr_debug("intel_pstate: policy max_freq=%d Khz min_freq = %d KHz\n", + policy->cpuinfo.max_freq, policy->cpuinfo.min_freq); + + return 0; +} + +static int intel_pstate_exit_perf_limits(struct cpufreq_policy *policy) +{ + struct cpudata *cpu; + + if (!no_acpi_perf) + return 0; + + cpu = all_cpu_data[policy->cpu]; + acpi_processor_unregister_performance(policy->cpu); + return 0; +} + +#else +static int intel_pstate_init_perf_limits(struct cpufreq_policy *policy) +{ + return 0; +} + +static int intel_pstate_exit_perf_limits(struct cpufreq_policy *policy) +{ + return 0; +} +#endif + static inline void pid_reset(struct _pid *pid, int setpoint, int busy, int deadband, int integral) { pid->setpoint = setpoint; @@ -591,7 +751,7 @@ static int core_get_min_pstate(void) return (value >> 40) & 0xFF; } -static int core_get_max_pstate(void) +static int core_get_max_pstate_physical(void) { u64 value; @@ -599,6 +759,46 @@ static int core_get_max_pstate(void) return (value >> 8) & 0xFF; } +static int core_get_max_pstate(void) +{ + u64 tar; + u64 plat_info; + int max_pstate; + int err; + + rdmsrl(MSR_PLATFORM_INFO, plat_info); + max_pstate = (plat_info >> 8) & 0xFF; + + err = rdmsrl_safe(MSR_TURBO_ACTIVATION_RATIO, &tar); + if (!err) { + /* Do some sanity checking for safety */ + if (plat_info & 0x600000000) { + u64 tdp_ctrl; + u64 tdp_ratio; + int tdp_msr; + + err = rdmsrl_safe(MSR_CONFIG_TDP_CONTROL, &tdp_ctrl); + if (err) + goto skip_tar; + + tdp_msr = MSR_CONFIG_TDP_NOMINAL + tdp_ctrl; + err = rdmsrl_safe(tdp_msr, &tdp_ratio); + if (err) + goto skip_tar; + + if (tdp_ratio - 1 == tar) { + max_pstate = tar; + pr_debug("max_pstate=TAC %x\n", max_pstate); + } else { + goto skip_tar; + } + } + } + +skip_tar: + return max_pstate; +} + static int core_get_turbo_pstate(void) { u64 value; @@ -652,6 +852,7 @@ static struct cpu_defaults core_params = { }, .funcs = { .get_max = core_get_max_pstate, + .get_max_physical = core_get_max_pstate_physical, .get_min = core_get_min_pstate, .get_turbo = core_get_turbo_pstate, .get_scaling = core_get_scaling, @@ -670,6 +871,7 @@ static struct cpu_defaults byt_params = { }, .funcs = { .get_max = byt_get_max_pstate, + .get_max_physical = byt_get_max_pstate, .get_min = byt_get_min_pstate, .get_turbo = byt_get_turbo_pstate, .set = byt_set_pstate, @@ -689,6 +891,7 @@ static struct cpu_defaults knl_params = { }, .funcs = { .get_max = core_get_max_pstate, + .get_max_physical = core_get_max_pstate_physical, .get_min = core_get_min_pstate, .get_turbo = knl_get_turbo_pstate, .get_scaling = core_get_scaling, @@ -710,12 +913,23 @@ static void intel_pstate_get_min_max(struct cpudata *cpu, int *min, int *max) * policy, or by cpu specific default values determined through * experimentation. */ - max_perf_adj = fp_toint(mul_fp(int_tofp(max_perf), limits.max_perf)); - *max = clamp_t(int, max_perf_adj, - cpu->pstate.min_pstate, cpu->pstate.turbo_pstate); + if (limits.max_perf_ctl && limits.max_sysfs_pct >= + limits.max_policy_pct) { + *max = limits.max_perf_ctl; + } else { + max_perf_adj = fp_toint(mul_fp(int_tofp(max_perf), + limits.max_perf)); + *max = clamp_t(int, max_perf_adj, cpu->pstate.min_pstate, + cpu->pstate.turbo_pstate); + } - min_perf = fp_toint(mul_fp(int_tofp(max_perf), limits.min_perf)); - *min = clamp_t(int, min_perf, cpu->pstate.min_pstate, max_perf); + if (limits.min_perf_ctl) { + *min = limits.min_perf_ctl; + } else { + min_perf = fp_toint(mul_fp(int_tofp(max_perf), + limits.min_perf)); + *min = clamp_t(int, min_perf, cpu->pstate.min_pstate, max_perf); + } } static void intel_pstate_set_pstate(struct cpudata *cpu, int pstate, bool force) @@ -743,6 +957,7 @@ static void intel_pstate_get_cpu_pstates(struct cpudata *cpu) { cpu->pstate.min_pstate = pstate_funcs.get_min(); cpu->pstate.max_pstate = pstate_funcs.get_max(); + cpu->pstate.max_pstate_physical = pstate_funcs.get_max_physical(); cpu->pstate.turbo_pstate = pstate_funcs.get_turbo(); cpu->pstate.scaling = pstate_funcs.get_scaling(); @@ -761,7 +976,8 @@ static inline void intel_pstate_calc_busy(struct cpudata *cpu) sample->freq = fp_toint( mul_fp(int_tofp( - cpu->pstate.max_pstate * cpu->pstate.scaling / 100), + cpu->pstate.max_pstate_physical * + cpu->pstate.scaling / 100), core_pct)); sample->core_pct_busy = (int32_t)core_pct; @@ -834,7 +1050,7 @@ static inline int32_t intel_pstate_get_scaled_busy(struct cpudata *cpu) * specified pstate. */ core_busy = cpu->sample.core_pct_busy; - max_pstate = int_tofp(cpu->pstate.max_pstate); + max_pstate = int_tofp(cpu->pstate.max_pstate_physical); current_pstate = int_tofp(cpu->pstate.current_pstate); core_busy = mul_fp(core_busy, div_fp(max_pstate, current_pstate)); @@ -988,6 +1204,12 @@ static unsigned int intel_pstate_get(unsigned int cpu_num) static int intel_pstate_set_policy(struct cpufreq_policy *policy) { +#if IS_ENABLED(CONFIG_ACPI) + struct cpudata *cpu; + int i; +#endif + pr_debug("intel_pstate: %s max %u policy->max %u\n", __func__, + policy->cpuinfo.max_freq, policy->max); if (!policy->cpuinfo.max_freq) return -ENODEV; @@ -1000,6 +1222,8 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy) limits.max_perf_pct = 100; limits.max_perf = int_tofp(1); limits.no_turbo = 0; + limits.max_perf_ctl = 0; + limits.min_perf_ctl = 0; return 0; } @@ -1020,6 +1244,23 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy) limits.min_perf = div_fp(int_tofp(limits.min_perf_pct), int_tofp(100)); limits.max_perf = div_fp(int_tofp(limits.max_perf_pct), int_tofp(100)); +#if IS_ENABLED(CONFIG_ACPI) + cpu = all_cpu_data[policy->cpu]; + for (i = 0; i < cpu->acpi_perf_data.state_count; i++) { + int control; + + control = convert_to_native_pstate_format(cpu, i); + if (control * cpu->pstate.scaling == policy->max) + limits.max_perf_ctl = control; + if (control * cpu->pstate.scaling == policy->min) + limits.min_perf_ctl = control; + } + + pr_debug("intel_pstate: max %u policy_max %u perf_ctl [0x%x-0x%x]\n", + policy->cpuinfo.max_freq, policy->max, limits.min_perf_ctl, + limits.max_perf_ctl); +#endif + if (hwp_active) intel_pstate_hwp_set(); @@ -1074,18 +1315,30 @@ static int intel_pstate_cpu_init(struct cpufreq_policy *policy) policy->cpuinfo.min_freq = cpu->pstate.min_pstate * cpu->pstate.scaling; policy->cpuinfo.max_freq = cpu->pstate.turbo_pstate * cpu->pstate.scaling; + if (!no_acpi_perf) + intel_pstate_init_perf_limits(policy); + /* + * If there is no acpi perf data or error, we ignore and use Intel P + * state calculated limits, So this is not fatal error. + */ policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; cpumask_set_cpu(policy->cpu, policy->cpus); return 0; } +static int intel_pstate_cpu_exit(struct cpufreq_policy *policy) +{ + return intel_pstate_exit_perf_limits(policy); +} + static struct cpufreq_driver intel_pstate_driver = { .flags = CPUFREQ_CONST_LOOPS, .verify = intel_pstate_verify_policy, .setpolicy = intel_pstate_set_policy, .get = intel_pstate_get, .init = intel_pstate_cpu_init, + .exit = intel_pstate_cpu_exit, .stop_cpu = intel_pstate_stop_cpu, .name = "intel_pstate", }; @@ -1118,6 +1371,7 @@ static void copy_pid_params(struct pstate_adjust_policy *policy) static void copy_cpu_funcs(struct pstate_funcs *funcs) { pstate_funcs.get_max = funcs->get_max; + pstate_funcs.get_max_physical = funcs->get_max_physical; pstate_funcs.get_min = funcs->get_min; pstate_funcs.get_turbo = funcs->get_turbo; pstate_funcs.get_scaling = funcs->get_scaling; @@ -1126,7 +1380,6 @@ static void copy_cpu_funcs(struct pstate_funcs *funcs) } #if IS_ENABLED(CONFIG_ACPI) -#include <acpi/processor.h> static bool intel_pstate_no_acpi_pss(void) { @@ -1318,6 +1571,9 @@ static int __init intel_pstate_setup(char *str) force_load = 1; if (!strcmp(str, "hwp_only")) hwp_only = 1; + if (!strcmp(str, "no_acpi")) + no_acpi_perf = 1; + return 0; } early_param("intel_pstate", intel_pstate_setup); diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c index 64994e10638e..cb501386eb6e 100644 --- a/drivers/cpufreq/powernv-cpufreq.c +++ b/drivers/cpufreq/powernv-cpufreq.c @@ -327,8 +327,14 @@ static void powernv_cpufreq_throttle_check(void *data) if (chips[i].throttled) goto next; chips[i].throttled = true; - pr_info("CPU %d on Chip %u has Pmax reduced to %d\n", cpu, - chips[i].id, pmsr_pmax); + if (pmsr_pmax < powernv_pstate_info.nominal) + pr_crit("CPU %d on Chip %u has Pmax reduced below nominal frequency (%d < %d)\n", + cpu, chips[i].id, pmsr_pmax, + powernv_pstate_info.nominal); + else + pr_info("CPU %d on Chip %u has Pmax reduced below turbo frequency (%d < %d)\n", + cpu, chips[i].id, pmsr_pmax, + powernv_pstate_info.max); } else if (chips[i].throttled) { chips[i].throttled = false; pr_info("CPU %d on Chip %u has Pmax restored to %d\n", cpu, diff --git a/drivers/cpufreq/tegra20-cpufreq.c b/drivers/cpufreq/tegra20-cpufreq.c index 8084c7f7e206..2bd62845e9d5 100644 --- a/drivers/cpufreq/tegra20-cpufreq.c +++ b/drivers/cpufreq/tegra20-cpufreq.c @@ -175,9 +175,7 @@ static struct cpufreq_driver tegra_cpufreq_driver = { .exit = tegra_cpu_exit, .name = "tegra", .attr = cpufreq_generic_attr, -#ifdef CONFIG_PM .suspend = cpufreq_generic_suspend, -#endif }; static int __init tegra_cpufreq_init(void) |