diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2018-06-13 16:24:18 +0200 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-06-13 16:24:18 +0200 |
commit | d09fcecb0c797b884ce65daa37c121a2786bb17b (patch) | |
tree | e9f055a54ebc2e3ab536872e55cbc4badc4f68a3 | |
parent | Revert "debugfs: inode: debugfs_create_dir uses mode permission from parent" (diff) | |
parent | Merge branches 'pm-domains' and 'pm-tools' (diff) | |
download | linux-d09fcecb0c797b884ce65daa37c121a2786bb17b.tar.xz linux-d09fcecb0c797b884ce65daa37c121a2786bb17b.zip |
Merge tag 'pm-4.18-rc1-2' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm
Pull more power management updates from Rafael Wysocki:
"These revert a recent PM core change that introduced a regression, fix
the build when the recently added Kryo cpufreq driver is selected, add
support for devices attached to multiple power domains to the generic
power domains (genpd) framework, add support for iowait boosting on
systens with hardware-managed P-states (HWP) enabled to the
intel_pstate driver, modify the behavior of the wakeup_count device
attribute in sysfs, fix a few issues and clean up some ugliness,
mostly in cpufreq (core and drivers) and in the cpupower utility.
Specifics:
- Revert a recent PM core change that attempted to fix an issue
related to device links, but introduced a regression (Rafael
Wysocki)
- Fix build when the recently added cpufreq driver for Kryo
processors is selected by making it possible to build that driver
as a module (Arnd Bergmann)
- Fix the long idle detection mechanism in the out-of-band (ondemand
and conservative) cpufreq governors (Chen Yu)
- Add support for devices in multiple power domains to the generic
power domains (genpd) framework (Ulf Hansson)
- Add support for iowait boosting on systems with hardware-managed
P-states (HWP) enabled to the intel_pstate driver and make it use
that feature on systems with Skylake Xeon processors as it is
reported to improve performance significantly on those systems
(Srinivas Pandruvada)
- Fix and update the acpi_cpufreq, ti-cpufreq and imx6q cpufreq
drivers (Colin Ian King, Suman Anna, Sébastien Szymanski)
- Change the behavior of the wakeup_count device attribute in sysfs
to expose the number of events when the device might have aborted
system suspend in progress (Ravi Chandra Sadineni)
- Fix two minor issues in the cpupower utility (Abhishek Goel, Colin
Ian King)"
* tag 'pm-4.18-rc1-2' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm:
Revert "PM / runtime: Fixup reference counting of device link suppliers at probe"
cpufreq: imx6q: check speed grades for i.MX6ULL
cpufreq: governors: Fix long idle detection logic in load calculation
cpufreq: intel_pstate: enable boost for Skylake Xeon
PM / wakeup: Export wakeup_count instead of event_count via sysfs
PM / Domains: Add dev_pm_domain_attach_by_id() to manage multi PM domains
PM / Domains: Add support for multi PM domains per device to genpd
PM / Domains: Split genpd_dev_pm_attach()
PM / Domains: Don't attach devices in genpd with multi PM domains
PM / Domains: dt: Allow power-domain property to be a list of specifiers
cpufreq: intel_pstate: New sysfs entry to control HWP boost
cpufreq: intel_pstate: HWP boost performance on IO wakeup
cpufreq: intel_pstate: Add HWP boost utility and sched util hooks
cpufreq: ti-cpufreq: Use devres managed API in probe()
cpufreq: ti-cpufreq: Fix an incorrect error return value
cpufreq: ACPI: make function acpi_cpufreq_fast_switch() static
cpufreq: kryo: allow building as a loadable module
cpupower : Fix header name to read idle state name
cpupower: fix spelling mistake: "logilename" -> "logfilename"
-rw-r--r-- | Documentation/devicetree/bindings/power/power_domain.txt | 19 | ||||
-rw-r--r-- | drivers/base/dd.c | 3 | ||||
-rw-r--r-- | drivers/base/power/common.c | 43 | ||||
-rw-r--r-- | drivers/base/power/domain.c | 134 | ||||
-rw-r--r-- | drivers/base/power/runtime.c | 27 | ||||
-rw-r--r-- | drivers/base/power/sysfs.c | 2 | ||||
-rw-r--r-- | drivers/cpufreq/Kconfig.arm | 2 | ||||
-rw-r--r-- | drivers/cpufreq/acpi-cpufreq.c | 4 | ||||
-rw-r--r-- | drivers/cpufreq/cpufreq_governor.c | 12 | ||||
-rw-r--r-- | drivers/cpufreq/imx6q-cpufreq.c | 29 | ||||
-rw-r--r-- | drivers/cpufreq/intel_pstate.c | 179 | ||||
-rw-r--r-- | drivers/cpufreq/ti-cpufreq.c | 7 | ||||
-rw-r--r-- | include/linux/pm_domain.h | 15 | ||||
-rw-r--r-- | include/linux/pm_runtime.h | 6 | ||||
-rw-r--r-- | tools/power/cpupower/bench/parse.c | 2 | ||||
-rw-r--r-- | tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c | 15 | ||||
-rw-r--r-- | tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c | 35 | ||||
-rw-r--r-- | tools/power/cpupower/utils/idle_monitor/cpupower-monitor.h | 9 |
18 files changed, 468 insertions, 75 deletions
diff --git a/Documentation/devicetree/bindings/power/power_domain.txt b/Documentation/devicetree/bindings/power/power_domain.txt index 4733f76cbe48..9b387f861aed 100644 --- a/Documentation/devicetree/bindings/power/power_domain.txt +++ b/Documentation/devicetree/bindings/power/power_domain.txt @@ -111,8 +111,8 @@ Example 3: ==PM domain consumers== Required properties: - - power-domains : A phandle and PM domain specifier as defined by bindings of - the power controller specified by phandle. + - power-domains : A list of PM domain specifiers, as defined by bindings of + the power controller that is the PM domain provider. Example: @@ -122,9 +122,18 @@ Example: power-domains = <&power 0>; }; -The node above defines a typical PM domain consumer device, which is located -inside a PM domain with index 0 of a power controller represented by a node -with the label "power". + leaky-device@12351000 { + compatible = "foo,i-leak-current"; + reg = <0x12351000 0x1000>; + power-domains = <&power 0>, <&power 1> ; + }; + +The first example above defines a typical PM domain consumer device, which is +located inside a PM domain with index 0 of a power controller represented by a +node with the label "power". +In the second example the consumer device are partitioned across two PM domains, +the first with index 0 and the second with index 1, of a power controller that +is represented by a node with the label "power. Optional properties: - required-opps: This contains phandle to an OPP node in another device's OPP diff --git a/drivers/base/dd.c b/drivers/base/dd.c index fb4e2df68d95..1435d7281c66 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -580,7 +580,7 @@ int driver_probe_device(struct device_driver *drv, struct device *dev) pr_debug("bus: '%s': %s: matched device %s with driver %s\n", drv->bus->name, __func__, dev_name(dev), drv->name); - pm_runtime_resume_suppliers(dev); + pm_runtime_get_suppliers(dev); if (dev->parent) pm_runtime_get_sync(dev->parent); @@ -591,6 +591,7 @@ int driver_probe_device(struct device_driver *drv, struct device *dev) if (dev->parent) pm_runtime_put(dev->parent); + pm_runtime_put_suppliers(dev); return ret; } diff --git a/drivers/base/power/common.c b/drivers/base/power/common.c index 7ae62b6355b8..df41b4780b3b 100644 --- a/drivers/base/power/common.c +++ b/drivers/base/power/common.c @@ -117,13 +117,50 @@ int dev_pm_domain_attach(struct device *dev, bool power_on) EXPORT_SYMBOL_GPL(dev_pm_domain_attach); /** + * dev_pm_domain_attach_by_id - Associate a device with one of its PM domains. + * @dev: The device used to lookup the PM domain. + * @index: The index of the PM domain. + * + * As @dev may only be attached to a single PM domain, the backend PM domain + * provider creates a virtual device to attach instead. If attachment succeeds, + * the ->detach() callback in the struct dev_pm_domain are assigned by the + * corresponding backend attach function, as to deal with detaching of the + * created virtual device. + * + * This function should typically be invoked by a driver during the probe phase, + * in case its device requires power management through multiple PM domains. The + * driver may benefit from using the received device, to configure device-links + * towards its original device. Depending on the use-case and if needed, the + * links may be dynamically changed by the driver, which allows it to control + * the power to the PM domains independently from each other. + * + * Callers must ensure proper synchronization of this function with power + * management callbacks. + * + * Returns the virtual created device when successfully attached to its PM + * domain, NULL in case @dev don't need a PM domain, else an ERR_PTR(). + * Note that, to detach the returned virtual device, the driver shall call + * dev_pm_domain_detach() on it, typically during the remove phase. + */ +struct device *dev_pm_domain_attach_by_id(struct device *dev, + unsigned int index) +{ + if (dev->pm_domain) + return ERR_PTR(-EEXIST); + + return genpd_dev_pm_attach_by_id(dev, index); +} +EXPORT_SYMBOL_GPL(dev_pm_domain_attach_by_id); + +/** * dev_pm_domain_detach - Detach a device from its PM domain. * @dev: Device to detach. * @power_off: Used to indicate whether we should power off the device. * - * This functions will reverse the actions from dev_pm_domain_attach() and thus - * try to detach the @dev from its PM domain. Typically it should be invoked - * from subsystem level code during the remove phase. + * This functions will reverse the actions from dev_pm_domain_attach() and + * dev_pm_domain_attach_by_id(), thus it detaches @dev from its PM domain. + * Typically it should be invoked during the remove phase, either from + * subsystem level code or from drivers. * * Callers must ensure proper synchronization of this function with power * management callbacks. diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 6f403d6fccb2..4925af5c4cf0 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -2171,6 +2171,15 @@ struct generic_pm_domain *of_genpd_remove_last(struct device_node *np) } EXPORT_SYMBOL_GPL(of_genpd_remove_last); +static void genpd_release_dev(struct device *dev) +{ + kfree(dev); +} + +static struct bus_type genpd_bus_type = { + .name = "genpd", +}; + /** * genpd_dev_pm_detach - Detach a device from its PM domain. * @dev: Device to detach. @@ -2208,6 +2217,10 @@ static void genpd_dev_pm_detach(struct device *dev, bool power_off) /* Check if PM domain can be powered off after removing this device. */ genpd_queue_power_off_work(pd); + + /* Unregister the device if it was created by genpd. */ + if (dev->bus == &genpd_bus_type) + device_unregister(dev); } static void genpd_dev_pm_sync(struct device *dev) @@ -2221,32 +2234,17 @@ static void genpd_dev_pm_sync(struct device *dev) genpd_queue_power_off_work(pd); } -/** - * genpd_dev_pm_attach - Attach a device to its PM domain using DT. - * @dev: Device to attach. - * - * Parse device's OF node to find a PM domain specifier. If such is found, - * attaches the device to retrieved pm_domain ops. - * - * Returns 1 on successfully attached PM domain, 0 when the device don't need a - * PM domain or a negative error code in case of failures. Note that if a - * power-domain exists for the device, but it cannot be found or turned on, - * then return -EPROBE_DEFER to ensure that the device is not probed and to - * re-try again later. - */ -int genpd_dev_pm_attach(struct device *dev) +static int __genpd_dev_pm_attach(struct device *dev, struct device_node *np, + unsigned int index) { struct of_phandle_args pd_args; struct generic_pm_domain *pd; int ret; - if (!dev->of_node) - return 0; - - ret = of_parse_phandle_with_args(dev->of_node, "power-domains", - "#power-domain-cells", 0, &pd_args); + ret = of_parse_phandle_with_args(np, "power-domains", + "#power-domain-cells", index, &pd_args); if (ret < 0) - return 0; + return ret; mutex_lock(&gpd_list_lock); pd = genpd_get_from_provider(&pd_args); @@ -2282,8 +2280,98 @@ int genpd_dev_pm_attach(struct device *dev) return ret ? -EPROBE_DEFER : 1; } + +/** + * genpd_dev_pm_attach - Attach a device to its PM domain using DT. + * @dev: Device to attach. + * + * Parse device's OF node to find a PM domain specifier. If such is found, + * attaches the device to retrieved pm_domain ops. + * + * Returns 1 on successfully attached PM domain, 0 when the device don't need a + * PM domain or when multiple power-domains exists for it, else a negative error + * code. Note that if a power-domain exists for the device, but it cannot be + * found or turned on, then return -EPROBE_DEFER to ensure that the device is + * not probed and to re-try again later. + */ +int genpd_dev_pm_attach(struct device *dev) +{ + if (!dev->of_node) + return 0; + + /* + * Devices with multiple PM domains must be attached separately, as we + * can only attach one PM domain per device. + */ + if (of_count_phandle_with_args(dev->of_node, "power-domains", + "#power-domain-cells") != 1) + return 0; + + return __genpd_dev_pm_attach(dev, dev->of_node, 0); +} EXPORT_SYMBOL_GPL(genpd_dev_pm_attach); +/** + * genpd_dev_pm_attach_by_id - Associate a device with one of its PM domains. + * @dev: The device used to lookup the PM domain. + * @index: The index of the PM domain. + * + * Parse device's OF node to find a PM domain specifier at the provided @index. + * If such is found, creates a virtual device and attaches it to the retrieved + * pm_domain ops. To deal with detaching of the virtual device, the ->detach() + * callback in the struct dev_pm_domain are assigned to genpd_dev_pm_detach(). + * + * Returns the created virtual device if successfully attached PM domain, NULL + * when the device don't need a PM domain, else an ERR_PTR() in case of + * failures. If a power-domain exists for the device, but cannot be found or + * turned on, then ERR_PTR(-EPROBE_DEFER) is returned to ensure that the device + * is not probed and to re-try again later. + */ +struct device *genpd_dev_pm_attach_by_id(struct device *dev, + unsigned int index) +{ + struct device *genpd_dev; + int num_domains; + int ret; + + if (!dev->of_node) + return NULL; + + /* Deal only with devices using multiple PM domains. */ + num_domains = of_count_phandle_with_args(dev->of_node, "power-domains", + "#power-domain-cells"); + if (num_domains < 2 || index >= num_domains) + return NULL; + + /* Allocate and register device on the genpd bus. */ + genpd_dev = kzalloc(sizeof(*genpd_dev), GFP_KERNEL); + if (!genpd_dev) + return ERR_PTR(-ENOMEM); + + dev_set_name(genpd_dev, "genpd:%u:%s", index, dev_name(dev)); + genpd_dev->bus = &genpd_bus_type; + genpd_dev->release = genpd_release_dev; + + ret = device_register(genpd_dev); + if (ret) { + kfree(genpd_dev); + return ERR_PTR(ret); + } + + /* Try to attach the device to the PM domain at the specified index. */ + ret = __genpd_dev_pm_attach(genpd_dev, dev->of_node, index); + if (ret < 1) { + device_unregister(genpd_dev); + return ret ? ERR_PTR(ret) : NULL; + } + + pm_runtime_set_active(genpd_dev); + pm_runtime_enable(genpd_dev); + + return genpd_dev; +} +EXPORT_SYMBOL_GPL(genpd_dev_pm_attach_by_id); + static const struct of_device_id idle_state_match[] = { { .compatible = "domain-idle-state", }, { } @@ -2443,6 +2531,12 @@ unlock: } EXPORT_SYMBOL_GPL(of_genpd_opp_to_performance_state); +static int __init genpd_bus_init(void) +{ + return bus_register(&genpd_bus_type); +} +core_initcall(genpd_bus_init); + #endif /* CONFIG_PM_GENERIC_DOMAINS_OF */ diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index c6030f100c08..beb85c31f3fa 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -1563,16 +1563,37 @@ void pm_runtime_clean_up_links(struct device *dev) } /** - * pm_runtime_resume_suppliers - Resume supplier devices. + * pm_runtime_get_suppliers - Resume and reference-count supplier devices. * @dev: Consumer device. */ -void pm_runtime_resume_suppliers(struct device *dev) +void pm_runtime_get_suppliers(struct device *dev) { + struct device_link *link; int idx; idx = device_links_read_lock(); - rpm_get_suppliers(dev); + list_for_each_entry_rcu(link, &dev->links.suppliers, c_node) + if (link->flags & DL_FLAG_PM_RUNTIME) + pm_runtime_get_sync(link->supplier); + + device_links_read_unlock(idx); +} + +/** + * pm_runtime_put_suppliers - Drop references to supplier devices. + * @dev: Consumer device. + */ +void pm_runtime_put_suppliers(struct device *dev) +{ + struct device_link *link; + int idx; + + idx = device_links_read_lock(); + + list_for_each_entry_rcu(link, &dev->links.suppliers, c_node) + if (link->flags & DL_FLAG_PM_RUNTIME) + pm_runtime_put(link->supplier); device_links_read_unlock(idx); } diff --git a/drivers/base/power/sysfs.c b/drivers/base/power/sysfs.c index 0f651efc58a1..d713738ce796 100644 --- a/drivers/base/power/sysfs.c +++ b/drivers/base/power/sysfs.c @@ -353,7 +353,7 @@ static ssize_t wakeup_count_show(struct device *dev, spin_lock_irq(&dev->power.lock); if (dev->power.wakeup) { - count = dev->power.wakeup->event_count; + count = dev->power.wakeup->wakeup_count; enabled = true; } spin_unlock_irq(&dev->power.lock); diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm index c7ce928fbf1f..52f5f1a2040c 100644 --- a/drivers/cpufreq/Kconfig.arm +++ b/drivers/cpufreq/Kconfig.arm @@ -125,7 +125,7 @@ config ARM_OMAP2PLUS_CPUFREQ default ARCH_OMAP2PLUS config ARM_QCOM_CPUFREQ_KRYO - bool "Qualcomm Kryo based CPUFreq" + tristate "Qualcomm Kryo based CPUFreq" depends on ARM64 depends on QCOM_QFPROM depends on QCOM_SMEM diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c index 8ff1c9123834..b61f4ec43e06 100644 --- a/drivers/cpufreq/acpi-cpufreq.c +++ b/drivers/cpufreq/acpi-cpufreq.c @@ -465,8 +465,8 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, return result; } -unsigned int acpi_cpufreq_fast_switch(struct cpufreq_policy *policy, - unsigned int target_freq) +static unsigned int acpi_cpufreq_fast_switch(struct cpufreq_policy *policy, + unsigned int target_freq) { struct acpi_cpufreq_data *data = policy->driver_data; struct acpi_processor_performance *perf; diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index 871bf9cf55cf..1d50e97d49f1 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -165,7 +165,7 @@ unsigned int dbs_update(struct cpufreq_policy *policy) * calls, so the previous load value can be used then. */ load = j_cdbs->prev_load; - } else if (unlikely(time_elapsed > 2 * sampling_rate && + } else if (unlikely((int)idle_time > 2 * sampling_rate && j_cdbs->prev_load)) { /* * If the CPU had gone completely idle and a task has @@ -185,10 +185,8 @@ unsigned int dbs_update(struct cpufreq_policy *policy) * clear prev_load to guarantee that the load will be * computed again next time. * - * Detecting this situation is easy: the governor's - * utilization update handler would not have run during - * CPU-idle periods. Hence, an unusually large - * 'time_elapsed' (as compared to the sampling rate) + * Detecting this situation is easy: an unusually large + * 'idle_time' (as compared to the sampling rate) * indicates this scenario. */ load = j_cdbs->prev_load; @@ -217,8 +215,8 @@ unsigned int dbs_update(struct cpufreq_policy *policy) j_cdbs->prev_load = load; } - if (time_elapsed > 2 * sampling_rate) { - unsigned int periods = time_elapsed / sampling_rate; + if (unlikely((int)idle_time > 2 * sampling_rate)) { + unsigned int periods = idle_time / sampling_rate; if (periods < idle_periods) idle_periods = periods; diff --git a/drivers/cpufreq/imx6q-cpufreq.c b/drivers/cpufreq/imx6q-cpufreq.c index 70912104a199..8b3c2a79ad6c 100644 --- a/drivers/cpufreq/imx6q-cpufreq.c +++ b/drivers/cpufreq/imx6q-cpufreq.c @@ -266,6 +266,8 @@ put_node: } #define OCOTP_CFG3_6UL_SPEED_696MHZ 0x2 +#define OCOTP_CFG3_6ULL_SPEED_792MHZ 0x2 +#define OCOTP_CFG3_6ULL_SPEED_900MHZ 0x3 static void imx6ul_opp_check_speed_grading(struct device *dev) { @@ -287,16 +289,30 @@ static void imx6ul_opp_check_speed_grading(struct device *dev) * Speed GRADING[1:0] defines the max speed of ARM: * 2b'00: Reserved; * 2b'01: 528000000Hz; - * 2b'10: 696000000Hz; - * 2b'11: Reserved; + * 2b'10: 696000000Hz on i.MX6UL, 792000000Hz on i.MX6ULL; + * 2b'11: 900000000Hz on i.MX6ULL only; * We need to set the max speed of ARM according to fuse map. */ val = readl_relaxed(base + OCOTP_CFG3); val >>= OCOTP_CFG3_SPEED_SHIFT; val &= 0x3; - if (val != OCOTP_CFG3_6UL_SPEED_696MHZ) - if (dev_pm_opp_disable(dev, 696000000)) - dev_warn(dev, "failed to disable 696MHz OPP\n"); + + if (of_machine_is_compatible("fsl,imx6ul")) { + if (val != OCOTP_CFG3_6UL_SPEED_696MHZ) + if (dev_pm_opp_disable(dev, 696000000)) + dev_warn(dev, "failed to disable 696MHz OPP\n"); + } + + if (of_machine_is_compatible("fsl,imx6ull")) { + if (val != OCOTP_CFG3_6ULL_SPEED_792MHZ) + if (dev_pm_opp_disable(dev, 792000000)) + dev_warn(dev, "failed to disable 792MHz OPP\n"); + + if (val != OCOTP_CFG3_6ULL_SPEED_900MHZ) + if (dev_pm_opp_disable(dev, 900000000)) + dev_warn(dev, "failed to disable 900MHz OPP\n"); + } + iounmap(base); put_node: of_node_put(np); @@ -356,7 +372,8 @@ static int imx6q_cpufreq_probe(struct platform_device *pdev) goto put_reg; } - if (of_machine_is_compatible("fsl,imx6ul")) + if (of_machine_is_compatible("fsl,imx6ul") || + of_machine_is_compatible("fsl,imx6ull")) imx6ul_opp_check_speed_grading(cpu_dev); else imx6q_opp_check_speed_grading(cpu_dev); diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index b6575408f279..1de5ec8d5ea3 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -221,6 +221,11 @@ struct global_params { * preference/bias * @epp_saved: Saved EPP/EPB during system suspend or CPU offline * operation + * @hwp_req_cached: Cached value of the last HWP Request MSR + * @hwp_cap_cached: Cached value of the last HWP Capabilities MSR + * @last_io_update: Last time when IO wake flag was set + * @sched_flags: Store scheduler flags for possible cross CPU update + * @hwp_boost_min: Last HWP boosted min performance * * This structure stores per CPU instance data for all CPUs. */ @@ -253,6 +258,11 @@ struct cpudata { s16 epp_policy; s16 epp_default; s16 epp_saved; + u64 hwp_req_cached; + u64 hwp_cap_cached; + u64 last_io_update; + unsigned int sched_flags; + u32 hwp_boost_min; }; static struct cpudata **all_cpu_data; @@ -285,6 +295,7 @@ static struct pstate_funcs pstate_funcs __read_mostly; static int hwp_active __read_mostly; static bool per_cpu_limits __read_mostly; +static bool hwp_boost __read_mostly; static struct cpufreq_driver *intel_pstate_driver __read_mostly; @@ -689,6 +700,7 @@ static void intel_pstate_get_hwp_max(unsigned int cpu, int *phy_max, u64 cap; rdmsrl_on_cpu(cpu, MSR_HWP_CAPABILITIES, &cap); + WRITE_ONCE(all_cpu_data[cpu]->hwp_cap_cached, cap); if (global.no_turbo) *current_max = HWP_GUARANTEED_PERF(cap); else @@ -763,6 +775,7 @@ update_epp: intel_pstate_set_epb(cpu, epp); } skip_epp: + WRITE_ONCE(cpu_data->hwp_req_cached, value); wrmsrl_on_cpu(cpu, MSR_HWP_REQUEST, value); } @@ -1020,6 +1033,30 @@ static ssize_t store_min_perf_pct(struct kobject *a, struct attribute *b, return count; } +static ssize_t show_hwp_dynamic_boost(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + return sprintf(buf, "%u\n", hwp_boost); +} + +static ssize_t store_hwp_dynamic_boost(struct kobject *a, struct attribute *b, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + + ret = kstrtouint(buf, 10, &input); + if (ret) + return ret; + + mutex_lock(&intel_pstate_driver_lock); + hwp_boost = !!input; + intel_pstate_update_policies(); + mutex_unlock(&intel_pstate_driver_lock); + + return count; +} + show_one(max_perf_pct, max_perf_pct); show_one(min_perf_pct, min_perf_pct); @@ -1029,6 +1066,7 @@ define_one_global_rw(max_perf_pct); define_one_global_rw(min_perf_pct); define_one_global_ro(turbo_pct); define_one_global_ro(num_pstates); +define_one_global_rw(hwp_dynamic_boost); static struct attribute *intel_pstate_attributes[] = { &status.attr, @@ -1069,6 +1107,11 @@ static void __init intel_pstate_sysfs_expose_params(void) rc = sysfs_create_file(intel_pstate_kobject, &min_perf_pct.attr); WARN_ON(rc); + if (hwp_active) { + rc = sysfs_create_file(intel_pstate_kobject, + &hwp_dynamic_boost.attr); + WARN_ON(rc); + } } /************************** sysfs end ************************/ @@ -1381,6 +1424,116 @@ static void intel_pstate_get_cpu_pstates(struct cpudata *cpu) intel_pstate_set_min_pstate(cpu); } +/* + * Long hold time will keep high perf limits for long time, + * which negatively impacts perf/watt for some workloads, + * like specpower. 3ms is based on experiements on some + * workoads. + */ +static int hwp_boost_hold_time_ns = 3 * NSEC_PER_MSEC; + +static inline void intel_pstate_hwp_boost_up(struct cpudata *cpu) +{ + u64 hwp_req = READ_ONCE(cpu->hwp_req_cached); + u32 max_limit = (hwp_req & 0xff00) >> 8; + u32 min_limit = (hwp_req & 0xff); + u32 boost_level1; + + /* + * Cases to consider (User changes via sysfs or boot time): + * If, P0 (Turbo max) = P1 (Guaranteed max) = min: + * No boost, return. + * If, P0 (Turbo max) > P1 (Guaranteed max) = min: + * Should result in one level boost only for P0. + * If, P0 (Turbo max) = P1 (Guaranteed max) > min: + * Should result in two level boost: + * (min + p1)/2 and P1. + * If, P0 (Turbo max) > P1 (Guaranteed max) > min: + * Should result in three level boost: + * (min + p1)/2, P1 and P0. + */ + + /* If max and min are equal or already at max, nothing to boost */ + if (max_limit == min_limit || cpu->hwp_boost_min >= max_limit) + return; + + if (!cpu->hwp_boost_min) + cpu->hwp_boost_min = min_limit; + + /* level at half way mark between min and guranteed */ + boost_level1 = (HWP_GUARANTEED_PERF(cpu->hwp_cap_cached) + min_limit) >> 1; + + if (cpu->hwp_boost_min < boost_level1) + cpu->hwp_boost_min = boost_level1; + else if (cpu->hwp_boost_min < HWP_GUARANTEED_PERF(cpu->hwp_cap_cached)) + cpu->hwp_boost_min = HWP_GUARANTEED_PERF(cpu->hwp_cap_cached); + else if (cpu->hwp_boost_min == HWP_GUARANTEED_PERF(cpu->hwp_cap_cached) && + max_limit != HWP_GUARANTEED_PERF(cpu->hwp_cap_cached)) + cpu->hwp_boost_min = max_limit; + else + return; + + hwp_req = (hwp_req & ~GENMASK_ULL(7, 0)) | cpu->hwp_boost_min; + wrmsrl(MSR_HWP_REQUEST, hwp_req); + cpu->last_update = cpu->sample.time; +} + +static inline void intel_pstate_hwp_boost_down(struct cpudata *cpu) +{ + if (cpu->hwp_boost_min) { + bool expired; + + /* Check if we are idle for hold time to boost down */ + expired = time_after64(cpu->sample.time, cpu->last_update + + hwp_boost_hold_time_ns); + if (expired) { + wrmsrl(MSR_HWP_REQUEST, cpu->hwp_req_cached); + cpu->hwp_boost_min = 0; + } + } + cpu->last_update = cpu->sample.time; +} + +static inline void intel_pstate_update_util_hwp_local(struct cpudata *cpu, + u64 time) +{ + cpu->sample.time = time; + + if (cpu->sched_flags & SCHED_CPUFREQ_IOWAIT) { + bool do_io = false; + + cpu->sched_flags = 0; + /* + * Set iowait_boost flag and update time. Since IO WAIT flag + * is set all the time, we can't just conclude that there is + * some IO bound activity is scheduled on this CPU with just + * one occurrence. If we receive at least two in two + * consecutive ticks, then we treat as boost candidate. + */ + if (time_before64(time, cpu->last_io_update + 2 * TICK_NSEC)) + do_io = true; + + cpu->last_io_update = time; + + if (do_io) + intel_pstate_hwp_boost_up(cpu); + + } else { + intel_pstate_hwp_boost_down(cpu); + } +} + +static inline void intel_pstate_update_util_hwp(struct update_util_data *data, + u64 time, unsigned int flags) +{ + struct cpudata *cpu = container_of(data, struct cpudata, update_util); + + cpu->sched_flags |= flags; + + if (smp_processor_id() == cpu->cpu) + intel_pstate_update_util_hwp_local(cpu, time); +} + static inline void intel_pstate_calc_avg_perf(struct cpudata *cpu) { struct sample *sample = &cpu->sample; @@ -1641,6 +1794,12 @@ static const struct x86_cpu_id intel_pstate_cpu_ee_disable_ids[] = { {} }; +static const struct x86_cpu_id intel_pstate_hwp_boost_ids[] = { + ICPU(INTEL_FAM6_SKYLAKE_X, core_funcs), + ICPU(INTEL_FAM6_SKYLAKE_DESKTOP, core_funcs), + {} +}; + static int intel_pstate_init_cpu(unsigned int cpunum) { struct cpudata *cpu; @@ -1671,6 +1830,10 @@ static int intel_pstate_init_cpu(unsigned int cpunum) intel_pstate_disable_ee(cpunum); intel_pstate_hwp_enable(cpu); + + id = x86_match_cpu(intel_pstate_hwp_boost_ids); + if (id) + hwp_boost = true; } intel_pstate_get_cpu_pstates(cpu); @@ -1684,7 +1847,7 @@ static void intel_pstate_set_update_util_hook(unsigned int cpu_num) { struct cpudata *cpu = all_cpu_data[cpu_num]; - if (hwp_active) + if (hwp_active && !hwp_boost) return; if (cpu->update_util_set) @@ -1693,7 +1856,9 @@ static void intel_pstate_set_update_util_hook(unsigned int cpu_num) /* Prevent intel_pstate_update_util() from using stale data. */ cpu->sample.time = 0; cpufreq_add_update_util_hook(cpu_num, &cpu->update_util, - intel_pstate_update_util); + (hwp_active ? + intel_pstate_update_util_hwp : + intel_pstate_update_util)); cpu->update_util_set = true; } @@ -1805,8 +1970,16 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy) intel_pstate_set_update_util_hook(policy->cpu); } - if (hwp_active) + if (hwp_active) { + /* + * When hwp_boost was active before and dynamically it + * was turned off, in that case we need to clear the + * update util hook. + */ + if (!hwp_boost) + intel_pstate_clear_update_util_hook(policy->cpu); intel_pstate_hwp_set(policy->cpu); + } mutex_unlock(&intel_pstate_limits_lock); diff --git a/drivers/cpufreq/ti-cpufreq.c b/drivers/cpufreq/ti-cpufreq.c index 6ba709b6f095..3f0e2a14895a 100644 --- a/drivers/cpufreq/ti-cpufreq.c +++ b/drivers/cpufreq/ti-cpufreq.c @@ -217,7 +217,7 @@ static int ti_cpufreq_probe(struct platform_device *pdev) if (!match) return -ENODEV; - opp_data = kzalloc(sizeof(*opp_data), GFP_KERNEL); + opp_data = devm_kzalloc(&pdev->dev, sizeof(*opp_data), GFP_KERNEL); if (!opp_data) return -ENOMEM; @@ -226,8 +226,7 @@ static int ti_cpufreq_probe(struct platform_device *pdev) opp_data->cpu_dev = get_cpu_device(0); if (!opp_data->cpu_dev) { pr_err("%s: Failed to get device for CPU0\n", __func__); - ret = ENODEV; - goto free_opp_data; + return -ENODEV; } opp_data->opp_node = dev_pm_opp_of_get_opp_desc_node(opp_data->cpu_dev); @@ -285,8 +284,6 @@ register_cpufreq_dt: fail_put_node: of_node_put(opp_data->opp_node); -free_opp_data: - kfree(opp_data); return ret; } diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 42e0d649e653..9206a4fef9ac 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -237,6 +237,8 @@ unsigned int of_genpd_opp_to_performance_state(struct device *dev, struct device_node *opp_node); int genpd_dev_pm_attach(struct device *dev); +struct device *genpd_dev_pm_attach_by_id(struct device *dev, + unsigned int index); #else /* !CONFIG_PM_GENERIC_DOMAINS_OF */ static inline int of_genpd_add_provider_simple(struct device_node *np, struct generic_pm_domain *genpd) @@ -282,6 +284,12 @@ static inline int genpd_dev_pm_attach(struct device *dev) return 0; } +static inline struct device *genpd_dev_pm_attach_by_id(struct device *dev, + unsigned int index) +{ + return NULL; +} + static inline struct generic_pm_domain *of_genpd_remove_last(struct device_node *np) { @@ -291,6 +299,8 @@ struct generic_pm_domain *of_genpd_remove_last(struct device_node *np) #ifdef CONFIG_PM int dev_pm_domain_attach(struct device *dev, bool power_on); +struct device *dev_pm_domain_attach_by_id(struct device *dev, + unsigned int index); void dev_pm_domain_detach(struct device *dev, bool power_off); void dev_pm_domain_set(struct device *dev, struct dev_pm_domain *pd); #else @@ -298,6 +308,11 @@ static inline int dev_pm_domain_attach(struct device *dev, bool power_on) { return 0; } +static inline struct device *dev_pm_domain_attach_by_id(struct device *dev, + unsigned int index) +{ + return NULL; +} static inline void dev_pm_domain_detach(struct device *dev, bool power_off) {} static inline void dev_pm_domain_set(struct device *dev, struct dev_pm_domain *pd) {} diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index db5dbbf7a48d..f0fc4700b6ff 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -56,7 +56,8 @@ extern void pm_runtime_update_max_time_suspended(struct device *dev, s64 delta_ns); extern void pm_runtime_set_memalloc_noio(struct device *dev, bool enable); extern void pm_runtime_clean_up_links(struct device *dev); -extern void pm_runtime_resume_suppliers(struct device *dev); +extern void pm_runtime_get_suppliers(struct device *dev); +extern void pm_runtime_put_suppliers(struct device *dev); extern void pm_runtime_new_link(struct device *dev); extern void pm_runtime_drop_link(struct device *dev); @@ -172,7 +173,8 @@ static inline unsigned long pm_runtime_autosuspend_expiration( static inline void pm_runtime_set_memalloc_noio(struct device *dev, bool enable){} static inline void pm_runtime_clean_up_links(struct device *dev) {} -static inline void pm_runtime_resume_suppliers(struct device *dev) {} +static inline void pm_runtime_get_suppliers(struct device *dev) {} +static inline void pm_runtime_put_suppliers(struct device *dev) {} static inline void pm_runtime_new_link(struct device *dev) {} static inline void pm_runtime_drop_link(struct device *dev) {} diff --git a/tools/power/cpupower/bench/parse.c b/tools/power/cpupower/bench/parse.c index 9b65f052081f..9ba8a44ad2a7 100644 --- a/tools/power/cpupower/bench/parse.c +++ b/tools/power/cpupower/bench/parse.c @@ -104,7 +104,7 @@ FILE *prepare_output(const char *dirname) dirname, time(NULL)); } - dprintf("logilename: %s\n", filename); + dprintf("logfilename: %s\n", filename); output = fopen(filename, "w+"); if (output == NULL) { diff --git a/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c b/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c index 5b3205f16217..5b8c4956ff9a 100644 --- a/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c +++ b/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c @@ -126,6 +126,20 @@ void fix_up_intel_idle_driver_name(char *tmp, int num) } } +#ifdef __powerpc__ +void map_power_idle_state_name(char *tmp) +{ + if (!strncmp(tmp, "stop0_lite", CSTATE_NAME_LEN)) + strcpy(tmp, "stop0L"); + else if (!strncmp(tmp, "stop1_lite", CSTATE_NAME_LEN)) + strcpy(tmp, "stop1L"); + else if (!strncmp(tmp, "stop2_lite", CSTATE_NAME_LEN)) + strcpy(tmp, "stop2L"); +} +#else +void map_power_idle_state_name(char *tmp) { } +#endif + static struct cpuidle_monitor *cpuidle_register(void) { int num; @@ -145,6 +159,7 @@ static struct cpuidle_monitor *cpuidle_register(void) if (tmp == NULL) continue; + map_power_idle_state_name(tmp); fix_up_intel_idle_driver_name(tmp, num); strncpy(cpuidle_cstates[num].name, tmp, CSTATE_NAME_LEN - 1); free(tmp); diff --git a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c index 05f953f0f0a0..051da0a7c454 100644 --- a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c +++ b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c @@ -70,36 +70,43 @@ void print_n_spaces(int n) printf(" "); } -/* size of s must be at least n + 1 */ +/*s is filled with left and right spaces + *to make its length atleast n+1 + */ int fill_string_with_spaces(char *s, int n) { + char *temp; int len = strlen(s); - if (len > n) + + if (len >= n) return -1; + + temp = malloc(sizeof(char) * (n+1)); for (; len < n; len++) s[len] = ' '; s[len] = '\0'; + snprintf(temp, n+1, " %s", s); + strcpy(s, temp); + free(temp); return 0; } +#define MAX_COL_WIDTH 6 void print_header(int topology_depth) { int unsigned mon; int state, need_len; cstate_t s; char buf[128] = ""; - int percent_width = 4; fill_string_with_spaces(buf, topology_depth * 5 - 1); printf("%s|", buf); for (mon = 0; mon < avail_monitors; mon++) { - need_len = monitors[mon]->hw_states_num * (percent_width + 3) + need_len = monitors[mon]->hw_states_num * (MAX_COL_WIDTH + 1) - 1; - if (mon != 0) { - printf("|| "); - need_len--; - } + if (mon != 0) + printf("||"); sprintf(buf, "%s", monitors[mon]->name); fill_string_with_spaces(buf, need_len); printf("%s", buf); @@ -107,23 +114,21 @@ void print_header(int topology_depth) printf("\n"); if (topology_depth > 2) - printf("PKG |"); + printf(" PKG|"); if (topology_depth > 1) printf("CORE|"); if (topology_depth > 0) - printf("CPU |"); + printf(" CPU|"); for (mon = 0; mon < avail_monitors; mon++) { if (mon != 0) - printf("|| "); - else - printf(" "); + printf("||"); for (state = 0; state < monitors[mon]->hw_states_num; state++) { if (state != 0) - printf(" | "); + printf("|"); s = monitors[mon]->hw_states[state]; sprintf(buf, "%s", s.name); - fill_string_with_spaces(buf, percent_width); + fill_string_with_spaces(buf, MAX_COL_WIDTH); printf("%s", buf); } printf(" "); diff --git a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.h b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.h index 9e43f3371fbc..2ae50b499e0a 100644 --- a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.h +++ b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.h @@ -15,7 +15,16 @@ #define MONITORS_MAX 20 #define MONITOR_NAME_LEN 20 + +/* CSTATE_NAME_LEN is limited by header field width defined + * in cpupower-monitor.c. Header field width is defined to be + * sum of percent width and two spaces for padding. + */ +#ifdef __powerpc__ +#define CSTATE_NAME_LEN 7 +#else #define CSTATE_NAME_LEN 5 +#endif #define CSTATE_DESC_LEN 60 int cpu_count; |