diff options
Diffstat (limited to 'drivers')
46 files changed, 2017 insertions, 515 deletions
diff --git a/drivers/base/power/clock_ops.c b/drivers/base/power/clock_ops.c index ced6863a16a5..84d5acb6301b 100644 --- a/drivers/base/power/clock_ops.c +++ b/drivers/base/power/clock_ops.c @@ -23,6 +23,7 @@ enum pce_status { PCE_STATUS_NONE = 0, PCE_STATUS_ACQUIRED, + PCE_STATUS_PREPARED, PCE_STATUS_ENABLED, PCE_STATUS_ERROR, }; @@ -32,9 +33,113 @@ struct pm_clock_entry { char *con_id; struct clk *clk; enum pce_status status; + bool enabled_when_prepared; }; /** + * pm_clk_list_lock - ensure exclusive access for modifying the PM clock + * entry list. + * @psd: pm_subsys_data instance corresponding to the PM clock entry list + * and clk_op_might_sleep count to be modified. + * + * Get exclusive access before modifying the PM clock entry list and the + * clock_op_might_sleep count to guard against concurrent modifications. + * This also protects against a concurrent clock_op_might_sleep and PM clock + * entry list usage in pm_clk_suspend()/pm_clk_resume() that may or may not + * happen in atomic context, hence both the mutex and the spinlock must be + * taken here. + */ +static void pm_clk_list_lock(struct pm_subsys_data *psd) + __acquires(&psd->lock) +{ + mutex_lock(&psd->clock_mutex); + spin_lock_irq(&psd->lock); +} + +/** + * pm_clk_list_unlock - counterpart to pm_clk_list_lock(). + * @psd: the same pm_subsys_data instance previously passed to + * pm_clk_list_lock(). + */ +static void pm_clk_list_unlock(struct pm_subsys_data *psd) + __releases(&psd->lock) +{ + spin_unlock_irq(&psd->lock); + mutex_unlock(&psd->clock_mutex); +} + +/** + * pm_clk_op_lock - ensure exclusive access for performing clock operations. + * @psd: pm_subsys_data instance corresponding to the PM clock entry list + * and clk_op_might_sleep count being used. + * @flags: stored irq flags. + * @fn: string for the caller function's name. + * + * This is used by pm_clk_suspend() and pm_clk_resume() to guard + * against concurrent modifications to the clock entry list and the + * clock_op_might_sleep count. If clock_op_might_sleep is != 0 then + * only the mutex can be locked and those functions can only be used in + * non atomic context. If clock_op_might_sleep == 0 then these functions + * may be used in any context and only the spinlock can be locked. + * Returns -EINVAL if called in atomic context when clock ops might sleep. + */ +static int pm_clk_op_lock(struct pm_subsys_data *psd, unsigned long *flags, + const char *fn) + /* sparse annotations don't work here as exit state isn't static */ +{ + bool atomic_context = in_atomic() || irqs_disabled(); + +try_again: + spin_lock_irqsave(&psd->lock, *flags); + if (!psd->clock_op_might_sleep) { + /* the __release is there to work around sparse limitations */ + __release(&psd->lock); + return 0; + } + + /* bail out if in atomic context */ + if (atomic_context) { + pr_err("%s: atomic context with clock_ops_might_sleep = %d", + fn, psd->clock_op_might_sleep); + spin_unlock_irqrestore(&psd->lock, *flags); + might_sleep(); + return -EPERM; + } + + /* we must switch to the mutex */ + spin_unlock_irqrestore(&psd->lock, *flags); + mutex_lock(&psd->clock_mutex); + + /* + * There was a possibility for psd->clock_op_might_sleep + * to become 0 above. Keep the mutex only if not the case. + */ + if (likely(psd->clock_op_might_sleep)) + return 0; + + mutex_unlock(&psd->clock_mutex); + goto try_again; +} + +/** + * pm_clk_op_unlock - counterpart to pm_clk_op_lock(). + * @psd: the same pm_subsys_data instance previously passed to + * pm_clk_op_lock(). + * @flags: irq flags provided by pm_clk_op_lock(). + */ +static void pm_clk_op_unlock(struct pm_subsys_data *psd, unsigned long *flags) + /* sparse annotations don't work here as entry state isn't static */ +{ + if (psd->clock_op_might_sleep) { + mutex_unlock(&psd->clock_mutex); + } else { + /* the __acquire is there to work around sparse limitations */ + __acquire(&psd->lock); + spin_unlock_irqrestore(&psd->lock, *flags); + } +} + +/** * pm_clk_enable - Enable a clock, reporting any errors * @dev: The device for the given clock * @ce: PM clock entry corresponding to the clock. @@ -43,14 +148,21 @@ static inline void __pm_clk_enable(struct device *dev, struct pm_clock_entry *ce { int ret; - if (ce->status < PCE_STATUS_ERROR) { + switch (ce->status) { + case PCE_STATUS_ACQUIRED: + ret = clk_prepare_enable(ce->clk); + break; + case PCE_STATUS_PREPARED: ret = clk_enable(ce->clk); - if (!ret) - ce->status = PCE_STATUS_ENABLED; - else - dev_err(dev, "%s: failed to enable clk %p, error %d\n", - __func__, ce->clk, ret); + break; + default: + return; } + if (!ret) + ce->status = PCE_STATUS_ENABLED; + else + dev_err(dev, "%s: failed to enable clk %p, error %d\n", + __func__, ce->clk, ret); } /** @@ -64,17 +176,20 @@ static void pm_clk_acquire(struct device *dev, struct pm_clock_entry *ce) ce->clk = clk_get(dev, ce->con_id); if (IS_ERR(ce->clk)) { ce->status = PCE_STATUS_ERROR; + return; + } else if (clk_is_enabled_when_prepared(ce->clk)) { + /* we defer preparing the clock in that case */ + ce->status = PCE_STATUS_ACQUIRED; + ce->enabled_when_prepared = true; + } else if (clk_prepare(ce->clk)) { + ce->status = PCE_STATUS_ERROR; + dev_err(dev, "clk_prepare() failed\n"); + return; } else { - if (clk_prepare(ce->clk)) { - ce->status = PCE_STATUS_ERROR; - dev_err(dev, "clk_prepare() failed\n"); - } else { - ce->status = PCE_STATUS_ACQUIRED; - dev_dbg(dev, - "Clock %pC con_id %s managed by runtime PM.\n", - ce->clk, ce->con_id); - } + ce->status = PCE_STATUS_PREPARED; } + dev_dbg(dev, "Clock %pC con_id %s managed by runtime PM.\n", + ce->clk, ce->con_id); } static int __pm_clk_add(struct device *dev, const char *con_id, @@ -106,9 +221,11 @@ static int __pm_clk_add(struct device *dev, const char *con_id, pm_clk_acquire(dev, ce); - spin_lock_irq(&psd->lock); + pm_clk_list_lock(psd); list_add_tail(&ce->node, &psd->clock_list); - spin_unlock_irq(&psd->lock); + if (ce->enabled_when_prepared) + psd->clock_op_might_sleep++; + pm_clk_list_unlock(psd); return 0; } @@ -239,14 +356,20 @@ static void __pm_clk_remove(struct pm_clock_entry *ce) if (!ce) return; - if (ce->status < PCE_STATUS_ERROR) { - if (ce->status == PCE_STATUS_ENABLED) - clk_disable(ce->clk); - - if (ce->status >= PCE_STATUS_ACQUIRED) { - clk_unprepare(ce->clk); + switch (ce->status) { + case PCE_STATUS_ENABLED: + clk_disable(ce->clk); + fallthrough; + case PCE_STATUS_PREPARED: + clk_unprepare(ce->clk); + fallthrough; + case PCE_STATUS_ACQUIRED: + case PCE_STATUS_ERROR: + if (!IS_ERR(ce->clk)) clk_put(ce->clk); - } + break; + default: + break; } kfree(ce->con_id); @@ -269,7 +392,7 @@ void pm_clk_remove(struct device *dev, const char *con_id) if (!psd) return; - spin_lock_irq(&psd->lock); + pm_clk_list_lock(psd); list_for_each_entry(ce, &psd->clock_list, node) { if (!con_id && !ce->con_id) @@ -280,12 +403,14 @@ void pm_clk_remove(struct device *dev, const char *con_id) goto remove; } - spin_unlock_irq(&psd->lock); + pm_clk_list_unlock(psd); return; remove: list_del(&ce->node); - spin_unlock_irq(&psd->lock); + if (ce->enabled_when_prepared) + psd->clock_op_might_sleep--; + pm_clk_list_unlock(psd); __pm_clk_remove(ce); } @@ -307,19 +432,21 @@ void pm_clk_remove_clk(struct device *dev, struct clk *clk) if (!psd || !clk) return; - spin_lock_irq(&psd->lock); + pm_clk_list_lock(psd); list_for_each_entry(ce, &psd->clock_list, node) { if (clk == ce->clk) goto remove; } - spin_unlock_irq(&psd->lock); + pm_clk_list_unlock(psd); return; remove: list_del(&ce->node); - spin_unlock_irq(&psd->lock); + if (ce->enabled_when_prepared) + psd->clock_op_might_sleep--; + pm_clk_list_unlock(psd); __pm_clk_remove(ce); } @@ -330,13 +457,16 @@ EXPORT_SYMBOL_GPL(pm_clk_remove_clk); * @dev: Device to initialize the list of PM clocks for. * * Initialize the lock and clock_list members of the device's pm_subsys_data - * object. + * object, set the count of clocks that might sleep to 0. */ void pm_clk_init(struct device *dev) { struct pm_subsys_data *psd = dev_to_psd(dev); - if (psd) + if (psd) { INIT_LIST_HEAD(&psd->clock_list); + mutex_init(&psd->clock_mutex); + psd->clock_op_might_sleep = 0; + } } EXPORT_SYMBOL_GPL(pm_clk_init); @@ -372,12 +502,13 @@ void pm_clk_destroy(struct device *dev) INIT_LIST_HEAD(&list); - spin_lock_irq(&psd->lock); + pm_clk_list_lock(psd); list_for_each_entry_safe_reverse(ce, c, &psd->clock_list, node) list_move(&ce->node, &list); + psd->clock_op_might_sleep = 0; - spin_unlock_irq(&psd->lock); + pm_clk_list_unlock(psd); dev_pm_put_subsys_data(dev); @@ -397,23 +528,30 @@ int pm_clk_suspend(struct device *dev) struct pm_subsys_data *psd = dev_to_psd(dev); struct pm_clock_entry *ce; unsigned long flags; + int ret; dev_dbg(dev, "%s()\n", __func__); if (!psd) return 0; - spin_lock_irqsave(&psd->lock, flags); + ret = pm_clk_op_lock(psd, &flags, __func__); + if (ret) + return ret; list_for_each_entry_reverse(ce, &psd->clock_list, node) { - if (ce->status < PCE_STATUS_ERROR) { - if (ce->status == PCE_STATUS_ENABLED) + if (ce->status == PCE_STATUS_ENABLED) { + if (ce->enabled_when_prepared) { + clk_disable_unprepare(ce->clk); + ce->status = PCE_STATUS_ACQUIRED; + } else { clk_disable(ce->clk); - ce->status = PCE_STATUS_ACQUIRED; + ce->status = PCE_STATUS_PREPARED; + } } } - spin_unlock_irqrestore(&psd->lock, flags); + pm_clk_op_unlock(psd, &flags); return 0; } @@ -428,18 +566,21 @@ int pm_clk_resume(struct device *dev) struct pm_subsys_data *psd = dev_to_psd(dev); struct pm_clock_entry *ce; unsigned long flags; + int ret; dev_dbg(dev, "%s()\n", __func__); if (!psd) return 0; - spin_lock_irqsave(&psd->lock, flags); + ret = pm_clk_op_lock(psd, &flags, __func__); + if (ret) + return ret; list_for_each_entry(ce, &psd->clock_list, node) __pm_clk_enable(dev, ce); - spin_unlock_irqrestore(&psd->lock, flags); + pm_clk_op_unlock(psd, &flags); return 0; } diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 9a14eedacb92..aaf6c83b5cf6 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -297,6 +297,18 @@ static int _genpd_reeval_performance_state(struct generic_pm_domain *genpd, return state; } +static int genpd_xlate_performance_state(struct generic_pm_domain *genpd, + struct generic_pm_domain *parent, + unsigned int pstate) +{ + if (!parent->set_performance_state) + return pstate; + + return dev_pm_opp_xlate_performance_state(genpd->opp_table, + parent->opp_table, + pstate); +} + static int _genpd_set_performance_state(struct generic_pm_domain *genpd, unsigned int state, int depth) { @@ -311,13 +323,8 @@ static int _genpd_set_performance_state(struct generic_pm_domain *genpd, list_for_each_entry(link, &genpd->child_links, child_node) { parent = link->parent; - if (!parent->set_performance_state) - continue; - /* Find parent's performance state */ - ret = dev_pm_opp_xlate_performance_state(genpd->opp_table, - parent->opp_table, - state); + ret = genpd_xlate_performance_state(genpd, parent, state); if (unlikely(ret < 0)) goto err; @@ -339,9 +346,11 @@ static int _genpd_set_performance_state(struct generic_pm_domain *genpd, goto err; } - ret = genpd->set_performance_state(genpd, state); - if (ret) - goto err; + if (genpd->set_performance_state) { + ret = genpd->set_performance_state(genpd, state); + if (ret) + goto err; + } genpd->performance_state = state; return 0; @@ -352,9 +361,6 @@ err: child_node) { parent = link->parent; - if (!parent->set_performance_state) - continue; - genpd_lock_nested(parent, depth + 1); parent_state = link->prev_performance_state; @@ -399,9 +405,6 @@ int dev_pm_genpd_set_performance_state(struct device *dev, unsigned int state) if (!genpd) return -ENODEV; - if (unlikely(!genpd->set_performance_state)) - return -EINVAL; - if (WARN_ON(!dev->power.subsys_data || !dev->power.subsys_data->domain_data)) return -EINVAL; @@ -423,6 +426,35 @@ int dev_pm_genpd_set_performance_state(struct device *dev, unsigned int state) } EXPORT_SYMBOL_GPL(dev_pm_genpd_set_performance_state); +/** + * dev_pm_genpd_set_next_wakeup - Notify PM framework of an impending wakeup. + * + * @dev: Device to handle + * @next: impending interrupt/wakeup for the device + * + * + * Allow devices to inform of the next wakeup. It's assumed that the users + * guarantee that the genpd wouldn't be detached while this routine is getting + * called. Additionally, it's also assumed that @dev isn't runtime suspended + * (RPM_SUSPENDED)." + * Although devices are expected to update the next_wakeup after the end of + * their usecase as well, it is possible the devices themselves may not know + * about that, so stale @next will be ignored when powering off the domain. + */ +void dev_pm_genpd_set_next_wakeup(struct device *dev, ktime_t next) +{ + struct generic_pm_domain_data *gpd_data; + struct generic_pm_domain *genpd; + + genpd = dev_to_genpd_safe(dev); + if (!genpd) + return; + + gpd_data = to_gpd_data(dev->power.subsys_data->domain_data); + gpd_data->next_wakeup = next; +} +EXPORT_SYMBOL_GPL(dev_pm_genpd_set_next_wakeup); + static int _genpd_power_on(struct generic_pm_domain *genpd, bool timed) { unsigned int state_idx = genpd->state_idx; @@ -934,8 +966,7 @@ static int genpd_runtime_resume(struct device *dev) err_stop: genpd_stop_dev(genpd, dev); err_poweroff: - if (!pm_runtime_is_irq_safe(dev) || - (pm_runtime_is_irq_safe(dev) && genpd_is_irq_safe(genpd))) { + if (!pm_runtime_is_irq_safe(dev) || genpd_is_irq_safe(genpd)) { genpd_lock(genpd); genpd_power_off(genpd, true, 0); genpd_unlock(genpd); @@ -1465,6 +1496,7 @@ static struct generic_pm_domain_data *genpd_alloc_dev_data(struct device *dev) gpd_data->td.constraint_changed = true; gpd_data->td.effective_constraint_ns = PM_QOS_RESUME_LATENCY_NO_CONSTRAINT_NS; gpd_data->nb.notifier_call = genpd_dev_pm_qos_notifier; + gpd_data->next_wakeup = KTIME_MAX; spin_lock_irq(&dev->power.lock); @@ -2463,7 +2495,7 @@ int of_genpd_add_subdomain(struct of_phandle_args *parent_spec, out: mutex_unlock(&gpd_list_lock); - return ret; + return ret == -ENOENT ? -EPROBE_DEFER : ret; } EXPORT_SYMBOL_GPL(of_genpd_add_subdomain); @@ -2952,7 +2984,15 @@ static void rtpm_status_str(struct seq_file *s, struct device *dev) else WARN_ON(1); - seq_puts(s, p); + seq_printf(s, "%-25s ", p); +} + +static void perf_status_str(struct seq_file *s, struct device *dev) +{ + struct generic_pm_domain_data *gpd_data; + + gpd_data = to_gpd_data(dev->power.subsys_data->domain_data); + seq_put_decimal_ull(s, "", gpd_data->performance_state); } static int genpd_summary_one(struct seq_file *s, @@ -2980,7 +3020,7 @@ static int genpd_summary_one(struct seq_file *s, else snprintf(state, sizeof(state), "%s", status_lookup[genpd->status]); - seq_printf(s, "%-30s %-15s ", genpd->name, state); + seq_printf(s, "%-30s %-50s %u", genpd->name, state, genpd->performance_state); /* * Modifications on the list require holding locks on both @@ -2988,6 +3028,8 @@ static int genpd_summary_one(struct seq_file *s, * Also genpd->name is immutable. */ list_for_each_entry(link, &genpd->parent_links, parent_node) { + if (list_is_first(&link->parent_node, &genpd->parent_links)) + seq_printf(s, "\n%48s", " "); seq_printf(s, "%s", link->child->name); if (!list_is_last(&link->parent_node, &genpd->parent_links)) seq_puts(s, ", "); @@ -3002,6 +3044,7 @@ static int genpd_summary_one(struct seq_file *s, seq_printf(s, "\n %-50s ", kobj_path); rtpm_status_str(s, pm_data->dev); + perf_status_str(s, pm_data->dev); kfree(kobj_path); } @@ -3017,9 +3060,9 @@ static int summary_show(struct seq_file *s, void *data) struct generic_pm_domain *genpd; int ret = 0; - seq_puts(s, "domain status children\n"); + seq_puts(s, "domain status children performance\n"); seq_puts(s, " /device runtime status\n"); - seq_puts(s, "----------------------------------------------------------------------\n"); + seq_puts(s, "----------------------------------------------------------------------------------------------\n"); ret = mutex_lock_interruptible(&gpd_list_lock); if (ret) diff --git a/drivers/base/power/domain_governor.c b/drivers/base/power/domain_governor.c index 490ed7deb99a..c6c218758f0b 100644 --- a/drivers/base/power/domain_governor.c +++ b/drivers/base/power/domain_governor.c @@ -117,6 +117,55 @@ static bool default_suspend_ok(struct device *dev) return td->cached_suspend_ok; } +static void update_domain_next_wakeup(struct generic_pm_domain *genpd, ktime_t now) +{ + ktime_t domain_wakeup = KTIME_MAX; + ktime_t next_wakeup; + struct pm_domain_data *pdd; + struct gpd_link *link; + + if (!(genpd->flags & GENPD_FLAG_MIN_RESIDENCY)) + return; + + /* + * Devices that have a predictable wakeup pattern, may specify + * their next wakeup. Let's find the next wakeup from all the + * devices attached to this domain and from all the sub-domains. + * It is possible that component's a next wakeup may have become + * stale when we read that here. We will ignore to ensure the domain + * is able to enter its optimal idle state. + */ + list_for_each_entry(pdd, &genpd->dev_list, list_node) { + next_wakeup = to_gpd_data(pdd)->next_wakeup; + if (next_wakeup != KTIME_MAX && !ktime_before(next_wakeup, now)) + if (ktime_before(next_wakeup, domain_wakeup)) + domain_wakeup = next_wakeup; + } + + list_for_each_entry(link, &genpd->parent_links, parent_node) { + next_wakeup = link->child->next_wakeup; + if (next_wakeup != KTIME_MAX && !ktime_before(next_wakeup, now)) + if (ktime_before(next_wakeup, domain_wakeup)) + domain_wakeup = next_wakeup; + } + + genpd->next_wakeup = domain_wakeup; +} + +static bool next_wakeup_allows_state(struct generic_pm_domain *genpd, + unsigned int state, ktime_t now) +{ + ktime_t domain_wakeup = genpd->next_wakeup; + s64 idle_time_ns, min_sleep_ns; + + min_sleep_ns = genpd->states[state].power_off_latency_ns + + genpd->states[state].residency_ns; + + idle_time_ns = ktime_to_ns(ktime_sub(domain_wakeup, now)); + + return idle_time_ns >= min_sleep_ns; +} + static bool __default_power_down_ok(struct dev_pm_domain *pd, unsigned int state) { @@ -201,16 +250,41 @@ static bool __default_power_down_ok(struct dev_pm_domain *pd, } /** - * default_power_down_ok - Default generic PM domain power off governor routine. + * _default_power_down_ok - Default generic PM domain power off governor routine. * @pd: PM domain to check. * * This routine must be executed under the PM domain's lock. */ -static bool default_power_down_ok(struct dev_pm_domain *pd) +static bool _default_power_down_ok(struct dev_pm_domain *pd, ktime_t now) { struct generic_pm_domain *genpd = pd_to_genpd(pd); + int state_idx = genpd->state_count - 1; struct gpd_link *link; + /* + * Find the next wakeup from devices that can determine their own wakeup + * to find when the domain would wakeup and do it for every device down + * the hierarchy. It is not worth while to sleep if the state's residency + * cannot be met. + */ + update_domain_next_wakeup(genpd, now); + if ((genpd->flags & GENPD_FLAG_MIN_RESIDENCY) && (genpd->next_wakeup != KTIME_MAX)) { + /* Let's find out the deepest domain idle state, the devices prefer */ + while (state_idx >= 0) { + if (next_wakeup_allows_state(genpd, state_idx, now)) { + genpd->max_off_time_changed = true; + break; + } + state_idx--; + } + + if (state_idx < 0) { + state_idx = 0; + genpd->cached_power_down_ok = false; + goto done; + } + } + if (!genpd->max_off_time_changed) { genpd->state_idx = genpd->cached_power_down_state_idx; return genpd->cached_power_down_ok; @@ -228,21 +302,30 @@ static bool default_power_down_ok(struct dev_pm_domain *pd) genpd->max_off_time_ns = -1; genpd->max_off_time_changed = false; genpd->cached_power_down_ok = true; - genpd->state_idx = genpd->state_count - 1; - /* Find a state to power down to, starting from the deepest. */ - while (!__default_power_down_ok(pd, genpd->state_idx)) { - if (genpd->state_idx == 0) { + /* + * Find a state to power down to, starting from the state + * determined by the next wakeup. + */ + while (!__default_power_down_ok(pd, state_idx)) { + if (state_idx == 0) { genpd->cached_power_down_ok = false; break; } - genpd->state_idx--; + state_idx--; } +done: + genpd->state_idx = state_idx; genpd->cached_power_down_state_idx = genpd->state_idx; return genpd->cached_power_down_ok; } +static bool default_power_down_ok(struct dev_pm_domain *pd) +{ + return _default_power_down_ok(pd, ktime_get()); +} + static bool always_on_power_down_ok(struct dev_pm_domain *domain) { return false; @@ -254,11 +337,12 @@ static bool cpu_power_down_ok(struct dev_pm_domain *pd) struct generic_pm_domain *genpd = pd_to_genpd(pd); struct cpuidle_device *dev; ktime_t domain_wakeup, next_hrtimer; + ktime_t now = ktime_get(); s64 idle_duration_ns; int cpu, i; /* Validate dev PM QoS constraints. */ - if (!default_power_down_ok(pd)) + if (!_default_power_down_ok(pd, now)) return false; if (!(genpd->flags & GENPD_FLAG_CPU_DOMAIN)) @@ -280,7 +364,7 @@ static bool cpu_power_down_ok(struct dev_pm_domain *pd) } /* The minimum idle duration is from now - until the next wakeup. */ - idle_duration_ns = ktime_to_ns(ktime_sub(domain_wakeup, ktime_get())); + idle_duration_ns = ktime_to_ns(ktime_sub(domain_wakeup, now)); if (idle_duration_ns <= 0) return false; diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index 46793276598d..f893c3c5af07 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -16,6 +16,7 @@ */ #define pr_fmt(fmt) "PM: " fmt +#define dev_fmt pr_fmt #include <linux/device.h> #include <linux/export.h> @@ -449,8 +450,8 @@ static void pm_dev_dbg(struct device *dev, pm_message_t state, const char *info) static void pm_dev_err(struct device *dev, pm_message_t state, const char *info, int error) { - pr_err("Device %s failed to %s%s: error %d\n", - dev_name(dev), pm_verb(state.event), info, error); + dev_err(dev, "failed to %s%s: error %d\n", pm_verb(state.event), info, + error); } static void dpm_show_time(ktime_t starttime, pm_message_t state, int error, @@ -1897,8 +1898,8 @@ int dpm_prepare(pm_message_t state) error = 0; continue; } - pr_info("Device %s not prepared for power transition: code %d\n", - dev_name(dev), error); + dev_info(dev, "not prepared for power transition: code %d\n", + error); put_device(dev); break; } diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index bfda153b1a41..a46a7e30881b 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -1100,7 +1100,7 @@ EXPORT_SYMBOL_GPL(__pm_runtime_resume); * suspending the device when both its runtime PM status is %RPM_ACTIVE and its * runtime PM usage counter is not zero. * - * The caller is resposible for decrementing the runtime PM usage counter of + * The caller is responsible for decrementing the runtime PM usage counter of * @dev after this function has returned a positive value for it. */ int pm_runtime_get_if_active(struct device *dev, bool ign_usage_count) diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index 8c1d04db990d..3d751ae5bc70 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -1164,6 +1164,27 @@ int clk_enable(struct clk *clk) } EXPORT_SYMBOL_GPL(clk_enable); +/** + * clk_is_enabled_when_prepared - indicate if preparing a clock also enables it. + * @clk: clock source + * + * Returns true if clk_prepare() implicitly enables the clock, effectively + * making clk_enable()/clk_disable() no-ops, false otherwise. + * + * This is of interest mainly to power management code where actually + * disabling the clock also requires unpreparing it to have any material + * effect. + * + * Regardless of the value returned here, the caller must always invoke + * clk_enable() or clk_prepare_enable() and counterparts for usage counts + * to be right. + */ +bool clk_is_enabled_when_prepared(struct clk *clk) +{ + return clk && !(clk->core->ops->enable && clk->core->ops->disable); +} +EXPORT_SYMBOL_GPL(clk_is_enabled_when_prepared); + static int clk_core_prepare_enable(struct clk_core *core) { int ret; diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm index 1f73fa75b1a0..e65e0a43be64 100644 --- a/drivers/cpufreq/Kconfig.arm +++ b/drivers/cpufreq/Kconfig.arm @@ -289,11 +289,6 @@ config ARM_STI_CPUFREQ this config option if you wish to add CPUFreq support for STi based SoCs. -config ARM_TANGO_CPUFREQ - bool - depends on CPUFREQ_DT && ARCH_TANGO - default y - config ARM_TEGRA20_CPUFREQ tristate "Tegra20/30 CPUFreq support" depends on ARCH_TEGRA && CPUFREQ_DT diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile index f1b7e3dd6e5d..1ab9b1536304 100644 --- a/drivers/cpufreq/Makefile +++ b/drivers/cpufreq/Makefile @@ -79,7 +79,6 @@ obj-$(CONFIG_ARM_SCPI_CPUFREQ) += scpi-cpufreq.o obj-$(CONFIG_ARM_SPEAR_CPUFREQ) += spear-cpufreq.o obj-$(CONFIG_ARM_STI_CPUFREQ) += sti-cpufreq.o obj-$(CONFIG_ARM_ALLWINNER_SUN50I_CPUFREQ_NVMEM) += sun50i-cpufreq-nvmem.o -obj-$(CONFIG_ARM_TANGO_CPUFREQ) += tango-cpufreq.o obj-$(CONFIG_ARM_TEGRA20_CPUFREQ) += tegra20-cpufreq.o obj-$(CONFIG_ARM_TEGRA124_CPUFREQ) += tegra124-cpufreq.o obj-$(CONFIG_ARM_TEGRA186_CPUFREQ) += tegra186-cpufreq.o diff --git a/drivers/cpufreq/brcmstb-avs-cpufreq.c b/drivers/cpufreq/brcmstb-avs-cpufreq.c index 3e31e5d28b79..4153150e20db 100644 --- a/drivers/cpufreq/brcmstb-avs-cpufreq.c +++ b/drivers/cpufreq/brcmstb-avs-cpufreq.c @@ -597,6 +597,16 @@ unmap_base: return ret; } +static void brcm_avs_prepare_uninit(struct platform_device *pdev) +{ + struct private_data *priv; + + priv = platform_get_drvdata(pdev); + + iounmap(priv->avs_intr_base); + iounmap(priv->base); +} + static int brcm_avs_cpufreq_init(struct cpufreq_policy *policy) { struct cpufreq_frequency_table *freq_table; @@ -732,21 +742,21 @@ static int brcm_avs_cpufreq_probe(struct platform_device *pdev) brcm_avs_driver.driver_data = pdev; - return cpufreq_register_driver(&brcm_avs_driver); + ret = cpufreq_register_driver(&brcm_avs_driver); + if (ret) + brcm_avs_prepare_uninit(pdev); + + return ret; } static int brcm_avs_cpufreq_remove(struct platform_device *pdev) { - struct private_data *priv; int ret; ret = cpufreq_unregister_driver(&brcm_avs_driver); - if (ret) - return ret; + WARN_ON(ret); - priv = platform_get_drvdata(pdev); - iounmap(priv->base); - iounmap(priv->avs_intr_base); + brcm_avs_prepare_uninit(pdev); return 0; } diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c index bd2db0188cbb..3ba2f716fe97 100644 --- a/drivers/cpufreq/cpufreq-dt-platdev.c +++ b/drivers/cpufreq/cpufreq-dt-platdev.c @@ -141,8 +141,6 @@ static const struct of_device_id blacklist[] __initconst = { { .compatible = "st,stih410", }, { .compatible = "st,stih418", }, - { .compatible = "sigma,tango4", }, - { .compatible = "ti,am33xx", }, { .compatible = "ti,am43", }, { .compatible = "ti,dra7", }, diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c index ad4234518ef6..b1e1bdc63b01 100644 --- a/drivers/cpufreq/cpufreq-dt.c +++ b/drivers/cpufreq/cpufreq-dt.c @@ -175,7 +175,7 @@ static int cpufreq_exit(struct cpufreq_policy *policy) } static struct cpufreq_driver dt_cpufreq_driver = { - .flags = CPUFREQ_STICKY | CPUFREQ_NEED_INITIAL_FREQ_CHECK | + .flags = CPUFREQ_NEED_INITIAL_FREQ_CHECK | CPUFREQ_IS_COOLING_DEV, .verify = cpufreq_generic_frequency_table_verify, .target_index = set_target, diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index d0a3525ce27f..7d0ae968def7 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -2810,8 +2810,7 @@ int cpufreq_register_driver(struct cpufreq_driver *driver_data) if (ret) goto err_boost_unreg; - if (!(cpufreq_driver->flags & CPUFREQ_STICKY) && - list_empty(&cpufreq_policy_list)) { + if (unlikely(list_empty(&cpufreq_policy_list))) { /* if all ->init() calls failed, unregister */ ret = -ENODEV; pr_debug("%s: No CPU initialized for driver %s\n", __func__, diff --git a/drivers/cpufreq/davinci-cpufreq.c b/drivers/cpufreq/davinci-cpufreq.c index 91f477a6cbc4..9e97f60f8199 100644 --- a/drivers/cpufreq/davinci-cpufreq.c +++ b/drivers/cpufreq/davinci-cpufreq.c @@ -95,7 +95,7 @@ static int davinci_cpu_init(struct cpufreq_policy *policy) } static struct cpufreq_driver davinci_driver = { - .flags = CPUFREQ_STICKY | CPUFREQ_NEED_INITIAL_FREQ_CHECK, + .flags = CPUFREQ_NEED_INITIAL_FREQ_CHECK, .verify = cpufreq_generic_frequency_table_verify, .target_index = davinci_target, .get = cpufreq_generic_get, diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index be05e038d956..5175ae3cac44 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -819,13 +819,13 @@ static struct freq_attr *hwp_cpufreq_attrs[] = { NULL, }; -static void intel_pstate_get_hwp_max(unsigned int cpu, int *phy_max, +static void intel_pstate_get_hwp_max(struct cpudata *cpu, int *phy_max, int *current_max) { u64 cap; - rdmsrl_on_cpu(cpu, MSR_HWP_CAPABILITIES, &cap); - WRITE_ONCE(all_cpu_data[cpu]->hwp_cap_cached, cap); + rdmsrl_on_cpu(cpu->cpu, MSR_HWP_CAPABILITIES, &cap); + WRITE_ONCE(cpu->hwp_cap_cached, cap); if (global.no_turbo || global.turbo_disabled) *current_max = HWP_GUARANTEED_PERF(cap); else @@ -914,7 +914,7 @@ static void intel_pstate_hwp_offline(struct cpudata *cpu) } value &= ~GENMASK_ULL(31, 0); - min_perf = HWP_LOWEST_PERF(cpu->hwp_cap_cached); + min_perf = HWP_LOWEST_PERF(READ_ONCE(cpu->hwp_cap_cached)); /* Set hwp_max = hwp_min */ value |= HWP_MAX_PERF(min_perf); @@ -1213,7 +1213,7 @@ static void update_qos_request(enum freq_qos_req_type type) continue; if (hwp_active) - intel_pstate_get_hwp_max(i, &turbo_max, &max_state); + intel_pstate_get_hwp_max(cpu, &turbo_max, &max_state); else turbo_max = cpu->pstate.turbo_pstate; @@ -1714,21 +1714,22 @@ static void intel_pstate_max_within_limits(struct cpudata *cpu) static void intel_pstate_get_cpu_pstates(struct cpudata *cpu) { cpu->pstate.min_pstate = pstate_funcs.get_min(); - cpu->pstate.max_pstate = pstate_funcs.get_max(); cpu->pstate.max_pstate_physical = pstate_funcs.get_max_physical(); cpu->pstate.turbo_pstate = pstate_funcs.get_turbo(); cpu->pstate.scaling = pstate_funcs.get_scaling(); - cpu->pstate.max_freq = cpu->pstate.max_pstate * cpu->pstate.scaling; if (hwp_active && !hwp_mode_bdw) { unsigned int phy_max, current_max; - intel_pstate_get_hwp_max(cpu->cpu, &phy_max, ¤t_max); + intel_pstate_get_hwp_max(cpu, &phy_max, ¤t_max); cpu->pstate.turbo_freq = phy_max * cpu->pstate.scaling; cpu->pstate.turbo_pstate = phy_max; + cpu->pstate.max_pstate = HWP_GUARANTEED_PERF(READ_ONCE(cpu->hwp_cap_cached)); } else { cpu->pstate.turbo_freq = cpu->pstate.turbo_pstate * cpu->pstate.scaling; + cpu->pstate.max_pstate = pstate_funcs.get_max(); } + cpu->pstate.max_freq = cpu->pstate.max_pstate * cpu->pstate.scaling; if (pstate_funcs.get_aperf_mperf_shift) cpu->aperf_mperf_shift = pstate_funcs.get_aperf_mperf_shift(); @@ -1750,6 +1751,7 @@ static int hwp_boost_hold_time_ns = 3 * NSEC_PER_MSEC; static inline void intel_pstate_hwp_boost_up(struct cpudata *cpu) { u64 hwp_req = READ_ONCE(cpu->hwp_req_cached); + u64 hwp_cap = READ_ONCE(cpu->hwp_cap_cached); u32 max_limit = (hwp_req & 0xff00) >> 8; u32 min_limit = (hwp_req & 0xff); u32 boost_level1; @@ -1776,14 +1778,14 @@ static inline void intel_pstate_hwp_boost_up(struct cpudata *cpu) cpu->hwp_boost_min = min_limit; /* level at half way mark between min and guranteed */ - boost_level1 = (HWP_GUARANTEED_PERF(cpu->hwp_cap_cached) + min_limit) >> 1; + boost_level1 = (HWP_GUARANTEED_PERF(hwp_cap) + min_limit) >> 1; if (cpu->hwp_boost_min < boost_level1) cpu->hwp_boost_min = boost_level1; - else if (cpu->hwp_boost_min < HWP_GUARANTEED_PERF(cpu->hwp_cap_cached)) - cpu->hwp_boost_min = HWP_GUARANTEED_PERF(cpu->hwp_cap_cached); - else if (cpu->hwp_boost_min == HWP_GUARANTEED_PERF(cpu->hwp_cap_cached) && - max_limit != HWP_GUARANTEED_PERF(cpu->hwp_cap_cached)) + else if (cpu->hwp_boost_min < HWP_GUARANTEED_PERF(hwp_cap)) + cpu->hwp_boost_min = HWP_GUARANTEED_PERF(hwp_cap); + else if (cpu->hwp_boost_min == HWP_GUARANTEED_PERF(hwp_cap) && + max_limit != HWP_GUARANTEED_PERF(hwp_cap)) cpu->hwp_boost_min = max_limit; else return; @@ -2207,7 +2209,7 @@ static void intel_pstate_update_perf_limits(struct cpudata *cpu, * rather than pure ratios. */ if (hwp_active) { - intel_pstate_get_hwp_max(cpu->cpu, &turbo_max, &max_state); + intel_pstate_get_hwp_max(cpu, &turbo_max, &max_state); } else { max_state = global.no_turbo || global.turbo_disabled ? cpu->pstate.max_pstate : cpu->pstate.turbo_pstate; @@ -2322,7 +2324,7 @@ static void intel_pstate_verify_cpu_policy(struct cpudata *cpu, if (hwp_active) { int max_state, turbo_max; - intel_pstate_get_hwp_max(cpu->cpu, &turbo_max, &max_state); + intel_pstate_get_hwp_max(cpu, &turbo_max, &max_state); max_freq = max_state * cpu->pstate.scaling; } else { max_freq = intel_pstate_get_max_freq(cpu); @@ -2496,7 +2498,7 @@ static int intel_cpufreq_verify_policy(struct cpufreq_policy_data *policy) * driver call was via the normal or fast switch path. Various graphs * output from the intel_pstate_tracer.py utility that include core_busy * (or performance or core_avg_perf) have a fixed y-axis from 0 to 100%, - * so we use 10 to indicate the the normal path through the driver, and + * so we use 10 to indicate the normal path through the driver, and * 90 to indicate the fast switch path through the driver. * The scaled_busy field is not used, and is set to 0. */ @@ -2526,7 +2528,7 @@ static void intel_cpufreq_trace(struct cpudata *cpu, unsigned int trace_type, in fp_toint(cpu->iowait_boost * 100)); } -static void intel_cpufreq_adjust_hwp(struct cpudata *cpu, u32 min, u32 max, +static void intel_cpufreq_hwp_update(struct cpudata *cpu, u32 min, u32 max, u32 desired, bool fast_switch) { u64 prev = READ_ONCE(cpu->hwp_req_cached), value = prev; @@ -2550,7 +2552,7 @@ static void intel_cpufreq_adjust_hwp(struct cpudata *cpu, u32 min, u32 max, wrmsrl_on_cpu(cpu->cpu, MSR_HWP_REQUEST, value); } -static void intel_cpufreq_adjust_perf_ctl(struct cpudata *cpu, +static void intel_cpufreq_perf_ctl_update(struct cpudata *cpu, u32 target_pstate, bool fast_switch) { if (fast_switch) @@ -2572,10 +2574,10 @@ static int intel_cpufreq_update_pstate(struct cpufreq_policy *policy, int max_pstate = policy->strict_target ? target_pstate : cpu->max_perf_ratio; - intel_cpufreq_adjust_hwp(cpu, target_pstate, max_pstate, 0, + intel_cpufreq_hwp_update(cpu, target_pstate, max_pstate, 0, fast_switch); } else if (target_pstate != old_pstate) { - intel_cpufreq_adjust_perf_ctl(cpu, target_pstate, fast_switch); + intel_cpufreq_perf_ctl_update(cpu, target_pstate, fast_switch); } cpu->pstate.current_pstate = target_pstate; @@ -2673,7 +2675,7 @@ static void intel_cpufreq_adjust_perf(unsigned int cpunum, target_pstate = clamp_t(int, target_pstate, min_pstate, max_pstate); - intel_cpufreq_adjust_hwp(cpu, min_pstate, max_pstate, target_pstate, true); + intel_cpufreq_hwp_update(cpu, min_pstate, max_pstate, target_pstate, true); cpu->pstate.current_pstate = target_pstate; intel_cpufreq_trace(cpu, INTEL_PSTATE_TRACE_FAST_SWITCH, old_pstate); @@ -2709,7 +2711,7 @@ static int intel_cpufreq_cpu_init(struct cpufreq_policy *policy) if (hwp_active) { u64 value; - intel_pstate_get_hwp_max(policy->cpu, &turbo_max, &max_state); + intel_pstate_get_hwp_max(cpu, &turbo_max, &max_state); policy->transition_delay_us = INTEL_CPUFREQ_TRANSITION_DELAY_HWP; rdmsrl_on_cpu(cpu->cpu, MSR_HWP_REQUEST, &value); WRITE_ONCE(cpu->hwp_req_cached, value); diff --git a/drivers/cpufreq/loongson1-cpufreq.c b/drivers/cpufreq/loongson1-cpufreq.c index 86f612593e49..fb72d709db56 100644 --- a/drivers/cpufreq/loongson1-cpufreq.c +++ b/drivers/cpufreq/loongson1-cpufreq.c @@ -116,7 +116,7 @@ static int ls1x_cpufreq_exit(struct cpufreq_policy *policy) static struct cpufreq_driver ls1x_cpufreq_driver = { .name = "cpufreq-ls1x", - .flags = CPUFREQ_STICKY | CPUFREQ_NEED_INITIAL_FREQ_CHECK, + .flags = CPUFREQ_NEED_INITIAL_FREQ_CHECK, .verify = cpufreq_generic_frequency_table_verify, .target_index = ls1x_cpufreq_target, .get = cpufreq_generic_get, diff --git a/drivers/cpufreq/mediatek-cpufreq.c b/drivers/cpufreq/mediatek-cpufreq.c index 022e3e966e71..f2e491b25b07 100644 --- a/drivers/cpufreq/mediatek-cpufreq.c +++ b/drivers/cpufreq/mediatek-cpufreq.c @@ -463,7 +463,7 @@ static int mtk_cpufreq_exit(struct cpufreq_policy *policy) } static struct cpufreq_driver mtk_cpufreq_driver = { - .flags = CPUFREQ_STICKY | CPUFREQ_NEED_INITIAL_FREQ_CHECK | + .flags = CPUFREQ_NEED_INITIAL_FREQ_CHECK | CPUFREQ_HAVE_GOVERNOR_PER_POLICY | CPUFREQ_IS_COOLING_DEV, .verify = cpufreq_generic_frequency_table_verify, diff --git a/drivers/cpufreq/omap-cpufreq.c b/drivers/cpufreq/omap-cpufreq.c index 3694bb030df3..e035ee216b0f 100644 --- a/drivers/cpufreq/omap-cpufreq.c +++ b/drivers/cpufreq/omap-cpufreq.c @@ -144,7 +144,7 @@ static int omap_cpu_exit(struct cpufreq_policy *policy) } static struct cpufreq_driver omap_driver = { - .flags = CPUFREQ_STICKY | CPUFREQ_NEED_INITIAL_FREQ_CHECK, + .flags = CPUFREQ_NEED_INITIAL_FREQ_CHECK, .verify = cpufreq_generic_frequency_table_verify, .target_index = omap_target, .get = cpufreq_generic_get, diff --git a/drivers/cpufreq/pmac32-cpufreq.c b/drivers/cpufreq/pmac32-cpufreq.c index 73621bc11976..4f20c6a9108d 100644 --- a/drivers/cpufreq/pmac32-cpufreq.c +++ b/drivers/cpufreq/pmac32-cpufreq.c @@ -439,8 +439,7 @@ static struct cpufreq_driver pmac_cpufreq_driver = { .init = pmac_cpufreq_cpu_init, .suspend = pmac_cpufreq_suspend, .resume = pmac_cpufreq_resume, - .flags = CPUFREQ_PM_NO_WARN | - CPUFREQ_NO_AUTO_DYNAMIC_SWITCHING, + .flags = CPUFREQ_NO_AUTO_DYNAMIC_SWITCHING, .attr = cpufreq_generic_attr, .name = "powermac", }; diff --git a/drivers/cpufreq/qcom-cpufreq-hw.c b/drivers/cpufreq/qcom-cpufreq-hw.c index 9ed5341dc515..356244510b18 100644 --- a/drivers/cpufreq/qcom-cpufreq-hw.c +++ b/drivers/cpufreq/qcom-cpufreq-hw.c @@ -54,7 +54,7 @@ static int qcom_cpufreq_set_bw(struct cpufreq_policy *policy, if (IS_ERR(opp)) return PTR_ERR(opp); - ret = dev_pm_opp_set_bw(dev, opp); + ret = dev_pm_opp_set_opp(dev, opp); dev_pm_opp_put(opp); return ret; } @@ -347,6 +347,12 @@ static int qcom_cpufreq_hw_cpu_init(struct cpufreq_policy *policy) dev_pm_opp_of_register_em(cpu_dev, policy->cpus); + if (policy_has_boost_freq(policy)) { + ret = cpufreq_enable_boost_support(); + if (ret) + dev_warn(cpu_dev, "failed to enable boost: %d\n", ret); + } + return 0; error: devm_iounmap(dev, base); @@ -374,7 +380,7 @@ static struct freq_attr *qcom_cpufreq_hw_attr[] = { }; static struct cpufreq_driver cpufreq_qcom_hw_driver = { - .flags = CPUFREQ_STICKY | CPUFREQ_NEED_INITIAL_FREQ_CHECK | + .flags = CPUFREQ_NEED_INITIAL_FREQ_CHECK | CPUFREQ_HAVE_GOVERNOR_PER_POLICY | CPUFREQ_IS_COOLING_DEV, .verify = cpufreq_generic_frequency_table_verify, diff --git a/drivers/cpufreq/s3c24xx-cpufreq.c b/drivers/cpufreq/s3c24xx-cpufreq.c index 37efc0dc3f91..7380c32b238e 100644 --- a/drivers/cpufreq/s3c24xx-cpufreq.c +++ b/drivers/cpufreq/s3c24xx-cpufreq.c @@ -420,7 +420,7 @@ static int s3c_cpufreq_resume(struct cpufreq_policy *policy) #endif static struct cpufreq_driver s3c24xx_driver = { - .flags = CPUFREQ_STICKY | CPUFREQ_NEED_INITIAL_FREQ_CHECK, + .flags = CPUFREQ_NEED_INITIAL_FREQ_CHECK, .target = s3c_cpufreq_target, .get = cpufreq_generic_get, .init = s3c_cpufreq_init, diff --git a/drivers/cpufreq/s5pv210-cpufreq.c b/drivers/cpufreq/s5pv210-cpufreq.c index bed496cf8d24..69786e5bbf05 100644 --- a/drivers/cpufreq/s5pv210-cpufreq.c +++ b/drivers/cpufreq/s5pv210-cpufreq.c @@ -574,7 +574,7 @@ static int s5pv210_cpufreq_reboot_notifier_event(struct notifier_block *this, } static struct cpufreq_driver s5pv210_driver = { - .flags = CPUFREQ_STICKY | CPUFREQ_NEED_INITIAL_FREQ_CHECK, + .flags = CPUFREQ_NEED_INITIAL_FREQ_CHECK, .verify = cpufreq_generic_frequency_table_verify, .target_index = s5pv210_target, .get = cpufreq_generic_get, diff --git a/drivers/cpufreq/sa1100-cpufreq.c b/drivers/cpufreq/sa1100-cpufreq.c index 5c075ef6adc0..252b9fc26124 100644 --- a/drivers/cpufreq/sa1100-cpufreq.c +++ b/drivers/cpufreq/sa1100-cpufreq.c @@ -186,7 +186,7 @@ static int __init sa1100_cpu_init(struct cpufreq_policy *policy) } static struct cpufreq_driver sa1100_driver __refdata = { - .flags = CPUFREQ_STICKY | CPUFREQ_NEED_INITIAL_FREQ_CHECK | + .flags = CPUFREQ_NEED_INITIAL_FREQ_CHECK | CPUFREQ_NO_AUTO_DYNAMIC_SWITCHING, .verify = cpufreq_generic_frequency_table_verify, .target_index = sa1100_target, diff --git a/drivers/cpufreq/sa1110-cpufreq.c b/drivers/cpufreq/sa1110-cpufreq.c index d9d04d935b3a..1a83c8678a63 100644 --- a/drivers/cpufreq/sa1110-cpufreq.c +++ b/drivers/cpufreq/sa1110-cpufreq.c @@ -310,7 +310,7 @@ static int __init sa1110_cpu_init(struct cpufreq_policy *policy) /* sa1110_driver needs __refdata because it must remain after init registers * it with cpufreq_register_driver() */ static struct cpufreq_driver sa1110_driver __refdata = { - .flags = CPUFREQ_STICKY | CPUFREQ_NEED_INITIAL_FREQ_CHECK | + .flags = CPUFREQ_NEED_INITIAL_FREQ_CHECK | CPUFREQ_NO_AUTO_DYNAMIC_SWITCHING, .verify = cpufreq_generic_frequency_table_verify, .target_index = sa1110_target, diff --git a/drivers/cpufreq/scmi-cpufreq.c b/drivers/cpufreq/scmi-cpufreq.c index 491a0a24fb1e..5bd03b59887f 100644 --- a/drivers/cpufreq/scmi-cpufreq.c +++ b/drivers/cpufreq/scmi-cpufreq.c @@ -217,7 +217,7 @@ static int scmi_cpufreq_exit(struct cpufreq_policy *policy) static struct cpufreq_driver scmi_cpufreq_driver = { .name = "scmi", - .flags = CPUFREQ_STICKY | CPUFREQ_HAVE_GOVERNOR_PER_POLICY | + .flags = CPUFREQ_HAVE_GOVERNOR_PER_POLICY | CPUFREQ_NEED_INITIAL_FREQ_CHECK | CPUFREQ_IS_COOLING_DEV, .verify = cpufreq_generic_frequency_table_verify, diff --git a/drivers/cpufreq/scpi-cpufreq.c b/drivers/cpufreq/scpi-cpufreq.c index e5140ad63db8..d6a698a1b5d1 100644 --- a/drivers/cpufreq/scpi-cpufreq.c +++ b/drivers/cpufreq/scpi-cpufreq.c @@ -191,7 +191,7 @@ static int scpi_cpufreq_exit(struct cpufreq_policy *policy) static struct cpufreq_driver scpi_cpufreq_driver = { .name = "scpi-cpufreq", - .flags = CPUFREQ_STICKY | CPUFREQ_HAVE_GOVERNOR_PER_POLICY | + .flags = CPUFREQ_HAVE_GOVERNOR_PER_POLICY | CPUFREQ_NEED_INITIAL_FREQ_CHECK | CPUFREQ_IS_COOLING_DEV, .verify = cpufreq_generic_frequency_table_verify, diff --git a/drivers/cpufreq/spear-cpufreq.c b/drivers/cpufreq/spear-cpufreq.c index 73bd8dc47074..7d0d62a06bf3 100644 --- a/drivers/cpufreq/spear-cpufreq.c +++ b/drivers/cpufreq/spear-cpufreq.c @@ -160,7 +160,7 @@ static int spear_cpufreq_init(struct cpufreq_policy *policy) static struct cpufreq_driver spear_cpufreq_driver = { .name = "cpufreq-spear", - .flags = CPUFREQ_STICKY | CPUFREQ_NEED_INITIAL_FREQ_CHECK, + .flags = CPUFREQ_NEED_INITIAL_FREQ_CHECK, .verify = cpufreq_generic_frequency_table_verify, .target_index = spear_cpufreq_target, .get = cpufreq_generic_get, diff --git a/drivers/cpufreq/tango-cpufreq.c b/drivers/cpufreq/tango-cpufreq.c deleted file mode 100644 index 89a7f860bfe8..000000000000 --- a/drivers/cpufreq/tango-cpufreq.c +++ /dev/null @@ -1,38 +0,0 @@ -#include <linux/of.h> -#include <linux/cpu.h> -#include <linux/clk.h> -#include <linux/pm_opp.h> -#include <linux/platform_device.h> - -static const struct of_device_id machines[] __initconst = { - { .compatible = "sigma,tango4" }, - { /* sentinel */ } -}; - -static int __init tango_cpufreq_init(void) -{ - struct device *cpu_dev = get_cpu_device(0); - unsigned long max_freq; - struct clk *cpu_clk; - void *res; - - if (!of_match_node(machines, of_root)) - return -ENODEV; - - cpu_clk = clk_get(cpu_dev, NULL); - if (IS_ERR(cpu_clk)) - return -ENODEV; - - max_freq = clk_get_rate(cpu_clk); - - dev_pm_opp_add(cpu_dev, max_freq / 1, 0); - dev_pm_opp_add(cpu_dev, max_freq / 2, 0); - dev_pm_opp_add(cpu_dev, max_freq / 3, 0); - dev_pm_opp_add(cpu_dev, max_freq / 5, 0); - dev_pm_opp_add(cpu_dev, max_freq / 9, 0); - - res = platform_device_register_data(NULL, "cpufreq-dt", -1, NULL, 0); - - return PTR_ERR_OR_ZERO(res); -} -device_initcall(tango_cpufreq_init); diff --git a/drivers/cpufreq/tegra186-cpufreq.c b/drivers/cpufreq/tegra186-cpufreq.c index e566ea298b59..5d1943e787b0 100644 --- a/drivers/cpufreq/tegra186-cpufreq.c +++ b/drivers/cpufreq/tegra186-cpufreq.c @@ -117,7 +117,7 @@ static unsigned int tegra186_cpufreq_get(unsigned int cpu) static struct cpufreq_driver tegra186_cpufreq_driver = { .name = "tegra186", - .flags = CPUFREQ_STICKY | CPUFREQ_HAVE_GOVERNOR_PER_POLICY | + .flags = CPUFREQ_HAVE_GOVERNOR_PER_POLICY | CPUFREQ_NEED_INITIAL_FREQ_CHECK, .get = tegra186_cpufreq_get, .verify = cpufreq_generic_frequency_table_verify, diff --git a/drivers/cpufreq/tegra194-cpufreq.c b/drivers/cpufreq/tegra194-cpufreq.c index 6a67f36f3b80..a9620e4489ae 100644 --- a/drivers/cpufreq/tegra194-cpufreq.c +++ b/drivers/cpufreq/tegra194-cpufreq.c @@ -272,8 +272,7 @@ static int tegra194_cpufreq_set_target(struct cpufreq_policy *policy, static struct cpufreq_driver tegra194_cpufreq_driver = { .name = "tegra194", - .flags = CPUFREQ_STICKY | CPUFREQ_CONST_LOOPS | - CPUFREQ_NEED_INITIAL_FREQ_CHECK, + .flags = CPUFREQ_CONST_LOOPS | CPUFREQ_NEED_INITIAL_FREQ_CHECK, .verify = cpufreq_generic_frequency_table_verify, .target_index = tegra194_cpufreq_set_target, .get = tegra194_get_speed, diff --git a/drivers/cpufreq/tegra20-cpufreq.c b/drivers/cpufreq/tegra20-cpufreq.c index 8c893043953e..e8db3d75be25 100644 --- a/drivers/cpufreq/tegra20-cpufreq.c +++ b/drivers/cpufreq/tegra20-cpufreq.c @@ -32,6 +32,16 @@ static bool cpu0_node_has_opp_v2_prop(void) return ret; } +static void tegra20_cpufreq_put_supported_hw(void *opp_table) +{ + dev_pm_opp_put_supported_hw(opp_table); +} + +static void tegra20_cpufreq_dt_unregister(void *cpufreq_dt) +{ + platform_device_unregister(cpufreq_dt); +} + static int tegra20_cpufreq_probe(struct platform_device *pdev) { struct platform_device *cpufreq_dt; @@ -68,42 +78,31 @@ static int tegra20_cpufreq_probe(struct platform_device *pdev) return err; } + err = devm_add_action_or_reset(&pdev->dev, + tegra20_cpufreq_put_supported_hw, + opp_table); + if (err) + return err; + cpufreq_dt = platform_device_register_simple("cpufreq-dt", -1, NULL, 0); err = PTR_ERR_OR_ZERO(cpufreq_dt); if (err) { dev_err(&pdev->dev, "failed to create cpufreq-dt device: %d\n", err); - goto err_put_supported_hw; + return err; } - platform_set_drvdata(pdev, cpufreq_dt); - - return 0; - -err_put_supported_hw: - dev_pm_opp_put_supported_hw(opp_table); - - return err; -} - -static int tegra20_cpufreq_remove(struct platform_device *pdev) -{ - struct platform_device *cpufreq_dt; - struct opp_table *opp_table; - - cpufreq_dt = platform_get_drvdata(pdev); - platform_device_unregister(cpufreq_dt); - - opp_table = dev_pm_opp_get_opp_table(get_cpu_device(0)); - dev_pm_opp_put_supported_hw(opp_table); - dev_pm_opp_put_opp_table(opp_table); + err = devm_add_action_or_reset(&pdev->dev, + tegra20_cpufreq_dt_unregister, + cpufreq_dt); + if (err) + return err; return 0; } static struct platform_driver tegra20_cpufreq_driver = { .probe = tegra20_cpufreq_probe, - .remove = tegra20_cpufreq_remove, .driver = { .name = "tegra20-cpufreq", }, diff --git a/drivers/cpufreq/vexpress-spc-cpufreq.c b/drivers/cpufreq/vexpress-spc-cpufreq.c index f711d8eaea6a..51dfa9ae6cf5 100644 --- a/drivers/cpufreq/vexpress-spc-cpufreq.c +++ b/drivers/cpufreq/vexpress-spc-cpufreq.c @@ -486,8 +486,7 @@ static void ve_spc_cpufreq_ready(struct cpufreq_policy *policy) static struct cpufreq_driver ve_spc_cpufreq_driver = { .name = "vexpress-spc", - .flags = CPUFREQ_STICKY | - CPUFREQ_HAVE_GOVERNOR_PER_POLICY | + .flags = CPUFREQ_HAVE_GOVERNOR_PER_POLICY | CPUFREQ_NEED_INITIAL_FREQ_CHECK, .verify = cpufreq_generic_frequency_table_verify, .target_index = ve_spc_cpufreq_set_target, diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c index 6aa10de792b3..bf3047896e41 100644 --- a/drivers/devfreq/devfreq.c +++ b/drivers/devfreq/devfreq.c @@ -757,6 +757,9 @@ static void devfreq_dev_release(struct device *dev) if (devfreq->profile->exit) devfreq->profile->exit(devfreq->dev.parent); + if (devfreq->opp_table) + dev_pm_opp_put_opp_table(devfreq->opp_table); + mutex_destroy(&devfreq->lock); kfree(devfreq); } @@ -844,6 +847,10 @@ struct devfreq *devfreq_add_device(struct device *dev, } devfreq->suspend_freq = dev_pm_opp_get_suspend_opp_freq(dev); + devfreq->opp_table = dev_pm_opp_get_opp_table(dev); + if (IS_ERR(devfreq->opp_table)) + devfreq->opp_table = NULL; + atomic_set(&devfreq->suspend_count, 0); dev_set_name(&devfreq->dev, "%s", dev_name(dev)); @@ -893,13 +900,13 @@ struct devfreq *devfreq_add_device(struct device *dev, goto err_devfreq; devfreq->nb_min.notifier_call = qos_min_notifier_call; - err = dev_pm_qos_add_notifier(devfreq->dev.parent, &devfreq->nb_min, + err = dev_pm_qos_add_notifier(dev, &devfreq->nb_min, DEV_PM_QOS_MIN_FREQUENCY); if (err) goto err_devfreq; devfreq->nb_max.notifier_call = qos_max_notifier_call; - err = dev_pm_qos_add_notifier(devfreq->dev.parent, &devfreq->nb_max, + err = dev_pm_qos_add_notifier(dev, &devfreq->nb_max, DEV_PM_QOS_MAX_FREQUENCY); if (err) goto err_devfreq; diff --git a/drivers/devfreq/governor.h b/drivers/devfreq/governor.h index 2a52f97b542d..70f44b3ca42e 100644 --- a/drivers/devfreq/governor.h +++ b/drivers/devfreq/governor.h @@ -40,7 +40,7 @@ /* * Definition of governor attribute flags except for common sysfs attributes * - DEVFREQ_GOV_ATTR_POLLING_INTERVAL - * : Indicate polling_interal sysfs attribute + * : Indicate polling_interval sysfs attribute * - DEVFREQ_GOV_ATTR_TIMER * : Indicate timer sysfs attribute */ diff --git a/drivers/devfreq/governor_passive.c b/drivers/devfreq/governor_passive.c index 63332e4a65ae..b094132bd20b 100644 --- a/drivers/devfreq/governor_passive.c +++ b/drivers/devfreq/governor_passive.c @@ -19,18 +19,16 @@ static int devfreq_passive_get_target_freq(struct devfreq *devfreq, = (struct devfreq_passive_data *)devfreq->data; struct devfreq *parent_devfreq = (struct devfreq *)p_data->parent; unsigned long child_freq = ULONG_MAX; - struct dev_pm_opp *opp; - int i, count, ret = 0; + struct dev_pm_opp *opp, *p_opp; + int i, count; /* * If the devfreq device with passive governor has the specific method * to determine the next frequency, should use the get_target_freq() * of struct devfreq_passive_data. */ - if (p_data->get_target_freq) { - ret = p_data->get_target_freq(devfreq, freq); - goto out; - } + if (p_data->get_target_freq) + return p_data->get_target_freq(devfreq, freq); /* * If the parent and passive devfreq device uses the OPP table, @@ -56,26 +54,35 @@ static int devfreq_passive_get_target_freq(struct devfreq *devfreq, * list of parent device. Because in this case, *freq is temporary * value which is decided by ondemand governor. */ - opp = devfreq_recommended_opp(parent_devfreq->dev.parent, freq, 0); - if (IS_ERR(opp)) { - ret = PTR_ERR(opp); - goto out; - } + if (devfreq->opp_table && parent_devfreq->opp_table) { + p_opp = devfreq_recommended_opp(parent_devfreq->dev.parent, + freq, 0); + if (IS_ERR(p_opp)) + return PTR_ERR(p_opp); + + opp = dev_pm_opp_xlate_required_opp(parent_devfreq->opp_table, + devfreq->opp_table, p_opp); + dev_pm_opp_put(p_opp); - dev_pm_opp_put(opp); + if (IS_ERR(opp)) + return PTR_ERR(opp); + + *freq = dev_pm_opp_get_freq(opp); + dev_pm_opp_put(opp); + + return 0; + } /* - * Get the OPP table's index of decided freqeuncy by governor + * Get the OPP table's index of decided frequency by governor * of parent device. */ for (i = 0; i < parent_devfreq->profile->max_state; i++) if (parent_devfreq->profile->freq_table[i] == *freq) break; - if (i == parent_devfreq->profile->max_state) { - ret = -EINVAL; - goto out; - } + if (i == parent_devfreq->profile->max_state) + return -EINVAL; /* Get the suitable frequency by using index of parent device. */ if (i < devfreq->profile->max_state) { @@ -88,8 +95,7 @@ static int devfreq_passive_get_target_freq(struct devfreq *devfreq, /* Return the suitable frequency for passive device. */ *freq = child_freq; -out: - return ret; + return 0; } static int devfreq_passive_notifier_call(struct notifier_block *nb, diff --git a/drivers/devfreq/rk3399_dmc.c b/drivers/devfreq/rk3399_dmc.c index 2e912166a993..9e9d3b4c6d48 100644 --- a/drivers/devfreq/rk3399_dmc.c +++ b/drivers/devfreq/rk3399_dmc.c @@ -400,7 +400,7 @@ static int rk3399_dmcfreq_probe(struct platform_device *pdev) default: ret = -EINVAL; goto err_edev; - }; + } no_pmu: arm_smccc_smc(ROCKCHIP_SIP_DRAM_FREQ, 0, 0, diff --git a/drivers/devfreq/tegra30-devfreq.c b/drivers/devfreq/tegra30-devfreq.c index 117cad7968ab..ce83f883ca65 100644 --- a/drivers/devfreq/tegra30-devfreq.c +++ b/drivers/devfreq/tegra30-devfreq.c @@ -647,7 +647,7 @@ static int tegra_devfreq_target(struct device *dev, unsigned long *freq, return PTR_ERR(opp); } - ret = dev_pm_opp_set_bw(dev, opp); + ret = dev_pm_opp_set_opp(dev, opp); dev_pm_opp_put(opp); return ret; @@ -849,7 +849,7 @@ static int tegra_devfreq_probe(struct platform_device *pdev) return err; } - err = dev_pm_opp_of_add_table(&pdev->dev); + err = dev_pm_opp_of_add_table_noclk(&pdev->dev, 0); if (err) { dev_err(&pdev->dev, "Failed to add OPP table: %d\n", err); goto put_hw; diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c index e6703ae98760..05e0ef58fe32 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c @@ -134,7 +134,7 @@ void a6xx_gmu_set_freq(struct msm_gpu *gpu, struct dev_pm_opp *opp) if (!gmu->legacy) { a6xx_hfi_set_freq(gmu, perf_index); - dev_pm_opp_set_bw(&gpu->pdev->dev, opp); + dev_pm_opp_set_opp(&gpu->pdev->dev, opp); pm_runtime_put(gmu->dev); return; } @@ -158,7 +158,7 @@ void a6xx_gmu_set_freq(struct msm_gpu *gpu, struct dev_pm_opp *opp) if (ret) dev_err(gmu->dev, "GMU set GPU frequency error: %d\n", ret); - dev_pm_opp_set_bw(&gpu->pdev->dev, opp); + dev_pm_opp_set_opp(&gpu->pdev->dev, opp); pm_runtime_put(gmu->dev); } @@ -866,7 +866,7 @@ static void a6xx_gmu_set_initial_bw(struct msm_gpu *gpu, struct a6xx_gmu *gmu) if (IS_ERR_OR_NULL(gpu_opp)) return; - dev_pm_opp_set_bw(&gpu->pdev->dev, gpu_opp); + dev_pm_opp_set_opp(&gpu->pdev->dev, gpu_opp); dev_pm_opp_put(gpu_opp); } @@ -1072,7 +1072,7 @@ int a6xx_gmu_stop(struct a6xx_gpu *a6xx_gpu) a6xx_gmu_shutdown(gmu); /* Remove the bus vote */ - dev_pm_opp_set_bw(&gpu->pdev->dev, NULL); + dev_pm_opp_set_opp(&gpu->pdev->dev, NULL); /* * Make sure the GX domain is off before turning off the GMU (CX) diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index 28f93b9aa51b..3273360f30f7 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -37,7 +37,7 @@ */ /* un-comment DEBUG to enable pr_debug() statements */ -#define DEBUG +/* #define DEBUG */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt diff --git a/drivers/opp/core.c b/drivers/opp/core.c index 8c905aabacc0..c3f3d9249cc5 100644 --- a/drivers/opp/core.c +++ b/drivers/opp/core.c @@ -27,6 +27,10 @@ * various states of availability. */ LIST_HEAD(opp_tables); + +/* OPP tables with uninitialized required OPPs */ +LIST_HEAD(lazy_opp_tables); + /* Lock to allow exclusive modification to the device and opp lists */ DEFINE_MUTEX(opp_table_lock); /* Flag indicating that opp_tables list is being updated at the moment */ @@ -146,6 +150,32 @@ unsigned int dev_pm_opp_get_level(struct dev_pm_opp *opp) EXPORT_SYMBOL_GPL(dev_pm_opp_get_level); /** + * dev_pm_opp_get_required_pstate() - Gets the required performance state + * corresponding to an available opp + * @opp: opp for which performance state has to be returned for + * @index: index of the required opp + * + * Return: performance state read from device tree corresponding to the + * required opp, else return 0. + */ +unsigned int dev_pm_opp_get_required_pstate(struct dev_pm_opp *opp, + unsigned int index) +{ + if (IS_ERR_OR_NULL(opp) || !opp->available || + index >= opp->opp_table->required_opp_count) { + pr_err("%s: Invalid parameters\n", __func__); + return 0; + } + + /* required-opps not fully initialized yet */ + if (lazy_linking_pending(opp->opp_table)) + return 0; + + return opp->required_opps[index]->pstate; +} +EXPORT_SYMBOL_GPL(dev_pm_opp_get_required_pstate); + +/** * dev_pm_opp_is_turbo() - Returns if opp is turbo OPP or not * @opp: opp for which turbo mode is being verified * @@ -449,6 +479,55 @@ struct dev_pm_opp *dev_pm_opp_find_level_exact(struct device *dev, } EXPORT_SYMBOL_GPL(dev_pm_opp_find_level_exact); +/** + * dev_pm_opp_find_level_ceil() - search for an rounded up level + * @dev: device for which we do this operation + * @level: level to search for + * + * Return: Searches for rounded up match in the opp table and returns pointer + * to the matching opp if found, else returns ERR_PTR in case of error and + * should be handled using IS_ERR. Error return values can be: + * EINVAL: for bad pointer + * ERANGE: no match found for search + * ENODEV: if device not found in list of registered devices + * + * The callers are required to call dev_pm_opp_put() for the returned OPP after + * use. + */ +struct dev_pm_opp *dev_pm_opp_find_level_ceil(struct device *dev, + unsigned int *level) +{ + struct opp_table *opp_table; + struct dev_pm_opp *temp_opp, *opp = ERR_PTR(-ERANGE); + + opp_table = _find_opp_table(dev); + if (IS_ERR(opp_table)) { + int r = PTR_ERR(opp_table); + + dev_err(dev, "%s: OPP table not found (%d)\n", __func__, r); + return ERR_PTR(r); + } + + mutex_lock(&opp_table->lock); + + list_for_each_entry(temp_opp, &opp_table->opp_list, node) { + if (temp_opp->available && temp_opp->level >= *level) { + opp = temp_opp; + *level = opp->level; + + /* Increment the reference count of OPP */ + dev_pm_opp_get(opp); + break; + } + } + + mutex_unlock(&opp_table->lock); + dev_pm_opp_put_opp_table(opp_table); + + return opp; +} +EXPORT_SYMBOL_GPL(dev_pm_opp_find_level_ceil); + static noinline struct dev_pm_opp *_find_freq_ceil(struct opp_table *opp_table, unsigned long *freq) { @@ -655,6 +734,10 @@ static inline int _generic_set_opp_clk_only(struct device *dev, struct clk *clk, { int ret; + /* We may reach here for devices which don't change frequency */ + if (IS_ERR(clk)) + return 0; + ret = clk_set_rate(clk, freq); if (ret) { dev_err(dev, "%s: failed to set clock rate: %d\n", __func__, @@ -666,12 +749,12 @@ static inline int _generic_set_opp_clk_only(struct device *dev, struct clk *clk, static int _generic_set_opp_regulator(struct opp_table *opp_table, struct device *dev, - unsigned long old_freq, + struct dev_pm_opp *opp, unsigned long freq, - struct dev_pm_opp_supply *old_supply, - struct dev_pm_opp_supply *new_supply) + int scaling_down) { struct regulator *reg = opp_table->regulators[0]; + struct dev_pm_opp *old_opp = opp_table->current_opp; int ret; /* This function only supports single regulator per device */ @@ -681,8 +764,8 @@ static int _generic_set_opp_regulator(struct opp_table *opp_table, } /* Scaling up? Scale voltage before frequency */ - if (freq >= old_freq) { - ret = _set_opp_voltage(dev, reg, new_supply); + if (!scaling_down) { + ret = _set_opp_voltage(dev, reg, opp->supplies); if (ret) goto restore_voltage; } @@ -693,8 +776,8 @@ static int _generic_set_opp_regulator(struct opp_table *opp_table, goto restore_voltage; /* Scaling down? Scale voltage after frequency */ - if (freq < old_freq) { - ret = _set_opp_voltage(dev, reg, new_supply); + if (scaling_down) { + ret = _set_opp_voltage(dev, reg, opp->supplies); if (ret) goto restore_freq; } @@ -712,19 +795,18 @@ static int _generic_set_opp_regulator(struct opp_table *opp_table, return 0; restore_freq: - if (_generic_set_opp_clk_only(dev, opp_table->clk, old_freq)) + if (_generic_set_opp_clk_only(dev, opp_table->clk, old_opp->rate)) dev_err(dev, "%s: failed to restore old-freq (%lu Hz)\n", - __func__, old_freq); + __func__, old_opp->rate); restore_voltage: /* This shouldn't harm even if the voltages weren't updated earlier */ - if (old_supply) - _set_opp_voltage(dev, reg, old_supply); + _set_opp_voltage(dev, reg, old_opp->supplies); return ret; } static int _set_opp_bw(const struct opp_table *opp_table, - struct dev_pm_opp *opp, struct device *dev, bool remove) + struct dev_pm_opp *opp, struct device *dev) { u32 avg, peak; int i, ret; @@ -733,7 +815,7 @@ static int _set_opp_bw(const struct opp_table *opp_table, return 0; for (i = 0; i < opp_table->path_count; i++) { - if (remove) { + if (!opp) { avg = 0; peak = 0; } else { @@ -743,7 +825,7 @@ static int _set_opp_bw(const struct opp_table *opp_table, ret = icc_set_bw(opp_table->paths[i], avg, peak); if (ret) { dev_err(dev, "Failed to %s bandwidth[%d]: %d\n", - remove ? "remove" : "set", i, ret); + opp ? "set" : "remove", i, ret); return ret; } } @@ -752,29 +834,31 @@ static int _set_opp_bw(const struct opp_table *opp_table, } static int _set_opp_custom(const struct opp_table *opp_table, - struct device *dev, unsigned long old_freq, - unsigned long freq, - struct dev_pm_opp_supply *old_supply, - struct dev_pm_opp_supply *new_supply) + struct device *dev, struct dev_pm_opp *opp, + unsigned long freq) { - struct dev_pm_set_opp_data *data; + struct dev_pm_set_opp_data *data = opp_table->set_opp_data; + struct dev_pm_opp *old_opp = opp_table->current_opp; int size; - data = opp_table->set_opp_data; + /* + * We support this only if dev_pm_opp_set_regulators() was called + * earlier. + */ + if (opp_table->sod_supplies) { + size = sizeof(*old_opp->supplies) * opp_table->regulator_count; + memcpy(data->old_opp.supplies, old_opp->supplies, size); + memcpy(data->new_opp.supplies, opp->supplies, size); + data->regulator_count = opp_table->regulator_count; + } else { + data->regulator_count = 0; + } + data->regulators = opp_table->regulators; - data->regulator_count = opp_table->regulator_count; data->clk = opp_table->clk; data->dev = dev; - - data->old_opp.rate = old_freq; - size = sizeof(*old_supply) * opp_table->regulator_count; - if (!old_supply) - memset(data->old_opp.supplies, 0, size); - else - memcpy(data->old_opp.supplies, old_supply, size); - + data->old_opp.rate = old_opp->rate; data->new_opp.rate = freq; - memcpy(data->new_opp.supplies, new_supply, size); return opp_table->set_opp(data); } @@ -809,6 +893,10 @@ static int _set_required_opps(struct device *dev, if (!required_opp_tables) return 0; + /* required-opps not fully initialized yet */ + if (lazy_linking_pending(opp_table)) + return -EBUSY; + /* Single genpd case */ if (!genpd_virt_devs) return _set_required_opp(dev, dev, opp, 0); @@ -841,38 +929,32 @@ static int _set_required_opps(struct device *dev, return ret; } -/** - * dev_pm_opp_set_bw() - sets bandwidth levels corresponding to an opp - * @dev: device for which we do this operation - * @opp: opp based on which the bandwidth levels are to be configured - * - * This configures the bandwidth to the levels specified by the OPP. However - * if the OPP specified is NULL the bandwidth levels are cleared out. - * - * Return: 0 on success or a negative error value. - */ -int dev_pm_opp_set_bw(struct device *dev, struct dev_pm_opp *opp) +static void _find_current_opp(struct device *dev, struct opp_table *opp_table) { - struct opp_table *opp_table; - int ret; + struct dev_pm_opp *opp = ERR_PTR(-ENODEV); + unsigned long freq; - opp_table = _find_opp_table(dev); - if (IS_ERR(opp_table)) { - dev_err(dev, "%s: device opp table doesn't exist\n", __func__); - return PTR_ERR(opp_table); + if (!IS_ERR(opp_table->clk)) { + freq = clk_get_rate(opp_table->clk); + opp = _find_freq_ceil(opp_table, &freq); } - if (opp) - ret = _set_opp_bw(opp_table, opp, dev, false); - else - ret = _set_opp_bw(opp_table, NULL, dev, true); + /* + * Unable to find the current OPP ? Pick the first from the list since + * it is in ascending order, otherwise rest of the code will need to + * make special checks to validate current_opp. + */ + if (IS_ERR(opp)) { + mutex_lock(&opp_table->lock); + opp = list_first_entry(&opp_table->opp_list, struct dev_pm_opp, node); + dev_pm_opp_get(opp); + mutex_unlock(&opp_table->lock); + } - dev_pm_opp_put_opp_table(opp_table); - return ret; + opp_table->current_opp = opp; } -EXPORT_SYMBOL_GPL(dev_pm_opp_set_bw); -static int _opp_set_rate_zero(struct device *dev, struct opp_table *opp_table) +static int _disable_opp_table(struct device *dev, struct opp_table *opp_table) { int ret; @@ -887,7 +969,7 @@ static int _opp_set_rate_zero(struct device *dev, struct opp_table *opp_table) if (!_get_opp_count(opp_table)) return 0; - ret = _set_opp_bw(opp_table, NULL, dev, true); + ret = _set_opp_bw(opp_table, NULL, dev); if (ret) return ret; @@ -900,6 +982,89 @@ static int _opp_set_rate_zero(struct device *dev, struct opp_table *opp_table) return ret; } +static int _set_opp(struct device *dev, struct opp_table *opp_table, + struct dev_pm_opp *opp, unsigned long freq) +{ + struct dev_pm_opp *old_opp; + int scaling_down, ret; + + if (unlikely(!opp)) + return _disable_opp_table(dev, opp_table); + + /* Find the currently set OPP if we don't know already */ + if (unlikely(!opp_table->current_opp)) + _find_current_opp(dev, opp_table); + + old_opp = opp_table->current_opp; + + /* Return early if nothing to do */ + if (opp_table->enabled && old_opp == opp) { + dev_dbg(dev, "%s: OPPs are same, nothing to do\n", __func__); + return 0; + } + + dev_dbg(dev, "%s: switching OPP: Freq %lu -> %lu Hz, Level %u -> %u, Bw %u -> %u\n", + __func__, old_opp->rate, freq, old_opp->level, opp->level, + old_opp->bandwidth ? old_opp->bandwidth[0].peak : 0, + opp->bandwidth ? opp->bandwidth[0].peak : 0); + + scaling_down = _opp_compare_key(old_opp, opp); + if (scaling_down == -1) + scaling_down = 0; + + /* Scaling up? Configure required OPPs before frequency */ + if (!scaling_down) { + ret = _set_required_opps(dev, opp_table, opp, true); + if (ret) { + dev_err(dev, "Failed to set required opps: %d\n", ret); + return ret; + } + + ret = _set_opp_bw(opp_table, opp, dev); + if (ret) { + dev_err(dev, "Failed to set bw: %d\n", ret); + return ret; + } + } + + if (opp_table->set_opp) { + ret = _set_opp_custom(opp_table, dev, opp, freq); + } else if (opp_table->regulators) { + ret = _generic_set_opp_regulator(opp_table, dev, opp, freq, + scaling_down); + } else { + /* Only frequency scaling */ + ret = _generic_set_opp_clk_only(dev, opp_table->clk, freq); + } + + if (ret) + return ret; + + /* Scaling down? Configure required OPPs after frequency */ + if (scaling_down) { + ret = _set_opp_bw(opp_table, opp, dev); + if (ret) { + dev_err(dev, "Failed to set bw: %d\n", ret); + return ret; + } + + ret = _set_required_opps(dev, opp_table, opp, false); + if (ret) { + dev_err(dev, "Failed to set required opps: %d\n", ret); + return ret; + } + } + + opp_table->enabled = true; + dev_pm_opp_put(old_opp); + + /* Make sure current_opp doesn't get freed */ + dev_pm_opp_get(opp); + opp_table->current_opp = opp; + + return ret; +} + /** * dev_pm_opp_set_rate() - Configure new OPP based on frequency * @dev: device for which we do this operation @@ -914,118 +1079,85 @@ static int _opp_set_rate_zero(struct device *dev, struct opp_table *opp_table) int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq) { struct opp_table *opp_table; - unsigned long freq, old_freq, temp_freq; - struct dev_pm_opp *old_opp, *opp; - struct clk *clk; + unsigned long freq = 0, temp_freq; + struct dev_pm_opp *opp = NULL; int ret; opp_table = _find_opp_table(dev); if (IS_ERR(opp_table)) { - dev_err(dev, "%s: device opp doesn't exist\n", __func__); + dev_err(dev, "%s: device's opp table doesn't exist\n", __func__); return PTR_ERR(opp_table); } - if (unlikely(!target_freq)) { - ret = _opp_set_rate_zero(dev, opp_table); - goto put_opp_table; - } - - clk = opp_table->clk; - if (IS_ERR(clk)) { - dev_err(dev, "%s: No clock available for the device\n", - __func__); - ret = PTR_ERR(clk); - goto put_opp_table; - } - - freq = clk_round_rate(clk, target_freq); - if ((long)freq <= 0) - freq = target_freq; - - old_freq = clk_get_rate(clk); - - /* Return early if nothing to do */ - if (opp_table->enabled && old_freq == freq) { - dev_dbg(dev, "%s: old/new frequencies (%lu Hz) are same, nothing to do\n", - __func__, freq); - ret = 0; - goto put_opp_table; - } - - /* - * For IO devices which require an OPP on some platforms/SoCs - * while just needing to scale the clock on some others - * we look for empty OPP tables with just a clock handle and - * scale only the clk. This makes dev_pm_opp_set_rate() - * equivalent to a clk_set_rate() - */ - if (!_get_opp_count(opp_table)) { - ret = _generic_set_opp_clk_only(dev, clk, freq); - goto put_opp_table; - } + if (target_freq) { + /* + * For IO devices which require an OPP on some platforms/SoCs + * while just needing to scale the clock on some others + * we look for empty OPP tables with just a clock handle and + * scale only the clk. This makes dev_pm_opp_set_rate() + * equivalent to a clk_set_rate() + */ + if (!_get_opp_count(opp_table)) { + ret = _generic_set_opp_clk_only(dev, opp_table->clk, target_freq); + goto put_opp_table; + } - temp_freq = old_freq; - old_opp = _find_freq_ceil(opp_table, &temp_freq); - if (IS_ERR(old_opp)) { - dev_err(dev, "%s: failed to find current OPP for freq %lu (%ld)\n", - __func__, old_freq, PTR_ERR(old_opp)); - } + freq = clk_round_rate(opp_table->clk, target_freq); + if ((long)freq <= 0) + freq = target_freq; - temp_freq = freq; - opp = _find_freq_ceil(opp_table, &temp_freq); - if (IS_ERR(opp)) { - ret = PTR_ERR(opp); - dev_err(dev, "%s: failed to find OPP for freq %lu (%d)\n", - __func__, freq, ret); - goto put_old_opp; + /* + * The clock driver may support finer resolution of the + * frequencies than the OPP table, don't update the frequency we + * pass to clk_set_rate() here. + */ + temp_freq = freq; + opp = _find_freq_ceil(opp_table, &temp_freq); + if (IS_ERR(opp)) { + ret = PTR_ERR(opp); + dev_err(dev, "%s: failed to find OPP for freq %lu (%d)\n", + __func__, freq, ret); + goto put_opp_table; + } } - dev_dbg(dev, "%s: switching OPP: %lu Hz --> %lu Hz\n", __func__, - old_freq, freq); + ret = _set_opp(dev, opp_table, opp, freq); - /* Scaling up? Configure required OPPs before frequency */ - if (freq >= old_freq) { - ret = _set_required_opps(dev, opp_table, opp, true); - if (ret) - goto put_opp; - } - - if (opp_table->set_opp) { - ret = _set_opp_custom(opp_table, dev, old_freq, freq, - IS_ERR(old_opp) ? NULL : old_opp->supplies, - opp->supplies); - } else if (opp_table->regulators) { - ret = _generic_set_opp_regulator(opp_table, dev, old_freq, freq, - IS_ERR(old_opp) ? NULL : old_opp->supplies, - opp->supplies); - } else { - /* Only frequency scaling */ - ret = _generic_set_opp_clk_only(dev, clk, freq); - } + if (target_freq) + dev_pm_opp_put(opp); +put_opp_table: + dev_pm_opp_put_opp_table(opp_table); + return ret; +} +EXPORT_SYMBOL_GPL(dev_pm_opp_set_rate); - /* Scaling down? Configure required OPPs after frequency */ - if (!ret && freq < old_freq) { - ret = _set_required_opps(dev, opp_table, opp, false); - if (ret) - dev_err(dev, "Failed to set required opps: %d\n", ret); - } +/** + * dev_pm_opp_set_opp() - Configure device for OPP + * @dev: device for which we do this operation + * @opp: OPP to set to + * + * This configures the device based on the properties of the OPP passed to this + * routine. + * + * Return: 0 on success, a negative error number otherwise. + */ +int dev_pm_opp_set_opp(struct device *dev, struct dev_pm_opp *opp) +{ + struct opp_table *opp_table; + int ret; - if (!ret) { - ret = _set_opp_bw(opp_table, opp, dev, false); - if (!ret) - opp_table->enabled = true; + opp_table = _find_opp_table(dev); + if (IS_ERR(opp_table)) { + dev_err(dev, "%s: device opp doesn't exist\n", __func__); + return PTR_ERR(opp_table); } -put_opp: - dev_pm_opp_put(opp); -put_old_opp: - if (!IS_ERR(old_opp)) - dev_pm_opp_put(old_opp); -put_opp_table: + ret = _set_opp(dev, opp_table, opp, opp ? opp->rate : 0); dev_pm_opp_put_opp_table(opp_table); + return ret; } -EXPORT_SYMBOL_GPL(dev_pm_opp_set_rate); +EXPORT_SYMBOL_GPL(dev_pm_opp_set_opp); /* OPP-dev Helpers */ static void _remove_opp_dev(struct opp_device *opp_dev, @@ -1075,6 +1207,7 @@ static struct opp_table *_allocate_opp_table(struct device *dev, int index) mutex_init(&opp_table->lock); mutex_init(&opp_table->genpd_virt_dev_lock); INIT_LIST_HEAD(&opp_table->dev_list); + INIT_LIST_HEAD(&opp_table->lazy); /* Mark regulator count uninitialized */ opp_table->regulator_count = -1; @@ -1087,21 +1220,11 @@ static struct opp_table *_allocate_opp_table(struct device *dev, int index) _of_init_opp_table(opp_table, dev, index); - /* Find clk for the device */ - opp_table->clk = clk_get(dev, NULL); - if (IS_ERR(opp_table->clk)) { - ret = PTR_ERR(opp_table->clk); - if (ret == -EPROBE_DEFER) - goto remove_opp_dev; - - dev_dbg(dev, "%s: Couldn't find clock: %d\n", __func__, ret); - } - /* Find interconnect path(s) for the device */ ret = dev_pm_opp_of_find_icc_paths(dev, opp_table); if (ret) { if (ret == -EPROBE_DEFER) - goto put_clk; + goto remove_opp_dev; dev_warn(dev, "%s: Error finding interconnect paths: %d\n", __func__, ret); @@ -1113,9 +1236,6 @@ static struct opp_table *_allocate_opp_table(struct device *dev, int index) return opp_table; -put_clk: - if (!IS_ERR(opp_table->clk)) - clk_put(opp_table->clk); remove_opp_dev: _remove_opp_dev(opp_dev, opp_table); err: @@ -1128,6 +1248,37 @@ void _get_opp_table_kref(struct opp_table *opp_table) kref_get(&opp_table->kref); } +static struct opp_table *_update_opp_table_clk(struct device *dev, + struct opp_table *opp_table, + bool getclk) +{ + int ret; + + /* + * Return early if we don't need to get clk or we have already tried it + * earlier. + */ + if (!getclk || IS_ERR(opp_table) || opp_table->clk) + return opp_table; + + /* Find clk for the device */ + opp_table->clk = clk_get(dev, NULL); + + ret = PTR_ERR_OR_ZERO(opp_table->clk); + if (!ret) + return opp_table; + + if (ret == -ENOENT) { + dev_dbg(dev, "%s: Couldn't find clock: %d\n", __func__, ret); + return opp_table; + } + + dev_pm_opp_put_opp_table(opp_table); + dev_err_probe(dev, ret, "Couldn't find clock\n"); + + return ERR_PTR(ret); +} + /* * We need to make sure that the OPP table for a device doesn't get added twice, * if this routine gets called in parallel with the same device pointer. @@ -1143,7 +1294,8 @@ void _get_opp_table_kref(struct opp_table *opp_table) * uses the opp_tables_busy flag to indicate if another creator is in the middle * of adding an OPP table and others should wait for it to finish. */ -struct opp_table *_add_opp_table_indexed(struct device *dev, int index) +struct opp_table *_add_opp_table_indexed(struct device *dev, int index, + bool getclk) { struct opp_table *opp_table; @@ -1190,12 +1342,12 @@ again: unlock: mutex_unlock(&opp_table_lock); - return opp_table; + return _update_opp_table_clk(dev, opp_table, getclk); } -struct opp_table *_add_opp_table(struct device *dev) +static struct opp_table *_add_opp_table(struct device *dev, bool getclk) { - return _add_opp_table_indexed(dev, 0); + return _add_opp_table_indexed(dev, 0, getclk); } struct opp_table *dev_pm_opp_get_opp_table(struct device *dev) @@ -1214,6 +1366,9 @@ static void _opp_table_kref_release(struct kref *kref) list_del(&opp_table->node); mutex_unlock(&opp_table_lock); + if (opp_table->current_opp) + dev_pm_opp_put(opp_table->current_opp); + _of_clear_opp_table(opp_table); /* Release clk */ @@ -1508,6 +1663,21 @@ static int _opp_is_duplicate(struct device *dev, struct dev_pm_opp *new_opp, return 0; } +void _required_opps_available(struct dev_pm_opp *opp, int count) +{ + int i; + + for (i = 0; i < count; i++) { + if (opp->required_opps[i]->available) + continue; + + opp->available = false; + pr_warn("%s: OPP not supported by required OPP %pOF (%lu)\n", + __func__, opp->required_opps[i]->np, opp->rate); + return; + } +} + /* * Returns: * 0: On success. And appropriate error message for duplicate OPPs. @@ -1527,12 +1697,10 @@ int _opp_add(struct device *dev, struct dev_pm_opp *new_opp, mutex_lock(&opp_table->lock); head = &opp_table->opp_list; - if (likely(!rate_not_available)) { - ret = _opp_is_duplicate(dev, new_opp, opp_table, &head); - if (ret) { - mutex_unlock(&opp_table->lock); - return ret; - } + ret = _opp_is_duplicate(dev, new_opp, opp_table, &head); + if (ret) { + mutex_unlock(&opp_table->lock); + return ret; } list_add(&new_opp->node, head); @@ -1549,6 +1717,12 @@ int _opp_add(struct device *dev, struct dev_pm_opp *new_opp, __func__, new_opp->rate); } + /* required-opps not fully initialized yet */ + if (lazy_linking_pending(opp_table)) + return 0; + + _required_opps_available(new_opp, opp_table->required_opp_count); + return 0; } @@ -1631,7 +1805,7 @@ struct opp_table *dev_pm_opp_set_supported_hw(struct device *dev, { struct opp_table *opp_table; - opp_table = _add_opp_table(dev); + opp_table = _add_opp_table(dev, false); if (IS_ERR(opp_table)) return opp_table; @@ -1693,7 +1867,7 @@ struct opp_table *dev_pm_opp_set_prop_name(struct device *dev, const char *name) { struct opp_table *opp_table; - opp_table = _add_opp_table(dev); + opp_table = _add_opp_table(dev, false); if (IS_ERR(opp_table)) return opp_table; @@ -1737,38 +1911,6 @@ void dev_pm_opp_put_prop_name(struct opp_table *opp_table) } EXPORT_SYMBOL_GPL(dev_pm_opp_put_prop_name); -static int _allocate_set_opp_data(struct opp_table *opp_table) -{ - struct dev_pm_set_opp_data *data; - int len, count = opp_table->regulator_count; - - if (WARN_ON(!opp_table->regulators)) - return -EINVAL; - - /* space for set_opp_data */ - len = sizeof(*data); - - /* space for old_opp.supplies and new_opp.supplies */ - len += 2 * sizeof(struct dev_pm_opp_supply) * count; - - data = kzalloc(len, GFP_KERNEL); - if (!data) - return -ENOMEM; - - data->old_opp.supplies = (void *)(data + 1); - data->new_opp.supplies = data->old_opp.supplies + count; - - opp_table->set_opp_data = data; - - return 0; -} - -static void _free_set_opp_data(struct opp_table *opp_table) -{ - kfree(opp_table->set_opp_data); - opp_table->set_opp_data = NULL; -} - /** * dev_pm_opp_set_regulators() - Set regulator names for the device * @dev: Device for which regulator name is being set. @@ -1785,11 +1927,12 @@ struct opp_table *dev_pm_opp_set_regulators(struct device *dev, const char * const names[], unsigned int count) { + struct dev_pm_opp_supply *supplies; struct opp_table *opp_table; struct regulator *reg; int ret, i; - opp_table = _add_opp_table(dev); + opp_table = _add_opp_table(dev, false); if (IS_ERR(opp_table)) return opp_table; @@ -1826,10 +1969,19 @@ struct opp_table *dev_pm_opp_set_regulators(struct device *dev, opp_table->regulator_count = count; - /* Allocate block only once to pass to set_opp() routines */ - ret = _allocate_set_opp_data(opp_table); - if (ret) + supplies = kmalloc_array(count * 2, sizeof(*supplies), GFP_KERNEL); + if (!supplies) { + ret = -ENOMEM; goto free_regulators; + } + + mutex_lock(&opp_table->lock); + opp_table->sod_supplies = supplies; + if (opp_table->set_opp_data) { + opp_table->set_opp_data->old_opp.supplies = supplies; + opp_table->set_opp_data->new_opp.supplies = supplies + count; + } + mutex_unlock(&opp_table->lock); return opp_table; @@ -1872,7 +2024,15 @@ void dev_pm_opp_put_regulators(struct opp_table *opp_table) for (i = opp_table->regulator_count - 1; i >= 0; i--) regulator_put(opp_table->regulators[i]); - _free_set_opp_data(opp_table); + mutex_lock(&opp_table->lock); + if (opp_table->set_opp_data) { + opp_table->set_opp_data->old_opp.supplies = NULL; + opp_table->set_opp_data->new_opp.supplies = NULL; + } + + kfree(opp_table->sod_supplies); + opp_table->sod_supplies = NULL; + mutex_unlock(&opp_table->lock); kfree(opp_table->regulators); opp_table->regulators = NULL; @@ -1900,7 +2060,7 @@ struct opp_table *dev_pm_opp_set_clkname(struct device *dev, const char *name) struct opp_table *opp_table; int ret; - opp_table = _add_opp_table(dev); + opp_table = _add_opp_table(dev, false); if (IS_ERR(opp_table)) return opp_table; @@ -1910,9 +2070,11 @@ struct opp_table *dev_pm_opp_set_clkname(struct device *dev, const char *name) goto err; } - /* Already have default clk set, free it */ - if (!IS_ERR(opp_table->clk)) - clk_put(opp_table->clk); + /* clk shouldn't be initialized at this point */ + if (WARN_ON(opp_table->clk)) { + ret = -EBUSY; + goto err; + } /* Find clk for the device */ opp_table->clk = clk_get(dev, name); @@ -1966,12 +2128,13 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_put_clkname); struct opp_table *dev_pm_opp_register_set_opp_helper(struct device *dev, int (*set_opp)(struct dev_pm_set_opp_data *data)) { + struct dev_pm_set_opp_data *data; struct opp_table *opp_table; if (!set_opp) return ERR_PTR(-EINVAL); - opp_table = _add_opp_table(dev); + opp_table = _add_opp_table(dev, false); if (IS_ERR(opp_table)) return opp_table; @@ -1982,8 +2145,23 @@ struct opp_table *dev_pm_opp_register_set_opp_helper(struct device *dev, } /* Another CPU that shares the OPP table has set the helper ? */ - if (!opp_table->set_opp) - opp_table->set_opp = set_opp; + if (opp_table->set_opp) + return opp_table; + + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (!data) + return ERR_PTR(-ENOMEM); + + mutex_lock(&opp_table->lock); + opp_table->set_opp_data = data; + if (opp_table->sod_supplies) { + data->old_opp.supplies = opp_table->sod_supplies; + data->new_opp.supplies = opp_table->sod_supplies + + opp_table->regulator_count; + } + mutex_unlock(&opp_table->lock); + + opp_table->set_opp = set_opp; return opp_table; } @@ -2005,10 +2183,50 @@ void dev_pm_opp_unregister_set_opp_helper(struct opp_table *opp_table) WARN_ON(!list_empty(&opp_table->opp_list)); opp_table->set_opp = NULL; + + mutex_lock(&opp_table->lock); + kfree(opp_table->set_opp_data); + opp_table->set_opp_data = NULL; + mutex_unlock(&opp_table->lock); + dev_pm_opp_put_opp_table(opp_table); } EXPORT_SYMBOL_GPL(dev_pm_opp_unregister_set_opp_helper); +static void devm_pm_opp_unregister_set_opp_helper(void *data) +{ + dev_pm_opp_unregister_set_opp_helper(data); +} + +/** + * devm_pm_opp_register_set_opp_helper() - Register custom set OPP helper + * @dev: Device for which the helper is getting registered. + * @set_opp: Custom set OPP helper. + * + * This is a resource-managed version of dev_pm_opp_register_set_opp_helper(). + * + * Return: pointer to 'struct opp_table' on success and errorno otherwise. + */ +struct opp_table * +devm_pm_opp_register_set_opp_helper(struct device *dev, + int (*set_opp)(struct dev_pm_set_opp_data *data)) +{ + struct opp_table *opp_table; + int err; + + opp_table = dev_pm_opp_register_set_opp_helper(dev, set_opp); + if (IS_ERR(opp_table)) + return opp_table; + + err = devm_add_action_or_reset(dev, devm_pm_opp_unregister_set_opp_helper, + opp_table); + if (err) + return ERR_PTR(err); + + return opp_table; +} +EXPORT_SYMBOL_GPL(devm_pm_opp_register_set_opp_helper); + static void _opp_detach_genpd(struct opp_table *opp_table) { int index; @@ -2058,7 +2276,7 @@ struct opp_table *dev_pm_opp_attach_genpd(struct device *dev, int index = 0, ret = -EINVAL; const char **name = names; - opp_table = _add_opp_table(dev); + opp_table = _add_opp_table(dev, false); if (IS_ERR(opp_table)) return opp_table; @@ -2144,6 +2362,97 @@ void dev_pm_opp_detach_genpd(struct opp_table *opp_table) } EXPORT_SYMBOL_GPL(dev_pm_opp_detach_genpd); +static void devm_pm_opp_detach_genpd(void *data) +{ + dev_pm_opp_detach_genpd(data); +} + +/** + * devm_pm_opp_attach_genpd - Attach genpd(s) for the device and save virtual + * device pointer + * @dev: Consumer device for which the genpd is getting attached. + * @names: Null terminated array of pointers containing names of genpd to attach. + * @virt_devs: Pointer to return the array of virtual devices. + * + * This is a resource-managed version of dev_pm_opp_attach_genpd(). + * + * Return: pointer to 'struct opp_table' on success and errorno otherwise. + */ +struct opp_table * +devm_pm_opp_attach_genpd(struct device *dev, const char **names, + struct device ***virt_devs) +{ + struct opp_table *opp_table; + int err; + + opp_table = dev_pm_opp_attach_genpd(dev, names, virt_devs); + if (IS_ERR(opp_table)) + return opp_table; + + err = devm_add_action_or_reset(dev, devm_pm_opp_detach_genpd, + opp_table); + if (err) + return ERR_PTR(err); + + return opp_table; +} +EXPORT_SYMBOL_GPL(devm_pm_opp_attach_genpd); + +/** + * dev_pm_opp_xlate_required_opp() - Find required OPP for @src_table OPP. + * @src_table: OPP table which has @dst_table as one of its required OPP table. + * @dst_table: Required OPP table of the @src_table. + * @src_opp: OPP from the @src_table. + * + * This function returns the OPP (present in @dst_table) pointed out by the + * "required-opps" property of the @src_opp (present in @src_table). + * + * The callers are required to call dev_pm_opp_put() for the returned OPP after + * use. + * + * Return: pointer to 'struct dev_pm_opp' on success and errorno otherwise. + */ +struct dev_pm_opp *dev_pm_opp_xlate_required_opp(struct opp_table *src_table, + struct opp_table *dst_table, + struct dev_pm_opp *src_opp) +{ + struct dev_pm_opp *opp, *dest_opp = ERR_PTR(-ENODEV); + int i; + + if (!src_table || !dst_table || !src_opp || + !src_table->required_opp_tables) + return ERR_PTR(-EINVAL); + + /* required-opps not fully initialized yet */ + if (lazy_linking_pending(src_table)) + return ERR_PTR(-EBUSY); + + for (i = 0; i < src_table->required_opp_count; i++) { + if (src_table->required_opp_tables[i] == dst_table) { + mutex_lock(&src_table->lock); + + list_for_each_entry(opp, &src_table->opp_list, node) { + if (opp == src_opp) { + dest_opp = opp->required_opps[i]; + dev_pm_opp_get(dest_opp); + break; + } + } + + mutex_unlock(&src_table->lock); + break; + } + } + + if (IS_ERR(dest_opp)) { + pr_err("%s: Couldn't find matching OPP (%p: %p)\n", __func__, + src_table, dst_table); + } + + return dest_opp; +} +EXPORT_SYMBOL_GPL(dev_pm_opp_xlate_required_opp); + /** * dev_pm_opp_xlate_performance_state() - Find required OPP's pstate for src_table. * @src_table: OPP table which has dst_table as one of its required OPP table. @@ -2172,9 +2481,13 @@ int dev_pm_opp_xlate_performance_state(struct opp_table *src_table, * and so none of them have the "required-opps" property set. Return the * pstate of the src_table as it is in such cases. */ - if (!src_table->required_opp_count) + if (!src_table || !src_table->required_opp_count) return pstate; + /* required-opps not fully initialized yet */ + if (lazy_linking_pending(src_table)) + return -EBUSY; + for (i = 0; i < src_table->required_opp_count; i++) { if (src_table->required_opp_tables[i]->np == dst_table->np) break; @@ -2226,7 +2539,7 @@ int dev_pm_opp_add(struct device *dev, unsigned long freq, unsigned long u_volt) struct opp_table *opp_table; int ret; - opp_table = _add_opp_table(dev); + opp_table = _add_opp_table(dev, true); if (IS_ERR(opp_table)) return PTR_ERR(opp_table); @@ -2504,3 +2817,44 @@ void dev_pm_opp_remove_table(struct device *dev) dev_pm_opp_put_opp_table(opp_table); } EXPORT_SYMBOL_GPL(dev_pm_opp_remove_table); + +/** + * dev_pm_opp_sync_regulators() - Sync state of voltage regulators + * @dev: device for which we do this operation + * + * Sync voltage state of the OPP table regulators. + * + * Return: 0 on success or a negative error value. + */ +int dev_pm_opp_sync_regulators(struct device *dev) +{ + struct opp_table *opp_table; + struct regulator *reg; + int i, ret = 0; + + /* Device may not have OPP table */ + opp_table = _find_opp_table(dev); + if (IS_ERR(opp_table)) + return 0; + + /* Regulator may not be required for the device */ + if (unlikely(!opp_table->regulators)) + goto put_table; + + /* Nothing to sync if voltage wasn't changed */ + if (!opp_table->enabled) + goto put_table; + + for (i = 0; i < opp_table->regulator_count; i++) { + reg = opp_table->regulators[i]; + ret = regulator_sync_voltage(reg); + if (ret) + break; + } +put_table: + /* Drop reference taken by _find_opp_table() */ + dev_pm_opp_put_opp_table(opp_table); + + return ret; +} +EXPORT_SYMBOL_GPL(dev_pm_opp_sync_regulators); diff --git a/drivers/opp/of.c b/drivers/opp/of.c index 03cb387236c4..f480c10e6314 100644 --- a/drivers/opp/of.c +++ b/drivers/opp/of.c @@ -144,7 +144,7 @@ static void _opp_table_free_required_tables(struct opp_table *opp_table) for (i = 0; i < opp_table->required_opp_count; i++) { if (IS_ERR_OR_NULL(required_opp_tables[i])) - break; + continue; dev_pm_opp_put_opp_table(required_opp_tables[i]); } @@ -153,6 +153,7 @@ static void _opp_table_free_required_tables(struct opp_table *opp_table) opp_table->required_opp_count = 0; opp_table->required_opp_tables = NULL; + list_del(&opp_table->lazy); } /* @@ -165,6 +166,7 @@ static void _opp_table_alloc_required_tables(struct opp_table *opp_table, { struct opp_table **required_opp_tables; struct device_node *required_np, *np; + bool lazy = false; int count, i; /* Traversing the first OPP node is all we need */ @@ -195,8 +197,10 @@ static void _opp_table_alloc_required_tables(struct opp_table *opp_table, required_opp_tables[i] = _find_table_of_opp_np(required_np); of_node_put(required_np); - if (IS_ERR(required_opp_tables[i])) - goto free_required_tables; + if (IS_ERR(required_opp_tables[i])) { + lazy = true; + continue; + } /* * We only support genpd's OPPs in the "required-opps" for now, @@ -210,6 +214,10 @@ static void _opp_table_alloc_required_tables(struct opp_table *opp_table, } } + /* Let's do the linking later on */ + if (lazy) + list_add(&opp_table->lazy, &lazy_opp_tables); + goto put_np; free_required_tables: @@ -278,14 +286,14 @@ void _of_opp_free_required_opps(struct opp_table *opp_table, for (i = 0; i < opp_table->required_opp_count; i++) { if (!required_opps[i]) - break; + continue; /* Put the reference back */ dev_pm_opp_put(required_opps[i]); } - kfree(required_opps); opp->required_opps = NULL; + kfree(required_opps); } /* Populate all required OPPs which are part of "required-opps" list */ @@ -309,6 +317,10 @@ static int _of_opp_alloc_required_opps(struct opp_table *opp_table, for (i = 0; i < count; i++) { required_table = opp_table->required_opp_tables[i]; + /* Required table not added yet, we will link later */ + if (IS_ERR_OR_NULL(required_table)) + continue; + np = of_parse_required_opp(opp->np, i); if (unlikely(!np)) { ret = -ENODEV; @@ -334,6 +346,104 @@ free_required_opps: return ret; } +/* Link required OPPs for an individual OPP */ +static int lazy_link_required_opps(struct opp_table *opp_table, + struct opp_table *new_table, int index) +{ + struct device_node *required_np; + struct dev_pm_opp *opp; + + list_for_each_entry(opp, &opp_table->opp_list, node) { + required_np = of_parse_required_opp(opp->np, index); + if (unlikely(!required_np)) + return -ENODEV; + + opp->required_opps[index] = _find_opp_of_np(new_table, required_np); + of_node_put(required_np); + + if (!opp->required_opps[index]) { + pr_err("%s: Unable to find required OPP node: %pOF (%d)\n", + __func__, opp->np, index); + return -ENODEV; + } + } + + return 0; +} + +/* Link required OPPs for all OPPs of the newly added OPP table */ +static void lazy_link_required_opp_table(struct opp_table *new_table) +{ + struct opp_table *opp_table, *temp, **required_opp_tables; + struct device_node *required_np, *opp_np, *required_table_np; + struct dev_pm_opp *opp; + int i, ret; + + /* + * We only support genpd's OPPs in the "required-opps" for now, + * as we don't know much about other cases. + */ + if (!new_table->is_genpd) + return; + + mutex_lock(&opp_table_lock); + + list_for_each_entry_safe(opp_table, temp, &lazy_opp_tables, lazy) { + bool lazy = false; + + /* opp_np can't be invalid here */ + opp_np = of_get_next_available_child(opp_table->np, NULL); + + for (i = 0; i < opp_table->required_opp_count; i++) { + required_opp_tables = opp_table->required_opp_tables; + + /* Required opp-table is already parsed */ + if (!IS_ERR(required_opp_tables[i])) + continue; + + /* required_np can't be invalid here */ + required_np = of_parse_required_opp(opp_np, i); + required_table_np = of_get_parent(required_np); + + of_node_put(required_table_np); + of_node_put(required_np); + + /* + * Newly added table isn't the required opp-table for + * opp_table. + */ + if (required_table_np != new_table->np) { + lazy = true; + continue; + } + + required_opp_tables[i] = new_table; + _get_opp_table_kref(new_table); + + /* Link OPPs now */ + ret = lazy_link_required_opps(opp_table, new_table, i); + if (ret) { + /* The OPPs will be marked unusable */ + lazy = false; + break; + } + } + + of_node_put(opp_np); + + /* All required opp-tables found, remove from lazy list */ + if (!lazy) { + list_del(&opp_table->lazy); + INIT_LIST_HEAD(&opp_table->lazy); + + list_for_each_entry(opp, &opp_table->opp_list, node) + _required_opps_available(opp, opp_table->required_opp_count); + } + } + + mutex_unlock(&opp_table_lock); +} + static int _bandwidth_supported(struct device *dev, struct opp_table *opp_table) { struct device_node *np, *opp_np; @@ -755,7 +865,6 @@ static struct dev_pm_opp *_opp_add_static_v2(struct opp_table *opp_table, struct device *dev, struct device_node *np) { struct dev_pm_opp *new_opp; - u64 rate = 0; u32 val; int ret; bool rate_not_available = false; @@ -772,7 +881,8 @@ static struct dev_pm_opp *_opp_add_static_v2(struct opp_table *opp_table, /* Check if the OPP supports hardware's hierarchy of versions or not */ if (!_opp_is_supported(dev, opp_table, np)) { - dev_dbg(dev, "OPP not supported by hardware: %llu\n", rate); + dev_dbg(dev, "OPP not supported by hardware: %lu\n", + new_opp->rate); goto free_opp; } @@ -822,10 +932,11 @@ static struct dev_pm_opp *_opp_add_static_v2(struct opp_table *opp_table, if (new_opp->clock_latency_ns > opp_table->clock_latency_ns_max) opp_table->clock_latency_ns_max = new_opp->clock_latency_ns; - pr_debug("%s: turbo:%d rate:%lu uv:%lu uvmin:%lu uvmax:%lu latency:%lu\n", + pr_debug("%s: turbo:%d rate:%lu uv:%lu uvmin:%lu uvmax:%lu latency:%lu level:%u\n", __func__, new_opp->turbo, new_opp->rate, new_opp->supplies[0].u_volt, new_opp->supplies[0].u_volt_min, - new_opp->supplies[0].u_volt_max, new_opp->clock_latency_ns); + new_opp->supplies[0].u_volt_max, new_opp->clock_latency_ns, + new_opp->level); /* * Notify the changes in the availability of the operable @@ -888,6 +999,8 @@ static int _of_add_opp_table_v2(struct device *dev, struct opp_table *opp_table) } } + lazy_link_required_opp_table(opp_table); + return 0; remove_static_opp: @@ -956,29 +1069,23 @@ remove_static_opp: return ret; } -/** - * dev_pm_opp_of_add_table() - Initialize opp table from device tree - * @dev: device pointer used to lookup OPP table. - * - * Register the initial OPP table with the OPP library for given device. - * - * Return: - * 0 On success OR - * Duplicate OPPs (both freq and volt are same) and opp->available - * -EEXIST Freq are same and volt are different OR - * Duplicate OPPs (both freq and volt are same) and !opp->available - * -ENOMEM Memory allocation failure - * -ENODEV when 'operating-points' property is not found or is invalid data - * in device node. - * -ENODATA when empty 'operating-points' property is found - * -EINVAL when invalid entries are found in opp-v2 table - */ -int dev_pm_opp_of_add_table(struct device *dev) +static int _of_add_table_indexed(struct device *dev, int index, bool getclk) { struct opp_table *opp_table; - int ret; + int ret, count; - opp_table = _add_opp_table_indexed(dev, 0); + if (index) { + /* + * If only one phandle is present, then the same OPP table + * applies for all index requests. + */ + count = of_count_phandle_with_args(dev->of_node, + "operating-points-v2", NULL); + if (count == 1) + index = 0; + } + + opp_table = _add_opp_table_indexed(dev, index, getclk); if (IS_ERR(opp_table)) return PTR_ERR(opp_table); @@ -996,15 +1103,12 @@ int dev_pm_opp_of_add_table(struct device *dev) return ret; } -EXPORT_SYMBOL_GPL(dev_pm_opp_of_add_table); /** - * dev_pm_opp_of_add_table_indexed() - Initialize indexed opp table from device tree + * dev_pm_opp_of_add_table() - Initialize opp table from device tree * @dev: device pointer used to lookup OPP table. - * @index: Index number. * - * Register the initial OPP table with the OPP library for given device only - * using the "operating-points-v2" property. + * Register the initial OPP table with the OPP library for given device. * * Return: * 0 On success OR @@ -1017,34 +1121,46 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_of_add_table); * -ENODATA when empty 'operating-points' property is found * -EINVAL when invalid entries are found in opp-v2 table */ -int dev_pm_opp_of_add_table_indexed(struct device *dev, int index) +int dev_pm_opp_of_add_table(struct device *dev) { - struct opp_table *opp_table; - int ret, count; - - if (index) { - /* - * If only one phandle is present, then the same OPP table - * applies for all index requests. - */ - count = of_count_phandle_with_args(dev->of_node, - "operating-points-v2", NULL); - if (count == 1) - index = 0; - } - - opp_table = _add_opp_table_indexed(dev, index); - if (IS_ERR(opp_table)) - return PTR_ERR(opp_table); - - ret = _of_add_opp_table_v2(dev, opp_table); - if (ret) - dev_pm_opp_put_opp_table(opp_table); + return _of_add_table_indexed(dev, 0, true); +} +EXPORT_SYMBOL_GPL(dev_pm_opp_of_add_table); - return ret; +/** + * dev_pm_opp_of_add_table_indexed() - Initialize indexed opp table from device tree + * @dev: device pointer used to lookup OPP table. + * @index: Index number. + * + * Register the initial OPP table with the OPP library for given device only + * using the "operating-points-v2" property. + * + * Return: Refer to dev_pm_opp_of_add_table() for return values. + */ +int dev_pm_opp_of_add_table_indexed(struct device *dev, int index) +{ + return _of_add_table_indexed(dev, index, true); } EXPORT_SYMBOL_GPL(dev_pm_opp_of_add_table_indexed); +/** + * dev_pm_opp_of_add_table_noclk() - Initialize indexed opp table from device + * tree without getting clk for device. + * @dev: device pointer used to lookup OPP table. + * @index: Index number. + * + * Register the initial OPP table with the OPP library for given device only + * using the "operating-points-v2" property. Do not try to get the clk for the + * device. + * + * Return: Refer to dev_pm_opp_of_add_table() for return values. + */ +int dev_pm_opp_of_add_table_noclk(struct device *dev, int index) +{ + return _of_add_table_indexed(dev, index, false); +} +EXPORT_SYMBOL_GPL(dev_pm_opp_of_add_table_noclk); + /* CPU device specific helpers */ /** diff --git a/drivers/opp/opp.h b/drivers/opp/opp.h index 4ced7ffa8158..9b9daf83b074 100644 --- a/drivers/opp/opp.h +++ b/drivers/opp/opp.h @@ -26,7 +26,7 @@ struct regulator; /* Lock to allow exclusive modification to the device and opp lists */ extern struct mutex opp_table_lock; -extern struct list_head opp_tables; +extern struct list_head opp_tables, lazy_opp_tables; /* * Internal data structure organization with the OPP layer library is as @@ -135,6 +135,7 @@ enum opp_table_access { * @clock_latency_ns_max: Max clock latency in nanoseconds. * @parsed_static_opps: Count of devices for which OPPs are initialized from DT. * @shared_opp: OPP is shared between multiple devices. + * @current_opp: Currently configured OPP for the table. * @suspend_opp: Pointer to OPP to be used during device suspend. * @genpd_virt_dev_lock: Mutex protecting the genpd virtual device pointers. * @genpd_virt_devs: List of virtual devices for multiple genpd support. @@ -155,6 +156,7 @@ enum opp_table_access { * @genpd_performance_state: Device's power domain support performance state. * @is_genpd: Marks if the OPP table belongs to a genpd. * @set_opp: Platform specific set_opp callback + * @sod_supplies: Set opp data supplies * @set_opp_data: Data to be passed to set_opp callback * @dentry: debugfs dentry pointer of the real device directory (not links). * @dentry_name: Name of the real dentry. @@ -166,7 +168,7 @@ enum opp_table_access { * meant for book keeping and private to OPP library. */ struct opp_table { - struct list_head node; + struct list_head node, lazy; struct blocking_notifier_head head; struct list_head dev_list; @@ -182,6 +184,7 @@ struct opp_table { unsigned int parsed_static_opps; enum opp_table_access shared_opp; + struct dev_pm_opp *current_opp; struct dev_pm_opp *suspend_opp; struct mutex genpd_virt_dev_lock; @@ -202,6 +205,7 @@ struct opp_table { bool is_genpd; int (*set_opp)(struct dev_pm_set_opp_data *data); + struct dev_pm_opp_supply *sod_supplies; struct dev_pm_set_opp_data *set_opp_data; #ifdef CONFIG_DEBUG_FS @@ -223,9 +227,14 @@ int _opp_compare_key(struct dev_pm_opp *opp1, struct dev_pm_opp *opp2); int _opp_add(struct device *dev, struct dev_pm_opp *new_opp, struct opp_table *opp_table, bool rate_not_available); int _opp_add_v1(struct opp_table *opp_table, struct device *dev, unsigned long freq, long u_volt, bool dynamic); void _dev_pm_opp_cpumask_remove_table(const struct cpumask *cpumask, int last_cpu); -struct opp_table *_add_opp_table(struct device *dev); -struct opp_table *_add_opp_table_indexed(struct device *dev, int index); +struct opp_table *_add_opp_table_indexed(struct device *dev, int index, bool getclk); void _put_opp_list_kref(struct opp_table *opp_table); +void _required_opps_available(struct dev_pm_opp *opp, int count); + +static inline bool lazy_linking_pending(struct opp_table *opp_table) +{ + return unlikely(!list_empty(&opp_table->lazy)); +} #ifdef CONFIG_OF void _of_init_opp_table(struct opp_table *opp_table, struct device *dev, int index); diff --git a/drivers/powercap/Kconfig b/drivers/powercap/Kconfig index bc228725346b..20b4325c6161 100644 --- a/drivers/powercap/Kconfig +++ b/drivers/powercap/Kconfig @@ -43,4 +43,17 @@ config IDLE_INJECT CPUs for power capping. Idle period can be injected synchronously on a set of specified CPUs or alternatively on a per CPU basis. + +config DTPM + bool "Power capping for Dynamic Thermal Power Management" + help + This enables support for the power capping for the dynamic + thermal power management userspace engine. + +config DTPM_CPU + bool "Add CPU power capping based on the energy model" + depends on DTPM && ENERGY_MODEL + help + This enables support for CPU power limitation based on + energy model. endif diff --git a/drivers/powercap/Makefile b/drivers/powercap/Makefile index 7255c94ec61c..fabcf388a8d3 100644 --- a/drivers/powercap/Makefile +++ b/drivers/powercap/Makefile @@ -1,4 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only +obj-$(CONFIG_DTPM) += dtpm.o +obj-$(CONFIG_DTPM_CPU) += dtpm_cpu.o obj-$(CONFIG_POWERCAP) += powercap_sys.o obj-$(CONFIG_INTEL_RAPL_CORE) += intel_rapl_common.o obj-$(CONFIG_INTEL_RAPL) += intel_rapl_msr.o diff --git a/drivers/powercap/dtpm.c b/drivers/powercap/dtpm.c new file mode 100644 index 000000000000..5a51cd34a7e8 --- /dev/null +++ b/drivers/powercap/dtpm.c @@ -0,0 +1,480 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2020 Linaro Limited + * + * Author: Daniel Lezcano <daniel.lezcano@linaro.org> + * + * The powercap based Dynamic Thermal Power Management framework + * provides to the userspace a consistent API to set the power limit + * on some devices. + * + * DTPM defines the functions to create a tree of constraints. Each + * parent node is a virtual description of the aggregation of the + * children. It propagates the constraints set at its level to its + * children and collect the children power information. The leaves of + * the tree are the real devices which have the ability to get their + * current power consumption and set their power limit. + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/dtpm.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/powercap.h> +#include <linux/slab.h> +#include <linux/mutex.h> + +#define DTPM_POWER_LIMIT_FLAG 0 + +static const char *constraint_name[] = { + "Instantaneous", +}; + +static DEFINE_MUTEX(dtpm_lock); +static struct powercap_control_type *pct; +static struct dtpm *root; + +static int get_time_window_us(struct powercap_zone *pcz, int cid, u64 *window) +{ + return -ENOSYS; +} + +static int set_time_window_us(struct powercap_zone *pcz, int cid, u64 window) +{ + return -ENOSYS; +} + +static int get_max_power_range_uw(struct powercap_zone *pcz, u64 *max_power_uw) +{ + struct dtpm *dtpm = to_dtpm(pcz); + + mutex_lock(&dtpm_lock); + *max_power_uw = dtpm->power_max - dtpm->power_min; + mutex_unlock(&dtpm_lock); + + return 0; +} + +static int __get_power_uw(struct dtpm *dtpm, u64 *power_uw) +{ + struct dtpm *child; + u64 power; + int ret = 0; + + if (dtpm->ops) { + *power_uw = dtpm->ops->get_power_uw(dtpm); + return 0; + } + + *power_uw = 0; + + list_for_each_entry(child, &dtpm->children, sibling) { + ret = __get_power_uw(child, &power); + if (ret) + break; + *power_uw += power; + } + + return ret; +} + +static int get_power_uw(struct powercap_zone *pcz, u64 *power_uw) +{ + struct dtpm *dtpm = to_dtpm(pcz); + int ret; + + mutex_lock(&dtpm_lock); + ret = __get_power_uw(dtpm, power_uw); + mutex_unlock(&dtpm_lock); + + return ret; +} + +static void __dtpm_rebalance_weight(struct dtpm *dtpm) +{ + struct dtpm *child; + + list_for_each_entry(child, &dtpm->children, sibling) { + + pr_debug("Setting weight '%d' for '%s'\n", + child->weight, child->zone.name); + + child->weight = DIV64_U64_ROUND_CLOSEST( + child->power_max * 1024, dtpm->power_max); + + __dtpm_rebalance_weight(child); + } +} + +static void __dtpm_sub_power(struct dtpm *dtpm) +{ + struct dtpm *parent = dtpm->parent; + + while (parent) { + parent->power_min -= dtpm->power_min; + parent->power_max -= dtpm->power_max; + parent->power_limit -= dtpm->power_limit; + parent = parent->parent; + } + + __dtpm_rebalance_weight(root); +} + +static void __dtpm_add_power(struct dtpm *dtpm) +{ + struct dtpm *parent = dtpm->parent; + + while (parent) { + parent->power_min += dtpm->power_min; + parent->power_max += dtpm->power_max; + parent->power_limit += dtpm->power_limit; + parent = parent->parent; + } + + __dtpm_rebalance_weight(root); +} + +/** + * dtpm_update_power - Update the power on the dtpm + * @dtpm: a pointer to a dtpm structure to update + * @power_min: a u64 representing the new power_min value + * @power_max: a u64 representing the new power_max value + * + * Function to update the power values of the dtpm node specified in + * parameter. These new values will be propagated to the tree. + * + * Return: zero on success, -EINVAL if the values are inconsistent + */ +int dtpm_update_power(struct dtpm *dtpm, u64 power_min, u64 power_max) +{ + int ret = 0; + + mutex_lock(&dtpm_lock); + + if (power_min == dtpm->power_min && power_max == dtpm->power_max) + goto unlock; + + if (power_max < power_min) { + ret = -EINVAL; + goto unlock; + } + + __dtpm_sub_power(dtpm); + + dtpm->power_min = power_min; + dtpm->power_max = power_max; + if (!test_bit(DTPM_POWER_LIMIT_FLAG, &dtpm->flags)) + dtpm->power_limit = power_max; + + __dtpm_add_power(dtpm); + +unlock: + mutex_unlock(&dtpm_lock); + + return ret; +} + +/** + * dtpm_release_zone - Cleanup when the node is released + * @pcz: a pointer to a powercap_zone structure + * + * Do some housecleaning and update the weight on the tree. The + * release will be denied if the node has children. This function must + * be called by the specific release callback of the different + * backends. + * + * Return: 0 on success, -EBUSY if there are children + */ +int dtpm_release_zone(struct powercap_zone *pcz) +{ + struct dtpm *dtpm = to_dtpm(pcz); + struct dtpm *parent = dtpm->parent; + + mutex_lock(&dtpm_lock); + + if (!list_empty(&dtpm->children)) { + mutex_unlock(&dtpm_lock); + return -EBUSY; + } + + if (parent) + list_del(&dtpm->sibling); + + __dtpm_sub_power(dtpm); + + mutex_unlock(&dtpm_lock); + + if (dtpm->ops) + dtpm->ops->release(dtpm); + + kfree(dtpm); + + return 0; +} + +static int __get_power_limit_uw(struct dtpm *dtpm, int cid, u64 *power_limit) +{ + *power_limit = dtpm->power_limit; + return 0; +} + +static int get_power_limit_uw(struct powercap_zone *pcz, + int cid, u64 *power_limit) +{ + struct dtpm *dtpm = to_dtpm(pcz); + int ret; + + mutex_lock(&dtpm_lock); + ret = __get_power_limit_uw(dtpm, cid, power_limit); + mutex_unlock(&dtpm_lock); + + return ret; +} + +/* + * Set the power limit on the nodes, the power limit is distributed + * given the weight of the children. + * + * The dtpm node lock must be held when calling this function. + */ +static int __set_power_limit_uw(struct dtpm *dtpm, int cid, u64 power_limit) +{ + struct dtpm *child; + int ret = 0; + u64 power; + + /* + * A max power limitation means we remove the power limit, + * otherwise we set a constraint and flag the dtpm node. + */ + if (power_limit == dtpm->power_max) { + clear_bit(DTPM_POWER_LIMIT_FLAG, &dtpm->flags); + } else { + set_bit(DTPM_POWER_LIMIT_FLAG, &dtpm->flags); + } + + pr_debug("Setting power limit for '%s': %llu uW\n", + dtpm->zone.name, power_limit); + + /* + * Only leaves of the dtpm tree has ops to get/set the power + */ + if (dtpm->ops) { + dtpm->power_limit = dtpm->ops->set_power_uw(dtpm, power_limit); + } else { + dtpm->power_limit = 0; + + list_for_each_entry(child, &dtpm->children, sibling) { + + /* + * Integer division rounding will inevitably + * lead to a different min or max value when + * set several times. In order to restore the + * initial value, we force the child's min or + * max power every time if the constraint is + * at the boundaries. + */ + if (power_limit == dtpm->power_max) { + power = child->power_max; + } else if (power_limit == dtpm->power_min) { + power = child->power_min; + } else { + power = DIV_ROUND_CLOSEST_ULL( + power_limit * child->weight, 1024); + } + + pr_debug("Setting power limit for '%s': %llu uW\n", + child->zone.name, power); + + ret = __set_power_limit_uw(child, cid, power); + if (!ret) + ret = __get_power_limit_uw(child, cid, &power); + + if (ret) + break; + + dtpm->power_limit += power; + } + } + + return ret; +} + +static int set_power_limit_uw(struct powercap_zone *pcz, + int cid, u64 power_limit) +{ + struct dtpm *dtpm = to_dtpm(pcz); + int ret; + + mutex_lock(&dtpm_lock); + + /* + * Don't allow values outside of the power range previously + * set when initializing the power numbers. + */ + power_limit = clamp_val(power_limit, dtpm->power_min, dtpm->power_max); + + ret = __set_power_limit_uw(dtpm, cid, power_limit); + + pr_debug("%s: power limit: %llu uW, power max: %llu uW\n", + dtpm->zone.name, dtpm->power_limit, dtpm->power_max); + + mutex_unlock(&dtpm_lock); + + return ret; +} + +static const char *get_constraint_name(struct powercap_zone *pcz, int cid) +{ + return constraint_name[cid]; +} + +static int get_max_power_uw(struct powercap_zone *pcz, int id, u64 *max_power) +{ + struct dtpm *dtpm = to_dtpm(pcz); + + mutex_lock(&dtpm_lock); + *max_power = dtpm->power_max; + mutex_unlock(&dtpm_lock); + + return 0; +} + +static struct powercap_zone_constraint_ops constraint_ops = { + .set_power_limit_uw = set_power_limit_uw, + .get_power_limit_uw = get_power_limit_uw, + .set_time_window_us = set_time_window_us, + .get_time_window_us = get_time_window_us, + .get_max_power_uw = get_max_power_uw, + .get_name = get_constraint_name, +}; + +static struct powercap_zone_ops zone_ops = { + .get_max_power_range_uw = get_max_power_range_uw, + .get_power_uw = get_power_uw, + .release = dtpm_release_zone, +}; + +/** + * dtpm_alloc - Allocate and initialize a dtpm struct + * @name: a string specifying the name of the node + * + * Return: a struct dtpm pointer, NULL in case of error + */ +struct dtpm *dtpm_alloc(struct dtpm_ops *ops) +{ + struct dtpm *dtpm; + + dtpm = kzalloc(sizeof(*dtpm), GFP_KERNEL); + if (dtpm) { + INIT_LIST_HEAD(&dtpm->children); + INIT_LIST_HEAD(&dtpm->sibling); + dtpm->weight = 1024; + dtpm->ops = ops; + } + + return dtpm; +} + +/** + * dtpm_unregister - Unregister a dtpm node from the hierarchy tree + * @dtpm: a pointer to a dtpm structure corresponding to the node to be removed + * + * Call the underlying powercap unregister function. That will call + * the release callback of the powercap zone. + */ +void dtpm_unregister(struct dtpm *dtpm) +{ + powercap_unregister_zone(pct, &dtpm->zone); + + pr_info("Unregistered dtpm node '%s'\n", dtpm->zone.name); +} + +/** + * dtpm_register - Register a dtpm node in the hierarchy tree + * @name: a string specifying the name of the node + * @dtpm: a pointer to a dtpm structure corresponding to the new node + * @parent: a pointer to a dtpm structure corresponding to the parent node + * + * Create a dtpm node in the tree. If no parent is specified, the node + * is the root node of the hierarchy. If the root node already exists, + * then the registration will fail. The powercap controller must be + * initialized before calling this function. + * + * The dtpm structure must be initialized with the power numbers + * before calling this function. + * + * Return: zero on success, a negative value in case of error: + * -EAGAIN: the function is called before the framework is initialized. + * -EBUSY: the root node is already inserted + * -EINVAL: * there is no root node yet and @parent is specified + * * no all ops are defined + * * parent have ops which are reserved for leaves + * Other negative values are reported back from the powercap framework + */ +int dtpm_register(const char *name, struct dtpm *dtpm, struct dtpm *parent) +{ + struct powercap_zone *pcz; + + if (!pct) + return -EAGAIN; + + if (root && !parent) + return -EBUSY; + + if (!root && parent) + return -EINVAL; + + if (parent && parent->ops) + return -EINVAL; + + if (!dtpm) + return -EINVAL; + + if (dtpm->ops && !(dtpm->ops->set_power_uw && + dtpm->ops->get_power_uw && + dtpm->ops->release)) + return -EINVAL; + + pcz = powercap_register_zone(&dtpm->zone, pct, name, + parent ? &parent->zone : NULL, + &zone_ops, MAX_DTPM_CONSTRAINTS, + &constraint_ops); + if (IS_ERR(pcz)) + return PTR_ERR(pcz); + + mutex_lock(&dtpm_lock); + + if (parent) { + list_add_tail(&dtpm->sibling, &parent->children); + dtpm->parent = parent; + } else { + root = dtpm; + } + + __dtpm_add_power(dtpm); + + pr_info("Registered dtpm node '%s' / %llu-%llu uW, \n", + dtpm->zone.name, dtpm->power_min, dtpm->power_max); + + mutex_unlock(&dtpm_lock); + + return 0; +} + +static int __init dtpm_init(void) +{ + struct dtpm_descr **dtpm_descr; + + pct = powercap_register_control_type(NULL, "dtpm", NULL); + if (IS_ERR(pct)) { + pr_err("Failed to register control type\n"); + return PTR_ERR(pct); + } + + for_each_dtpm_table(dtpm_descr) + (*dtpm_descr)->init(*dtpm_descr); + + return 0; +} +late_initcall(dtpm_init); diff --git a/drivers/powercap/dtpm_cpu.c b/drivers/powercap/dtpm_cpu.c new file mode 100644 index 000000000000..51c366938acd --- /dev/null +++ b/drivers/powercap/dtpm_cpu.c @@ -0,0 +1,257 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2020 Linaro Limited + * + * Author: Daniel Lezcano <daniel.lezcano@linaro.org> + * + * The DTPM CPU is based on the energy model. It hooks the CPU in the + * DTPM tree which in turns update the power number by propagating the + * power number from the CPU energy model information to the parents. + * + * The association between the power and the performance state, allows + * to set the power of the CPU at the OPP granularity. + * + * The CPU hotplug is supported and the power numbers will be updated + * if a CPU is hot plugged / unplugged. + */ +#include <linux/cpumask.h> +#include <linux/cpufreq.h> +#include <linux/cpuhotplug.h> +#include <linux/dtpm.h> +#include <linux/energy_model.h> +#include <linux/pm_qos.h> +#include <linux/slab.h> +#include <linux/units.h> + +static struct dtpm *__parent; + +static DEFINE_PER_CPU(struct dtpm *, dtpm_per_cpu); + +struct dtpm_cpu { + struct freq_qos_request qos_req; + int cpu; +}; + +/* + * When a new CPU is inserted at hotplug or boot time, add the power + * contribution and update the dtpm tree. + */ +static int power_add(struct dtpm *dtpm, struct em_perf_domain *em) +{ + u64 power_min, power_max; + + power_min = em->table[0].power; + power_min *= MICROWATT_PER_MILLIWATT; + power_min += dtpm->power_min; + + power_max = em->table[em->nr_perf_states - 1].power; + power_max *= MICROWATT_PER_MILLIWATT; + power_max += dtpm->power_max; + + return dtpm_update_power(dtpm, power_min, power_max); +} + +/* + * When a CPU is unplugged, remove its power contribution from the + * dtpm tree. + */ +static int power_sub(struct dtpm *dtpm, struct em_perf_domain *em) +{ + u64 power_min, power_max; + + power_min = em->table[0].power; + power_min *= MICROWATT_PER_MILLIWATT; + power_min = dtpm->power_min - power_min; + + power_max = em->table[em->nr_perf_states - 1].power; + power_max *= MICROWATT_PER_MILLIWATT; + power_max = dtpm->power_max - power_max; + + return dtpm_update_power(dtpm, power_min, power_max); +} + +static u64 set_pd_power_limit(struct dtpm *dtpm, u64 power_limit) +{ + struct dtpm_cpu *dtpm_cpu = dtpm->private; + struct em_perf_domain *pd; + struct cpumask cpus; + unsigned long freq; + u64 power; + int i, nr_cpus; + + pd = em_cpu_get(dtpm_cpu->cpu); + + cpumask_and(&cpus, cpu_online_mask, to_cpumask(pd->cpus)); + + nr_cpus = cpumask_weight(&cpus); + + for (i = 0; i < pd->nr_perf_states; i++) { + + power = pd->table[i].power * MICROWATT_PER_MILLIWATT * nr_cpus; + + if (power > power_limit) + break; + } + + freq = pd->table[i - 1].frequency; + + freq_qos_update_request(&dtpm_cpu->qos_req, freq); + + power_limit = pd->table[i - 1].power * + MICROWATT_PER_MILLIWATT * nr_cpus; + + return power_limit; +} + +static u64 get_pd_power_uw(struct dtpm *dtpm) +{ + struct dtpm_cpu *dtpm_cpu = dtpm->private; + struct em_perf_domain *pd; + struct cpumask cpus; + unsigned long freq; + int i, nr_cpus; + + pd = em_cpu_get(dtpm_cpu->cpu); + freq = cpufreq_quick_get(dtpm_cpu->cpu); + cpumask_and(&cpus, cpu_online_mask, to_cpumask(pd->cpus)); + nr_cpus = cpumask_weight(&cpus); + + for (i = 0; i < pd->nr_perf_states; i++) { + + if (pd->table[i].frequency < freq) + continue; + + return pd->table[i].power * + MICROWATT_PER_MILLIWATT * nr_cpus; + } + + return 0; +} + +static void pd_release(struct dtpm *dtpm) +{ + struct dtpm_cpu *dtpm_cpu = dtpm->private; + + if (freq_qos_request_active(&dtpm_cpu->qos_req)) + freq_qos_remove_request(&dtpm_cpu->qos_req); + + kfree(dtpm_cpu); +} + +static struct dtpm_ops dtpm_ops = { + .set_power_uw = set_pd_power_limit, + .get_power_uw = get_pd_power_uw, + .release = pd_release, +}; + +static int cpuhp_dtpm_cpu_offline(unsigned int cpu) +{ + struct cpufreq_policy *policy; + struct em_perf_domain *pd; + struct dtpm *dtpm; + + policy = cpufreq_cpu_get(cpu); + + if (!policy) + return 0; + + pd = em_cpu_get(cpu); + if (!pd) + return -EINVAL; + + dtpm = per_cpu(dtpm_per_cpu, cpu); + + power_sub(dtpm, pd); + + if (cpumask_weight(policy->cpus) != 1) + return 0; + + for_each_cpu(cpu, policy->related_cpus) + per_cpu(dtpm_per_cpu, cpu) = NULL; + + dtpm_unregister(dtpm); + + return 0; +} + +static int cpuhp_dtpm_cpu_online(unsigned int cpu) +{ + struct dtpm *dtpm; + struct dtpm_cpu *dtpm_cpu; + struct cpufreq_policy *policy; + struct em_perf_domain *pd; + char name[CPUFREQ_NAME_LEN]; + int ret = -ENOMEM; + + policy = cpufreq_cpu_get(cpu); + + if (!policy) + return 0; + + pd = em_cpu_get(cpu); + if (!pd) + return -EINVAL; + + dtpm = per_cpu(dtpm_per_cpu, cpu); + if (dtpm) + return power_add(dtpm, pd); + + dtpm = dtpm_alloc(&dtpm_ops); + if (!dtpm) + return -EINVAL; + + dtpm_cpu = kzalloc(sizeof(*dtpm_cpu), GFP_KERNEL); + if (!dtpm_cpu) + goto out_kfree_dtpm; + + dtpm->private = dtpm_cpu; + dtpm_cpu->cpu = cpu; + + for_each_cpu(cpu, policy->related_cpus) + per_cpu(dtpm_per_cpu, cpu) = dtpm; + + sprintf(name, "cpu%d", dtpm_cpu->cpu); + + ret = dtpm_register(name, dtpm, __parent); + if (ret) + goto out_kfree_dtpm_cpu; + + ret = power_add(dtpm, pd); + if (ret) + goto out_dtpm_unregister; + + ret = freq_qos_add_request(&policy->constraints, + &dtpm_cpu->qos_req, FREQ_QOS_MAX, + pd->table[pd->nr_perf_states - 1].frequency); + if (ret) + goto out_power_sub; + + return 0; + +out_power_sub: + power_sub(dtpm, pd); + +out_dtpm_unregister: + dtpm_unregister(dtpm); + dtpm_cpu = NULL; + dtpm = NULL; + +out_kfree_dtpm_cpu: + for_each_cpu(cpu, policy->related_cpus) + per_cpu(dtpm_per_cpu, cpu) = NULL; + kfree(dtpm_cpu); + +out_kfree_dtpm: + kfree(dtpm); + return ret; +} + +int dtpm_register_cpu(struct dtpm *parent) +{ + __parent = parent; + + return cpuhp_setup_state(CPUHP_AP_DTPM_CPU_ONLINE, + "dtpm_cpu:online", + cpuhp_dtpm_cpu_online, + cpuhp_dtpm_cpu_offline); +} diff --git a/drivers/powercap/intel_rapl_common.c b/drivers/powercap/intel_rapl_common.c index c9e57237d778..fdda2a737186 100644 --- a/drivers/powercap/intel_rapl_common.c +++ b/drivers/powercap/intel_rapl_common.c @@ -547,7 +547,7 @@ static void rapl_init_domains(struct rapl_package *rp) if (i == RAPL_DOMAIN_PLATFORM && rp->id > 0) { snprintf(rd->name, RAPL_DOMAIN_NAME_LENGTH, "psys-%d", - cpu_data(rp->lead_cpu).phys_proc_id); + topology_physical_package_id(rp->lead_cpu)); } else snprintf(rd->name, RAPL_DOMAIN_NAME_LENGTH, "%s", rapl_domain_names[i]); @@ -1049,6 +1049,7 @@ static const struct x86_cpu_id rapl_ids[] __initconst = { X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE, &rapl_defaults_core), X86_MATCH_INTEL_FAM6_MODEL(ROCKETLAKE, &rapl_defaults_core), X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, &rapl_defaults_core), + X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, &rapl_defaults_core), X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &rapl_defaults_spr_server), X86_MATCH_INTEL_FAM6_MODEL(LAKEFIELD, &rapl_defaults_core), @@ -1309,7 +1310,6 @@ struct rapl_package *rapl_add_package(int cpu, struct rapl_if_priv *priv) { int id = topology_logical_die_id(cpu); struct rapl_package *rp; - struct cpuinfo_x86 *c = &cpu_data(cpu); int ret; if (!rapl_defaults) @@ -1326,10 +1326,11 @@ struct rapl_package *rapl_add_package(int cpu, struct rapl_if_priv *priv) if (topology_max_die_per_package() > 1) snprintf(rp->name, PACKAGE_DOMAIN_NAME_LENGTH, - "package-%d-die-%d", c->phys_proc_id, c->cpu_die_id); + "package-%d-die-%d", + topology_physical_package_id(cpu), topology_die_id(cpu)); else snprintf(rp->name, PACKAGE_DOMAIN_NAME_LENGTH, "package-%d", - c->phys_proc_id); + topology_physical_package_id(cpu)); /* check if the package contains valid domains */ if (rapl_detect_domains(rp, cpu) || rapl_defaults->check_unit(rp, cpu)) { |