diff options
Diffstat (limited to 'drivers/gpu/drm/i915/i915_pmu.c')
-rw-r--r-- | drivers/gpu/drm/i915/i915_pmu.c | 136 |
1 files changed, 73 insertions, 63 deletions
diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index 2814218c5ba1..f3ef6700a5f2 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -144,61 +144,40 @@ static inline s64 ktime_since(const ktime_t kt) return ktime_to_ns(ktime_sub(ktime_get(), kt)); } -static u64 __pmu_estimate_rc6(struct i915_pmu *pmu) -{ - u64 val; - - /* - * We think we are runtime suspended. - * - * Report the delta from when the device was suspended to now, - * on top of the last known real value, as the approximated RC6 - * counter value. - */ - val = ktime_since(pmu->sleep_last); - val += pmu->sample[__I915_SAMPLE_RC6].cur; - - pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur = val; - - return val; -} - -static u64 __pmu_update_rc6(struct i915_pmu *pmu, u64 val) -{ - /* - * If we are coming back from being runtime suspended we must - * be careful not to report a larger value than returned - * previously. - */ - if (val >= pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur) { - pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur = 0; - pmu->sample[__I915_SAMPLE_RC6].cur = val; - } else { - val = pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur; - } - - return val; -} - static u64 get_rc6(struct intel_gt *gt) { struct drm_i915_private *i915 = gt->i915; struct i915_pmu *pmu = &i915->pmu; unsigned long flags; + bool awake = false; u64 val; - val = 0; if (intel_gt_pm_get_if_awake(gt)) { val = __get_rc6(gt); intel_gt_pm_put_async(gt); + awake = true; } spin_lock_irqsave(&pmu->lock, flags); - if (val) - val = __pmu_update_rc6(pmu, val); + if (awake) { + pmu->sample[__I915_SAMPLE_RC6].cur = val; + } else { + /* + * We think we are runtime suspended. + * + * Report the delta from when the device was suspended to now, + * on top of the last known real value, as the approximated RC6 + * counter value. + */ + val = ktime_since(pmu->sleep_last); + val += pmu->sample[__I915_SAMPLE_RC6].cur; + } + + if (val < pmu->sample[__I915_SAMPLE_RC6_LAST_REPORTED].cur) + val = pmu->sample[__I915_SAMPLE_RC6_LAST_REPORTED].cur; else - val = __pmu_estimate_rc6(pmu); + pmu->sample[__I915_SAMPLE_RC6_LAST_REPORTED].cur = val; spin_unlock_irqrestore(&pmu->lock, flags); @@ -210,20 +189,11 @@ static void park_rc6(struct drm_i915_private *i915) struct i915_pmu *pmu = &i915->pmu; if (pmu->enable & config_enabled_mask(I915_PMU_RC6_RESIDENCY)) - __pmu_update_rc6(pmu, __get_rc6(&i915->gt)); + pmu->sample[__I915_SAMPLE_RC6].cur = __get_rc6(&i915->gt); pmu->sleep_last = ktime_get(); } -static void unpark_rc6(struct drm_i915_private *i915) -{ - struct i915_pmu *pmu = &i915->pmu; - - /* Estimate how long we slept and accumulate that into rc6 counters */ - if (pmu->enable & config_enabled_mask(I915_PMU_RC6_RESIDENCY)) - __pmu_estimate_rc6(pmu); -} - #else static u64 get_rc6(struct intel_gt *gt) @@ -232,7 +202,6 @@ static u64 get_rc6(struct intel_gt *gt) } static void park_rc6(struct drm_i915_private *i915) {} -static void unpark_rc6(struct drm_i915_private *i915) {} #endif @@ -281,8 +250,6 @@ void i915_pmu_gt_unparked(struct drm_i915_private *i915) */ __i915_pmu_maybe_start_timer(pmu); - unpark_rc6(i915); - spin_unlock_irq(&pmu->lock); } @@ -292,6 +259,16 @@ add_sample(struct i915_pmu_sample *sample, u32 val) sample->cur += val; } +static bool exclusive_mmio_access(const struct drm_i915_private *i915) +{ + /* + * We have to avoid concurrent mmio cache line access on gen7 or + * risk a machine hang. For a fun history lesson dig out the old + * userspace intel_gpu_top and run it on Ivybridge or Haswell! + */ + return IS_GEN(i915, 7); +} + static void engines_sample(struct intel_gt *gt, unsigned int period_ns) { @@ -302,8 +279,12 @@ engines_sample(struct intel_gt *gt, unsigned int period_ns) if ((i915->pmu.enable & ENGINE_SAMPLE_MASK) == 0) return; + if (!intel_gt_pm_is_awake(gt)) + return; + for_each_engine(engine, gt, id) { struct intel_engine_pmu *pmu = &engine->pmu; + spinlock_t *mmio_lock; unsigned long flags; bool busy; u32 val; @@ -311,7 +292,12 @@ engines_sample(struct intel_gt *gt, unsigned int period_ns) if (!intel_engine_pm_get_if_awake(engine)) continue; - spin_lock_irqsave(&engine->uncore->lock, flags); + mmio_lock = NULL; + if (exclusive_mmio_access(i915)) + mmio_lock = &engine->uncore->lock; + + if (unlikely(mmio_lock)) + spin_lock_irqsave(mmio_lock, flags); val = ENGINE_READ_FW(engine, RING_CTL); if (val == 0) /* powerwell off => engine idle */ @@ -342,7 +328,8 @@ engines_sample(struct intel_gt *gt, unsigned int period_ns) add_sample(&pmu->sample[I915_SAMPLE_BUSY], period_ns); skip: - spin_unlock_irqrestore(&engine->uncore->lock, flags); + if (unlikely(mmio_lock)) + spin_unlock_irqrestore(mmio_lock, flags); intel_engine_pm_put_async(engine); } } @@ -353,6 +340,13 @@ add_sample_mult(struct i915_pmu_sample *sample, u32 val, u32 mul) sample->cur += mul_u32_u32(val, mul); } +static bool frequency_sampling_enabled(struct i915_pmu *pmu) +{ + return pmu->enable & + (config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY) | + config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY)); +} + static void frequency_sample(struct intel_gt *gt, unsigned int period_ns) { @@ -361,19 +355,33 @@ frequency_sample(struct intel_gt *gt, unsigned int period_ns) struct i915_pmu *pmu = &i915->pmu; struct intel_rps *rps = >->rps; + if (!frequency_sampling_enabled(pmu)) + return; + + /* Report 0/0 (actual/requested) frequency while parked. */ + if (!intel_gt_pm_get_if_awake(gt)) + return; + if (pmu->enable & config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY)) { u32 val; - val = rps->cur_freq; - if (intel_gt_pm_get_if_awake(gt)) { - val = intel_uncore_read_notrace(uncore, GEN6_RPSTAT1); - val = intel_get_cagf(rps, val); - intel_gt_pm_put_async(gt); - } + /* + * We take a quick peek here without using forcewake + * so that we don't perturb the system under observation + * (forcewake => !rc6 => increased power use). We expect + * that if the read fails because it is outside of the + * mmio power well, then it will return 0 -- in which + * case we assume the system is running at the intended + * frequency. Fortunately, the read should rarely fail! + */ + val = intel_uncore_read_fw(uncore, GEN6_RPSTAT1); + if (val) + val = intel_rps_get_cagf(rps, val); + else + val = rps->cur_freq; add_sample_mult(&pmu->sample[__I915_SAMPLE_FREQ_ACT], - intel_gpu_freq(rps, val), - period_ns / 1000); + intel_gpu_freq(rps, val), period_ns / 1000); } if (pmu->enable & config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY)) { @@ -381,6 +389,8 @@ frequency_sample(struct intel_gt *gt, unsigned int period_ns) intel_gpu_freq(rps, rps->cur_freq), period_ns / 1000); } + + intel_gt_pm_put_async(gt); } static enum hrtimer_restart i915_sample(struct hrtimer *hrtimer) |