summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915/i915_pmu.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/i915_pmu.c')
-rw-r--r--drivers/gpu/drm/i915/i915_pmu.c136
1 files changed, 73 insertions, 63 deletions
diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index 2814218c5ba1..f3ef6700a5f2 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -144,61 +144,40 @@ static inline s64 ktime_since(const ktime_t kt)
return ktime_to_ns(ktime_sub(ktime_get(), kt));
}
-static u64 __pmu_estimate_rc6(struct i915_pmu *pmu)
-{
- u64 val;
-
- /*
- * We think we are runtime suspended.
- *
- * Report the delta from when the device was suspended to now,
- * on top of the last known real value, as the approximated RC6
- * counter value.
- */
- val = ktime_since(pmu->sleep_last);
- val += pmu->sample[__I915_SAMPLE_RC6].cur;
-
- pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur = val;
-
- return val;
-}
-
-static u64 __pmu_update_rc6(struct i915_pmu *pmu, u64 val)
-{
- /*
- * If we are coming back from being runtime suspended we must
- * be careful not to report a larger value than returned
- * previously.
- */
- if (val >= pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur) {
- pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur = 0;
- pmu->sample[__I915_SAMPLE_RC6].cur = val;
- } else {
- val = pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur;
- }
-
- return val;
-}
-
static u64 get_rc6(struct intel_gt *gt)
{
struct drm_i915_private *i915 = gt->i915;
struct i915_pmu *pmu = &i915->pmu;
unsigned long flags;
+ bool awake = false;
u64 val;
- val = 0;
if (intel_gt_pm_get_if_awake(gt)) {
val = __get_rc6(gt);
intel_gt_pm_put_async(gt);
+ awake = true;
}
spin_lock_irqsave(&pmu->lock, flags);
- if (val)
- val = __pmu_update_rc6(pmu, val);
+ if (awake) {
+ pmu->sample[__I915_SAMPLE_RC6].cur = val;
+ } else {
+ /*
+ * We think we are runtime suspended.
+ *
+ * Report the delta from when the device was suspended to now,
+ * on top of the last known real value, as the approximated RC6
+ * counter value.
+ */
+ val = ktime_since(pmu->sleep_last);
+ val += pmu->sample[__I915_SAMPLE_RC6].cur;
+ }
+
+ if (val < pmu->sample[__I915_SAMPLE_RC6_LAST_REPORTED].cur)
+ val = pmu->sample[__I915_SAMPLE_RC6_LAST_REPORTED].cur;
else
- val = __pmu_estimate_rc6(pmu);
+ pmu->sample[__I915_SAMPLE_RC6_LAST_REPORTED].cur = val;
spin_unlock_irqrestore(&pmu->lock, flags);
@@ -210,20 +189,11 @@ static void park_rc6(struct drm_i915_private *i915)
struct i915_pmu *pmu = &i915->pmu;
if (pmu->enable & config_enabled_mask(I915_PMU_RC6_RESIDENCY))
- __pmu_update_rc6(pmu, __get_rc6(&i915->gt));
+ pmu->sample[__I915_SAMPLE_RC6].cur = __get_rc6(&i915->gt);
pmu->sleep_last = ktime_get();
}
-static void unpark_rc6(struct drm_i915_private *i915)
-{
- struct i915_pmu *pmu = &i915->pmu;
-
- /* Estimate how long we slept and accumulate that into rc6 counters */
- if (pmu->enable & config_enabled_mask(I915_PMU_RC6_RESIDENCY))
- __pmu_estimate_rc6(pmu);
-}
-
#else
static u64 get_rc6(struct intel_gt *gt)
@@ -232,7 +202,6 @@ static u64 get_rc6(struct intel_gt *gt)
}
static void park_rc6(struct drm_i915_private *i915) {}
-static void unpark_rc6(struct drm_i915_private *i915) {}
#endif
@@ -281,8 +250,6 @@ void i915_pmu_gt_unparked(struct drm_i915_private *i915)
*/
__i915_pmu_maybe_start_timer(pmu);
- unpark_rc6(i915);
-
spin_unlock_irq(&pmu->lock);
}
@@ -292,6 +259,16 @@ add_sample(struct i915_pmu_sample *sample, u32 val)
sample->cur += val;
}
+static bool exclusive_mmio_access(const struct drm_i915_private *i915)
+{
+ /*
+ * We have to avoid concurrent mmio cache line access on gen7 or
+ * risk a machine hang. For a fun history lesson dig out the old
+ * userspace intel_gpu_top and run it on Ivybridge or Haswell!
+ */
+ return IS_GEN(i915, 7);
+}
+
static void
engines_sample(struct intel_gt *gt, unsigned int period_ns)
{
@@ -302,8 +279,12 @@ engines_sample(struct intel_gt *gt, unsigned int period_ns)
if ((i915->pmu.enable & ENGINE_SAMPLE_MASK) == 0)
return;
+ if (!intel_gt_pm_is_awake(gt))
+ return;
+
for_each_engine(engine, gt, id) {
struct intel_engine_pmu *pmu = &engine->pmu;
+ spinlock_t *mmio_lock;
unsigned long flags;
bool busy;
u32 val;
@@ -311,7 +292,12 @@ engines_sample(struct intel_gt *gt, unsigned int period_ns)
if (!intel_engine_pm_get_if_awake(engine))
continue;
- spin_lock_irqsave(&engine->uncore->lock, flags);
+ mmio_lock = NULL;
+ if (exclusive_mmio_access(i915))
+ mmio_lock = &engine->uncore->lock;
+
+ if (unlikely(mmio_lock))
+ spin_lock_irqsave(mmio_lock, flags);
val = ENGINE_READ_FW(engine, RING_CTL);
if (val == 0) /* powerwell off => engine idle */
@@ -342,7 +328,8 @@ engines_sample(struct intel_gt *gt, unsigned int period_ns)
add_sample(&pmu->sample[I915_SAMPLE_BUSY], period_ns);
skip:
- spin_unlock_irqrestore(&engine->uncore->lock, flags);
+ if (unlikely(mmio_lock))
+ spin_unlock_irqrestore(mmio_lock, flags);
intel_engine_pm_put_async(engine);
}
}
@@ -353,6 +340,13 @@ add_sample_mult(struct i915_pmu_sample *sample, u32 val, u32 mul)
sample->cur += mul_u32_u32(val, mul);
}
+static bool frequency_sampling_enabled(struct i915_pmu *pmu)
+{
+ return pmu->enable &
+ (config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY) |
+ config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY));
+}
+
static void
frequency_sample(struct intel_gt *gt, unsigned int period_ns)
{
@@ -361,19 +355,33 @@ frequency_sample(struct intel_gt *gt, unsigned int period_ns)
struct i915_pmu *pmu = &i915->pmu;
struct intel_rps *rps = &gt->rps;
+ if (!frequency_sampling_enabled(pmu))
+ return;
+
+ /* Report 0/0 (actual/requested) frequency while parked. */
+ if (!intel_gt_pm_get_if_awake(gt))
+ return;
+
if (pmu->enable & config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY)) {
u32 val;
- val = rps->cur_freq;
- if (intel_gt_pm_get_if_awake(gt)) {
- val = intel_uncore_read_notrace(uncore, GEN6_RPSTAT1);
- val = intel_get_cagf(rps, val);
- intel_gt_pm_put_async(gt);
- }
+ /*
+ * We take a quick peek here without using forcewake
+ * so that we don't perturb the system under observation
+ * (forcewake => !rc6 => increased power use). We expect
+ * that if the read fails because it is outside of the
+ * mmio power well, then it will return 0 -- in which
+ * case we assume the system is running at the intended
+ * frequency. Fortunately, the read should rarely fail!
+ */
+ val = intel_uncore_read_fw(uncore, GEN6_RPSTAT1);
+ if (val)
+ val = intel_rps_get_cagf(rps, val);
+ else
+ val = rps->cur_freq;
add_sample_mult(&pmu->sample[__I915_SAMPLE_FREQ_ACT],
- intel_gpu_freq(rps, val),
- period_ns / 1000);
+ intel_gpu_freq(rps, val), period_ns / 1000);
}
if (pmu->enable & config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY)) {
@@ -381,6 +389,8 @@ frequency_sample(struct intel_gt *gt, unsigned int period_ns)
intel_gpu_freq(rps, rps->cur_freq),
period_ns / 1000);
}
+
+ intel_gt_pm_put_async(gt);
}
static enum hrtimer_restart i915_sample(struct hrtimer *hrtimer)