summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915/intel_pm.c
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2018-07-31 15:26:29 +0200
committerRodrigo Vivi <rodrigo.vivi@intel.com>2018-08-06 20:18:01 +0200
commit027063b1606fea6df15c270e5f2a072d1dfa8fef (patch)
tree07a2162d9f6e09fedd267f7e5c92c57666852e3b /drivers/gpu/drm/i915/intel_pm.c
parentdrm/i915: Fix psr sink status report. (diff)
downloadlinux-027063b1606fea6df15c270e5f2a072d1dfa8fef.tar.xz
linux-027063b1606fea6df15c270e5f2a072d1dfa8fef.zip
drm/i915: Interactive RPS mode
RPS provides a feedback loop where we use the load during the previous evaluation interval to decide whether to up or down clock the GPU frequency. Our responsiveness is split into 3 regimes, a high and low plateau with the intent to keep the gpu clocked high to cover occasional stalls under high load, and low despite occasional glitches under steady low load, and inbetween. However, we run into situations like kodi where we want to stay at low power (video decoding is done efficiently inside the fixed function HW and doesn't need high clocks even for high bitrate streams), but just occasionally the pipeline is more complex than a video decode and we need a smidgen of extra GPU power to present on time. In the high power regime, we sample at sub frame intervals with a bias to upclocking, and conversely at low power we sample over a few frames worth to provide what we consider to be the right levels of responsiveness respectively. At low power, we more or less expect to be kicked out to high power at the start of a busy sequence by waitboosting. Prior to commit e9af4ea2b9e7 ("drm/i915: Avoid waitboosting on the active request") whenever we missed the frame or stalled, we would immediate go full throttle and upclock the GPU to max. But in commit e9af4ea2b9e7, we relaxed the waitboosting to only apply if the pipeline was deep to avoid over-committing resources for a near miss. Sadly though, a near miss is still a miss, and perceptible as jitter in the frame delivery. To try and prevent the near miss before having to resort to boosting after the fact, we use the pageflip queue as an indication that we are in an "interactive" regime and so should sample the load more frequently to provide power before the frame misses it vblank. This will make us more favorable to providing a small power increase (one or two bins) as required rather than going all the way to maximum and then having to work back down again. (We still keep the waitboosting mechanism around just in case a dramatic change in system load requires urgent uplocking, faster than we can provide in a few evaluation intervals.) v2: Reduce rps_set_interactive to a boolean parameter to avoid the confusion of what if they wanted a new power mode after pinning to a different mode (which to choose?) v3: Only reprogram RPS while the GT is awake, it will be set when we wake the GT, and while off warns about being used outside of rpm. v4: Fix deferred application of interactive mode v5: s/state/interactive/ v6: Group the mutex with its principle in a substruct Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=107111 Fixes: e9af4ea2b9e7 ("drm/i915: Avoid waitboosting on the active request") Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Cc: Radoslaw Szwichtenberg <radoslaw.szwichtenberg@intel.com> Cc: Ville Syrjälä <ville.syrjala@linux.intel.com> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20180731132629.3381-1-chris@chris-wilson.co.uk (cherry picked from commit 60548c554be2830d29d2533dad0ac8133347ee51) Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Diffstat (limited to 'drivers/gpu/drm/i915/intel_pm.c')
-rw-r--r--drivers/gpu/drm/i915/intel_pm.c101
1 files changed, 67 insertions, 34 deletions
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 7312ecb73415..43ae9de12ba3 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -6264,42 +6264,15 @@ static u32 intel_rps_limits(struct drm_i915_private *dev_priv, u8 val)
return limits;
}
-static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val)
+static void rps_set_power(struct drm_i915_private *dev_priv, int new_power)
{
struct intel_rps *rps = &dev_priv->gt_pm.rps;
- int new_power;
u32 threshold_up = 0, threshold_down = 0; /* in % */
u32 ei_up = 0, ei_down = 0;
- new_power = rps->power;
- switch (rps->power) {
- case LOW_POWER:
- if (val > rps->efficient_freq + 1 &&
- val > rps->cur_freq)
- new_power = BETWEEN;
- break;
-
- case BETWEEN:
- if (val <= rps->efficient_freq &&
- val < rps->cur_freq)
- new_power = LOW_POWER;
- else if (val >= rps->rp0_freq &&
- val > rps->cur_freq)
- new_power = HIGH_POWER;
- break;
+ lockdep_assert_held(&rps->power.mutex);
- case HIGH_POWER:
- if (val < (rps->rp1_freq + rps->rp0_freq) >> 1 &&
- val < rps->cur_freq)
- new_power = BETWEEN;
- break;
- }
- /* Max/min bins are special */
- if (val <= rps->min_freq_softlimit)
- new_power = LOW_POWER;
- if (val >= rps->max_freq_softlimit)
- new_power = HIGH_POWER;
- if (new_power == rps->power)
+ if (new_power == rps->power.mode)
return;
/* Note the units here are not exactly 1us, but 1280ns. */
@@ -6362,12 +6335,71 @@ static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val)
GEN6_RP_DOWN_IDLE_AVG);
skip_hw_write:
- rps->power = new_power;
- rps->up_threshold = threshold_up;
- rps->down_threshold = threshold_down;
+ rps->power.mode = new_power;
+ rps->power.up_threshold = threshold_up;
+ rps->power.down_threshold = threshold_down;
+}
+
+static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val)
+{
+ struct intel_rps *rps = &dev_priv->gt_pm.rps;
+ int new_power;
+
+ new_power = rps->power.mode;
+ switch (rps->power.mode) {
+ case LOW_POWER:
+ if (val > rps->efficient_freq + 1 &&
+ val > rps->cur_freq)
+ new_power = BETWEEN;
+ break;
+
+ case BETWEEN:
+ if (val <= rps->efficient_freq &&
+ val < rps->cur_freq)
+ new_power = LOW_POWER;
+ else if (val >= rps->rp0_freq &&
+ val > rps->cur_freq)
+ new_power = HIGH_POWER;
+ break;
+
+ case HIGH_POWER:
+ if (val < (rps->rp1_freq + rps->rp0_freq) >> 1 &&
+ val < rps->cur_freq)
+ new_power = BETWEEN;
+ break;
+ }
+ /* Max/min bins are special */
+ if (val <= rps->min_freq_softlimit)
+ new_power = LOW_POWER;
+ if (val >= rps->max_freq_softlimit)
+ new_power = HIGH_POWER;
+
+ mutex_lock(&rps->power.mutex);
+ if (rps->power.interactive)
+ new_power = HIGH_POWER;
+ rps_set_power(dev_priv, new_power);
+ mutex_unlock(&rps->power.mutex);
rps->last_adj = 0;
}
+void intel_rps_mark_interactive(struct drm_i915_private *i915, bool interactive)
+{
+ struct intel_rps *rps = &i915->gt_pm.rps;
+
+ if (INTEL_GEN(i915) < 6)
+ return;
+
+ mutex_lock(&rps->power.mutex);
+ if (interactive) {
+ if (!rps->power.interactive++ && READ_ONCE(i915->gt.awake))
+ rps_set_power(i915, HIGH_POWER);
+ } else {
+ GEM_BUG_ON(!rps->power.interactive);
+ rps->power.interactive--;
+ }
+ mutex_unlock(&rps->power.mutex);
+}
+
static u32 gen6_rps_pm_mask(struct drm_i915_private *dev_priv, u8 val)
{
struct intel_rps *rps = &dev_priv->gt_pm.rps;
@@ -6780,7 +6812,7 @@ static void reset_rps(struct drm_i915_private *dev_priv,
u8 freq = rps->cur_freq;
/* force a reset */
- rps->power = -1;
+ rps->power.mode = -1;
rps->cur_freq = -1;
if (set(dev_priv, freq))
@@ -9604,6 +9636,7 @@ int intel_freq_opcode(struct drm_i915_private *dev_priv, int val)
void intel_pm_setup(struct drm_i915_private *dev_priv)
{
mutex_init(&dev_priv->pcu_lock);
+ mutex_init(&dev_priv->gt_pm.rps.power.mutex);
atomic_set(&dev_priv->gt_pm.rps.num_waiters, 0);