diff options
author | Daniel Vetter <daniel.vetter@ffwll.ch> | 2023-03-29 15:45:37 +0200 |
---|---|---|
committer | Daniel Vetter <daniel.vetter@ffwll.ch> | 2023-03-29 15:45:38 +0200 |
commit | 929ae7c2e3adbbb2c2bddcd16854a6b11b56e95a (patch) | |
tree | f023b52847e7f71ad12eedb157f0d7aa77753b92 /drivers | |
parent | Merge tag 'exynos-drm-next-for-v6.4' of git://git.kernel.org/pub/scm/linux/ke... (diff) | |
parent | drm/atomic-helper: Set fence deadline for vblank (diff) | |
download | linux-929ae7c2e3adbbb2c2bddcd16854a6b11b56e95a.tar.xz linux-929ae7c2e3adbbb2c2bddcd16854a6b11b56e95a.zip |
Merge tag 'dma-fence-deadline' of https://gitlab.freedesktop.org/drm/msm into drm-next
This series adds a deadline hint to fences, so realtime deadlines
such as vblank can be communicated to the fence signaller for power/
frequency management decisions.
This is partially inspired by a trick i915 does, but implemented
via dma-fence for a couple of reasons:
1) To continue to be able to use the atomic helpers
2) To support cases where display and gpu are different drivers
See https://patchwork.freedesktop.org/series/93035/
This does not yet add any UAPI, although this will be needed in
a number of cases:
1) Workloads "ping-ponging" between CPU and GPU, where we don't
want the GPU freq governor to interpret time stalled waiting
for GPU as "idle" time
2) Cases where the compositor is waiting for fences to be signaled
before issuing the atomic ioctl, for example to maintain 60fps
cursor updates even when the GPU is not able to maintain that
framerate.
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
From: Rob Clark <robdclark@gmail.com>
Link: https://patchwork.freedesktop.org/patch/msgid/CAF6AEGt5nDQpa6J86V1oFKPA30YcJzPhAVpmF7N1K1g2N3c=Zg@mail.gmail.com
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/dma-buf/dma-fence-array.c | 11 | ||||
-rw-r--r-- | drivers/dma-buf/dma-fence-chain.c | 12 | ||||
-rw-r--r-- | drivers/dma-buf/dma-fence.c | 59 | ||||
-rw-r--r-- | drivers/dma-buf/dma-resv.c | 22 | ||||
-rw-r--r-- | drivers/gpu/drm/drm_atomic_helper.c | 37 | ||||
-rw-r--r-- | drivers/gpu/drm/drm_vblank.c | 53 | ||||
-rw-r--r-- | drivers/gpu/drm/scheduler/sched_fence.c | 46 | ||||
-rw-r--r-- | drivers/gpu/drm/scheduler/sched_main.c | 2 |
8 files changed, 232 insertions, 10 deletions
diff --git a/drivers/dma-buf/dma-fence-array.c b/drivers/dma-buf/dma-fence-array.c index 5c8a7084577b..9b3ce8948351 100644 --- a/drivers/dma-buf/dma-fence-array.c +++ b/drivers/dma-buf/dma-fence-array.c @@ -123,12 +123,23 @@ static void dma_fence_array_release(struct dma_fence *fence) dma_fence_free(fence); } +static void dma_fence_array_set_deadline(struct dma_fence *fence, + ktime_t deadline) +{ + struct dma_fence_array *array = to_dma_fence_array(fence); + unsigned i; + + for (i = 0; i < array->num_fences; ++i) + dma_fence_set_deadline(array->fences[i], deadline); +} + const struct dma_fence_ops dma_fence_array_ops = { .get_driver_name = dma_fence_array_get_driver_name, .get_timeline_name = dma_fence_array_get_timeline_name, .enable_signaling = dma_fence_array_enable_signaling, .signaled = dma_fence_array_signaled, .release = dma_fence_array_release, + .set_deadline = dma_fence_array_set_deadline, }; EXPORT_SYMBOL(dma_fence_array_ops); diff --git a/drivers/dma-buf/dma-fence-chain.c b/drivers/dma-buf/dma-fence-chain.c index a0d920576ba6..9663ba1bb6ac 100644 --- a/drivers/dma-buf/dma-fence-chain.c +++ b/drivers/dma-buf/dma-fence-chain.c @@ -206,6 +206,17 @@ static void dma_fence_chain_release(struct dma_fence *fence) dma_fence_free(fence); } + +static void dma_fence_chain_set_deadline(struct dma_fence *fence, + ktime_t deadline) +{ + dma_fence_chain_for_each(fence, fence) { + struct dma_fence *f = dma_fence_chain_contained(fence); + + dma_fence_set_deadline(f, deadline); + } +} + const struct dma_fence_ops dma_fence_chain_ops = { .use_64bit_seqno = true, .get_driver_name = dma_fence_chain_get_driver_name, @@ -213,6 +224,7 @@ const struct dma_fence_ops dma_fence_chain_ops = { .enable_signaling = dma_fence_chain_enable_signaling, .signaled = dma_fence_chain_signaled, .release = dma_fence_chain_release, + .set_deadline = dma_fence_chain_set_deadline, }; EXPORT_SYMBOL(dma_fence_chain_ops); diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c index 0de0482cd36e..f177c56269bb 100644 --- a/drivers/dma-buf/dma-fence.c +++ b/drivers/dma-buf/dma-fence.c @@ -913,6 +913,65 @@ err_free_cb: EXPORT_SYMBOL(dma_fence_wait_any_timeout); /** + * DOC: deadline hints + * + * In an ideal world, it would be possible to pipeline a workload sufficiently + * that a utilization based device frequency governor could arrive at a minimum + * frequency that meets the requirements of the use-case, in order to minimize + * power consumption. But in the real world there are many workloads which + * defy this ideal. For example, but not limited to: + * + * * Workloads that ping-pong between device and CPU, with alternating periods + * of CPU waiting for device, and device waiting on CPU. This can result in + * devfreq and cpufreq seeing idle time in their respective domains and in + * result reduce frequency. + * + * * Workloads that interact with a periodic time based deadline, such as double + * buffered GPU rendering vs vblank sync'd page flipping. In this scenario, + * missing a vblank deadline results in an *increase* in idle time on the GPU + * (since it has to wait an additional vblank period), sending a signal to + * the GPU's devfreq to reduce frequency, when in fact the opposite is what is + * needed. + * + * To this end, deadline hint(s) can be set on a &dma_fence via &dma_fence_set_deadline. + * The deadline hint provides a way for the waiting driver, or userspace, to + * convey an appropriate sense of urgency to the signaling driver. + * + * A deadline hint is given in absolute ktime (CLOCK_MONOTONIC for userspace + * facing APIs). The time could either be some point in the future (such as + * the vblank based deadline for page-flipping, or the start of a compositor's + * composition cycle), or the current time to indicate an immediate deadline + * hint (Ie. forward progress cannot be made until this fence is signaled). + * + * Multiple deadlines may be set on a given fence, even in parallel. See the + * documentation for &dma_fence_ops.set_deadline. + * + * The deadline hint is just that, a hint. The driver that created the fence + * may react by increasing frequency, making different scheduling choices, etc. + * Or doing nothing at all. + */ + +/** + * dma_fence_set_deadline - set desired fence-wait deadline hint + * @fence: the fence that is to be waited on + * @deadline: the time by which the waiter hopes for the fence to be + * signaled + * + * Give the fence signaler a hint about an upcoming deadline, such as + * vblank, by which point the waiter would prefer the fence to be + * signaled by. This is intended to give feedback to the fence signaler + * to aid in power management decisions, such as boosting GPU frequency + * if a periodic vblank deadline is approaching but the fence is not + * yet signaled.. + */ +void dma_fence_set_deadline(struct dma_fence *fence, ktime_t deadline) +{ + if (fence->ops->set_deadline && !dma_fence_is_signaled(fence)) + fence->ops->set_deadline(fence, deadline); +} +EXPORT_SYMBOL(dma_fence_set_deadline); + +/** * dma_fence_describe - Dump fence describtion into seq_file * @fence: the 6fence to describe * @seq: the seq_file to put the textual description into diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c index 1c76aed8e262..2a594b754af1 100644 --- a/drivers/dma-buf/dma-resv.c +++ b/drivers/dma-buf/dma-resv.c @@ -684,6 +684,28 @@ long dma_resv_wait_timeout(struct dma_resv *obj, enum dma_resv_usage usage, } EXPORT_SYMBOL_GPL(dma_resv_wait_timeout); +/** + * dma_resv_set_deadline - Set a deadline on reservation's objects fences + * @obj: the reservation object + * @usage: controls which fences to include, see enum dma_resv_usage. + * @deadline: the requested deadline (MONOTONIC) + * + * May be called without holding the dma_resv lock. Sets @deadline on + * all fences filtered by @usage. + */ +void dma_resv_set_deadline(struct dma_resv *obj, enum dma_resv_usage usage, + ktime_t deadline) +{ + struct dma_resv_iter cursor; + struct dma_fence *fence; + + dma_resv_iter_begin(&cursor, obj, usage); + dma_resv_for_each_fence_unlocked(&cursor, fence) { + dma_fence_set_deadline(fence, deadline); + } + dma_resv_iter_end(&cursor); +} +EXPORT_SYMBOL_GPL(dma_resv_set_deadline); /** * dma_resv_test_signaled - Test if a reservation object's fences have been diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c index 8606876f7233..d4d2a2ce40f8 100644 --- a/drivers/gpu/drm/drm_atomic_helper.c +++ b/drivers/gpu/drm/drm_atomic_helper.c @@ -1511,6 +1511,41 @@ void drm_atomic_helper_commit_modeset_enables(struct drm_device *dev, } EXPORT_SYMBOL(drm_atomic_helper_commit_modeset_enables); +/* + * For atomic updates which touch just a single CRTC, calculate the time of the + * next vblank, and inform all the fences of the deadline. + */ +static void set_fence_deadline(struct drm_device *dev, + struct drm_atomic_state *state) +{ + struct drm_crtc *crtc; + struct drm_crtc_state *new_crtc_state; + struct drm_plane *plane; + struct drm_plane_state *new_plane_state; + ktime_t vbltime = 0; + int i; + + for_each_new_crtc_in_state (state, crtc, new_crtc_state, i) { + ktime_t v; + + if (drm_crtc_next_vblank_start(crtc, &v)) + continue; + + if (!vbltime || ktime_before(v, vbltime)) + vbltime = v; + } + + /* If no CRTCs updated, then nothing to do: */ + if (!vbltime) + return; + + for_each_new_plane_in_state (state, plane, new_plane_state, i) { + if (!new_plane_state->fence) + continue; + dma_fence_set_deadline(new_plane_state->fence, vbltime); + } +} + /** * drm_atomic_helper_wait_for_fences - wait for fences stashed in plane state * @dev: DRM device @@ -1540,6 +1575,8 @@ int drm_atomic_helper_wait_for_fences(struct drm_device *dev, struct drm_plane_state *new_plane_state; int i, ret; + set_fence_deadline(dev, state); + for_each_new_plane_in_state(state, plane, new_plane_state, i) { if (!new_plane_state->fence) continue; diff --git a/drivers/gpu/drm/drm_vblank.c b/drivers/gpu/drm/drm_vblank.c index 2ff31717a3de..299fa2a19a90 100644 --- a/drivers/gpu/drm/drm_vblank.c +++ b/drivers/gpu/drm/drm_vblank.c @@ -844,10 +844,9 @@ bool drm_crtc_vblank_helper_get_vblank_timestamp(struct drm_crtc *crtc, EXPORT_SYMBOL(drm_crtc_vblank_helper_get_vblank_timestamp); /** - * drm_get_last_vbltimestamp - retrieve raw timestamp for the most recent - * vblank interval - * @dev: DRM device - * @pipe: index of CRTC whose vblank timestamp to retrieve + * drm_crtc_get_last_vbltimestamp - retrieve raw timestamp for the most + * recent vblank interval + * @crtc: CRTC whose vblank timestamp to retrieve * @tvblank: Pointer to target time which should receive the timestamp * @in_vblank_irq: * True when called from drm_crtc_handle_vblank(). Some drivers @@ -865,10 +864,9 @@ EXPORT_SYMBOL(drm_crtc_vblank_helper_get_vblank_timestamp); * True if timestamp is considered to be very precise, false otherwise. */ static bool -drm_get_last_vbltimestamp(struct drm_device *dev, unsigned int pipe, - ktime_t *tvblank, bool in_vblank_irq) +drm_crtc_get_last_vbltimestamp(struct drm_crtc *crtc, ktime_t *tvblank, + bool in_vblank_irq) { - struct drm_crtc *crtc = drm_crtc_from_index(dev, pipe); bool ret = false; /* Define requested maximum error on timestamps (nanoseconds). */ @@ -876,8 +874,6 @@ drm_get_last_vbltimestamp(struct drm_device *dev, unsigned int pipe, /* Query driver if possible and precision timestamping enabled. */ if (crtc && crtc->funcs->get_vblank_timestamp && max_error > 0) { - struct drm_crtc *crtc = drm_crtc_from_index(dev, pipe); - ret = crtc->funcs->get_vblank_timestamp(crtc, &max_error, tvblank, in_vblank_irq); } @@ -891,6 +887,15 @@ drm_get_last_vbltimestamp(struct drm_device *dev, unsigned int pipe, return ret; } +static bool +drm_get_last_vbltimestamp(struct drm_device *dev, unsigned int pipe, + ktime_t *tvblank, bool in_vblank_irq) +{ + struct drm_crtc *crtc = drm_crtc_from_index(dev, pipe); + + return drm_crtc_get_last_vbltimestamp(crtc, tvblank, in_vblank_irq); +} + /** * drm_crtc_vblank_count - retrieve "cooked" vblank counter value * @crtc: which counter to retrieve @@ -980,6 +985,36 @@ u64 drm_crtc_vblank_count_and_time(struct drm_crtc *crtc, } EXPORT_SYMBOL(drm_crtc_vblank_count_and_time); +/** + * drm_crtc_next_vblank_start - calculate the time of the next vblank + * @crtc: the crtc for which to calculate next vblank time + * @vblanktime: pointer to time to receive the next vblank timestamp. + * + * Calculate the expected time of the start of the next vblank period, + * based on time of previous vblank and frame duration + */ +int drm_crtc_next_vblank_start(struct drm_crtc *crtc, ktime_t *vblanktime) +{ + unsigned int pipe = drm_crtc_index(crtc); + struct drm_vblank_crtc *vblank = &crtc->dev->vblank[pipe]; + struct drm_display_mode *mode = &vblank->hwmode; + u64 vblank_start; + + if (!vblank->framedur_ns || !vblank->linedur_ns) + return -EINVAL; + + if (!drm_crtc_get_last_vbltimestamp(crtc, vblanktime, false)) + return -EINVAL; + + vblank_start = DIV_ROUND_DOWN_ULL( + (u64)vblank->framedur_ns * mode->crtc_vblank_start, + mode->crtc_vtotal); + *vblanktime = ktime_add(*vblanktime, ns_to_ktime(vblank_start)); + + return 0; +} +EXPORT_SYMBOL(drm_crtc_next_vblank_start); + static void send_vblank_event(struct drm_device *dev, struct drm_pending_vblank_event *e, u64 seq, ktime_t now) diff --git a/drivers/gpu/drm/scheduler/sched_fence.c b/drivers/gpu/drm/scheduler/sched_fence.c index 7fd869520ef2..fe9c6468e440 100644 --- a/drivers/gpu/drm/scheduler/sched_fence.c +++ b/drivers/gpu/drm/scheduler/sched_fence.c @@ -123,6 +123,37 @@ static void drm_sched_fence_release_finished(struct dma_fence *f) dma_fence_put(&fence->scheduled); } +static void drm_sched_fence_set_deadline_finished(struct dma_fence *f, + ktime_t deadline) +{ + struct drm_sched_fence *fence = to_drm_sched_fence(f); + struct dma_fence *parent; + unsigned long flags; + + spin_lock_irqsave(&fence->lock, flags); + + /* If we already have an earlier deadline, keep it: */ + if (test_bit(DRM_SCHED_FENCE_FLAG_HAS_DEADLINE_BIT, &f->flags) && + ktime_before(fence->deadline, deadline)) { + spin_unlock_irqrestore(&fence->lock, flags); + return; + } + + fence->deadline = deadline; + set_bit(DRM_SCHED_FENCE_FLAG_HAS_DEADLINE_BIT, &f->flags); + + spin_unlock_irqrestore(&fence->lock, flags); + + /* + * smp_load_aquire() to ensure that if we are racing another + * thread calling drm_sched_fence_set_parent(), that we see + * the parent set before it calls test_bit(HAS_DEADLINE_BIT) + */ + parent = smp_load_acquire(&fence->parent); + if (parent) + dma_fence_set_deadline(parent, deadline); +} + static const struct dma_fence_ops drm_sched_fence_ops_scheduled = { .get_driver_name = drm_sched_fence_get_driver_name, .get_timeline_name = drm_sched_fence_get_timeline_name, @@ -133,6 +164,7 @@ static const struct dma_fence_ops drm_sched_fence_ops_finished = { .get_driver_name = drm_sched_fence_get_driver_name, .get_timeline_name = drm_sched_fence_get_timeline_name, .release = drm_sched_fence_release_finished, + .set_deadline = drm_sched_fence_set_deadline_finished, }; struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f) @@ -147,6 +179,20 @@ struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f) } EXPORT_SYMBOL(to_drm_sched_fence); +void drm_sched_fence_set_parent(struct drm_sched_fence *s_fence, + struct dma_fence *fence) +{ + /* + * smp_store_release() to ensure another thread racing us + * in drm_sched_fence_set_deadline_finished() sees the + * fence's parent set before test_bit() + */ + smp_store_release(&s_fence->parent, dma_fence_get(fence)); + if (test_bit(DRM_SCHED_FENCE_FLAG_HAS_DEADLINE_BIT, + &s_fence->finished.flags)) + dma_fence_set_deadline(fence, s_fence->deadline); +} + struct drm_sched_fence *drm_sched_fence_alloc(struct drm_sched_entity *entity, void *owner) { diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index 214364fccb71..250c46cd9c34 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -1048,7 +1048,7 @@ static int drm_sched_main(void *param) drm_sched_fence_scheduled(s_fence); if (!IS_ERR_OR_NULL(fence)) { - s_fence->parent = dma_fence_get(fence); + drm_sched_fence_set_parent(s_fence, fence); /* Drop for original kref_init of the fence */ dma_fence_put(fence); |