diff options
author | Daniel Vetter <daniel.vetter@ffwll.ch> | 2019-03-25 11:05:11 +0100 |
---|---|---|
committer | Daniel Vetter <daniel.vetter@ffwll.ch> | 2019-03-25 11:05:12 +0100 |
commit | 0bec6219e5a0cf2dd17716949a7592807e10f3d7 (patch) | |
tree | 3eabbc70c5d9c053fbdc269bc09bf622b6ad1400 /drivers/gpu/drm/vc4 | |
parent | Merge tag 'du-next-20190318' of git://linuxtv.org/pinchartl/media into drm-next (diff) | |
parent | drm/fourcc: Fix conflicting Y41x definitions (diff) | |
download | linux-0bec6219e5a0cf2dd17716949a7592807e10f3d7.tar.xz linux-0bec6219e5a0cf2dd17716949a7592807e10f3d7.zip |
Merge tag 'drm-misc-next-2019-03-21' of git://anongit.freedesktop.org/drm/drm-misc into drm-next
drm-misc-next for 5.2:
UAPI Changes:
- Add Colorspace connector property (Uma)
- fourcc: Several new YUV formats from ARM (Brian & Ayan)
- fourcc: Fix merge conflicts between new formats above and Swati's that
went in via topic/hdr-formats-2019-03-07 branch (Maarten)
Cross-subsystem Changes:
- Typed component support via topic/component-typed-2019-02-11 (Maxime/Daniel)
Core Changes:
- Improve component helper documentation (Daniel)
- Avoid calling drm_dev_unregister() twice on unplugged devices (Noralf)
- Add device managed (devm) drm_device init function (Noralf)
- Graduate TINYDRM_MODE to DRM_SIMPLE_MODE in core (Noralf)
- Move MIPI/DSI rate control params computation into core from i915 (David)
- Add support for shmem backed gem objects (Noralf)
Driver Changes:
- various: Use of_node_name_eq for node name comparisons (Rob Herring)
- sun4i: Add DSI burst mode support (Konstantin)
- panel: Add Ronbo RB070D30 MIPI/DSI panel support (Konstantin)
- virtio: A few prime improvements (Gerd)
- tinydrm: Remove tinydrm_device (Noralf)
- vc4: Add load tracker to driver to detect underflow in atomic check (Boris)
- vboxvideo: Move it out of staging \o/ (Hans)
- v3d: Add support for V3D v4.2 (Eric)
Cc: Konstantin Sudakov <k.sudakov@integrasources.com>
Cc: Rob Herring <robh@kernel.org>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Maxime Ripard <maxime.ripard@bootlin.com>
Cc: Uma Shankar <uma.shankar@intel.com>
Cc: Noralf Trønnes <noralf@tronnes.org>
Cc: Gerd Hoffmann <kraxel@redhat.com>
Cc: David Francis <David.Francis@amd.com>
Cc: Boris Brezillon <boris.brezillon@bootlin.com>
Cc: Eric Anholt <eric@anholt.net>
Cc: Hans de Goede <hdegoede@redhat.com>
Cc: Brian Starkey <brian.starkey@arm.com>
Cc: Ayan Kumar Halder <ayan.halder@arm.com>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
From: Sean Paul <sean@poorly.run>
Link: https://patchwork.freedesktop.org/patch/msgid/20190321170805.GA50145@art_vandelay
Diffstat (limited to 'drivers/gpu/drm/vc4')
-rw-r--r-- | drivers/gpu/drm/vc4/vc4_bo.c | 15 | ||||
-rw-r--r-- | drivers/gpu/drm/vc4/vc4_crtc.c | 8 | ||||
-rw-r--r-- | drivers/gpu/drm/vc4/vc4_debugfs.c | 10 | ||||
-rw-r--r-- | drivers/gpu/drm/vc4/vc4_drv.c | 4 | ||||
-rw-r--r-- | drivers/gpu/drm/vc4/vc4_drv.h | 30 | ||||
-rw-r--r-- | drivers/gpu/drm/vc4/vc4_gem.c | 18 | ||||
-rw-r--r-- | drivers/gpu/drm/vc4/vc4_hvs.c | 95 | ||||
-rw-r--r-- | drivers/gpu/drm/vc4/vc4_kms.c | 122 | ||||
-rw-r--r-- | drivers/gpu/drm/vc4/vc4_plane.c | 59 | ||||
-rw-r--r-- | drivers/gpu/drm/vc4/vc4_regs.h | 51 | ||||
-rw-r--r-- | drivers/gpu/drm/vc4/vc4_txp.c | 3 |
11 files changed, 344 insertions, 71 deletions
diff --git a/drivers/gpu/drm/vc4/vc4_bo.c b/drivers/gpu/drm/vc4/vc4_bo.c index 8dcce7182bb7..92e3f98d8478 100644 --- a/drivers/gpu/drm/vc4/vc4_bo.c +++ b/drivers/gpu/drm/vc4/vc4_bo.c @@ -201,8 +201,6 @@ static void vc4_bo_destroy(struct vc4_bo *bo) bo->validated_shader = NULL; } - reservation_object_fini(&bo->_resv); - drm_gem_cma_free_object(obj); } @@ -427,8 +425,6 @@ struct drm_gem_object *vc4_create_object(struct drm_device *dev, size_t size) vc4->bo_labels[VC4_BO_TYPE_KERNEL].num_allocated++; vc4->bo_labels[VC4_BO_TYPE_KERNEL].size_allocated += size; mutex_unlock(&vc4->bo_lock); - bo->resv = &bo->_resv; - reservation_object_init(bo->resv); return &bo->base.base; } @@ -684,13 +680,6 @@ static void vc4_bo_cache_time_timer(struct timer_list *t) schedule_work(&vc4->bo_cache.time_work); } -struct reservation_object *vc4_prime_res_obj(struct drm_gem_object *obj) -{ - struct vc4_bo *bo = to_vc4_bo(obj); - - return bo->resv; -} - struct dma_buf * vc4_prime_export(struct drm_device *dev, struct drm_gem_object *obj, int flags) { @@ -822,14 +811,12 @@ vc4_prime_import_sg_table(struct drm_device *dev, struct sg_table *sgt) { struct drm_gem_object *obj; - struct vc4_bo *bo; obj = drm_gem_cma_prime_import_sg_table(dev, attach, sgt); if (IS_ERR(obj)) return obj; - bo = to_vc4_bo(obj); - bo->resv = attach->dmabuf->resv; + obj->resv = attach->dmabuf->resv; return obj; } diff --git a/drivers/gpu/drm/vc4/vc4_crtc.c b/drivers/gpu/drm/vc4/vc4_crtc.c index 730008d3da76..64c964b7c577 100644 --- a/drivers/gpu/drm/vc4/vc4_crtc.c +++ b/drivers/gpu/drm/vc4/vc4_crtc.c @@ -834,6 +834,14 @@ static void vc4_crtc_handle_page_flip(struct vc4_crtc *vc4_crtc) drm_crtc_send_vblank_event(crtc, vc4_crtc->event); vc4_crtc->event = NULL; drm_crtc_vblank_put(crtc); + + /* Wait for the page flip to unmask the underrun to ensure that + * the display list was updated by the hardware. Before that + * happens, the HVS will be using the previous display list with + * the CRTC and encoder already reconfigured, leading to + * underruns. This can be seen when reconfiguring the CRTC. + */ + vc4_hvs_unmask_underrun(dev, vc4_crtc->channel); } spin_unlock_irqrestore(&dev->event_lock, flags); } diff --git a/drivers/gpu/drm/vc4/vc4_debugfs.c b/drivers/gpu/drm/vc4/vc4_debugfs.c index 7a0003de71ab..59cdad89f844 100644 --- a/drivers/gpu/drm/vc4/vc4_debugfs.c +++ b/drivers/gpu/drm/vc4/vc4_debugfs.c @@ -23,6 +23,7 @@ static const struct drm_info_list vc4_debugfs_list[] = { {"vec_regs", vc4_vec_debugfs_regs, 0}, {"txp_regs", vc4_txp_debugfs_regs, 0}, {"hvs_regs", vc4_hvs_debugfs_regs, 0}, + {"hvs_underrun", vc4_hvs_debugfs_underrun, 0}, {"crtc0_regs", vc4_crtc_debugfs_regs, 0, (void *)(uintptr_t)0}, {"crtc1_regs", vc4_crtc_debugfs_regs, 0, (void *)(uintptr_t)1}, {"crtc2_regs", vc4_crtc_debugfs_regs, 0, (void *)(uintptr_t)2}, @@ -35,6 +36,15 @@ static const struct drm_info_list vc4_debugfs_list[] = { int vc4_debugfs_init(struct drm_minor *minor) { + struct vc4_dev *vc4 = to_vc4_dev(minor->dev); + struct dentry *dentry; + + dentry = debugfs_create_bool("hvs_load_tracker", S_IRUGO | S_IWUSR, + minor->debugfs_root, + &vc4->load_tracker_enabled); + if (!dentry) + return -ENOMEM; + return drm_debugfs_create_files(vc4_debugfs_list, VC4_DEBUGFS_ENTRIES, minor->debugfs_root, minor); } diff --git a/drivers/gpu/drm/vc4/vc4_drv.c b/drivers/gpu/drm/vc4/vc4_drv.c index 5fcd2f0da7f7..4daf44fd4548 100644 --- a/drivers/gpu/drm/vc4/vc4_drv.c +++ b/drivers/gpu/drm/vc4/vc4_drv.c @@ -200,7 +200,6 @@ static struct drm_driver vc4_drm_driver = { .prime_fd_to_handle = drm_gem_prime_fd_to_handle, .gem_prime_import = drm_gem_prime_import, .gem_prime_export = vc4_prime_export, - .gem_prime_res_obj = vc4_prime_res_obj, .gem_prime_get_sg_table = drm_gem_cma_prime_get_sg_table, .gem_prime_import_sg_table = vc4_prime_import_sg_table, .gem_prime_vmap = vc4_prime_vmap, @@ -287,7 +286,7 @@ static int vc4_drm_bind(struct device *dev) vc4_kms_load(drm); - drm_fbdev_generic_setup(drm, 32); + drm_fbdev_generic_setup(drm, 16); return 0; @@ -312,6 +311,7 @@ static void vc4_drm_unbind(struct device *dev) drm_mode_config_cleanup(drm); + drm_atomic_private_obj_fini(&vc4->load_tracker); drm_atomic_private_obj_fini(&vc4->ctm_manager); drm_dev_put(drm); diff --git a/drivers/gpu/drm/vc4/vc4_drv.h b/drivers/gpu/drm/vc4/vc4_drv.h index 2c635f001c71..7a3c093e7443 100644 --- a/drivers/gpu/drm/vc4/vc4_drv.h +++ b/drivers/gpu/drm/vc4/vc4_drv.h @@ -7,7 +7,6 @@ */ #include <linux/mm_types.h> -#include <linux/reservation.h> #include <drm/drmP.h> #include <drm/drm_util.h> #include <drm/drm_encoder.h> @@ -185,10 +184,20 @@ struct vc4_dev { /* Bitmask of the current bin_alloc used for overflow memory. */ uint32_t bin_alloc_overflow; + /* Incremented when an underrun error happened after an atomic commit. + * This is particularly useful to detect when a specific modeset is too + * demanding in term of memory or HVS bandwidth which is hard to guess + * at atomic check time. + */ + atomic_t underrun; + struct work_struct overflow_mem_work; int power_refcount; + /* Set to true when the load tracker is active. */ + bool load_tracker_enabled; + /* Mutex controlling the power refcount. */ struct mutex power_lock; @@ -201,6 +210,7 @@ struct vc4_dev { struct drm_modeset_lock ctm_state_lock; struct drm_private_obj ctm_manager; + struct drm_private_obj load_tracker; }; static inline struct vc4_dev * @@ -240,10 +250,6 @@ struct vc4_bo { */ struct vc4_validated_shader_info *validated_shader; - /* normally (resv == &_resv) except for imported bo's */ - struct reservation_object *resv; - struct reservation_object _resv; - /* One of enum vc4_kernel_bo_type, or VC4_BO_TYPE_COUNT + i * for user-allocated labels. */ @@ -376,6 +382,16 @@ struct vc4_plane_state { * when async update is not possible. */ bool dlist_initialized; + + /* Load of this plane on the HVS block. The load is expressed in HVS + * cycles/sec. + */ + u64 hvs_load; + + /* Memory bandwidth needed for this plane. This is expressed in + * bytes/sec. + */ + u64 membus_load; }; static inline struct vc4_plane_state * @@ -685,7 +701,6 @@ int vc4_label_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); vm_fault_t vc4_fault(struct vm_fault *vmf); int vc4_mmap(struct file *filp, struct vm_area_struct *vma); -struct reservation_object *vc4_prime_res_obj(struct drm_gem_object *obj); int vc4_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma); struct drm_gem_object *vc4_prime_import_sg_table(struct drm_device *dev, struct dma_buf_attachment *attach, @@ -773,6 +788,9 @@ void vc4_irq_reset(struct drm_device *dev); extern struct platform_driver vc4_hvs_driver; void vc4_hvs_dump_state(struct drm_device *dev); int vc4_hvs_debugfs_regs(struct seq_file *m, void *unused); +int vc4_hvs_debugfs_underrun(struct seq_file *m, void *unused); +void vc4_hvs_unmask_underrun(struct drm_device *dev, int channel); +void vc4_hvs_mask_underrun(struct drm_device *dev, int channel); /* vc4_kms.c */ int vc4_kms_load(struct drm_device *dev); diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c index aea2b8dfec17..5ee5bf7fedf7 100644 --- a/drivers/gpu/drm/vc4/vc4_gem.c +++ b/drivers/gpu/drm/vc4/vc4_gem.c @@ -536,7 +536,7 @@ vc4_update_bo_seqnos(struct vc4_exec_info *exec, uint64_t seqno) bo = to_vc4_bo(&exec->bo[i]->base); bo->seqno = seqno; - reservation_object_add_shared_fence(bo->resv, exec->fence); + reservation_object_add_shared_fence(bo->base.base.resv, exec->fence); } list_for_each_entry(bo, &exec->unref_list, unref_head) { @@ -547,7 +547,7 @@ vc4_update_bo_seqnos(struct vc4_exec_info *exec, uint64_t seqno) bo = to_vc4_bo(&exec->rcl_write_bo[i]->base); bo->write_seqno = seqno; - reservation_object_add_excl_fence(bo->resv, exec->fence); + reservation_object_add_excl_fence(bo->base.base.resv, exec->fence); } } @@ -559,7 +559,7 @@ vc4_unlock_bo_reservations(struct drm_device *dev, int i; for (i = 0; i < exec->bo_count; i++) { - struct vc4_bo *bo = to_vc4_bo(&exec->bo[i]->base); + struct drm_gem_object *bo = &exec->bo[i]->base; ww_mutex_unlock(&bo->resv->lock); } @@ -581,13 +581,13 @@ vc4_lock_bo_reservations(struct drm_device *dev, { int contended_lock = -1; int i, ret; - struct vc4_bo *bo; + struct drm_gem_object *bo; ww_acquire_init(acquire_ctx, &reservation_ww_class); retry: if (contended_lock != -1) { - bo = to_vc4_bo(&exec->bo[contended_lock]->base); + bo = &exec->bo[contended_lock]->base; ret = ww_mutex_lock_slow_interruptible(&bo->resv->lock, acquire_ctx); if (ret) { @@ -600,19 +600,19 @@ retry: if (i == contended_lock) continue; - bo = to_vc4_bo(&exec->bo[i]->base); + bo = &exec->bo[i]->base; ret = ww_mutex_lock_interruptible(&bo->resv->lock, acquire_ctx); if (ret) { int j; for (j = 0; j < i; j++) { - bo = to_vc4_bo(&exec->bo[j]->base); + bo = &exec->bo[j]->base; ww_mutex_unlock(&bo->resv->lock); } if (contended_lock != -1 && contended_lock >= i) { - bo = to_vc4_bo(&exec->bo[contended_lock]->base); + bo = &exec->bo[contended_lock]->base; ww_mutex_unlock(&bo->resv->lock); } @@ -633,7 +633,7 @@ retry: * before we commit the CL to the hardware. */ for (i = 0; i < exec->bo_count; i++) { - bo = to_vc4_bo(&exec->bo[i]->base); + bo = &exec->bo[i]->base; ret = reservation_object_reserve_shared(bo->resv, 1); if (ret) { diff --git a/drivers/gpu/drm/vc4/vc4_hvs.c b/drivers/gpu/drm/vc4/vc4_hvs.c index 5d8c749c9749..918e71256ecc 100644 --- a/drivers/gpu/drm/vc4/vc4_hvs.c +++ b/drivers/gpu/drm/vc4/vc4_hvs.c @@ -22,6 +22,7 @@ * each CRTC. */ +#include <drm/drm_atomic_helper.h> #include <linux/component.h> #include "vc4_drv.h" #include "vc4_regs.h" @@ -102,6 +103,18 @@ int vc4_hvs_debugfs_regs(struct seq_file *m, void *unused) return 0; } + +int vc4_hvs_debugfs_underrun(struct seq_file *m, void *data) +{ + struct drm_info_node *node = m->private; + struct drm_device *dev = node->minor->dev; + struct vc4_dev *vc4 = to_vc4_dev(dev); + struct drm_printer p = drm_seq_file_printer(m); + + drm_printf(&p, "%d\n", atomic_read(&vc4->underrun)); + + return 0; +} #endif /* The filter kernel is composed of dwords each containing 3 9-bit @@ -166,6 +179,67 @@ static int vc4_hvs_upload_linear_kernel(struct vc4_hvs *hvs, return 0; } +void vc4_hvs_mask_underrun(struct drm_device *dev, int channel) +{ + struct vc4_dev *vc4 = to_vc4_dev(dev); + u32 dispctrl = HVS_READ(SCALER_DISPCTRL); + + dispctrl &= ~SCALER_DISPCTRL_DSPEISLUR(channel); + + HVS_WRITE(SCALER_DISPCTRL, dispctrl); +} + +void vc4_hvs_unmask_underrun(struct drm_device *dev, int channel) +{ + struct vc4_dev *vc4 = to_vc4_dev(dev); + u32 dispctrl = HVS_READ(SCALER_DISPCTRL); + + dispctrl |= SCALER_DISPCTRL_DSPEISLUR(channel); + + HVS_WRITE(SCALER_DISPSTAT, + SCALER_DISPSTAT_EUFLOW(channel)); + HVS_WRITE(SCALER_DISPCTRL, dispctrl); +} + +static void vc4_hvs_report_underrun(struct drm_device *dev) +{ + struct vc4_dev *vc4 = to_vc4_dev(dev); + + atomic_inc(&vc4->underrun); + DRM_DEV_ERROR(dev->dev, "HVS underrun\n"); +} + +static irqreturn_t vc4_hvs_irq_handler(int irq, void *data) +{ + struct drm_device *dev = data; + struct vc4_dev *vc4 = to_vc4_dev(dev); + irqreturn_t irqret = IRQ_NONE; + int channel; + u32 control; + u32 status; + + status = HVS_READ(SCALER_DISPSTAT); + control = HVS_READ(SCALER_DISPCTRL); + + for (channel = 0; channel < SCALER_CHANNELS_COUNT; channel++) { + /* Interrupt masking is not always honored, so check it here. */ + if (status & SCALER_DISPSTAT_EUFLOW(channel) && + control & SCALER_DISPCTRL_DSPEISLUR(channel)) { + vc4_hvs_mask_underrun(dev, channel); + vc4_hvs_report_underrun(dev); + + irqret = IRQ_HANDLED; + } + } + + /* Clear every per-channel interrupt flag. */ + HVS_WRITE(SCALER_DISPSTAT, SCALER_DISPSTAT_IRQMASK(0) | + SCALER_DISPSTAT_IRQMASK(1) | + SCALER_DISPSTAT_IRQMASK(2)); + + return irqret; +} + static int vc4_hvs_bind(struct device *dev, struct device *master, void *data) { struct platform_device *pdev = to_platform_device(dev); @@ -219,15 +293,36 @@ static int vc4_hvs_bind(struct device *dev, struct device *master, void *data) dispctrl = HVS_READ(SCALER_DISPCTRL); dispctrl |= SCALER_DISPCTRL_ENABLE; + dispctrl |= SCALER_DISPCTRL_DISPEIRQ(0) | + SCALER_DISPCTRL_DISPEIRQ(1) | + SCALER_DISPCTRL_DISPEIRQ(2); /* Set DSP3 (PV1) to use HVS channel 2, which would otherwise * be unused. */ dispctrl &= ~SCALER_DISPCTRL_DSP3_MUX_MASK; + dispctrl &= ~(SCALER_DISPCTRL_DMAEIRQ | + SCALER_DISPCTRL_SLVWREIRQ | + SCALER_DISPCTRL_SLVRDEIRQ | + SCALER_DISPCTRL_DSPEIEOF(0) | + SCALER_DISPCTRL_DSPEIEOF(1) | + SCALER_DISPCTRL_DSPEIEOF(2) | + SCALER_DISPCTRL_DSPEIEOLN(0) | + SCALER_DISPCTRL_DSPEIEOLN(1) | + SCALER_DISPCTRL_DSPEIEOLN(2) | + SCALER_DISPCTRL_DSPEISLUR(0) | + SCALER_DISPCTRL_DSPEISLUR(1) | + SCALER_DISPCTRL_DSPEISLUR(2) | + SCALER_DISPCTRL_SCLEIRQ); dispctrl |= VC4_SET_FIELD(2, SCALER_DISPCTRL_DSP3_MUX); HVS_WRITE(SCALER_DISPCTRL, dispctrl); + ret = devm_request_irq(dev, platform_get_irq(pdev, 0), + vc4_hvs_irq_handler, 0, "vc4 hvs", drm); + if (ret) + return ret; + return 0; } diff --git a/drivers/gpu/drm/vc4/vc4_kms.c b/drivers/gpu/drm/vc4/vc4_kms.c index 91b8c72ff361..5160cad25fce 100644 --- a/drivers/gpu/drm/vc4/vc4_kms.c +++ b/drivers/gpu/drm/vc4/vc4_kms.c @@ -34,6 +34,18 @@ static struct vc4_ctm_state *to_vc4_ctm_state(struct drm_private_state *priv) return container_of(priv, struct vc4_ctm_state, base); } +struct vc4_load_tracker_state { + struct drm_private_state base; + u64 hvs_load; + u64 membus_load; +}; + +static struct vc4_load_tracker_state * +to_vc4_load_tracker_state(struct drm_private_state *priv) +{ + return container_of(priv, struct vc4_load_tracker_state, base); +} + static struct vc4_ctm_state *vc4_get_ctm_state(struct drm_atomic_state *state, struct drm_private_obj *manager) { @@ -138,6 +150,16 @@ vc4_atomic_complete_commit(struct drm_atomic_state *state) { struct drm_device *dev = state->dev; struct vc4_dev *vc4 = to_vc4_dev(dev); + struct vc4_crtc *vc4_crtc; + int i; + + for (i = 0; i < dev->mode_config.num_crtc; i++) { + if (!state->crtcs[i].ptr || !state->crtcs[i].commit) + continue; + + vc4_crtc = to_vc4_crtc(state->crtcs[i].ptr); + vc4_hvs_mask_underrun(dev, vc4_crtc->channel); + } drm_atomic_helper_wait_for_fences(dev, state, false); @@ -385,6 +407,85 @@ vc4_ctm_atomic_check(struct drm_device *dev, struct drm_atomic_state *state) return 0; } +static int vc4_load_tracker_atomic_check(struct drm_atomic_state *state) +{ + struct drm_plane_state *old_plane_state, *new_plane_state; + struct vc4_dev *vc4 = to_vc4_dev(state->dev); + struct vc4_load_tracker_state *load_state; + struct drm_private_state *priv_state; + struct drm_plane *plane; + int i; + + priv_state = drm_atomic_get_private_obj_state(state, + &vc4->load_tracker); + if (IS_ERR(priv_state)) + return PTR_ERR(priv_state); + + load_state = to_vc4_load_tracker_state(priv_state); + for_each_oldnew_plane_in_state(state, plane, old_plane_state, + new_plane_state, i) { + struct vc4_plane_state *vc4_plane_state; + + if (old_plane_state->fb && old_plane_state->crtc) { + vc4_plane_state = to_vc4_plane_state(old_plane_state); + load_state->membus_load -= vc4_plane_state->membus_load; + load_state->hvs_load -= vc4_plane_state->hvs_load; + } + + if (new_plane_state->fb && new_plane_state->crtc) { + vc4_plane_state = to_vc4_plane_state(new_plane_state); + load_state->membus_load += vc4_plane_state->membus_load; + load_state->hvs_load += vc4_plane_state->hvs_load; + } + } + + /* Don't check the load when the tracker is disabled. */ + if (!vc4->load_tracker_enabled) + return 0; + + /* The absolute limit is 2Gbyte/sec, but let's take a margin to let + * the system work when other blocks are accessing the memory. + */ + if (load_state->membus_load > SZ_1G + SZ_512M) + return -ENOSPC; + + /* HVS clock is supposed to run @ 250Mhz, let's take a margin and + * consider the maximum number of cycles is 240M. + */ + if (load_state->hvs_load > 240000000ULL) + return -ENOSPC; + + return 0; +} + +static struct drm_private_state * +vc4_load_tracker_duplicate_state(struct drm_private_obj *obj) +{ + struct vc4_load_tracker_state *state; + + state = kmemdup(obj->state, sizeof(*state), GFP_KERNEL); + if (!state) + return NULL; + + __drm_atomic_helper_private_obj_duplicate_state(obj, &state->base); + + return &state->base; +} + +static void vc4_load_tracker_destroy_state(struct drm_private_obj *obj, + struct drm_private_state *state) +{ + struct vc4_load_tracker_state *load_state; + + load_state = to_vc4_load_tracker_state(state); + kfree(load_state); +} + +static const struct drm_private_state_funcs vc4_load_tracker_state_funcs = { + .atomic_duplicate_state = vc4_load_tracker_duplicate_state, + .atomic_destroy_state = vc4_load_tracker_destroy_state, +}; + static int vc4_atomic_check(struct drm_device *dev, struct drm_atomic_state *state) { @@ -394,7 +495,11 @@ vc4_atomic_check(struct drm_device *dev, struct drm_atomic_state *state) if (ret < 0) return ret; - return drm_atomic_helper_check(dev, state); + ret = drm_atomic_helper_check(dev, state); + if (ret) + return ret; + + return vc4_load_tracker_atomic_check(state); } static const struct drm_mode_config_funcs vc4_mode_funcs = { @@ -407,8 +512,14 @@ int vc4_kms_load(struct drm_device *dev) { struct vc4_dev *vc4 = to_vc4_dev(dev); struct vc4_ctm_state *ctm_state; + struct vc4_load_tracker_state *load_state; int ret; + /* Start with the load tracker enabled. Can be disabled through the + * debugfs load_tracker file. + */ + vc4->load_tracker_enabled = true; + sema_init(&vc4->async_modeset, 1); /* Set support for vblank irq fast disable, before drm_vblank_init() */ @@ -436,6 +547,15 @@ int vc4_kms_load(struct drm_device *dev) drm_atomic_private_obj_init(dev, &vc4->ctm_manager, &ctm_state->base, &vc4_ctm_state_funcs); + load_state = kzalloc(sizeof(*load_state), GFP_KERNEL); + if (!load_state) { + drm_atomic_private_obj_fini(&vc4->ctm_manager); + return -ENOMEM; + } + + drm_atomic_private_obj_init(dev, &vc4->load_tracker, &load_state->base, + &vc4_load_tracker_state_funcs); + drm_mode_config_reset(dev); drm_kms_helper_poll_init(dev); diff --git a/drivers/gpu/drm/vc4/vc4_plane.c b/drivers/gpu/drm/vc4/vc4_plane.c index d098337c10e9..4d918d3e4858 100644 --- a/drivers/gpu/drm/vc4/vc4_plane.c +++ b/drivers/gpu/drm/vc4/vc4_plane.c @@ -488,6 +488,61 @@ static void vc4_write_scaling_parameters(struct drm_plane_state *state, } } +static void vc4_plane_calc_load(struct drm_plane_state *state) +{ + unsigned int hvs_load_shift, vrefresh, i; + struct drm_framebuffer *fb = state->fb; + struct vc4_plane_state *vc4_state; + struct drm_crtc_state *crtc_state; + unsigned int vscale_factor; + + vc4_state = to_vc4_plane_state(state); + crtc_state = drm_atomic_get_existing_crtc_state(state->state, + state->crtc); + vrefresh = drm_mode_vrefresh(&crtc_state->adjusted_mode); + + /* The HVS is able to process 2 pixels/cycle when scaling the source, + * 4 pixels/cycle otherwise. + * Alpha blending step seems to be pipelined and it's always operating + * at 4 pixels/cycle, so the limiting aspect here seems to be the + * scaler block. + * HVS load is expressed in clk-cycles/sec (AKA Hz). + */ + if (vc4_state->x_scaling[0] != VC4_SCALING_NONE || + vc4_state->x_scaling[1] != VC4_SCALING_NONE || + vc4_state->y_scaling[0] != VC4_SCALING_NONE || + vc4_state->y_scaling[1] != VC4_SCALING_NONE) + hvs_load_shift = 1; + else + hvs_load_shift = 2; + + vc4_state->membus_load = 0; + vc4_state->hvs_load = 0; + for (i = 0; i < fb->format->num_planes; i++) { + /* Even if the bandwidth/plane required for a single frame is + * + * vc4_state->src_w[i] * vc4_state->src_h[i] * cpp * vrefresh + * + * when downscaling, we have to read more pixels per line in + * the time frame reserved for a single line, so the bandwidth + * demand can be punctually higher. To account for that, we + * calculate the down-scaling factor and multiply the plane + * load by this number. We're likely over-estimating the read + * demand, but that's better than under-estimating it. + */ + vscale_factor = DIV_ROUND_UP(vc4_state->src_h[i], + vc4_state->crtc_h); + vc4_state->membus_load += vc4_state->src_w[i] * + vc4_state->src_h[i] * vscale_factor * + fb->format->cpp[i]; + vc4_state->hvs_load += vc4_state->crtc_h * vc4_state->crtc_w; + } + + vc4_state->hvs_load *= vrefresh; + vc4_state->hvs_load >>= hvs_load_shift; + vc4_state->membus_load *= vrefresh; +} + static int vc4_plane_allocate_lbm(struct drm_plane_state *state) { struct vc4_dev *vc4 = to_vc4_dev(state->plane->dev); @@ -875,6 +930,8 @@ static int vc4_plane_mode_set(struct drm_plane *plane, */ vc4_state->dlist_initialized = 1; + vc4_plane_calc_load(state); + return 0; } @@ -1082,7 +1139,7 @@ static int vc4_prepare_fb(struct drm_plane *plane, bo = to_vc4_bo(&drm_fb_cma_get_gem_obj(state->fb, 0)->base); - fence = reservation_object_get_excl_rcu(bo->resv); + fence = reservation_object_get_excl_rcu(bo->base.base.resv); drm_atomic_set_fence_for_plane(state, fence); if (plane->state->fb == state->fb) diff --git a/drivers/gpu/drm/vc4/vc4_regs.h b/drivers/gpu/drm/vc4/vc4_regs.h index 931088014272..c0c5fadaf7e3 100644 --- a/drivers/gpu/drm/vc4/vc4_regs.h +++ b/drivers/gpu/drm/vc4/vc4_regs.h @@ -212,11 +212,11 @@ #define PV_HACT_ACT 0x30 +#define SCALER_CHANNELS_COUNT 3 + #define SCALER_DISPCTRL 0x00000000 /* Global register for clock gating the HVS */ # define SCALER_DISPCTRL_ENABLE BIT(31) -# define SCALER_DISPCTRL_DSP2EISLUR BIT(15) -# define SCALER_DISPCTRL_DSP1EISLUR BIT(14) # define SCALER_DISPCTRL_DSP3_MUX_MASK VC4_MASK(19, 18) # define SCALER_DISPCTRL_DSP3_MUX_SHIFT 18 @@ -224,45 +224,25 @@ * SCALER_DISPSTAT_IRQDISP0. Note that short frame contributions are * always enabled. */ -# define SCALER_DISPCTRL_DSP0EISLUR BIT(13) -# define SCALER_DISPCTRL_DSP2EIEOLN BIT(12) -# define SCALER_DISPCTRL_DSP2EIEOF BIT(11) -# define SCALER_DISPCTRL_DSP1EIEOLN BIT(10) -# define SCALER_DISPCTRL_DSP1EIEOF BIT(9) +# define SCALER_DISPCTRL_DSPEISLUR(x) BIT(13 + (x)) /* Enables Display 0 end-of-line-N contribution to * SCALER_DISPSTAT_IRQDISP0 */ -# define SCALER_DISPCTRL_DSP0EIEOLN BIT(8) +# define SCALER_DISPCTRL_DSPEIEOLN(x) BIT(8 + ((x) * 2)) /* Enables Display 0 EOF contribution to SCALER_DISPSTAT_IRQDISP0 */ -# define SCALER_DISPCTRL_DSP0EIEOF BIT(7) +# define SCALER_DISPCTRL_DSPEIEOF(x) BIT(7 + ((x) * 2)) # define SCALER_DISPCTRL_SLVRDEIRQ BIT(6) # define SCALER_DISPCTRL_SLVWREIRQ BIT(5) # define SCALER_DISPCTRL_DMAEIRQ BIT(4) -# define SCALER_DISPCTRL_DISP2EIRQ BIT(3) -# define SCALER_DISPCTRL_DISP1EIRQ BIT(2) /* Enables interrupt generation on the enabled EOF/EOLN/EISLUR * bits and short frames.. */ -# define SCALER_DISPCTRL_DISP0EIRQ BIT(1) +# define SCALER_DISPCTRL_DISPEIRQ(x) BIT(1 + (x)) /* Enables interrupt generation on scaler profiler interrupt. */ # define SCALER_DISPCTRL_SCLEIRQ BIT(0) #define SCALER_DISPSTAT 0x00000004 -# define SCALER_DISPSTAT_COBLOW2 BIT(29) -# define SCALER_DISPSTAT_EOLN2 BIT(28) -# define SCALER_DISPSTAT_ESFRAME2 BIT(27) -# define SCALER_DISPSTAT_ESLINE2 BIT(26) -# define SCALER_DISPSTAT_EUFLOW2 BIT(25) -# define SCALER_DISPSTAT_EOF2 BIT(24) - -# define SCALER_DISPSTAT_COBLOW1 BIT(21) -# define SCALER_DISPSTAT_EOLN1 BIT(20) -# define SCALER_DISPSTAT_ESFRAME1 BIT(19) -# define SCALER_DISPSTAT_ESLINE1 BIT(18) -# define SCALER_DISPSTAT_EUFLOW1 BIT(17) -# define SCALER_DISPSTAT_EOF1 BIT(16) - # define SCALER_DISPSTAT_RESP_MASK VC4_MASK(15, 14) # define SCALER_DISPSTAT_RESP_SHIFT 14 # define SCALER_DISPSTAT_RESP_OKAY 0 @@ -270,23 +250,26 @@ # define SCALER_DISPSTAT_RESP_SLVERR 2 # define SCALER_DISPSTAT_RESP_DECERR 3 -# define SCALER_DISPSTAT_COBLOW0 BIT(13) +# define SCALER_DISPSTAT_COBLOW(x) BIT(13 + ((x) * 8)) /* Set when the DISPEOLN line is done compositing. */ -# define SCALER_DISPSTAT_EOLN0 BIT(12) +# define SCALER_DISPSTAT_EOLN(x) BIT(12 + ((x) * 8)) /* Set when VSTART is seen but there are still pixels in the current * output line. */ -# define SCALER_DISPSTAT_ESFRAME0 BIT(11) +# define SCALER_DISPSTAT_ESFRAME(x) BIT(11 + ((x) * 8)) /* Set when HSTART is seen but there are still pixels in the current * output line. */ -# define SCALER_DISPSTAT_ESLINE0 BIT(10) +# define SCALER_DISPSTAT_ESLINE(x) BIT(10 + ((x) * 8)) /* Set when the the downstream tries to read from the display FIFO * while it's empty. */ -# define SCALER_DISPSTAT_EUFLOW0 BIT(9) +# define SCALER_DISPSTAT_EUFLOW(x) BIT(9 + ((x) * 8)) /* Set when the display mode changes from RUN to EOF */ -# define SCALER_DISPSTAT_EOF0 BIT(8) +# define SCALER_DISPSTAT_EOF(x) BIT(8 + ((x) * 8)) + +# define SCALER_DISPSTAT_IRQMASK(x) VC4_MASK(13 + ((x) * 8), \ + 8 + ((x) * 8)) /* Set on AXI invalid DMA ID error. */ # define SCALER_DISPSTAT_DMA_ERROR BIT(7) @@ -298,12 +281,10 @@ * SCALER_DISPSTAT_RESP_ERROR is not SCALER_DISPSTAT_RESP_OKAY. */ # define SCALER_DISPSTAT_IRQDMA BIT(4) -# define SCALER_DISPSTAT_IRQDISP2 BIT(3) -# define SCALER_DISPSTAT_IRQDISP1 BIT(2) /* Set when any of the EOF/EOLN/ESFRAME/ESLINE bits are set and their * corresponding interrupt bit is enabled in DISPCTRL. */ -# define SCALER_DISPSTAT_IRQDISP0 BIT(1) +# define SCALER_DISPSTAT_IRQDISP(x) BIT(1 + (x)) /* On read, the profiler interrupt. On write, clear *all* interrupt bits. */ # define SCALER_DISPSTAT_IRQSCL BIT(0) diff --git a/drivers/gpu/drm/vc4/vc4_txp.c b/drivers/gpu/drm/vc4/vc4_txp.c index 5dabd91f2d7e..cc2888dd7171 100644 --- a/drivers/gpu/drm/vc4/vc4_txp.c +++ b/drivers/gpu/drm/vc4/vc4_txp.c @@ -249,7 +249,6 @@ static int vc4_txp_connector_atomic_check(struct drm_connector *conn, struct drm_connector_state *conn_state) { struct drm_crtc_state *crtc_state; - struct drm_gem_cma_object *gem; struct drm_framebuffer *fb; int i; @@ -275,8 +274,6 @@ static int vc4_txp_connector_atomic_check(struct drm_connector *conn, if (i == ARRAY_SIZE(drm_fmts)) return -EINVAL; - gem = drm_fb_cma_get_gem_obj(fb, 0); - /* Pitch must be aligned on 16 bytes. */ if (fb->pitches[0] & GENMASK(3, 0)) return -EINVAL; |