diff options
100 files changed, 3709 insertions, 1571 deletions
diff --git a/Documentation/gpu/amdgpu.rst b/Documentation/gpu/amdgpu.rst new file mode 100644 index 000000000000..e52d0ce186fe --- /dev/null +++ b/Documentation/gpu/amdgpu.rst @@ -0,0 +1,117 @@ +========================= + drm/amdgpu AMDgpu driver +========================= + +The drm/amdgpu driver supports all AMD Radeon GPUs based on the Graphics Core +Next (GCN) architecture. + +Core Driver Infrastructure +========================== + +This section covers core driver infrastructure. + +.. _amdgpu_memory_domains: + +Memory Domains +-------------- + +.. kernel-doc:: include/uapi/drm/amdgpu_drm.h + :doc: memory domains + +Buffer Objects +-------------- + +.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_object.c + :doc: amdgpu_object + +.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_object.c + :internal: + +PRIME Buffer Sharing +-------------------- + +.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c + :doc: PRIME Buffer Sharing + +.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c + :internal: + +MMU Notifier +------------ + +.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c + :doc: MMU Notifier + +.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c + :internal: + +AMDGPU Virtual Memory +--------------------- + +.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c + :doc: GPUVM + +.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c + :internal: + +Interrupt Handling +------------------ + +.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c + :doc: Interrupt Handling + +.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c + :internal: + +GPU Power/Thermal Controls and Monitoring +========================================= + +This section covers hwmon and power/thermal controls. + +HWMON Interfaces +---------------- + +.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c + :doc: hwmon + +GPU sysfs Power State Interfaces +-------------------------------- + +GPU power controls are exposed via sysfs files. + +power_dpm_state +~~~~~~~~~~~~~~~ + +.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c + :doc: power_dpm_state + +power_dpm_force_performance_level +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c + :doc: power_dpm_force_performance_level + +pp_table +~~~~~~~~ + +.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c + :doc: pp_table + +pp_od_clk_voltage +~~~~~~~~~~~~~~~~~ + +.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c + :doc: pp_od_clk_voltage + +pp_dpm_sclk pp_dpm_mclk pp_dpm_pcie +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c + :doc: pp_dpm_sclk pp_dpm_mclk pp_dpm_pcie + +pp_power_profile_mode +~~~~~~~~~~~~~~~~~~~~~ + +.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c + :doc: pp_power_profile_mode + diff --git a/Documentation/gpu/drivers.rst b/Documentation/gpu/drivers.rst index f982558fc25d..65be325bf282 100644 --- a/Documentation/gpu/drivers.rst +++ b/Documentation/gpu/drivers.rst @@ -4,6 +4,7 @@ GPU Driver Documentation .. toctree:: + amdgpu i915 meson pl111 diff --git a/Documentation/gpu/drm-mm.rst b/Documentation/gpu/drm-mm.rst index b08e9dcd9177..21b6b72a9ba8 100644 --- a/Documentation/gpu/drm-mm.rst +++ b/Documentation/gpu/drm-mm.rst @@ -395,6 +395,8 @@ VMA Offset Manager .. kernel-doc:: drivers/gpu/drm/drm_vma_manager.c :export: +.. _prime_buffer_sharing: + PRIME Buffer Sharing ==================== @@ -496,3 +498,21 @@ DRM Sync Objects .. kernel-doc:: drivers/gpu/drm/drm_syncobj.c :export: + +GPU Scheduler +============= + +Overview +-------- + +.. kernel-doc:: drivers/gpu/drm/scheduler/gpu_scheduler.c + :doc: Overview + +Scheduler Function References +----------------------------- + +.. kernel-doc:: include/drm/gpu_scheduler.h + :internal: + +.. kernel-doc:: drivers/gpu/drm/scheduler/gpu_scheduler.c + :export: diff --git a/drivers/gpu/drm/amd/amdgpu/ObjectID.h b/drivers/gpu/drm/amd/amdgpu/ObjectID.h index 06192698bd96..5b393622f592 100644 --- a/drivers/gpu/drm/amd/amdgpu/ObjectID.h +++ b/drivers/gpu/drm/amd/amdgpu/ObjectID.h @@ -136,6 +136,7 @@ #define GENERIC_OBJECT_ID_PX2_NON_DRIVABLE 0x02 #define GENERIC_OBJECT_ID_MXM_OPM 0x03 #define GENERIC_OBJECT_ID_STEREO_PIN 0x04 //This object could show up from Misc Object table, it follows ATOM_OBJECT format, and contains one ATOM_OBJECT_GPIO_CNTL_RECORD for the stereo pin +#define GENERIC_OBJECT_ID_BRACKET_LAYOUT 0x05 /****************************************************/ /* Graphics Object ENUM ID Definition */ @@ -714,6 +715,13 @@ GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\ GENERIC_OBJECT_ID_STEREO_PIN << OBJECT_ID_SHIFT) +#define GENERICOBJECT_BRACKET_LAYOUT_ENUM_ID1 (GRAPH_OBJECT_TYPE_GENERIC << OBJECT_TYPE_SHIFT |\ + GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\ + GENERIC_OBJECT_ID_BRACKET_LAYOUT << OBJECT_ID_SHIFT) + +#define GENERICOBJECT_BRACKET_LAYOUT_ENUM_ID2 (GRAPH_OBJECT_TYPE_GENERIC << OBJECT_TYPE_SHIFT |\ + GRAPH_OBJECT_ENUM_ID2 << ENUM_ID_SHIFT |\ + GENERIC_OBJECT_ID_BRACKET_LAYOUT << OBJECT_ID_SHIFT) /****************************************************/ /* Object Cap definition - Shared with BIOS */ /****************************************************/ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index a59c07590cee..71b9b861f60e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -968,6 +968,8 @@ struct amdgpu_gfx { struct amdgpu_irq_src eop_irq; struct amdgpu_irq_src priv_reg_irq; struct amdgpu_irq_src priv_inst_irq; + struct amdgpu_irq_src cp_ecc_error_irq; + struct amdgpu_irq_src sq_irq; /* gfx status */ uint32_t gfx_current_status; /* ce ram size*/ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 82312a7bc6ad..7a625f3989a0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -31,6 +31,7 @@ #include <drm/drm_syncobj.h> #include "amdgpu.h" #include "amdgpu_trace.h" +#include "amdgpu_gmc.h" static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p, struct drm_amdgpu_cs_chunk_fence *data, @@ -302,7 +303,7 @@ static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev, *max_bytes = us_to_bytes(adev, adev->mm_stats.accum_us); /* Do the same for visible VRAM if half of it is free */ - if (adev->gmc.visible_vram_size < adev->gmc.real_vram_size) { + if (!amdgpu_gmc_vram_full_visible(&adev->gmc)) { u64 total_vis_vram = adev->gmc.visible_vram_size; u64 used_vis_vram = amdgpu_vram_mgr_vis_usage(&adev->mman.bdev.man[TTM_PL_VRAM]); @@ -359,7 +360,7 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p, * to move it. Don't move anything if the threshold is zero. */ if (p->bytes_moved < p->bytes_moved_threshold) { - if (adev->gmc.visible_vram_size < adev->gmc.real_vram_size && + if (!amdgpu_gmc_vram_full_visible(&adev->gmc) && (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)) { /* And don't move a CPU_ACCESS_REQUIRED BO to limited * visible VRAM if we've depleted our allowance to do @@ -381,7 +382,7 @@ retry: r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); p->bytes_moved += ctx.bytes_moved; - if (adev->gmc.visible_vram_size < adev->gmc.real_vram_size && + if (!amdgpu_gmc_vram_full_visible(&adev->gmc) && amdgpu_bo_in_cpu_visible_vram(bo)) p->bytes_moved_vis += ctx.bytes_moved; @@ -434,8 +435,8 @@ static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p, /* Good we can try to move this BO somewhere else */ update_bytes_moved_vis = - adev->gmc.visible_vram_size < adev->gmc.real_vram_size && - amdgpu_bo_in_cpu_visible_vram(bo); + !amdgpu_gmc_vram_full_visible(&adev->gmc) && + amdgpu_bo_in_cpu_visible_vram(bo); amdgpu_ttm_placement_from_domain(bo, other); r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); p->bytes_moved += ctx.bytes_moved; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index c5bb36275e93..64b3a1ed04dc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -449,26 +449,28 @@ void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr) struct amdgpu_ctx *ctx; struct idr *idp; uint32_t id, i; + long max_wait = MAX_WAIT_SCHED_ENTITY_Q_EMPTY; idp = &mgr->ctx_handles; + mutex_lock(&mgr->lock); idr_for_each_entry(idp, ctx, id) { - if (!ctx->adev) + if (!ctx->adev) { + mutex_unlock(&mgr->lock); return; + } for (i = 0; i < ctx->adev->num_rings; i++) { if (ctx->adev->rings[i] == &ctx->adev->gfx.kiq.ring) continue; - if (kref_read(&ctx->refcount) == 1) - drm_sched_entity_do_release(&ctx->adev->rings[i]->sched, - &ctx->rings[i].entity); - else - DRM_ERROR("ctx %p is still alive\n", ctx); + max_wait = drm_sched_entity_do_release(&ctx->adev->rings[i]->sched, + &ctx->rings[i].entity, max_wait); } } + mutex_unlock(&mgr->lock); } void amdgpu_ctx_mgr_entity_cleanup(struct amdgpu_ctx_mgr *mgr) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 3317d1536f4f..cd8177967e67 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -25,6 +25,7 @@ * Alex Deucher * Jerome Glisse */ +#include <linux/power_supply.h> #include <linux/kthread.h> #include <linux/console.h> #include <linux/slab.h> @@ -675,17 +676,15 @@ void amdgpu_device_vram_location(struct amdgpu_device *adev, } /** - * amdgpu_device_gart_location - try to find GTT location + * amdgpu_device_gart_location - try to find GART location * * @adev: amdgpu device structure holding all necessary informations * @mc: memory controller structure holding memory informations * - * Function will place try to place GTT before or after VRAM. + * Function will place try to place GART before or after VRAM. * - * If GTT size is bigger than space left then we ajust GTT size. + * If GART size is bigger than space left then we ajust GART size. * Thus function will never fails. - * - * FIXME: when reducing GTT size align new size on power of 2. */ void amdgpu_device_gart_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc) @@ -698,13 +697,13 @@ void amdgpu_device_gart_location(struct amdgpu_device *adev, size_bf = mc->vram_start; if (size_bf > size_af) { if (mc->gart_size > size_bf) { - dev_warn(adev->dev, "limiting GTT\n"); + dev_warn(adev->dev, "limiting GART\n"); mc->gart_size = size_bf; } mc->gart_start = 0; } else { if (mc->gart_size > size_af) { - dev_warn(adev->dev, "limiting GTT\n"); + dev_warn(adev->dev, "limiting GART\n"); mc->gart_size = size_af; } /* VCE doesn't like it when BOs cross a 4GB segment, so align @@ -713,7 +712,7 @@ void amdgpu_device_gart_location(struct amdgpu_device *adev, mc->gart_start = ALIGN(mc->vram_end + 1, 0x100000000ULL); } mc->gart_end = mc->gart_start + mc->gart_size - 1; - dev_info(adev->dev, "GTT: %lluM 0x%016llX - 0x%016llX\n", + dev_info(adev->dev, "GART: %lluM 0x%016llX - 0x%016llX\n", mc->gart_size >> 20, mc->gart_start, mc->gart_end); } @@ -1926,7 +1925,7 @@ int amdgpu_device_ip_suspend(struct amdgpu_device *adev) if (adev->powerplay.pp_feature & PP_GFXOFF_MASK) amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_SMC, - AMD_CG_STATE_UNGATE); + AMD_PG_STATE_UNGATE); /* ungate SMC block first */ r = amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_SMC, @@ -2293,6 +2292,8 @@ int amdgpu_device_init(struct amdgpu_device *adev, INIT_DELAYED_WORK(&adev->late_init_work, amdgpu_device_ip_late_init_func_handler); + adev->pm.ac_power = power_supply_is_system_supplied() > 0 ? true : false; + /* Registers mapping */ /* TODO: block userspace mapping of io register */ if (adev->asic_type >= CHIP_BONAIRE) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h index dd6203a0a6b7..9acfbee91c40 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h @@ -402,7 +402,6 @@ struct amdgpu_dpm { u32 tdp_adjustment; u16 load_line_slope; bool power_control; - bool ac_power; /* special states active */ bool thermal_active; bool uvd_active; @@ -439,6 +438,7 @@ struct amdgpu_pm { struct amd_pp_display_configuration pm_display_cfg;/* set by dc */ uint32_t smu_prv_buffer_size; struct amdgpu_bo *smu_prv_buffer; + bool ac_power; }; #define R600_SSTU_DFLT 0 diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index b0bf2f24da48..a549483032b0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -855,9 +855,21 @@ static const struct dev_pm_ops amdgpu_pm_ops = { .runtime_idle = amdgpu_pmops_runtime_idle, }; +static int amdgpu_flush(struct file *f, fl_owner_t id) +{ + struct drm_file *file_priv = f->private_data; + struct amdgpu_fpriv *fpriv = file_priv->driver_priv; + + amdgpu_ctx_mgr_entity_fini(&fpriv->ctx_mgr); + + return 0; +} + + static const struct file_operations amdgpu_driver_kms_fops = { .owner = THIS_MODULE, .open = drm_open, + .flush = amdgpu_flush, .release = drm_release, .unlocked_ioctl = amdgpu_drm_ioctl, .mmap = amdgpu_mmap, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 5fb156a01774..89743cdc1c2c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -510,7 +510,6 @@ out: * @adev: amdgpu_device pointer * @vm: vm to update * @bo_va: bo_va to update - * @list: validation list * @operation: map, unmap or clear * * Update the bo_va directly after setting its address. Errors are not @@ -519,7 +518,6 @@ out: static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct amdgpu_bo_va *bo_va, - struct list_head *list, uint32_t operation) { int r; @@ -612,7 +610,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, return -ENOENT; abo = gem_to_amdgpu_bo(gobj); tv.bo = &abo->tbo; - tv.shared = false; + tv.shared = !!(abo->flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID); list_add(&tv.head, &list); } else { gobj = NULL; @@ -673,7 +671,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, break; } if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE) && !amdgpu_vm_debug) - amdgpu_gem_va_update_vm(adev, &fpriv->vm, bo_va, &list, + amdgpu_gem_va_update_vm(adev, &fpriv->vm, bo_va, args->operation); error_backoff: diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h index 893c2490b783..6cb4948233cb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h @@ -109,4 +109,19 @@ struct amdgpu_gmc { const struct amdgpu_gmc_funcs *gmc_funcs; }; +/** + * amdgpu_gmc_vram_full_visible - Check if full VRAM is visible through the BAR + * + * @adev: amdgpu_device pointer + * + * Returns: + * True if full VRAM is visible through the BAR + */ +static inline bool amdgpu_gmc_vram_full_visible(struct amdgpu_gmc *gmc) +{ + WARN_ON(gmc->real_vram_size < gmc->visible_vram_size); + + return (gmc->real_vram_size == gmc->visible_vram_size); +} + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index f70eeed9ed76..31f8170313b4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -353,7 +353,8 @@ int amdgpu_ib_ring_tests(struct amdgpu_device *adev) ring->funcs->type == AMDGPU_RING_TYPE_VCE || ring->funcs->type == AMDGPU_RING_TYPE_UVD_ENC || ring->funcs->type == AMDGPU_RING_TYPE_VCN_DEC || - ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC) + ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC || + ring->funcs->type == AMDGPU_RING_TYPE_VCN_JPEG) tmo = tmo_mm; else tmo = tmo_gfx; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c index 3a5ca462abf0..1abf5b5bac9e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c @@ -25,6 +25,23 @@ * Alex Deucher * Jerome Glisse */ + +/** + * DOC: Interrupt Handling + * + * Interrupts generated within GPU hardware raise interrupt requests that are + * passed to amdgpu IRQ handler which is responsible for detecting source and + * type of the interrupt and dispatching matching handlers. If handling an + * interrupt requires calling kernel functions that may sleep processing is + * dispatched to work handlers. + * + * If MSI functionality is not disabled by module parameter then MSI + * support will be enabled. + * + * For GPU interrupt sources that may be driven by another driver, IRQ domain + * support is used (with mapping between virtual and hardware IRQs). + */ + #include <linux/irq.h> #include <drm/drmP.h> #include <drm/drm_crtc_helper.h> @@ -43,19 +60,21 @@ #define AMDGPU_WAIT_IDLE_TIMEOUT 200 -/* - * Handle hotplug events outside the interrupt handler proper. - */ /** - * amdgpu_hotplug_work_func - display hotplug work handler + * amdgpu_hotplug_work_func - work handler for display hotplug event * - * @work: work struct + * @work: work struct pointer * - * This is the hot plug event work handler (all asics). - * The work gets scheduled from the irq handler if there - * was a hot plug interrupt. It walks the connector table - * and calls the hotplug handler for each one, then sends - * a drm hotplug event to alert userspace. + * This is the hotplug event work handler (all ASICs). + * The work gets scheduled from the IRQ handler if there + * was a hotplug interrupt. It walks through the connector table + * and calls hotplug handler for each connector. After this, it sends + * a DRM hotplug event to alert userspace. + * + * This design approach is required in order to defer hotplug event handling + * from the IRQ handler to a work handler because hotplug handler has to use + * mutexes which cannot be locked in an IRQ handler (since &mutex_lock may + * sleep). */ static void amdgpu_hotplug_work_func(struct work_struct *work) { @@ -74,13 +93,12 @@ static void amdgpu_hotplug_work_func(struct work_struct *work) } /** - * amdgpu_irq_reset_work_func - execute gpu reset + * amdgpu_irq_reset_work_func - execute GPU reset * - * @work: work struct + * @work: work struct pointer * - * Execute scheduled gpu reset (cayman+). - * This function is called when the irq handler - * thinks we need a gpu reset. + * Execute scheduled GPU reset (Cayman+). + * This function is called when the IRQ handler thinks we need a GPU reset. */ static void amdgpu_irq_reset_work_func(struct work_struct *work) { @@ -91,7 +109,13 @@ static void amdgpu_irq_reset_work_func(struct work_struct *work) amdgpu_device_gpu_recover(adev, NULL, false); } -/* Disable *all* interrupts */ +/** + * amdgpu_irq_disable_all - disable *all* interrupts + * + * @adev: amdgpu device pointer + * + * Disable all types of interrupts from all sources. + */ void amdgpu_irq_disable_all(struct amdgpu_device *adev) { unsigned long irqflags; @@ -123,11 +147,15 @@ void amdgpu_irq_disable_all(struct amdgpu_device *adev) } /** - * amdgpu_irq_handler - irq handler + * amdgpu_irq_handler - IRQ handler + * + * @irq: IRQ number (unused) + * @arg: pointer to DRM device * - * @int irq, void *arg: args + * IRQ handler for amdgpu driver (all ASICs). * - * This is the irq handler for the amdgpu driver (all asics). + * Returns: + * result of handling the IRQ, as defined by &irqreturn_t */ irqreturn_t amdgpu_irq_handler(int irq, void *arg) { @@ -142,18 +170,18 @@ irqreturn_t amdgpu_irq_handler(int irq, void *arg) } /** - * amdgpu_msi_ok - asic specific msi checks + * amdgpu_msi_ok - check whether MSI functionality is enabled * - * @adev: amdgpu device pointer + * @adev: amdgpu device pointer (unused) + * + * Checks whether MSI functionality has been disabled via module parameter + * (all ASICs). * - * Handles asic specific MSI checks to determine if - * MSIs should be enabled on a particular chip (all asics). - * Returns true if MSIs should be enabled, false if MSIs - * should not be enabled. + * Returns: + * *true* if MSIs are allowed to be enabled or *false* otherwise */ static bool amdgpu_msi_ok(struct amdgpu_device *adev) { - /* force MSI on */ if (amdgpu_msi == 1) return true; else if (amdgpu_msi == 0) @@ -163,12 +191,15 @@ static bool amdgpu_msi_ok(struct amdgpu_device *adev) } /** - * amdgpu_irq_init - init driver interrupt info + * amdgpu_irq_init - initialize interrupt handling * * @adev: amdgpu device pointer * - * Sets up the work irq handlers, vblank init, MSIs, etc. (all asics). - * Returns 0 for success, error for failure. + * Sets up work functions for hotplug and reset interrupts, enables MSI + * functionality, initializes vblank, hotplug and reset interrupt handling. + * + * Returns: + * 0 on success or error code on failure */ int amdgpu_irq_init(struct amdgpu_device *adev) { @@ -176,7 +207,7 @@ int amdgpu_irq_init(struct amdgpu_device *adev) spin_lock_init(&adev->irq.lock); - /* enable msi */ + /* Enable MSI if not disabled by module parameter */ adev->irq.msi_enabled = false; if (amdgpu_msi_ok(adev)) { @@ -189,7 +220,7 @@ int amdgpu_irq_init(struct amdgpu_device *adev) if (!amdgpu_device_has_dc_support(adev)) { if (!adev->enable_virtual_display) - /* Disable vblank irqs aggressively for power-saving */ + /* Disable vblank IRQs aggressively for power-saving */ /* XXX: can this be enabled for DC? */ adev->ddev->vblank_disable_immediate = true; @@ -197,7 +228,7 @@ int amdgpu_irq_init(struct amdgpu_device *adev) if (r) return r; - /* pre DCE11 */ + /* Pre-DCE11 */ INIT_WORK(&adev->hotplug_work, amdgpu_hotplug_work_func); } @@ -220,11 +251,13 @@ int amdgpu_irq_init(struct amdgpu_device *adev) } /** - * amdgpu_irq_fini - tear down driver interrupt info + * amdgpu_irq_fini - shut down interrupt handling * * @adev: amdgpu device pointer * - * Tears down the work irq handlers, vblank handlers, MSIs, etc. (all asics). + * Tears down work functions for hotplug and reset interrupts, disables MSI + * functionality, shuts down vblank, hotplug and reset interrupt handling, + * turns off interrupts from all sources (all ASICs). */ void amdgpu_irq_fini(struct amdgpu_device *adev) { @@ -264,12 +297,17 @@ void amdgpu_irq_fini(struct amdgpu_device *adev) } /** - * amdgpu_irq_add_id - register irq source + * amdgpu_irq_add_id - register IRQ source * * @adev: amdgpu device pointer - * @src_id: source id for this source - * @source: irq source + * @client_id: client id + * @src_id: source id + * @source: IRQ source pointer + * + * Registers IRQ source on a client. * + * Returns: + * 0 on success or error code otherwise */ int amdgpu_irq_add_id(struct amdgpu_device *adev, unsigned client_id, unsigned src_id, @@ -312,12 +350,12 @@ int amdgpu_irq_add_id(struct amdgpu_device *adev, } /** - * amdgpu_irq_dispatch - dispatch irq to IP blocks + * amdgpu_irq_dispatch - dispatch IRQ to IP blocks * * @adev: amdgpu device pointer - * @entry: interrupt vector + * @entry: interrupt vector pointer * - * Dispatches the irq to the different IP blocks + * Dispatches IRQ to IP blocks. */ void amdgpu_irq_dispatch(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry) @@ -361,13 +399,13 @@ void amdgpu_irq_dispatch(struct amdgpu_device *adev, } /** - * amdgpu_irq_update - update hw interrupt state + * amdgpu_irq_update - update hardware interrupt state * * @adev: amdgpu device pointer - * @src: interrupt src you want to enable - * @type: type of interrupt you want to update + * @src: interrupt source pointer + * @type: type of interrupt * - * Updates the interrupt state for a specific src (all asics). + * Updates interrupt state for the specific source (all ASICs). */ int amdgpu_irq_update(struct amdgpu_device *adev, struct amdgpu_irq_src *src, unsigned type) @@ -378,7 +416,7 @@ int amdgpu_irq_update(struct amdgpu_device *adev, spin_lock_irqsave(&adev->irq.lock, irqflags); - /* we need to determine after taking the lock, otherwise + /* We need to determine after taking the lock, otherwise we might disable just enabled interrupts again */ if (amdgpu_irq_enabled(adev, src, type)) state = AMDGPU_IRQ_STATE_ENABLE; @@ -390,6 +428,14 @@ int amdgpu_irq_update(struct amdgpu_device *adev, return r; } +/** + * amdgpu_irq_gpu_reset_resume_helper - update interrupt states on all sources + * + * @adev: amdgpu device pointer + * + * Updates state of all types of interrupts on all sources on resume after + * reset. + */ void amdgpu_irq_gpu_reset_resume_helper(struct amdgpu_device *adev) { int i, j, k; @@ -413,10 +459,13 @@ void amdgpu_irq_gpu_reset_resume_helper(struct amdgpu_device *adev) * amdgpu_irq_get - enable interrupt * * @adev: amdgpu device pointer - * @src: interrupt src you want to enable - * @type: type of interrupt you want to enable + * @src: interrupt source pointer + * @type: type of interrupt * - * Enables the interrupt type for a specific src (all asics). + * Enables specified type of interrupt on the specified source (all ASICs). + * + * Returns: + * 0 on success or error code otherwise */ int amdgpu_irq_get(struct amdgpu_device *adev, struct amdgpu_irq_src *src, unsigned type) @@ -440,10 +489,13 @@ int amdgpu_irq_get(struct amdgpu_device *adev, struct amdgpu_irq_src *src, * amdgpu_irq_put - disable interrupt * * @adev: amdgpu device pointer - * @src: interrupt src you want to disable - * @type: type of interrupt you want to disable + * @src: interrupt source pointer + * @type: type of interrupt + * + * Enables specified type of interrupt on the specified source (all ASICs). * - * Disables the interrupt type for a specific src (all asics). + * Returns: + * 0 on success or error code otherwise */ int amdgpu_irq_put(struct amdgpu_device *adev, struct amdgpu_irq_src *src, unsigned type) @@ -464,12 +516,17 @@ int amdgpu_irq_put(struct amdgpu_device *adev, struct amdgpu_irq_src *src, } /** - * amdgpu_irq_enabled - test if irq is enabled or not + * amdgpu_irq_enabled - check whether interrupt is enabled or not * * @adev: amdgpu device pointer - * @idx: interrupt src you want to test + * @src: interrupt source pointer + * @type: type of interrupt * - * Tests if the given interrupt source is enabled or not + * Checks whether the given type of interrupt is enabled on the given source. + * + * Returns: + * *true* if interrupt is enabled, *false* if interrupt is disabled or on + * invalid parameters */ bool amdgpu_irq_enabled(struct amdgpu_device *adev, struct amdgpu_irq_src *src, unsigned type) @@ -486,7 +543,7 @@ bool amdgpu_irq_enabled(struct amdgpu_device *adev, struct amdgpu_irq_src *src, return !!atomic_read(&src->enabled_types[type]); } -/* gen irq */ +/* XXX: Generic IRQ handling */ static void amdgpu_irq_mask(struct irq_data *irqd) { /* XXX */ @@ -497,12 +554,26 @@ static void amdgpu_irq_unmask(struct irq_data *irqd) /* XXX */ } +/* amdgpu hardware interrupt chip descriptor */ static struct irq_chip amdgpu_irq_chip = { .name = "amdgpu-ih", .irq_mask = amdgpu_irq_mask, .irq_unmask = amdgpu_irq_unmask, }; +/** + * amdgpu_irqdomain_map - create mapping between virtual and hardware IRQ numbers + * + * @d: amdgpu IRQ domain pointer (unused) + * @irq: virtual IRQ number + * @hwirq: hardware irq number + * + * Current implementation assigns simple interrupt handler to the given virtual + * IRQ. + * + * Returns: + * 0 on success or error code otherwise + */ static int amdgpu_irqdomain_map(struct irq_domain *d, unsigned int irq, irq_hw_number_t hwirq) { @@ -514,17 +585,21 @@ static int amdgpu_irqdomain_map(struct irq_domain *d, return 0; } +/* Implementation of methods for amdgpu IRQ domain */ static const struct irq_domain_ops amdgpu_hw_irqdomain_ops = { .map = amdgpu_irqdomain_map, }; /** - * amdgpu_irq_add_domain - create a linear irq domain + * amdgpu_irq_add_domain - create a linear IRQ domain * * @adev: amdgpu device pointer * - * Create an irq domain for GPU interrupt sources + * Creates an IRQ domain for GPU interrupt sources * that may be driven by another driver (e.g., ACP). + * + * Returns: + * 0 on success or error code otherwise */ int amdgpu_irq_add_domain(struct amdgpu_device *adev) { @@ -539,11 +614,11 @@ int amdgpu_irq_add_domain(struct amdgpu_device *adev) } /** - * amdgpu_irq_remove_domain - remove the irq domain + * amdgpu_irq_remove_domain - remove the IRQ domain * * @adev: amdgpu device pointer * - * Remove the irq domain for GPU interrupt sources + * Removes the IRQ domain for GPU interrupt sources * that may be driven by another driver (e.g., ACP). */ void amdgpu_irq_remove_domain(struct amdgpu_device *adev) @@ -555,16 +630,17 @@ void amdgpu_irq_remove_domain(struct amdgpu_device *adev) } /** - * amdgpu_irq_create_mapping - create a mapping between a domain irq and a - * Linux irq + * amdgpu_irq_create_mapping - create mapping between domain Linux IRQs * * @adev: amdgpu device pointer * @src_id: IH source id * - * Create a mapping between a domain irq (GPU IH src id) and a Linux irq + * Creates mapping between a domain IRQ (GPU IH src id) and a Linux IRQ * Use this for components that generate a GPU interrupt, but are driven * by a different driver (e.g., ACP). - * Returns the Linux irq. + * + * Returns: + * Linux IRQ */ unsigned amdgpu_irq_create_mapping(struct amdgpu_device *adev, unsigned src_id) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 91517b166a3b..2060f208e60b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -329,35 +329,35 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file type = AMD_IP_BLOCK_TYPE_GFX; for (i = 0; i < adev->gfx.num_gfx_rings; i++) ring_mask |= ((adev->gfx.gfx_ring[i].ready ? 1 : 0) << i); - ib_start_alignment = AMDGPU_GPU_PAGE_SIZE; - ib_size_alignment = 8; + ib_start_alignment = 32; + ib_size_alignment = 32; break; case AMDGPU_HW_IP_COMPUTE: type = AMD_IP_BLOCK_TYPE_GFX; for (i = 0; i < adev->gfx.num_compute_rings; i++) ring_mask |= ((adev->gfx.compute_ring[i].ready ? 1 : 0) << i); - ib_start_alignment = AMDGPU_GPU_PAGE_SIZE; - ib_size_alignment = 8; + ib_start_alignment = 32; + ib_size_alignment = 32; break; case AMDGPU_HW_IP_DMA: type = AMD_IP_BLOCK_TYPE_SDMA; for (i = 0; i < adev->sdma.num_instances; i++) ring_mask |= ((adev->sdma.instance[i].ring.ready ? 1 : 0) << i); - ib_start_alignment = AMDGPU_GPU_PAGE_SIZE; - ib_size_alignment = 1; + ib_start_alignment = 256; + ib_size_alignment = 4; break; case AMDGPU_HW_IP_UVD: type = AMD_IP_BLOCK_TYPE_UVD; for (i = 0; i < adev->uvd.num_uvd_inst; i++) ring_mask |= ((adev->uvd.inst[i].ring.ready ? 1 : 0) << i); - ib_start_alignment = AMDGPU_GPU_PAGE_SIZE; - ib_size_alignment = 16; + ib_start_alignment = 64; + ib_size_alignment = 64; break; case AMDGPU_HW_IP_VCE: type = AMD_IP_BLOCK_TYPE_VCE; for (i = 0; i < adev->vce.num_rings; i++) ring_mask |= ((adev->vce.ring[i].ready ? 1 : 0) << i); - ib_start_alignment = AMDGPU_GPU_PAGE_SIZE; + ib_start_alignment = 4; ib_size_alignment = 1; break; case AMDGPU_HW_IP_UVD_ENC: @@ -367,22 +367,28 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file ring_mask |= ((adev->uvd.inst[i].ring_enc[j].ready ? 1 : 0) << (j + i * adev->uvd.num_enc_rings)); - ib_start_alignment = AMDGPU_GPU_PAGE_SIZE; - ib_size_alignment = 1; + ib_start_alignment = 64; + ib_size_alignment = 64; break; case AMDGPU_HW_IP_VCN_DEC: type = AMD_IP_BLOCK_TYPE_VCN; ring_mask = adev->vcn.ring_dec.ready ? 1 : 0; - ib_start_alignment = AMDGPU_GPU_PAGE_SIZE; + ib_start_alignment = 16; ib_size_alignment = 16; break; case AMDGPU_HW_IP_VCN_ENC: type = AMD_IP_BLOCK_TYPE_VCN; for (i = 0; i < adev->vcn.num_enc_rings; i++) ring_mask |= ((adev->vcn.ring_enc[i].ready ? 1 : 0) << i); - ib_start_alignment = AMDGPU_GPU_PAGE_SIZE; + ib_start_alignment = 64; ib_size_alignment = 1; break; + case AMDGPU_HW_IP_VCN_JPEG: + type = AMD_IP_BLOCK_TYPE_VCN; + ring_mask = adev->vcn.ring_jpeg.ready ? 1 : 0; + ib_start_alignment = 16; + ib_size_alignment = 16; + break; default: return -EINVAL; } @@ -427,6 +433,7 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file break; case AMDGPU_HW_IP_VCN_DEC: case AMDGPU_HW_IP_VCN_ENC: + case AMDGPU_HW_IP_VCN_JPEG: type = AMD_IP_BLOCK_TYPE_VCN; break; default: @@ -930,7 +937,6 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev, return; pm_runtime_get_sync(dev->dev); - amdgpu_ctx_mgr_entity_fini(&fpriv->ctx_mgr); if (adev->asic_type != CHIP_RAVEN) { amdgpu_uvd_free_handles(adev, file_priv); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c index 83e344fbb50a..72a3e8c68876 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c @@ -28,6 +28,21 @@ * Christian König <christian.koenig@amd.com> */ +/** + * DOC: MMU Notifier + * + * For coherent userptr handling registers an MMU notifier to inform the driver + * about updates on the page tables of a process. + * + * When somebody tries to invalidate the page tables we block the update until + * all operations on the pages in question are completed, then those pages are + * marked as accessed and also dirty if it wasn't a read only access. + * + * New command submissions using the userptrs in question are delayed until all + * page table invalidation are completed and we once more see a coherent process + * address space. + */ + #include <linux/firmware.h> #include <linux/module.h> #include <linux/mmu_notifier.h> @@ -38,6 +53,21 @@ #include "amdgpu.h" #include "amdgpu_amdkfd.h" +/** + * struct amdgpu_mn + * + * @adev: amdgpu device pointer + * @mm: process address space + * @mn: MMU notifier structur + * @work: destruction work item + * @node: hash table node to find structure by adev and mn + * @lock: rw semaphore protecting the notifier nodes + * @objects: interval tree containing amdgpu_mn_nodes + * @read_lock: mutex for recursive locking of @lock + * @recursion: depth of recursion + * + * Data for each amdgpu device and process address space. + */ struct amdgpu_mn { /* constant after initialisation */ struct amdgpu_device *adev; @@ -58,13 +88,21 @@ struct amdgpu_mn { atomic_t recursion; }; +/** + * struct amdgpu_mn_node + * + * @it: interval node defining start-last of the affected address range + * @bos: list of all BOs in the affected address range + * + * Manages all BOs which are affected of a certain range of address space. + */ struct amdgpu_mn_node { struct interval_tree_node it; struct list_head bos; }; /** - * amdgpu_mn_destroy - destroy the rmn + * amdgpu_mn_destroy - destroy the MMU notifier * * @work: previously sheduled work item * @@ -72,47 +110,50 @@ struct amdgpu_mn_node { */ static void amdgpu_mn_destroy(struct work_struct *work) { - struct amdgpu_mn *rmn = container_of(work, struct amdgpu_mn, work); - struct amdgpu_device *adev = rmn->adev; + struct amdgpu_mn *amn = container_of(work, struct amdgpu_mn, work); + struct amdgpu_device *adev = amn->adev; struct amdgpu_mn_node *node, *next_node; struct amdgpu_bo *bo, *next_bo; mutex_lock(&adev->mn_lock); - down_write(&rmn->lock); - hash_del(&rmn->node); + down_write(&amn->lock); + hash_del(&amn->node); rbtree_postorder_for_each_entry_safe(node, next_node, - &rmn->objects.rb_root, it.rb) { + &amn->objects.rb_root, it.rb) { list_for_each_entry_safe(bo, next_bo, &node->bos, mn_list) { bo->mn = NULL; list_del_init(&bo->mn_list); } kfree(node); } - up_write(&rmn->lock); + up_write(&amn->lock); mutex_unlock(&adev->mn_lock); - mmu_notifier_unregister_no_release(&rmn->mn, rmn->mm); - kfree(rmn); + mmu_notifier_unregister_no_release(&amn->mn, amn->mm); + kfree(amn); } /** * amdgpu_mn_release - callback to notify about mm destruction * * @mn: our notifier - * @mn: the mm this callback is about + * @mm: the mm this callback is about * * Shedule a work item to lazy destroy our notifier. */ static void amdgpu_mn_release(struct mmu_notifier *mn, struct mm_struct *mm) { - struct amdgpu_mn *rmn = container_of(mn, struct amdgpu_mn, mn); - INIT_WORK(&rmn->work, amdgpu_mn_destroy); - schedule_work(&rmn->work); + struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn); + + INIT_WORK(&amn->work, amdgpu_mn_destroy); + schedule_work(&amn->work); } /** - * amdgpu_mn_lock - take the write side lock for this mn + * amdgpu_mn_lock - take the write side lock for this notifier + * + * @mn: our notifier */ void amdgpu_mn_lock(struct amdgpu_mn *mn) { @@ -121,7 +162,9 @@ void amdgpu_mn_lock(struct amdgpu_mn *mn) } /** - * amdgpu_mn_unlock - drop the write side lock for this mn + * amdgpu_mn_unlock - drop the write side lock for this notifier + * + * @mn: our notifier */ void amdgpu_mn_unlock(struct amdgpu_mn *mn) { @@ -130,40 +173,38 @@ void amdgpu_mn_unlock(struct amdgpu_mn *mn) } /** - * amdgpu_mn_read_lock - take the rmn read lock - * - * @rmn: our notifier + * amdgpu_mn_read_lock - take the read side lock for this notifier * - * Take the rmn read side lock. + * @amn: our notifier */ -static void amdgpu_mn_read_lock(struct amdgpu_mn *rmn) +static void amdgpu_mn_read_lock(struct amdgpu_mn *amn) { - mutex_lock(&rmn->read_lock); - if (atomic_inc_return(&rmn->recursion) == 1) - down_read_non_owner(&rmn->lock); - mutex_unlock(&rmn->read_lock); + mutex_lock(&amn->read_lock); + if (atomic_inc_return(&amn->recursion) == 1) + down_read_non_owner(&amn->lock); + mutex_unlock(&amn->read_lock); } /** - * amdgpu_mn_read_unlock - drop the rmn read lock - * - * @rmn: our notifier + * amdgpu_mn_read_unlock - drop the read side lock for this notifier * - * Drop the rmn read side lock. + * @amn: our notifier */ -static void amdgpu_mn_read_unlock(struct amdgpu_mn *rmn) +static void amdgpu_mn_read_unlock(struct amdgpu_mn *amn) { - if (atomic_dec_return(&rmn->recursion) == 0) - up_read_non_owner(&rmn->lock); + if (atomic_dec_return(&amn->recursion) == 0) + up_read_non_owner(&amn->lock); } /** * amdgpu_mn_invalidate_node - unmap all BOs of a node * * @node: the node with the BOs to unmap + * @start: start of address range affected + * @end: end of address range affected * - * We block for all BOs and unmap them by move them - * into system domain again. + * Block for operations on BOs to finish and mark pages as accessed and + * potentially dirty. */ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node, unsigned long start, @@ -190,27 +231,27 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node, * amdgpu_mn_invalidate_range_start_gfx - callback to notify about mm change * * @mn: our notifier - * @mn: the mm this callback is about + * @mm: the mm this callback is about * @start: start of updated range * @end: end of updated range * - * We block for all BOs between start and end to be idle and - * unmap them by move them into system domain again. + * Block for operations on BOs to finish and mark pages as accessed and + * potentially dirty. */ static void amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn, struct mm_struct *mm, unsigned long start, unsigned long end) { - struct amdgpu_mn *rmn = container_of(mn, struct amdgpu_mn, mn); + struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn); struct interval_tree_node *it; /* notification is exclusive, but interval is inclusive */ end -= 1; - amdgpu_mn_read_lock(rmn); + amdgpu_mn_read_lock(amn); - it = interval_tree_iter_first(&rmn->objects, start, end); + it = interval_tree_iter_first(&amn->objects, start, end); while (it) { struct amdgpu_mn_node *node; @@ -238,15 +279,15 @@ static void amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn, unsigned long start, unsigned long end) { - struct amdgpu_mn *rmn = container_of(mn, struct amdgpu_mn, mn); + struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn); struct interval_tree_node *it; /* notification is exclusive, but interval is inclusive */ end -= 1; - amdgpu_mn_read_lock(rmn); + amdgpu_mn_read_lock(amn); - it = interval_tree_iter_first(&rmn->objects, start, end); + it = interval_tree_iter_first(&amn->objects, start, end); while (it) { struct amdgpu_mn_node *node; struct amdgpu_bo *bo; @@ -268,7 +309,7 @@ static void amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn, * amdgpu_mn_invalidate_range_end - callback to notify about mm change * * @mn: our notifier - * @mn: the mm this callback is about + * @mm: the mm this callback is about * @start: start of updated range * @end: end of updated range * @@ -279,9 +320,9 @@ static void amdgpu_mn_invalidate_range_end(struct mmu_notifier *mn, unsigned long start, unsigned long end) { - struct amdgpu_mn *rmn = container_of(mn, struct amdgpu_mn, mn); + struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn); - amdgpu_mn_read_unlock(rmn); + amdgpu_mn_read_unlock(amn); } static const struct mmu_notifier_ops amdgpu_mn_ops[] = { @@ -315,7 +356,7 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev, enum amdgpu_mn_type type) { struct mm_struct *mm = current->mm; - struct amdgpu_mn *rmn; + struct amdgpu_mn *amn; unsigned long key = AMDGPU_MN_KEY(mm, type); int r; @@ -325,41 +366,41 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev, return ERR_PTR(-EINTR); } - hash_for_each_possible(adev->mn_hash, rmn, node, key) - if (AMDGPU_MN_KEY(rmn->mm, rmn->type) == key) + hash_for_each_possible(adev->mn_hash, amn, node, key) + if (AMDGPU_MN_KEY(amn->mm, amn->type) == key) goto release_locks; - rmn = kzalloc(sizeof(*rmn), GFP_KERNEL); - if (!rmn) { - rmn = ERR_PTR(-ENOMEM); + amn = kzalloc(sizeof(*amn), GFP_KERNEL); + if (!amn) { + amn = ERR_PTR(-ENOMEM); goto release_locks; } - rmn->adev = adev; - rmn->mm = mm; - init_rwsem(&rmn->lock); - rmn->type = type; - rmn->mn.ops = &amdgpu_mn_ops[type]; - rmn->objects = RB_ROOT_CACHED; - mutex_init(&rmn->read_lock); - atomic_set(&rmn->recursion, 0); + amn->adev = adev; + amn->mm = mm; + init_rwsem(&amn->lock); + amn->type = type; + amn->mn.ops = &amdgpu_mn_ops[type]; + amn->objects = RB_ROOT_CACHED; + mutex_init(&amn->read_lock); + atomic_set(&amn->recursion, 0); - r = __mmu_notifier_register(&rmn->mn, mm); + r = __mmu_notifier_register(&amn->mn, mm); if (r) - goto free_rmn; + goto free_amn; - hash_add(adev->mn_hash, &rmn->node, AMDGPU_MN_KEY(mm, type)); + hash_add(adev->mn_hash, &amn->node, AMDGPU_MN_KEY(mm, type)); release_locks: up_write(&mm->mmap_sem); mutex_unlock(&adev->mn_lock); - return rmn; + return amn; -free_rmn: +free_amn: up_write(&mm->mmap_sem); mutex_unlock(&adev->mn_lock); - kfree(rmn); + kfree(amn); return ERR_PTR(r); } @@ -379,14 +420,14 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr) struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); enum amdgpu_mn_type type = bo->kfd_bo ? AMDGPU_MN_TYPE_HSA : AMDGPU_MN_TYPE_GFX; - struct amdgpu_mn *rmn; + struct amdgpu_mn *amn; struct amdgpu_mn_node *node = NULL, *new_node; struct list_head bos; struct interval_tree_node *it; - rmn = amdgpu_mn_get(adev, type); - if (IS_ERR(rmn)) - return PTR_ERR(rmn); + amn = amdgpu_mn_get(adev, type); + if (IS_ERR(amn)) + return PTR_ERR(amn); new_node = kmalloc(sizeof(*new_node), GFP_KERNEL); if (!new_node) @@ -394,12 +435,12 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr) INIT_LIST_HEAD(&bos); - down_write(&rmn->lock); + down_write(&amn->lock); - while ((it = interval_tree_iter_first(&rmn->objects, addr, end))) { + while ((it = interval_tree_iter_first(&amn->objects, addr, end))) { kfree(node); node = container_of(it, struct amdgpu_mn_node, it); - interval_tree_remove(&node->it, &rmn->objects); + interval_tree_remove(&node->it, &amn->objects); addr = min(it->start, addr); end = max(it->last, end); list_splice(&node->bos, &bos); @@ -410,7 +451,7 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr) else kfree(new_node); - bo->mn = rmn; + bo->mn = amn; node->it.start = addr; node->it.last = end; @@ -418,9 +459,9 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr) list_splice(&bos, &node->bos); list_add(&bo->mn_list, &node->bos); - interval_tree_insert(&node->it, &rmn->objects); + interval_tree_insert(&node->it, &amn->objects); - up_write(&rmn->lock); + up_write(&amn->lock); return 0; } @@ -435,18 +476,18 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr) void amdgpu_mn_unregister(struct amdgpu_bo *bo) { struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); - struct amdgpu_mn *rmn; + struct amdgpu_mn *amn; struct list_head *head; mutex_lock(&adev->mn_lock); - rmn = bo->mn; - if (rmn == NULL) { + amn = bo->mn; + if (amn == NULL) { mutex_unlock(&adev->mn_lock); return; } - down_write(&rmn->lock); + down_write(&amn->lock); /* save the next list entry for later */ head = bo->mn_list.next; @@ -456,12 +497,13 @@ void amdgpu_mn_unregister(struct amdgpu_bo *bo) if (list_empty(head)) { struct amdgpu_mn_node *node; + node = container_of(head, struct amdgpu_mn_node, bos); - interval_tree_remove(&node->it, &rmn->objects); + interval_tree_remove(&node->it, &amn->objects); kfree(node); } - up_write(&rmn->lock); + up_write(&amn->lock); mutex_unlock(&adev->mn_lock); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 5e4e1bd90383..f5b0b180a6cc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -38,6 +38,19 @@ #include "amdgpu_trace.h" #include "amdgpu_amdkfd.h" +/** + * DOC: amdgpu_object + * + * This defines the interfaces to operate on an &amdgpu_bo buffer object which + * represents memory used by driver (VRAM, system memory, etc.). The driver + * provides DRM/GEM APIs to userspace. DRM/GEM APIs then use these interfaces + * to create/destroy/set buffer object which are then managed by the kernel TTM + * memory manager. + * The interfaces are also used internally by kernel clients, including gfx, + * uvd, etc. for kernel managed allocations used by the GPU. + * + */ + static bool amdgpu_need_backup(struct amdgpu_device *adev) { if (adev->flags & AMD_IS_APU) @@ -73,6 +86,16 @@ static void amdgpu_ttm_bo_destroy(struct ttm_buffer_object *tbo) kfree(bo); } +/** + * amdgpu_ttm_bo_is_amdgpu_bo - check if the buffer object is an &amdgpu_bo + * @bo: buffer object to be checked + * + * Uses destroy function associated with the object to determine if this is + * an &amdgpu_bo. + * + * Returns: + * true if the object belongs to &amdgpu_bo, false if not. + */ bool amdgpu_ttm_bo_is_amdgpu_bo(struct ttm_buffer_object *bo) { if (bo->destroy == &amdgpu_ttm_bo_destroy) @@ -80,6 +103,14 @@ bool amdgpu_ttm_bo_is_amdgpu_bo(struct ttm_buffer_object *bo) return false; } +/** + * amdgpu_ttm_placement_from_domain - set buffer's placement + * @abo: &amdgpu_bo buffer object whose placement is to be set + * @domain: requested domain + * + * Sets buffer's placement according to requested domain and the buffer's + * flags. + */ void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *abo, u32 domain) { struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev); @@ -184,7 +215,8 @@ void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *abo, u32 domain) * * Note: For bo_ptr new BO is only created if bo_ptr points to NULL. * - * Returns 0 on success, negative error code otherwise. + * Returns: + * 0 on success, negative error code otherwise. */ int amdgpu_bo_create_reserved(struct amdgpu_device *adev, unsigned long size, int align, @@ -261,7 +293,8 @@ error_free: * * Note: For bo_ptr new BO is only created if bo_ptr points to NULL. * - * Returns 0 on success, negative error code otherwise. + * Returns: + * 0 on success, negative error code otherwise. */ int amdgpu_bo_create_kernel(struct amdgpu_device *adev, unsigned long size, int align, @@ -285,6 +318,8 @@ int amdgpu_bo_create_kernel(struct amdgpu_device *adev, * amdgpu_bo_free_kernel - free BO for kernel use * * @bo: amdgpu BO to free + * @gpu_addr: pointer to where the BO's GPU memory space address was stored + * @cpu_addr: pointer to where the BO's CPU memory space address was stored * * unmaps and unpin a BO for kernel internal use. */ @@ -428,7 +463,7 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev, if (unlikely(r != 0)) return r; - if (adev->gmc.visible_vram_size < adev->gmc.real_vram_size && + if (!amdgpu_gmc_vram_full_visible(&adev->gmc) && bo->tbo.mem.mem_type == TTM_PL_VRAM && bo->tbo.mem.start < adev->gmc.visible_vram_size >> PAGE_SHIFT) amdgpu_cs_report_moved_bytes(adev, ctx.bytes_moved, @@ -498,6 +533,20 @@ static int amdgpu_bo_create_shadow(struct amdgpu_device *adev, return r; } +/** + * amdgpu_bo_create - create an &amdgpu_bo buffer object + * @adev: amdgpu device object + * @bp: parameters to be used for the buffer object + * @bo_ptr: pointer to the buffer object pointer + * + * Creates an &amdgpu_bo buffer object; and if requested, also creates a + * shadow object. + * Shadow object is used to backup the original buffer object, and is always + * in GTT. + * + * Returns: + * 0 for success or a negative error code on failure. + */ int amdgpu_bo_create(struct amdgpu_device *adev, struct amdgpu_bo_param *bp, struct amdgpu_bo **bo_ptr) @@ -527,6 +576,21 @@ int amdgpu_bo_create(struct amdgpu_device *adev, return r; } +/** + * amdgpu_bo_backup_to_shadow - Backs up an &amdgpu_bo buffer object + * @adev: amdgpu device object + * @ring: amdgpu_ring for the engine handling the buffer operations + * @bo: &amdgpu_bo buffer to be backed up + * @resv: reservation object with embedded fence + * @fence: dma_fence associated with the operation + * @direct: whether to submit the job directly + * + * Copies an &amdgpu_bo buffer object to its shadow object. + * Not used for now. + * + * Returns: + * 0 for success or a negative error code on failure. + */ int amdgpu_bo_backup_to_shadow(struct amdgpu_device *adev, struct amdgpu_ring *ring, struct amdgpu_bo *bo, @@ -559,6 +623,18 @@ err: return r; } +/** + * amdgpu_bo_validate - validate an &amdgpu_bo buffer object + * @bo: pointer to the buffer object + * + * Sets placement according to domain; and changes placement and caching + * policy of the buffer object according to the placement. + * This is used for validating shadow bos. It calls ttm_bo_validate() to + * make sure the buffer is resident where it needs to be. + * + * Returns: + * 0 for success or a negative error code on failure. + */ int amdgpu_bo_validate(struct amdgpu_bo *bo) { struct ttm_operation_ctx ctx = { false, false }; @@ -581,6 +657,22 @@ retry: return r; } +/** + * amdgpu_bo_restore_from_shadow - restore an &amdgpu_bo buffer object + * @adev: amdgpu device object + * @ring: amdgpu_ring for the engine handling the buffer operations + * @bo: &amdgpu_bo buffer to be restored + * @resv: reservation object with embedded fence + * @fence: dma_fence associated with the operation + * @direct: whether to submit the job directly + * + * Copies a buffer object's shadow content back to the object. + * This is used for recovering a buffer from its shadow in case of a gpu + * reset where vram context may be lost. + * + * Returns: + * 0 for success or a negative error code on failure. + */ int amdgpu_bo_restore_from_shadow(struct amdgpu_device *adev, struct amdgpu_ring *ring, struct amdgpu_bo *bo, @@ -613,6 +705,17 @@ err: return r; } +/** + * amdgpu_bo_kmap - map an &amdgpu_bo buffer object + * @bo: &amdgpu_bo buffer object to be mapped + * @ptr: kernel virtual address to be returned + * + * Calls ttm_bo_kmap() to set up the kernel virtual mapping; calls + * amdgpu_bo_kptr() to get the kernel virtual address. + * + * Returns: + * 0 for success or a negative error code on failure. + */ int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr) { void *kptr; @@ -643,6 +746,15 @@ int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr) return 0; } +/** + * amdgpu_bo_kptr - returns a kernel virtual address of the buffer object + * @bo: &amdgpu_bo buffer object + * + * Calls ttm_kmap_obj_virtual() to get the kernel virtual address + * + * Returns: + * the virtual address of a buffer object area. + */ void *amdgpu_bo_kptr(struct amdgpu_bo *bo) { bool is_iomem; @@ -650,12 +762,27 @@ void *amdgpu_bo_kptr(struct amdgpu_bo *bo) return ttm_kmap_obj_virtual(&bo->kmap, &is_iomem); } +/** + * amdgpu_bo_kunmap - unmap an &amdgpu_bo buffer object + * @bo: &amdgpu_bo buffer object to be unmapped + * + * Unmaps a kernel map set up by amdgpu_bo_kmap(). + */ void amdgpu_bo_kunmap(struct amdgpu_bo *bo) { if (bo->kmap.bo) ttm_bo_kunmap(&bo->kmap); } +/** + * amdgpu_bo_ref - reference an &amdgpu_bo buffer object + * @bo: &amdgpu_bo buffer object + * + * References the contained &ttm_buffer_object. + * + * Returns: + * a refcounted pointer to the &amdgpu_bo buffer object. + */ struct amdgpu_bo *amdgpu_bo_ref(struct amdgpu_bo *bo) { if (bo == NULL) @@ -665,6 +792,12 @@ struct amdgpu_bo *amdgpu_bo_ref(struct amdgpu_bo *bo) return bo; } +/** + * amdgpu_bo_unref - unreference an &amdgpu_bo buffer object + * @bo: &amdgpu_bo buffer object + * + * Unreferences the contained &ttm_buffer_object and clear the pointer + */ void amdgpu_bo_unref(struct amdgpu_bo **bo) { struct ttm_buffer_object *tbo; @@ -678,6 +811,29 @@ void amdgpu_bo_unref(struct amdgpu_bo **bo) *bo = NULL; } +/** + * amdgpu_bo_pin_restricted - pin an &amdgpu_bo buffer object + * @bo: &amdgpu_bo buffer object to be pinned + * @domain: domain to be pinned to + * @min_offset: the start of requested address range + * @max_offset: the end of requested address range + * @gpu_addr: GPU offset of the &amdgpu_bo buffer object + * + * Pins the buffer object according to requested domain and address range. If + * the memory is unbound gart memory, binds the pages into gart table. Adjusts + * pin_count and pin_size accordingly. + * + * Pinning means to lock pages in memory along with keeping them at a fixed + * offset. It is required when a buffer can not be moved, for example, when + * a display buffer is being scanned out. + * + * Compared with amdgpu_bo_pin(), this function gives more flexibility on + * where to pin a buffer if there are specific restrictions on where a buffer + * must be located. + * + * Returns: + * 0 for success or a negative error code on failure. + */ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain, u64 min_offset, u64 max_offset, u64 *gpu_addr) @@ -772,11 +928,34 @@ error: return r; } +/** + * amdgpu_bo_pin - pin an &amdgpu_bo buffer object + * @bo: &amdgpu_bo buffer object to be pinned + * @domain: domain to be pinned to + * @gpu_addr: GPU offset of the &amdgpu_bo buffer object + * + * A simple wrapper to amdgpu_bo_pin_restricted(). + * Provides a simpler API for buffers that do not have any strict restrictions + * on where a buffer must be located. + * + * Returns: + * 0 for success or a negative error code on failure. + */ int amdgpu_bo_pin(struct amdgpu_bo *bo, u32 domain, u64 *gpu_addr) { return amdgpu_bo_pin_restricted(bo, domain, 0, 0, gpu_addr); } +/** + * amdgpu_bo_unpin - unpin an &amdgpu_bo buffer object + * @bo: &amdgpu_bo buffer object to be unpinned + * + * Decreases the pin_count, and clears the flags if pin_count reaches 0. + * Changes placement and pin size accordingly. + * + * Returns: + * 0 for success or a negative error code on failure. + */ int amdgpu_bo_unpin(struct amdgpu_bo *bo) { struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); @@ -812,6 +991,16 @@ error: return r; } +/** + * amdgpu_bo_evict_vram - evict VRAM buffers + * @adev: amdgpu device object + * + * Evicts all VRAM buffers on the lru list of the memory type. + * Mainly used for evicting vram at suspend time. + * + * Returns: + * 0 for success or a negative error code on failure. + */ int amdgpu_bo_evict_vram(struct amdgpu_device *adev) { /* late 2.6.33 fix IGP hibernate - we need pm ops to do this correct */ @@ -834,6 +1023,15 @@ static const char *amdgpu_vram_names[] = { "DDR4", }; +/** + * amdgpu_bo_init - initialize memory manager + * @adev: amdgpu device object + * + * Calls amdgpu_ttm_init() to initialize amdgpu memory manager. + * + * Returns: + * 0 for success or a negative error code on failure. + */ int amdgpu_bo_init(struct amdgpu_device *adev) { /* reserve PAT memory space to WC for VRAM */ @@ -851,6 +1049,16 @@ int amdgpu_bo_init(struct amdgpu_device *adev) return amdgpu_ttm_init(adev); } +/** + * amdgpu_bo_late_init - late init + * @adev: amdgpu device object + * + * Calls amdgpu_ttm_late_init() to free resources used earlier during + * initialization. + * + * Returns: + * 0 for success or a negative error code on failure. + */ int amdgpu_bo_late_init(struct amdgpu_device *adev) { amdgpu_ttm_late_init(adev); @@ -858,6 +1066,12 @@ int amdgpu_bo_late_init(struct amdgpu_device *adev) return 0; } +/** + * amdgpu_bo_fini - tear down memory manager + * @adev: amdgpu device object + * + * Reverses amdgpu_bo_init() to tear down memory manager. + */ void amdgpu_bo_fini(struct amdgpu_device *adev) { amdgpu_ttm_fini(adev); @@ -865,12 +1079,33 @@ void amdgpu_bo_fini(struct amdgpu_device *adev) arch_io_free_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size); } +/** + * amdgpu_bo_fbdev_mmap - mmap fbdev memory + * @bo: &amdgpu_bo buffer object + * @vma: vma as input from the fbdev mmap method + * + * Calls ttm_fbdev_mmap() to mmap fbdev memory if it is backed by a bo. + * + * Returns: + * 0 for success or a negative error code on failure. + */ int amdgpu_bo_fbdev_mmap(struct amdgpu_bo *bo, struct vm_area_struct *vma) { return ttm_fbdev_mmap(vma, &bo->tbo); } +/** + * amdgpu_bo_set_tiling_flags - set tiling flags + * @bo: &amdgpu_bo buffer object + * @tiling_flags: new flags + * + * Sets buffer object's tiling flags with the new one. Used by GEM ioctl or + * kernel driver to set the tiling flags on a buffer. + * + * Returns: + * 0 for success or a negative error code on failure. + */ int amdgpu_bo_set_tiling_flags(struct amdgpu_bo *bo, u64 tiling_flags) { struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); @@ -883,6 +1118,14 @@ int amdgpu_bo_set_tiling_flags(struct amdgpu_bo *bo, u64 tiling_flags) return 0; } +/** + * amdgpu_bo_get_tiling_flags - get tiling flags + * @bo: &amdgpu_bo buffer object + * @tiling_flags: returned flags + * + * Gets buffer object's tiling flags. Used by GEM ioctl or kernel driver to + * set the tiling flags on a buffer. + */ void amdgpu_bo_get_tiling_flags(struct amdgpu_bo *bo, u64 *tiling_flags) { lockdep_assert_held(&bo->tbo.resv->lock.base); @@ -891,6 +1134,19 @@ void amdgpu_bo_get_tiling_flags(struct amdgpu_bo *bo, u64 *tiling_flags) *tiling_flags = bo->tiling_flags; } +/** + * amdgpu_bo_set_metadata - set metadata + * @bo: &amdgpu_bo buffer object + * @metadata: new metadata + * @metadata_size: size of the new metadata + * @flags: flags of the new metadata + * + * Sets buffer object's metadata, its size and flags. + * Used via GEM ioctl. + * + * Returns: + * 0 for success or a negative error code on failure. + */ int amdgpu_bo_set_metadata (struct amdgpu_bo *bo, void *metadata, uint32_t metadata_size, uint64_t flags) { @@ -920,6 +1176,21 @@ int amdgpu_bo_set_metadata (struct amdgpu_bo *bo, void *metadata, return 0; } +/** + * amdgpu_bo_get_metadata - get metadata + * @bo: &amdgpu_bo buffer object + * @buffer: returned metadata + * @buffer_size: size of the buffer + * @metadata_size: size of the returned metadata + * @flags: flags of the returned metadata + * + * Gets buffer object's metadata, its size and flags. buffer_size shall not be + * less than metadata_size. + * Used via GEM ioctl. + * + * Returns: + * 0 for success or a negative error code on failure. + */ int amdgpu_bo_get_metadata(struct amdgpu_bo *bo, void *buffer, size_t buffer_size, uint32_t *metadata_size, uint64_t *flags) @@ -943,6 +1214,16 @@ int amdgpu_bo_get_metadata(struct amdgpu_bo *bo, void *buffer, return 0; } +/** + * amdgpu_bo_move_notify - notification about a memory move + * @bo: pointer to a buffer object + * @evict: if this move is evicting the buffer from the graphics address space + * @new_mem: new information of the bufer object + * + * Marks the corresponding &amdgpu_bo buffer object as invalid, also performs + * bookkeeping. + * TTM driver callback which is called when ttm moves a buffer. + */ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, bool evict, struct ttm_mem_reg *new_mem) @@ -971,6 +1252,17 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, trace_amdgpu_ttm_bo_move(abo, new_mem->mem_type, old_mem->mem_type); } +/** + * amdgpu_bo_fault_reserve_notify - notification about a memory fault + * @bo: pointer to a buffer object + * + * Notifies the driver we are taking a fault on this BO and have reserved it, + * also performs bookkeeping. + * TTM driver callback for dealing with vm faults. + * + * Returns: + * 0 for success or a negative error code on failure. + */ int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo) { struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev); @@ -1044,10 +1336,11 @@ void amdgpu_bo_fence(struct amdgpu_bo *bo, struct dma_fence *fence, * amdgpu_bo_gpu_offset - return GPU offset of bo * @bo: amdgpu object for which we query the offset * - * Returns current GPU offset of the object. - * * Note: object should either be pinned or reserved when calling this * function, it might be useful to add check for this for debugging. + * + * Returns: + * current GPU offset of the object. */ u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo) { @@ -1063,6 +1356,14 @@ u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo) return bo->tbo.offset; } +/** + * amdgpu_bo_get_preferred_pin_domain - get preferred domain for scanout + * @adev: amdgpu device object + * @domain: allowed :ref:`memory domains <amdgpu_memory_domains>` + * + * Returns: + * Which of the allowed domains is preferred for pinning the BO for scanout. + */ uint32_t amdgpu_bo_get_preferred_pin_domain(struct amdgpu_device *adev, uint32_t domain) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c index b455da487782..113edffb5960 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c @@ -68,11 +68,11 @@ void amdgpu_pm_acpi_event_handler(struct amdgpu_device *adev) if (adev->pm.dpm_enabled) { mutex_lock(&adev->pm.mutex); if (power_supply_is_system_supplied() > 0) - adev->pm.dpm.ac_power = true; + adev->pm.ac_power = true; else - adev->pm.dpm.ac_power = false; + adev->pm.ac_power = false; if (adev->powerplay.pp_funcs->enable_bapm) - amdgpu_dpm_enable_bapm(adev, adev->pm.dpm.ac_power); + amdgpu_dpm_enable_bapm(adev, adev->pm.ac_power); mutex_unlock(&adev->pm.mutex); } } @@ -80,12 +80,15 @@ void amdgpu_pm_acpi_event_handler(struct amdgpu_device *adev) /** * DOC: power_dpm_state * - * This is a legacy interface and is only provided for backwards compatibility. - * The amdgpu driver provides a sysfs API for adjusting certain power - * related parameters. The file power_dpm_state is used for this. + * The power_dpm_state file is a legacy interface and is only provided for + * backwards compatibility. The amdgpu driver provides a sysfs API for adjusting + * certain power related parameters. The file power_dpm_state is used for this. * It accepts the following arguments: + * * - battery + * * - balanced + * * - performance * * battery @@ -169,14 +172,21 @@ fail: * The amdgpu driver provides a sysfs API for adjusting certain power * related parameters. The file power_dpm_force_performance_level is * used for this. It accepts the following arguments: + * * - auto + * * - low + * * - high + * * - manual - * - GPU fan + * * - profile_standard + * * - profile_min_sclk + * * - profile_min_mclk + * * - profile_peak * * auto @@ -463,8 +473,11 @@ static ssize_t amdgpu_set_pp_table(struct device *dev, * this. * * Reading the file will display: + * * - a list of engine clock levels and voltages labeled OD_SCLK + * * - a list of memory clock levels and voltages labeled OD_MCLK + * * - a list of valid ranges for sclk, mclk, and voltage labeled OD_RANGE * * To manually adjust these settings, first select manual using @@ -1285,35 +1298,51 @@ static ssize_t amdgpu_hwmon_set_power_cap(struct device *dev, * DOC: hwmon * * The amdgpu driver exposes the following sensor interfaces: + * * - GPU temperature (via the on-die sensor) + * * - GPU voltage + * * - Northbridge voltage (APUs only) + * * - GPU power + * * - GPU fan * * hwmon interfaces for GPU temperature: + * * - temp1_input: the on die GPU temperature in millidegrees Celsius + * * - temp1_crit: temperature critical max value in millidegrees Celsius + * * - temp1_crit_hyst: temperature hysteresis for critical limit in millidegrees Celsius * * hwmon interfaces for GPU voltage: + * * - in0_input: the voltage on the GPU in millivolts + * * - in1_input: the voltage on the Northbridge in millivolts * * hwmon interfaces for GPU power: + * * - power1_average: average power used by the GPU in microWatts + * * - power1_cap_min: minimum cap supported in microWatts + * * - power1_cap_max: maximum cap supported in microWatts + * * - power1_cap: selected power cap in microWatts * * hwmon interfaces for GPU fan: + * * - pwm1: pulse width modulation fan level (0-255) - * - pwm1_enable: pulse width modulation fan control method - * 0: no fan speed control - * 1: manual fan speed control using pwm interface - * 2: automatic fan speed control + * + * - pwm1_enable: pulse width modulation fan control method (0: no fan speed control, 1: manual fan speed control using pwm interface, 2: automatic fan speed control) + * * - pwm1_min: pulse width modulation fan control minimum level (0) + * * - pwm1_max: pulse width modulation fan control maximum level (255) + * * - fan1_input: fan speed in RPM * * You can use hwmon tools like sensors to view this information on your system. @@ -1878,6 +1907,14 @@ void amdgpu_pm_compute_clocks(struct amdgpu_device *adev) amdgpu_fence_wait_empty(ring); } + mutex_lock(&adev->pm.mutex); + /* update battery/ac status */ + if (power_supply_is_system_supplied() > 0) + adev->pm.ac_power = true; + else + adev->pm.ac_power = false; + mutex_unlock(&adev->pm.mutex); + if (adev->powerplay.pp_funcs->dispatch_tasks) { if (!amdgpu_device_has_dc_support(adev)) { mutex_lock(&adev->pm.mutex); @@ -1898,14 +1935,7 @@ void amdgpu_pm_compute_clocks(struct amdgpu_device *adev) } else { mutex_lock(&adev->pm.mutex); amdgpu_dpm_get_active_displays(adev); - /* update battery/ac status */ - if (power_supply_is_system_supplied() > 0) - adev->pm.dpm.ac_power = true; - else - adev->pm.dpm.ac_power = false; - amdgpu_dpm_change_power_state_locked(adev); - mutex_unlock(&adev->pm.mutex); } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c index a156b3891a3f..b2286bc41aec 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c @@ -23,6 +23,14 @@ * * Authors: Alex Deucher */ + +/** + * DOC: PRIME Buffer Sharing + * + * The following callback implementations are used for :ref:`sharing GEM buffer + * objects between different devices via PRIME <prime_buffer_sharing>`. + */ + #include <drm/drmP.h> #include "amdgpu.h" @@ -32,6 +40,14 @@ static const struct dma_buf_ops amdgpu_dmabuf_ops; +/** + * amdgpu_gem_prime_get_sg_table - &drm_driver.gem_prime_get_sg_table + * implementation + * @obj: GEM buffer object + * + * Returns: + * A scatter/gather table for the pinned pages of the buffer object's memory. + */ struct sg_table *amdgpu_gem_prime_get_sg_table(struct drm_gem_object *obj) { struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); @@ -40,6 +56,15 @@ struct sg_table *amdgpu_gem_prime_get_sg_table(struct drm_gem_object *obj) return drm_prime_pages_to_sg(bo->tbo.ttm->pages, npages); } +/** + * amdgpu_gem_prime_vmap - &dma_buf_ops.vmap implementation + * @obj: GEM buffer object + * + * Sets up an in-kernel virtual mapping of the buffer object's memory. + * + * Returns: + * The virtual address of the mapping or an error pointer. + */ void *amdgpu_gem_prime_vmap(struct drm_gem_object *obj) { struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); @@ -53,6 +78,13 @@ void *amdgpu_gem_prime_vmap(struct drm_gem_object *obj) return bo->dma_buf_vmap.virtual; } +/** + * amdgpu_gem_prime_vunmap - &dma_buf_ops.vunmap implementation + * @obj: GEM buffer object + * @vaddr: virtual address (unused) + * + * Tears down the in-kernel virtual mapping of the buffer object's memory. + */ void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr) { struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); @@ -60,6 +92,17 @@ void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr) ttm_bo_kunmap(&bo->dma_buf_vmap); } +/** + * amdgpu_gem_prime_mmap - &drm_driver.gem_prime_mmap implementation + * @obj: GEM buffer object + * @vma: virtual memory area + * + * Sets up a userspace mapping of the buffer object's memory in the given + * virtual memory area. + * + * Returns: + * 0 on success or negative error code. + */ int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma) { struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); @@ -94,6 +137,19 @@ int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma return ret; } +/** + * amdgpu_gem_prime_import_sg_table - &drm_driver.gem_prime_import_sg_table + * implementation + * @dev: DRM device + * @attach: DMA-buf attachment + * @sg: Scatter/gather table + * + * Import shared DMA buffer memory exported by another device. + * + * Returns: + * A new GEM buffer object of the given DRM device, representing the memory + * described by the given DMA-buf attachment and scatter/gather table. + */ struct drm_gem_object * amdgpu_gem_prime_import_sg_table(struct drm_device *dev, struct dma_buf_attachment *attach, @@ -132,6 +188,19 @@ error: return ERR_PTR(ret); } +/** + * amdgpu_gem_map_attach - &dma_buf_ops.attach implementation + * @dma_buf: shared DMA buffer + * @target_dev: target device + * @attach: DMA-buf attachment + * + * Makes sure that the shared DMA buffer can be accessed by the target device. + * For now, simply pins it to the GTT domain, where it should be accessible by + * all DMA devices. + * + * Returns: + * 0 on success or negative error code. + */ static int amdgpu_gem_map_attach(struct dma_buf *dma_buf, struct dma_buf_attachment *attach) { @@ -180,6 +249,14 @@ error_detach: return r; } +/** + * amdgpu_gem_map_detach - &dma_buf_ops.detach implementation + * @dma_buf: shared DMA buffer + * @attach: DMA-buf attachment + * + * This is called when a shared DMA buffer no longer needs to be accessible by + * the other device. For now, simply unpins the buffer from GTT. + */ static void amdgpu_gem_map_detach(struct dma_buf *dma_buf, struct dma_buf_attachment *attach) { @@ -201,6 +278,13 @@ error: drm_gem_map_detach(dma_buf, attach); } +/** + * amdgpu_gem_prime_res_obj - &drm_driver.gem_prime_res_obj implementation + * @obj: GEM buffer object + * + * Returns: + * The buffer object's reservation object. + */ struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *obj) { struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); @@ -208,6 +292,18 @@ struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *obj) return bo->tbo.resv; } +/** + * amdgpu_gem_begin_cpu_access - &dma_buf_ops.begin_cpu_access implementation + * @dma_buf: shared DMA buffer + * @direction: direction of DMA transfer + * + * This is called before CPU access to the shared DMA buffer's memory. If it's + * a read access, the buffer is moved to the GTT domain if possible, for optimal + * CPU read performance. + * + * Returns: + * 0 on success or negative error code. + */ static int amdgpu_gem_begin_cpu_access(struct dma_buf *dma_buf, enum dma_data_direction direction) { @@ -250,6 +346,18 @@ static const struct dma_buf_ops amdgpu_dmabuf_ops = { .vunmap = drm_gem_dmabuf_vunmap, }; +/** + * amdgpu_gem_prime_export - &drm_driver.gem_prime_export implementation + * @dev: DRM device + * @gobj: GEM buffer object + * @flags: flags like DRM_CLOEXEC and DRM_RDWR + * + * The main work is done by the &drm_gem_prime_export helper, which in turn + * uses &amdgpu_gem_prime_res_obj. + * + * Returns: + * Shared DMA buffer representing the GEM buffer object from the given device. + */ struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev, struct drm_gem_object *gobj, int flags) @@ -270,6 +378,17 @@ struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev, return buf; } +/** + * amdgpu_gem_prime_import - &drm_driver.gem_prime_import implementation + * @dev: DRM device + * @dma_buf: Shared DMA buffer + * + * The main work is done by the &drm_gem_prime_import helper, which in turn + * uses &amdgpu_gem_prime_import_sg_table. + * + * Returns: + * GEM buffer object representing the shared DMA buffer for the given device. + */ struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev, struct dma_buf *dma_buf) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c index 8af16e81c7d4..ea9850c9224d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c @@ -96,6 +96,9 @@ static int amdgpu_identity_map(struct amdgpu_device *adev, case AMDGPU_HW_IP_VCN_ENC: *out_ring = &adev->vcn.ring_enc[ring]; break; + case AMDGPU_HW_IP_VCN_JPEG: + *out_ring = &adev->vcn.ring_jpeg; + break; default: *out_ring = NULL; DRM_ERROR("unknown HW IP type: %d\n", mapper->hw_ip); @@ -260,6 +263,9 @@ int amdgpu_queue_mgr_map(struct amdgpu_device *adev, case AMDGPU_HW_IP_VCN_ENC: ip_num_rings = adev->vcn.num_enc_rings; break; + case AMDGPU_HW_IP_VCN_JPEG: + ip_num_rings = 1; + break; default: DRM_DEBUG("unknown ip type: %d\n", hw_ip); return -EINVAL; @@ -287,6 +293,7 @@ int amdgpu_queue_mgr_map(struct amdgpu_device *adev, case AMDGPU_HW_IP_UVD_ENC: case AMDGPU_HW_IP_VCN_DEC: case AMDGPU_HW_IP_VCN_ENC: + case AMDGPU_HW_IP_VCN_JPEG: r = amdgpu_identity_map(adev, mapper, ring, out_ring); break; case AMDGPU_HW_IP_DMA: diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index c6850b629d0e..19e45a3953e0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -304,7 +304,7 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, 0xffffffffffffffff : ring->buf_mask; /* Allocate ring buffer */ if (ring->ring_obj == NULL) { - r = amdgpu_bo_create_kernel(adev, ring->ring_size, PAGE_SIZE, + r = amdgpu_bo_create_kernel(adev, ring->ring_size + ring->funcs->extra_dw, PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, &ring->ring_obj, &ring->gpu_addr, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index 1513124c5659..a293f4e6760d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -53,7 +53,8 @@ enum amdgpu_ring_type { AMDGPU_RING_TYPE_KIQ, AMDGPU_RING_TYPE_UVD_ENC, AMDGPU_RING_TYPE_VCN_DEC, - AMDGPU_RING_TYPE_VCN_ENC + AMDGPU_RING_TYPE_VCN_ENC, + AMDGPU_RING_TYPE_VCN_JPEG }; struct amdgpu_device; @@ -112,6 +113,7 @@ struct amdgpu_ring_funcs { u32 nop; bool support_64bit_ptrs; unsigned vmhub; + unsigned extra_dw; /* ring read/write ptr handling */ u64 (*get_rptr)(struct amdgpu_ring *ring); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index e93a0a237dc3..0c084d3d0865 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -277,7 +277,7 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo, if (!adev->mman.buffer_funcs_enabled) { /* Move to system memory */ amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU); - } else if (adev->gmc.visible_vram_size < adev->gmc.real_vram_size && + } else if (!amdgpu_gmc_vram_full_visible(&adev->gmc) && !(abo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) && amdgpu_bo_in_cpu_visible_vram(abo)) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index bcf68f80bbf0..04d77f19acc8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c @@ -127,7 +127,7 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev) unsigned long bo_size; const char *fw_name; const struct common_firmware_header *hdr; - unsigned version_major, version_minor, family_id; + unsigned family_id; int i, j, r; INIT_DELAYED_WORK(&adev->uvd.inst->idle_work, amdgpu_uvd_idle_work_handler); @@ -208,29 +208,46 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev) hdr = (const struct common_firmware_header *)adev->uvd.fw->data; family_id = le32_to_cpu(hdr->ucode_version) & 0xff; - version_major = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xff; - version_minor = (le32_to_cpu(hdr->ucode_version) >> 8) & 0xff; - DRM_INFO("Found UVD firmware Version: %hu.%hu Family ID: %hu\n", - version_major, version_minor, family_id); - - /* - * Limit the number of UVD handles depending on microcode major - * and minor versions. The firmware version which has 40 UVD - * instances support is 1.80. So all subsequent versions should - * also have the same support. - */ - if ((version_major > 0x01) || - ((version_major == 0x01) && (version_minor >= 0x50))) - adev->uvd.max_handles = AMDGPU_MAX_UVD_HANDLES; - adev->uvd.fw_version = ((version_major << 24) | (version_minor << 16) | - (family_id << 8)); + if (adev->asic_type < CHIP_VEGA20) { + unsigned version_major, version_minor; + + version_major = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xff; + version_minor = (le32_to_cpu(hdr->ucode_version) >> 8) & 0xff; + DRM_INFO("Found UVD firmware Version: %hu.%hu Family ID: %hu\n", + version_major, version_minor, family_id); + + /* + * Limit the number of UVD handles depending on microcode major + * and minor versions. The firmware version which has 40 UVD + * instances support is 1.80. So all subsequent versions should + * also have the same support. + */ + if ((version_major > 0x01) || + ((version_major == 0x01) && (version_minor >= 0x50))) + adev->uvd.max_handles = AMDGPU_MAX_UVD_HANDLES; + + adev->uvd.fw_version = ((version_major << 24) | (version_minor << 16) | + (family_id << 8)); + + if ((adev->asic_type == CHIP_POLARIS10 || + adev->asic_type == CHIP_POLARIS11) && + (adev->uvd.fw_version < FW_1_66_16)) + DRM_ERROR("POLARIS10/11 UVD firmware version %hu.%hu is too old.\n", + version_major, version_minor); + } else { + unsigned int enc_major, enc_minor, dec_minor; + + dec_minor = (le32_to_cpu(hdr->ucode_version) >> 8) & 0xff; + enc_minor = (le32_to_cpu(hdr->ucode_version) >> 24) & 0x3f; + enc_major = (le32_to_cpu(hdr->ucode_version) >> 30) & 0x3; + DRM_INFO("Found UVD firmware ENC: %hu.%hu DEC: .%hu Family ID: %hu\n", + enc_major, enc_minor, dec_minor, family_id); - if ((adev->asic_type == CHIP_POLARIS10 || - adev->asic_type == CHIP_POLARIS11) && - (adev->uvd.fw_version < FW_1_66_16)) - DRM_ERROR("POLARIS10/11 UVD firmware version %hu.%hu is too old.\n", - version_major, version_minor); + adev->uvd.max_handles = AMDGPU_MAX_UVD_HANDLES; + + adev->uvd.fw_version = le32_to_cpu(hdr->ucode_version); + } bo_size = AMDGPU_UVD_STACK_SIZE + AMDGPU_UVD_HEAP_SIZE + AMDGPU_UVD_SESSION_SIZE * adev->uvd.max_handles; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index 127e87b470ff..3f2a5e73e69f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -119,6 +119,8 @@ int amdgpu_vcn_sw_fini(struct amdgpu_device *adev) for (i = 0; i < adev->vcn.num_enc_rings; ++i) amdgpu_ring_fini(&adev->vcn.ring_enc[i]); + amdgpu_ring_fini(&adev->vcn.ring_jpeg); + release_firmware(adev->vcn.fw); return 0; @@ -576,3 +578,129 @@ error: dma_fence_put(fence); return r; } + +int amdgpu_vcn_jpeg_ring_test_ring(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + uint32_t tmp = 0; + unsigned i; + int r; + + WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_CONTEXT_ID), 0xCAFEDEAD); + r = amdgpu_ring_alloc(ring, 3); + + if (r) { + DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", + ring->idx, r); + return r; + } + + amdgpu_ring_write(ring, + PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_CONTEXT_ID), 0, 0, 0)); + amdgpu_ring_write(ring, 0xDEADBEEF); + amdgpu_ring_commit(ring); + + for (i = 0; i < adev->usec_timeout; i++) { + tmp = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_CONTEXT_ID)); + if (tmp == 0xDEADBEEF) + break; + DRM_UDELAY(1); + } + + if (i < adev->usec_timeout) { + DRM_DEBUG("ring test on %d succeeded in %d usecs\n", + ring->idx, i); + } else { + DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n", + ring->idx, tmp); + r = -EINVAL; + } + + return r; +} + +static int amdgpu_vcn_jpeg_set_reg(struct amdgpu_ring *ring, uint32_t handle, + struct dma_fence **fence) +{ + struct amdgpu_device *adev = ring->adev; + struct amdgpu_job *job; + struct amdgpu_ib *ib; + struct dma_fence *f = NULL; + const unsigned ib_size_dw = 16; + int i, r; + + r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job); + if (r) + return r; + + ib = &job->ibs[0]; + + ib->ptr[0] = PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_PITCH), 0, 0, PACKETJ_TYPE0); + ib->ptr[1] = 0xDEADBEEF; + for (i = 2; i < 16; i += 2) { + ib->ptr[i] = PACKETJ(0, 0, 0, PACKETJ_TYPE6); + ib->ptr[i+1] = 0; + } + ib->length_dw = 16; + + r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f); + job->fence = dma_fence_get(f); + if (r) + goto err; + + amdgpu_job_free(job); + if (fence) + *fence = dma_fence_get(f); + dma_fence_put(f); + + return 0; + +err: + amdgpu_job_free(job); + return r; +} + +int amdgpu_vcn_jpeg_ring_test_ib(struct amdgpu_ring *ring, long timeout) +{ + struct amdgpu_device *adev = ring->adev; + uint32_t tmp = 0; + unsigned i; + struct dma_fence *fence = NULL; + long r = 0; + + r = amdgpu_vcn_jpeg_set_reg(ring, 1, &fence); + if (r) { + DRM_ERROR("amdgpu: failed to set jpeg register (%ld).\n", r); + goto error; + } + + r = dma_fence_wait_timeout(fence, false, timeout); + if (r == 0) { + DRM_ERROR("amdgpu: IB test timed out.\n"); + r = -ETIMEDOUT; + goto error; + } else if (r < 0) { + DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); + goto error; + } else + r = 0; + + for (i = 0; i < adev->usec_timeout; i++) { + tmp = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_PITCH)); + if (tmp == 0xDEADBEEF) + break; + DRM_UDELAY(1); + } + + if (i < adev->usec_timeout) + DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx); + else { + DRM_ERROR("ib test failed (0x%08X)\n", tmp); + r = -EINVAL; + } + + dma_fence_put(fence); + +error: + return r; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index 773010b9ff15..0b0b8638d73f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -66,6 +66,7 @@ struct amdgpu_vcn { const struct firmware *fw; /* VCN firmware */ struct amdgpu_ring ring_dec; struct amdgpu_ring ring_enc[AMDGPU_VCN_MAX_ENC_RINGS]; + struct amdgpu_ring ring_jpeg; struct amdgpu_irq_src irq; unsigned num_enc_rings; }; @@ -83,4 +84,7 @@ int amdgpu_vcn_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout); int amdgpu_vcn_enc_ring_test_ring(struct amdgpu_ring *ring); int amdgpu_vcn_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout); +int amdgpu_vcn_jpeg_ring_test_ring(struct amdgpu_ring *ring); +int amdgpu_vcn_jpeg_ring_test_ib(struct amdgpu_ring *ring, long timeout); + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index b0eb2f537392..819949418495 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -33,9 +33,11 @@ #include "amdgpu.h" #include "amdgpu_trace.h" #include "amdgpu_amdkfd.h" +#include "amdgpu_gmc.h" -/* - * GPUVM +/** + * DOC: GPUVM + * * GPUVM is similar to the legacy gart on older asics, however * rather than there being a single global gart table * for the entire GPU, there are multiple VM page tables active @@ -63,37 +65,84 @@ INTERVAL_TREE_DEFINE(struct amdgpu_bo_va_mapping, rb, uint64_t, __subtree_last, #undef START #undef LAST -/* Local structure. Encapsulate some VM table update parameters to reduce +/** + * struct amdgpu_pte_update_params - Local structure + * + * Encapsulate some VM table update parameters to reduce * the number of function parameters + * */ struct amdgpu_pte_update_params { - /* amdgpu device we do this update for */ + + /** + * @adev: amdgpu device we do this update for + */ struct amdgpu_device *adev; - /* optional amdgpu_vm we do this update for */ + + /** + * @vm: optional amdgpu_vm we do this update for + */ struct amdgpu_vm *vm; - /* address where to copy page table entries from */ + + /** + * @src: address where to copy page table entries from + */ uint64_t src; - /* indirect buffer to fill with commands */ + + /** + * @ib: indirect buffer to fill with commands + */ struct amdgpu_ib *ib; - /* Function which actually does the update */ + + /** + * @func: Function which actually does the update + */ void (*func)(struct amdgpu_pte_update_params *params, struct amdgpu_bo *bo, uint64_t pe, uint64_t addr, unsigned count, uint32_t incr, uint64_t flags); - /* The next two are used during VM update by CPU - * DMA addresses to use for mapping - * Kernel pointer of PD/PT BO that needs to be updated + /** + * @pages_addr: + * + * DMA addresses to use for mapping, used during VM update by CPU */ dma_addr_t *pages_addr; + + /** + * @kptr: + * + * Kernel pointer of PD/PT BO that needs to be updated, + * used during VM update by CPU + */ void *kptr; }; -/* Helper to disable partial resident texture feature from a fence callback */ +/** + * struct amdgpu_prt_cb - Helper to disable partial resident texture feature from a fence callback + */ struct amdgpu_prt_cb { + + /** + * @adev: amdgpu device + */ struct amdgpu_device *adev; + + /** + * @cb: callback + */ struct dma_fence_cb cb; }; +/** + * amdgpu_vm_bo_base_init - Adds bo to the list of bos associated with the vm + * + * @base: base structure for tracking BO usage in a VM + * @vm: vm to which bo is to be added + * @bo: amdgpu buffer object + * + * Initialize a bo_va_base structure and add it to the appropriate lists + * + */ static void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base, struct amdgpu_vm *vm, struct amdgpu_bo *bo) @@ -126,8 +175,10 @@ static void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base, * amdgpu_vm_level_shift - return the addr shift for each level * * @adev: amdgpu_device pointer + * @level: VMPT level * - * Returns the number of bits the pfn needs to be right shifted for a level. + * Returns: + * The number of bits the pfn needs to be right shifted for a level. */ static unsigned amdgpu_vm_level_shift(struct amdgpu_device *adev, unsigned level) @@ -155,8 +206,10 @@ static unsigned amdgpu_vm_level_shift(struct amdgpu_device *adev, * amdgpu_vm_num_entries - return the number of entries in a PD/PT * * @adev: amdgpu_device pointer + * @level: VMPT level * - * Calculate the number of entries in a page directory or page table. + * Returns: + * The number of entries in a page directory or page table. */ static unsigned amdgpu_vm_num_entries(struct amdgpu_device *adev, unsigned level) @@ -179,8 +232,10 @@ static unsigned amdgpu_vm_num_entries(struct amdgpu_device *adev, * amdgpu_vm_bo_size - returns the size of the BOs in bytes * * @adev: amdgpu_device pointer + * @level: VMPT level * - * Calculate the size of the BO for a page directory or page table in bytes. + * Returns: + * The size of the BO for a page directory or page table in bytes. */ static unsigned amdgpu_vm_bo_size(struct amdgpu_device *adev, unsigned level) { @@ -218,6 +273,9 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm, * @param: parameter for the validation callback * * Validate the page table BOs on command submission if neccessary. + * + * Returns: + * Validation result. */ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, int (*validate)(void *p, struct amdgpu_bo *bo), @@ -273,6 +331,9 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, * @vm: VM to check * * Check if all VM PDs/PTs are ready for updates + * + * Returns: + * True if eviction list is empty. */ bool amdgpu_vm_ready(struct amdgpu_vm *vm) { @@ -283,10 +344,15 @@ bool amdgpu_vm_ready(struct amdgpu_vm *vm) * amdgpu_vm_clear_bo - initially clear the PDs/PTs * * @adev: amdgpu_device pointer + * @vm: VM to clear BO from * @bo: BO to clear * @level: level this BO is at + * @pte_support_ats: indicate ATS support from PTE * * Root PD needs to be reserved when calling this. + * + * Returns: + * 0 on success, errno otherwise. */ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct amdgpu_bo *bo, @@ -382,10 +448,16 @@ error: * * @adev: amdgpu_device pointer * @vm: requested vm + * @parent: parent PT * @saddr: start of the address range * @eaddr: end of the address range + * @level: VMPT level + * @ats: indicate ATS support from PTE * * Make sure the page directories and page tables are allocated + * + * Returns: + * 0 on success, errno otherwise. */ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, struct amdgpu_vm *vm, @@ -494,6 +566,9 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, * @size: Size from start address we need. * * Make sure the page tables are allocated. + * + * Returns: + * 0 on success, errno otherwise. */ int amdgpu_vm_alloc_pts(struct amdgpu_device *adev, struct amdgpu_vm *vm, @@ -559,6 +634,15 @@ void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev) } } +/** + * amdgpu_vm_need_pipeline_sync - Check if pipe sync is needed for job. + * + * @ring: ring on which the job will be submitted + * @job: job to submit + * + * Returns: + * True if sync is needed. + */ bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring, struct amdgpu_job *job) { @@ -586,19 +670,17 @@ bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring, return vm_flush_needed || gds_switch_needed; } -static bool amdgpu_vm_is_large_bar(struct amdgpu_device *adev) -{ - return (adev->gmc.real_vram_size == adev->gmc.visible_vram_size); -} - /** * amdgpu_vm_flush - hardware flush the vm * * @ring: ring to use for flush - * @vmid: vmid number to use - * @pd_addr: address of the page directory + * @job: related job + * @need_pipe_sync: is pipe sync needed * * Emit a VM flush when it is necessary. + * + * Returns: + * 0 on success, errno otherwise. */ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_pipe_sync) { @@ -706,6 +788,9 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_ * Returns the found bo_va or NULL if none is found * * Object has to be reserved! + * + * Returns: + * Found bo_va or NULL. */ struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm, struct amdgpu_bo *bo) @@ -787,7 +872,10 @@ static void amdgpu_vm_do_copy_ptes(struct amdgpu_pte_update_params *params, * @addr: the unmapped addr * * Look up the physical address of the page that the pte resolves - * to and return the pointer for the page table entry. + * to. + * + * Returns: + * The pointer for the page table entry. */ static uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr) { @@ -840,6 +928,17 @@ static void amdgpu_vm_cpu_set_ptes(struct amdgpu_pte_update_params *params, } } + +/** + * amdgpu_vm_wait_pd - Wait for PT BOs to be free. + * + * @adev: amdgpu_device pointer + * @vm: related vm + * @owner: fence owner + * + * Returns: + * 0 on success, errno otherwise. + */ static int amdgpu_vm_wait_pd(struct amdgpu_device *adev, struct amdgpu_vm *vm, void *owner) { @@ -893,7 +992,10 @@ static void amdgpu_vm_update_pde(struct amdgpu_pte_update_params *params, /* * amdgpu_vm_invalidate_level - mark all PD levels as invalid * + * @adev: amdgpu_device pointer + * @vm: related vm * @parent: parent PD + * @level: VMPT level * * Mark all PD level as invalid after an error. */ @@ -928,7 +1030,9 @@ static void amdgpu_vm_invalidate_level(struct amdgpu_device *adev, * @vm: requested vm * * Makes sure all directories are up to date. - * Returns 0 for success, error for failure. + * + * Returns: + * 0 for success, error for failure. */ int amdgpu_vm_update_directories(struct amdgpu_device *adev, struct amdgpu_vm *vm) @@ -1115,14 +1219,15 @@ static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p, * amdgpu_vm_update_ptes - make sure that page tables are valid * * @params: see amdgpu_pte_update_params definition - * @vm: requested vm * @start: start of GPU address range * @end: end of GPU address range * @dst: destination address to map to, the next dst inside the function * @flags: mapping flags * * Update the page tables in the range @start - @end. - * Returns 0 for success, -EINVAL for failure. + * + * Returns: + * 0 for success, -EINVAL for failure. */ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params, uint64_t start, uint64_t end, @@ -1176,7 +1281,9 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params, * @end: last PTE to handle * @dst: addr those PTEs should point to * @flags: hw mapping flags - * Returns 0 for success, -EINVAL for failure. + * + * Returns: + * 0 for success, -EINVAL for failure. */ static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params, uint64_t start, uint64_t end, @@ -1248,7 +1355,9 @@ static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params, * @fence: optional resulting fence * * Fill in the page table entries between @start and @last. - * Returns 0 for success, -EINVAL for failure. + * + * Returns: + * 0 for success, -EINVAL for failure. */ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, struct dma_fence *exclusive, @@ -1324,7 +1433,10 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, ndw += ncmds * 10; /* extra commands for begin/end fragments */ - ndw += 2 * 10 * adev->vm_manager.fragment_size; + if (vm->root.base.bo->shadow) + ndw += 2 * 10 * adev->vm_manager.fragment_size * 2; + else + ndw += 2 * 10 * adev->vm_manager.fragment_size; params.func = amdgpu_vm_do_set_ptes; } @@ -1400,7 +1512,9 @@ error_free: * * Split the mapping into smaller chunks so that each update fits * into a SDMA IB. - * Returns 0 for success, -EINVAL for failure. + * + * Returns: + * 0 for success, -EINVAL for failure. */ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, struct dma_fence *exclusive, @@ -1511,7 +1625,9 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, * @clear: if true clear the entries * * Fill in the page table entries for @bo_va. - * Returns 0 for success, -EINVAL for failure. + * + * Returns: + * 0 for success, -EINVAL for failure. */ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, @@ -1606,6 +1722,8 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, /** * amdgpu_vm_update_prt_state - update the global PRT state + * + * @adev: amdgpu_device pointer */ static void amdgpu_vm_update_prt_state(struct amdgpu_device *adev) { @@ -1620,6 +1738,8 @@ static void amdgpu_vm_update_prt_state(struct amdgpu_device *adev) /** * amdgpu_vm_prt_get - add a PRT user + * + * @adev: amdgpu_device pointer */ static void amdgpu_vm_prt_get(struct amdgpu_device *adev) { @@ -1632,6 +1752,8 @@ static void amdgpu_vm_prt_get(struct amdgpu_device *adev) /** * amdgpu_vm_prt_put - drop a PRT user + * + * @adev: amdgpu_device pointer */ static void amdgpu_vm_prt_put(struct amdgpu_device *adev) { @@ -1641,6 +1763,9 @@ static void amdgpu_vm_prt_put(struct amdgpu_device *adev) /** * amdgpu_vm_prt_cb - callback for updating the PRT status + * + * @fence: fence for the callback + * @_cb: the callback function */ static void amdgpu_vm_prt_cb(struct dma_fence *fence, struct dma_fence_cb *_cb) { @@ -1652,6 +1777,9 @@ static void amdgpu_vm_prt_cb(struct dma_fence *fence, struct dma_fence_cb *_cb) /** * amdgpu_vm_add_prt_cb - add callback for updating the PRT status + * + * @adev: amdgpu_device pointer + * @fence: fence for the callback */ static void amdgpu_vm_add_prt_cb(struct amdgpu_device *adev, struct dma_fence *fence) @@ -1743,9 +1871,11 @@ static void amdgpu_vm_prt_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) * or if an error occurred) * * Make sure all freed BOs are cleared in the PT. - * Returns 0 for success. - * * PTs have to be reserved and mutex must be locked! + * + * Returns: + * 0 for success. + * */ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, struct amdgpu_vm *vm, @@ -1790,10 +1920,11 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, * * @adev: amdgpu_device pointer * @vm: requested vm - * @sync: sync object to add fences to * * Make sure all BOs which are moved are updated in the PTs. - * Returns 0 for success. + * + * Returns: + * 0 for success. * * PTs have to be reserved! */ @@ -1848,7 +1979,9 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev, * * Add @bo into the requested vm. * Add @bo to the list of bos associated with the vm - * Returns newly added bo_va or NULL for failure + * + * Returns: + * Newly added bo_va or NULL for failure * * Object has to be reserved! */ @@ -1911,10 +2044,13 @@ static void amdgpu_vm_bo_insert_map(struct amdgpu_device *adev, * @bo_va: bo_va to store the address * @saddr: where to map the BO * @offset: requested offset in the BO + * @size: BO size in bytes * @flags: attributes of pages (read/write/valid/etc.) * * Add a mapping of the BO at the specefied addr into the VM. - * Returns 0 for success, error for failure. + * + * Returns: + * 0 for success, error for failure. * * Object has to be reserved and unreserved outside! */ @@ -1972,11 +2108,14 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev, * @bo_va: bo_va to store the address * @saddr: where to map the BO * @offset: requested offset in the BO + * @size: BO size in bytes * @flags: attributes of pages (read/write/valid/etc.) * * Add a mapping of the BO at the specefied addr into the VM. Replace existing * mappings as we do so. - * Returns 0 for success, error for failure. + * + * Returns: + * 0 for success, error for failure. * * Object has to be reserved and unreserved outside! */ @@ -2033,7 +2172,9 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev, * @saddr: where to the BO is mapped * * Remove a mapping of the BO at the specefied addr from the VM. - * Returns 0 for success, error for failure. + * + * Returns: + * 0 for success, error for failure. * * Object has to be reserved and unreserved outside! */ @@ -2087,7 +2228,9 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev, * @size: size of the range * * Remove all mappings in a range, split them as appropriate. - * Returns 0 for success, error for failure. + * + * Returns: + * 0 for success, error for failure. */ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev, struct amdgpu_vm *vm, @@ -2184,8 +2327,13 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev, * amdgpu_vm_bo_lookup_mapping - find mapping by address * * @vm: the requested VM + * @addr: the address * * Find a mapping by it's address. + * + * Returns: + * The amdgpu_bo_va_mapping matching for addr or NULL + * */ struct amdgpu_bo_va_mapping *amdgpu_vm_bo_lookup_mapping(struct amdgpu_vm *vm, uint64_t addr) @@ -2237,8 +2385,8 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev, * amdgpu_vm_bo_invalidate - mark the bo as invalid * * @adev: amdgpu_device pointer - * @vm: requested vm * @bo: amdgpu buffer object + * @evicted: is the BO evicted * * Mark @bo as invalid. */ @@ -2278,6 +2426,14 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev, } } +/** + * amdgpu_vm_get_block_size - calculate VM page table size as power of two + * + * @vm_size: VM size + * + * Returns: + * VM page table as power of two + */ static uint32_t amdgpu_vm_get_block_size(uint64_t vm_size) { /* Total bits covered by PD + PTs */ @@ -2296,6 +2452,10 @@ static uint32_t amdgpu_vm_get_block_size(uint64_t vm_size) * * @adev: amdgpu_device pointer * @vm_size: the default vm size if it's set auto + * @fragment_size_default: Default PTE fragment size + * @max_level: max VMPT level + * @max_bits: max address space size in bits + * */ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t vm_size, uint32_t fragment_size_default, unsigned max_level, @@ -2363,8 +2523,12 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t vm_size, * @adev: amdgpu_device pointer * @vm: requested vm * @vm_context: Indicates if it GFX or Compute context + * @pasid: Process address space identifier * * Init @vm fields. + * + * Returns: + * 0 for success, error for failure. */ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, int vm_context, unsigned int pasid) @@ -2415,7 +2579,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, } DRM_DEBUG_DRIVER("VM update mode is %s\n", vm->use_cpu_for_update ? "CPU" : "SDMA"); - WARN_ONCE((vm->use_cpu_for_update & !amdgpu_vm_is_large_bar(adev)), + WARN_ONCE((vm->use_cpu_for_update & !amdgpu_gmc_vram_full_visible(&adev->gmc)), "CPU update of VM recommended only for large BAR system\n"); vm->last_update = NULL; @@ -2485,6 +2649,9 @@ error_free_sched_entity: /** * amdgpu_vm_make_compute - Turn a GFX VM into a compute VM * + * @adev: amdgpu_device pointer + * @vm: requested vm + * * This only works on GFX VMs that don't have any BOs added and no * page tables allocated yet. * @@ -2497,7 +2664,8 @@ error_free_sched_entity: * setting. May leave behind an unused shadow BO for the page * directory when switching from SDMA updates to CPU updates. * - * Returns 0 for success, -errno for errors. + * Returns: + * 0 for success, -errno for errors. */ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm) { @@ -2531,7 +2699,7 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm) vm->pte_support_ats = pte_support_ats; DRM_DEBUG_DRIVER("VM update mode is %s\n", vm->use_cpu_for_update ? "CPU" : "SDMA"); - WARN_ONCE((vm->use_cpu_for_update & !amdgpu_vm_is_large_bar(adev)), + WARN_ONCE((vm->use_cpu_for_update & !amdgpu_gmc_vram_full_visible(&adev->gmc)), "CPU update of VM recommended only for large BAR system\n"); if (vm->pasid) { @@ -2652,8 +2820,10 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) * @adev: amdgpu_device pointer * @pasid: PASID do identify the VM * - * This function is expected to be called in interrupt context. Returns - * true if there was fault credit, false otherwise + * This function is expected to be called in interrupt context. + * + * Returns: + * True if there was fault credit, false otherwise */ bool amdgpu_vm_pasid_fault_credit(struct amdgpu_device *adev, unsigned int pasid) @@ -2707,7 +2877,7 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev) */ #ifdef CONFIG_X86_64 if (amdgpu_vm_update_mode == -1) { - if (amdgpu_vm_is_large_bar(adev)) + if (amdgpu_gmc_vram_full_visible(&adev->gmc)) adev->vm_manager.vm_update_mode = AMDGPU_VM_USE_CPU_FOR_COMPUTE; else @@ -2737,6 +2907,16 @@ void amdgpu_vm_manager_fini(struct amdgpu_device *adev) amdgpu_vmid_mgr_fini(adev); } +/** + * amdgpu_vm_ioctl - Manages VMID reservation for vm hubs. + * + * @dev: drm device pointer + * @data: drm_amdgpu_vm + * @filp: drm file pointer + * + * Returns: + * 0 for success, -errno for errors. + */ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { union drm_amdgpu_vm *args = data; diff --git a/drivers/gpu/drm/amd/amdgpu/atom.c b/drivers/gpu/drm/amd/amdgpu/atom.c index e9934de1b9cf..b18c31a701e2 100644 --- a/drivers/gpu/drm/amd/amdgpu/atom.c +++ b/drivers/gpu/drm/amd/amdgpu/atom.c @@ -1221,7 +1221,7 @@ static int amdgpu_atom_execute_table_locked(struct atom_context *ctx, int index, ectx.abort = false; ectx.last_jump = 0; if (ws) - ectx.ws = kcalloc(4, ws, GFP_KERNEL); + ectx.ws = kcalloc(4, ws, GFP_ATOMIC); else ectx.ws = NULL; diff --git a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c index 7fbad2f5f0bd..c9d45cffca56 100644 --- a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c @@ -951,12 +951,12 @@ static void ci_apply_state_adjust_rules(struct amdgpu_device *adev, else pi->battery_state = false; - if (adev->pm.dpm.ac_power) + if (adev->pm.ac_power) max_limits = &adev->pm.dpm.dyn_state.max_clock_voltage_on_ac; else max_limits = &adev->pm.dpm.dyn_state.max_clock_voltage_on_dc; - if (adev->pm.dpm.ac_power == false) { + if (adev->pm.ac_power == false) { for (i = 0; i < ps->performance_level_count; i++) { if (ps->performance_levels[i].mclk > max_limits->mclk) ps->performance_levels[i].mclk = max_limits->mclk; @@ -4078,7 +4078,7 @@ static int ci_enable_uvd_dpm(struct amdgpu_device *adev, bool enable) const struct amdgpu_clock_and_voltage_limits *max_limits; int i; - if (adev->pm.dpm.ac_power) + if (adev->pm.ac_power) max_limits = &adev->pm.dpm.dyn_state.max_clock_voltage_on_ac; else max_limits = &adev->pm.dpm.dyn_state.max_clock_voltage_on_dc; @@ -4127,7 +4127,7 @@ static int ci_enable_vce_dpm(struct amdgpu_device *adev, bool enable) const struct amdgpu_clock_and_voltage_limits *max_limits; int i; - if (adev->pm.dpm.ac_power) + if (adev->pm.ac_power) max_limits = &adev->pm.dpm.dyn_state.max_clock_voltage_on_ac; else max_limits = &adev->pm.dpm.dyn_state.max_clock_voltage_on_dc; @@ -4160,7 +4160,7 @@ static int ci_enable_samu_dpm(struct amdgpu_device *adev, bool enable) const struct amdgpu_clock_and_voltage_limits *max_limits; int i; - if (adev->pm.dpm.ac_power) + if (adev->pm.ac_power) max_limits = &adev->pm.dpm.dyn_state.max_clock_voltage_on_ac; else max_limits = &adev->pm.dpm.dyn_state.max_clock_voltage_on_dc; @@ -4191,7 +4191,7 @@ static int ci_enable_acp_dpm(struct amdgpu_device *adev, bool enable) const struct amdgpu_clock_and_voltage_limits *max_limits; int i; - if (adev->pm.dpm.ac_power) + if (adev->pm.ac_power) max_limits = &adev->pm.dpm.dyn_state.max_clock_voltage_on_ac; else max_limits = &adev->pm.dpm.dyn_state.max_clock_voltage_on_dc; diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c index 8ff4c60d1b59..702e257a483f 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik.c +++ b/drivers/gpu/drm/amd/amdgpu/cik.c @@ -2003,9 +2003,9 @@ int cik_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &gmc_v7_0_ip_block); amdgpu_device_ip_block_add(adev, &cik_ih_ip_block); if (amdgpu_dpm == -1) - amdgpu_device_ip_block_add(adev, &ci_smu_ip_block); - else amdgpu_device_ip_block_add(adev, &pp_smu_ip_block); + else + amdgpu_device_ip_block_add(adev, &ci_smu_ip_block); if (adev->enable_virtual_display) amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); #if defined(CONFIG_DRM_AMD_DC) @@ -2024,9 +2024,9 @@ int cik_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &gmc_v7_0_ip_block); amdgpu_device_ip_block_add(adev, &cik_ih_ip_block); if (amdgpu_dpm == -1) - amdgpu_device_ip_block_add(adev, &ci_smu_ip_block); - else amdgpu_device_ip_block_add(adev, &pp_smu_ip_block); + else + amdgpu_device_ip_block_add(adev, &ci_smu_ip_block); if (adev->enable_virtual_display) amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); #if defined(CONFIG_DRM_AMD_DC) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 818874b13c99..807ee0dd623c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -866,26 +866,32 @@ static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) struct amdgpu_device *adev = ring->adev; struct amdgpu_ib ib; struct dma_fence *f = NULL; - uint32_t scratch; - uint32_t tmp = 0; + + unsigned int index; + uint64_t gpu_addr; + uint32_t tmp; long r; - r = amdgpu_gfx_scratch_get(adev, &scratch); + r = amdgpu_device_wb_get(adev, &index); if (r) { - DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r); + dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r); return r; } - WREG32(scratch, 0xCAFEDEAD); + + gpu_addr = adev->wb.gpu_addr + (index * 4); + adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); memset(&ib, 0, sizeof(ib)); - r = amdgpu_ib_get(adev, NULL, 256, &ib); + r = amdgpu_ib_get(adev, NULL, 16, &ib); if (r) { DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); goto err1; } - ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1); - ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START)); - ib.ptr[2] = 0xDEADBEEF; - ib.length_dw = 3; + ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3); + ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM; + ib.ptr[2] = lower_32_bits(gpu_addr); + ib.ptr[3] = upper_32_bits(gpu_addr); + ib.ptr[4] = 0xDEADBEEF; + ib.length_dw = 5; r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); if (r) @@ -900,20 +906,21 @@ static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); goto err2; } - tmp = RREG32(scratch); + + tmp = adev->wb.wb[index]; if (tmp == 0xDEADBEEF) { DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx); r = 0; } else { - DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n", - scratch, tmp); + DRM_ERROR("ib test on ring %d failed\n", ring->idx); r = -EINVAL; } + err2: amdgpu_ib_free(adev, &ib, NULL); dma_fence_put(f); err1: - amdgpu_gfx_scratch_free(adev, scratch); + amdgpu_device_wb_free(adev, index); return r; } @@ -2048,6 +2055,20 @@ static int gfx_v8_0_sw_init(void *handle) if (r) return r; + /* Add CP EDC/ECC irq */ + r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 197, + &adev->gfx.cp_ecc_error_irq); + if (r) + return r; + + /* SQ interrupts. */ + r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 239, + &adev->gfx.sq_irq); + if (r) { + DRM_ERROR("amdgpu_irq_add() for SQ failed: %d\n", r); + return r; + } + adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE; gfx_v8_0_scratch_init(adev); @@ -5111,6 +5132,10 @@ static int gfx_v8_0_hw_fini(void *handle) amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); + amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0); + + amdgpu_irq_put(adev, &adev->gfx.sq_irq, 0); + /* disable KCQ to avoid CPC touch memory not valid anymore */ for (i = 0; i < adev->gfx.num_compute_rings; i++) gfx_v8_0_kcq_disable(&adev->gfx.kiq.ring, &adev->gfx.compute_ring[i]); @@ -5542,6 +5567,20 @@ static int gfx_v8_0_late_init(void *handle) if (r) return r; + r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0); + if (r) { + DRM_ERROR("amdgpu_irq_get() failed to get IRQ for EDC, r: %d.\n", r); + return r; + } + + r = amdgpu_irq_get(adev, &adev->gfx.sq_irq, 0); + if (r) { + DRM_ERROR( + "amdgpu_irq_get() failed to get IRQ for SQ, r: %d.\n", + r); + return r; + } + amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_GATE); @@ -6787,6 +6826,77 @@ static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev, return 0; } +static int gfx_v8_0_set_cp_ecc_int_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + unsigned int type, + enum amdgpu_interrupt_state state) +{ + int enable_flag; + + switch (state) { + case AMDGPU_IRQ_STATE_DISABLE: + enable_flag = 0; + break; + + case AMDGPU_IRQ_STATE_ENABLE: + enable_flag = 1; + break; + + default: + return -EINVAL; + } + + WREG32_FIELD(CP_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag); + WREG32_FIELD(CP_INT_CNTL_RING0, CP_ECC_ERROR_INT_ENABLE, enable_flag); + WREG32_FIELD(CP_INT_CNTL_RING1, CP_ECC_ERROR_INT_ENABLE, enable_flag); + WREG32_FIELD(CP_INT_CNTL_RING2, CP_ECC_ERROR_INT_ENABLE, enable_flag); + WREG32_FIELD(CPC_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag); + WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, + enable_flag); + WREG32_FIELD(CP_ME1_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, + enable_flag); + WREG32_FIELD(CP_ME1_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, + enable_flag); + WREG32_FIELD(CP_ME1_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, + enable_flag); + WREG32_FIELD(CP_ME2_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, + enable_flag); + WREG32_FIELD(CP_ME2_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, + enable_flag); + WREG32_FIELD(CP_ME2_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, + enable_flag); + WREG32_FIELD(CP_ME2_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, + enable_flag); + + return 0; +} + +static int gfx_v8_0_set_sq_int_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + unsigned int type, + enum amdgpu_interrupt_state state) +{ + int enable_flag; + + switch (state) { + case AMDGPU_IRQ_STATE_DISABLE: + enable_flag = 1; + break; + + case AMDGPU_IRQ_STATE_ENABLE: + enable_flag = 0; + break; + + default: + return -EINVAL; + } + + WREG32_FIELD(SQ_INTERRUPT_MSG_CTRL, STALL, + enable_flag); + + return 0; +} + static int gfx_v8_0_eop_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) @@ -6837,6 +6947,69 @@ static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev, return 0; } +static int gfx_v8_0_cp_ecc_error_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + DRM_ERROR("CP EDC/ECC error detected."); + return 0; +} + +static int gfx_v8_0_sq_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + u8 enc, se_id; + char type[20]; + + /* Parse all fields according to SQ_INTERRUPT* registers */ + enc = (entry->src_data[0] >> 26) & 0x3; + se_id = (entry->src_data[0] >> 24) & 0x3; + + switch (enc) { + case 0: + DRM_INFO("SQ general purpose intr detected:" + "se_id %d, immed_overflow %d, host_reg_overflow %d," + "host_cmd_overflow %d, cmd_timestamp %d," + "reg_timestamp %d, thread_trace_buff_full %d," + "wlt %d, thread_trace %d.\n", + se_id, + (entry->src_data[0] >> 7) & 0x1, + (entry->src_data[0] >> 6) & 0x1, + (entry->src_data[0] >> 5) & 0x1, + (entry->src_data[0] >> 4) & 0x1, + (entry->src_data[0] >> 3) & 0x1, + (entry->src_data[0] >> 2) & 0x1, + (entry->src_data[0] >> 1) & 0x1, + entry->src_data[0] & 0x1 + ); + break; + case 1: + case 2: + + if (enc == 1) + sprintf(type, "instruction intr"); + else + sprintf(type, "EDC/ECC error"); + + DRM_INFO( + "SQ %s detected: " + "se_id %d, cu_id %d, simd_id %d, wave_id %d, vm_id %d\n", + type, se_id, + (entry->src_data[0] >> 20) & 0xf, + (entry->src_data[0] >> 18) & 0x3, + (entry->src_data[0] >> 14) & 0xf, + (entry->src_data[0] >> 10) & 0xf + ); + break; + default: + DRM_ERROR("SQ invalid encoding type\n."); + return -EINVAL; + } + + return 0; +} + static int gfx_v8_0_kiq_set_interrupt_state(struct amdgpu_device *adev, struct amdgpu_irq_src *src, unsigned int type, @@ -7037,6 +7210,16 @@ static const struct amdgpu_irq_src_funcs gfx_v8_0_kiq_irq_funcs = { .process = gfx_v8_0_kiq_irq, }; +static const struct amdgpu_irq_src_funcs gfx_v8_0_cp_ecc_error_irq_funcs = { + .set = gfx_v8_0_set_cp_ecc_int_state, + .process = gfx_v8_0_cp_ecc_error_irq, +}; + +static const struct amdgpu_irq_src_funcs gfx_v8_0_sq_irq_funcs = { + .set = gfx_v8_0_set_sq_int_state, + .process = gfx_v8_0_sq_irq, +}; + static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev) { adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST; @@ -7050,6 +7233,12 @@ static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev) adev->gfx.kiq.irq.num_types = AMDGPU_CP_KIQ_IRQ_LAST; adev->gfx.kiq.irq.funcs = &gfx_v8_0_kiq_irq_funcs; + + adev->gfx.cp_ecc_error_irq.num_types = 1; + adev->gfx.cp_ecc_error_irq.funcs = &gfx_v8_0_cp_ecc_error_irq_funcs; + + adev->gfx.sq_irq.num_types = 1; + adev->gfx.sq_irq.funcs = &gfx_v8_0_sq_irq_funcs; } static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c index 7a1e77c93bf1..46de1fd18a7b 100644 --- a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c @@ -1921,7 +1921,7 @@ static int kv_dpm_set_power_state(void *handle) int ret; if (pi->bapm_enable) { - ret = amdgpu_kv_smc_bapm_enable(adev, adev->pm.dpm.ac_power); + ret = amdgpu_kv_smc_bapm_enable(adev, adev->pm.ac_power); if (ret) { DRM_ERROR("amdgpu_kv_smc_bapm_enable failed\n"); return ret; diff --git a/drivers/gpu/drm/amd/amdgpu/si_dpm.c b/drivers/gpu/drm/amd/amdgpu/si_dpm.c index 5c97a3671726..d51318c695e6 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/si_dpm.c @@ -3480,7 +3480,7 @@ static void si_apply_state_adjust_rules(struct amdgpu_device *adev, disable_sclk_switching = true; } - if (adev->pm.dpm.ac_power) + if (adev->pm.ac_power) max_limits = &adev->pm.dpm.dyn_state.max_clock_voltage_on_ac; else max_limits = &adev->pm.dpm.dyn_state.max_clock_voltage_on_dc; @@ -3489,7 +3489,7 @@ static void si_apply_state_adjust_rules(struct amdgpu_device *adev, if (ps->performance_levels[i].vddc > ps->performance_levels[i+1].vddc) ps->performance_levels[i].vddc = ps->performance_levels[i+1].vddc; } - if (adev->pm.dpm.ac_power == false) { + if (adev->pm.ac_power == false) { for (i = 0; i < ps->performance_level_count; i++) { if (ps->performance_levels[i].mclk > max_limits->mclk) ps->performance_levels[i].mclk = max_limits->mclk; diff --git a/drivers/gpu/drm/amd/amdgpu/soc15d.h b/drivers/gpu/drm/amd/amdgpu/soc15d.h index 8dc29107228f..edfe50821cd9 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15d.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15d.h @@ -53,6 +53,29 @@ #define PACKET3_COMPUTE(op, n) (PACKET3(op, n) | 1 << 1) +#define PACKETJ_CONDITION_CHECK0 0 +#define PACKETJ_CONDITION_CHECK1 1 +#define PACKETJ_CONDITION_CHECK2 2 +#define PACKETJ_CONDITION_CHECK3 3 +#define PACKETJ_CONDITION_CHECK4 4 +#define PACKETJ_CONDITION_CHECK5 5 +#define PACKETJ_CONDITION_CHECK6 6 +#define PACKETJ_CONDITION_CHECK7 7 + +#define PACKETJ_TYPE0 0 +#define PACKETJ_TYPE1 1 +#define PACKETJ_TYPE2 2 +#define PACKETJ_TYPE3 3 +#define PACKETJ_TYPE4 4 +#define PACKETJ_TYPE5 5 +#define PACKETJ_TYPE6 6 +#define PACKETJ_TYPE7 7 + +#define PACKETJ(reg, r, cond, type) ((reg & 0x3FFFF) | \ + ((r & 0x3F) << 18) | \ + ((cond & 0xF) << 24) | \ + ((type & 0xF) << 28)) + /* Packet 3 types */ #define PACKET3_NOP 0x10 #define PACKET3_SET_BASE 0x11 diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c index 29684c3ea4ef..b82c92084b6f 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c @@ -38,7 +38,9 @@ static int vcn_v1_0_stop(struct amdgpu_device *adev); static void vcn_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev); static void vcn_v1_0_set_enc_ring_funcs(struct amdgpu_device *adev); +static void vcn_v1_0_set_jpeg_ring_funcs(struct amdgpu_device *adev); static void vcn_v1_0_set_irq_funcs(struct amdgpu_device *adev); +static void vcn_v1_0_jpeg_ring_set_patch_ring(struct amdgpu_ring *ring, uint32_t ptr); /** * vcn_v1_0_early_init - set function pointers @@ -55,6 +57,7 @@ static int vcn_v1_0_early_init(void *handle) vcn_v1_0_set_dec_ring_funcs(adev); vcn_v1_0_set_enc_ring_funcs(adev); + vcn_v1_0_set_jpeg_ring_funcs(adev); vcn_v1_0_set_irq_funcs(adev); return 0; @@ -86,6 +89,11 @@ static int vcn_v1_0_sw_init(void *handle) return r; } + /* VCN JPEG TRAP */ + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, 126, &adev->vcn.irq); + if (r) + return r; + r = amdgpu_vcn_sw_init(adev); if (r) return r; @@ -108,6 +116,12 @@ static int vcn_v1_0_sw_init(void *handle) return r; } + ring = &adev->vcn.ring_jpeg; + sprintf(ring->name, "vcn_jpeg"); + r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.irq, 0); + if (r) + return r; + return r; } @@ -162,6 +176,14 @@ static int vcn_v1_0_hw_init(void *handle) } } + ring = &adev->vcn.ring_jpeg; + ring->ready = true; + r = amdgpu_ring_test_ring(ring); + if (r) { + ring->ready = false; + goto done; + } + done: if (!r) DRM_INFO("VCN decode and encode initialized successfully.\n"); @@ -729,6 +751,22 @@ static int vcn_v1_0_start(struct amdgpu_device *adev) WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE2, ring->ring_size / 4); + ring = &adev->vcn.ring_jpeg; + WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_VMID, 0); + WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL, (0x00000001L | 0x00000002L)); + WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_LOW, lower_32_bits(ring->gpu_addr)); + WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_HIGH, upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_RPTR, 0); + WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR, 0); + WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL, 0x00000002L); + + /* initialize wptr */ + ring->wptr = RREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR); + + /* copy patch commands to the jpeg ring */ + vcn_v1_0_jpeg_ring_set_patch_ring(ring, + (ring->wptr + ring->max_dw * amdgpu_sched_hw_submission)); + return 0; } @@ -1126,6 +1164,383 @@ static void vcn_v1_0_enc_ring_emit_wreg(struct amdgpu_ring *ring, amdgpu_ring_write(ring, val); } + +/** + * vcn_v1_0_jpeg_ring_get_rptr - get read pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware read pointer + */ +static uint64_t vcn_v1_0_jpeg_ring_get_rptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + return RREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_RPTR); +} + +/** + * vcn_v1_0_jpeg_ring_get_wptr - get write pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware write pointer + */ +static uint64_t vcn_v1_0_jpeg_ring_get_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + return RREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR); +} + +/** + * vcn_v1_0_jpeg_ring_set_wptr - set write pointer + * + * @ring: amdgpu_ring pointer + * + * Commits the write pointer to the hardware + */ +static void vcn_v1_0_jpeg_ring_set_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR, lower_32_bits(ring->wptr)); +} + +/** + * vcn_v1_0_jpeg_ring_insert_start - insert a start command + * + * @ring: amdgpu_ring pointer + * + * Write a start command to the ring. + */ +static void vcn_v1_0_jpeg_ring_insert_start(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + amdgpu_ring_write(ring, + PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, 0x68e04); + + amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, 0x80010000); +} + +/** + * vcn_v1_0_jpeg_ring_insert_end - insert a end command + * + * @ring: amdgpu_ring pointer + * + * Write a end command to the ring. + */ +static void vcn_v1_0_jpeg_ring_insert_end(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + amdgpu_ring_write(ring, + PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, 0x68e04); + + amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, 0x00010000); +} + +/** + * vcn_v1_0_jpeg_ring_emit_fence - emit an fence & trap command + * + * @ring: amdgpu_ring pointer + * @fence: fence to emit + * + * Write a fence and a trap command to the ring. + */ +static void vcn_v1_0_jpeg_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, + unsigned flags) +{ + struct amdgpu_device *adev = ring->adev; + + WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT); + + amdgpu_ring_write(ring, + PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_GPCOM_DATA0), 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, seq); + + amdgpu_ring_write(ring, + PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_GPCOM_DATA1), 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, seq); + + amdgpu_ring_write(ring, + PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_LOW), 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, lower_32_bits(addr)); + + amdgpu_ring_write(ring, + PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_HIGH), 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, upper_32_bits(addr)); + + amdgpu_ring_write(ring, + PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_GPCOM_CMD), 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, 0x8); + + amdgpu_ring_write(ring, + PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_GPCOM_CMD), 0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE4)); + amdgpu_ring_write(ring, 0); + + amdgpu_ring_write(ring, + PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_COND_RD_TIMER), 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, 0x01400200); + + amdgpu_ring_write(ring, + PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_REF_DATA), 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, seq); + + amdgpu_ring_write(ring, + PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW), 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, lower_32_bits(addr)); + + amdgpu_ring_write(ring, + PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH), 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, upper_32_bits(addr)); + + amdgpu_ring_write(ring, + PACKETJ(0, 0, PACKETJ_CONDITION_CHECK3, PACKETJ_TYPE2)); + amdgpu_ring_write(ring, 0xffffffff); + + amdgpu_ring_write(ring, + PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, 0x3fbc); + + amdgpu_ring_write(ring, + PACKETJ(0, 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, 0x1); +} + +/** + * vcn_v1_0_jpeg_ring_emit_ib - execute indirect buffer + * + * @ring: amdgpu_ring pointer + * @ib: indirect buffer to execute + * + * Write ring commands to execute the indirect buffer. + */ +static void vcn_v1_0_jpeg_ring_emit_ib(struct amdgpu_ring *ring, + struct amdgpu_ib *ib, + unsigned vmid, bool ctx_switch) +{ + struct amdgpu_device *adev = ring->adev; + + amdgpu_ring_write(ring, + PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_IB_VMID), 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, (vmid | (vmid << 4))); + + amdgpu_ring_write(ring, + PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JPEG_VMID), 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, (vmid | (vmid << 4))); + + amdgpu_ring_write(ring, + PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_IB_64BIT_BAR_LOW), 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); + + amdgpu_ring_write(ring, + PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_IB_64BIT_BAR_HIGH), 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); + + amdgpu_ring_write(ring, + PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_IB_SIZE), 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, ib->length_dw); + + amdgpu_ring_write(ring, + PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW), 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, lower_32_bits(ring->gpu_addr)); + + amdgpu_ring_write(ring, + PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH), 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, upper_32_bits(ring->gpu_addr)); + + amdgpu_ring_write(ring, + PACKETJ(0, 0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE2)); + amdgpu_ring_write(ring, 0); + + amdgpu_ring_write(ring, + PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_COND_RD_TIMER), 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, 0x01400200); + + amdgpu_ring_write(ring, + PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_REF_DATA), 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, 0x2); + + amdgpu_ring_write(ring, + PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_STATUS), 0, PACKETJ_CONDITION_CHECK3, PACKETJ_TYPE3)); + amdgpu_ring_write(ring, 0x2); +} + +static void vcn_v1_0_jpeg_ring_emit_reg_wait(struct amdgpu_ring *ring, + uint32_t reg, uint32_t val, + uint32_t mask) +{ + struct amdgpu_device *adev = ring->adev; + uint32_t reg_offset = (reg << 2); + + amdgpu_ring_write(ring, + PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_COND_RD_TIMER), 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, 0x01400200); + + amdgpu_ring_write(ring, + PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_REF_DATA), 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, val); + + amdgpu_ring_write(ring, + PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0)); + if (((reg_offset >= 0x1f800) && (reg_offset <= 0x21fff)) || + ((reg_offset >= 0x1e000) && (reg_offset <= 0x1e1ff))) { + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, + PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE3)); + } else { + amdgpu_ring_write(ring, reg_offset); + amdgpu_ring_write(ring, + PACKETJ(0, 0, 0, PACKETJ_TYPE3)); + } + amdgpu_ring_write(ring, mask); +} + +static void vcn_v1_0_jpeg_ring_emit_vm_flush(struct amdgpu_ring *ring, + unsigned vmid, uint64_t pd_addr) +{ + struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; + uint32_t data0, data1, mask; + + pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); + + /* wait for register write */ + data0 = hub->ctx0_ptb_addr_lo32 + vmid * 2; + data1 = lower_32_bits(pd_addr); + mask = 0xffffffff; + vcn_v1_0_jpeg_ring_emit_reg_wait(ring, data0, data1, mask); +} + +static void vcn_v1_0_jpeg_ring_emit_wreg(struct amdgpu_ring *ring, + uint32_t reg, uint32_t val) +{ + struct amdgpu_device *adev = ring->adev; + uint32_t reg_offset = (reg << 2); + + amdgpu_ring_write(ring, + PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0)); + if (((reg_offset >= 0x1f800) && (reg_offset <= 0x21fff)) || + ((reg_offset >= 0x1e000) && (reg_offset <= 0x1e1ff))) { + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, + PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE0)); + } else { + amdgpu_ring_write(ring, reg_offset); + amdgpu_ring_write(ring, + PACKETJ(0, 0, 0, PACKETJ_TYPE0)); + } + amdgpu_ring_write(ring, val); +} + +static void vcn_v1_0_jpeg_ring_nop(struct amdgpu_ring *ring, uint32_t count) +{ + int i; + + WARN_ON(ring->wptr % 2 || count % 2); + + for (i = 0; i < count / 2; i++) { + amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE6)); + amdgpu_ring_write(ring, 0); + } +} + +static void vcn_v1_0_jpeg_ring_patch_wreg(struct amdgpu_ring *ring, uint32_t *ptr, uint32_t reg_offset, uint32_t val) +{ + struct amdgpu_device *adev = ring->adev; + ring->ring[(*ptr)++] = PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0); + if (((reg_offset >= 0x1f800) && (reg_offset <= 0x21fff)) || + ((reg_offset >= 0x1e000) && (reg_offset <= 0x1e1ff))) { + ring->ring[(*ptr)++] = 0; + ring->ring[(*ptr)++] = PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE0); + } else { + ring->ring[(*ptr)++] = reg_offset; + ring->ring[(*ptr)++] = PACKETJ(0, 0, 0, PACKETJ_TYPE0); + } + ring->ring[(*ptr)++] = val; +} + +static void vcn_v1_0_jpeg_ring_set_patch_ring(struct amdgpu_ring *ring, uint32_t ptr) +{ + struct amdgpu_device *adev = ring->adev; + + uint32_t reg, reg_offset, val, mask, i; + + // 1st: program mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW + reg = SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW); + reg_offset = (reg << 2); + val = lower_32_bits(ring->gpu_addr); + vcn_v1_0_jpeg_ring_patch_wreg(ring, &ptr, reg_offset, val); + + // 2nd: program mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH + reg = SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH); + reg_offset = (reg << 2); + val = upper_32_bits(ring->gpu_addr); + vcn_v1_0_jpeg_ring_patch_wreg(ring, &ptr, reg_offset, val); + + // 3rd to 5th: issue MEM_READ commands + for (i = 0; i <= 2; i++) { + ring->ring[ptr++] = PACKETJ(0, 0, 0, PACKETJ_TYPE2); + ring->ring[ptr++] = 0; + } + + // 6th: program mmUVD_JRBC_RB_CNTL register to enable NO_FETCH and RPTR write ability + reg = SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_CNTL); + reg_offset = (reg << 2); + val = 0x13; + vcn_v1_0_jpeg_ring_patch_wreg(ring, &ptr, reg_offset, val); + + // 7th: program mmUVD_JRBC_RB_REF_DATA + reg = SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_REF_DATA); + reg_offset = (reg << 2); + val = 0x1; + vcn_v1_0_jpeg_ring_patch_wreg(ring, &ptr, reg_offset, val); + + // 8th: issue conditional register read mmUVD_JRBC_RB_CNTL + reg = SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_CNTL); + reg_offset = (reg << 2); + val = 0x1; + mask = 0x1; + + ring->ring[ptr++] = PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_COND_RD_TIMER), 0, 0, PACKETJ_TYPE0); + ring->ring[ptr++] = 0x01400200; + ring->ring[ptr++] = PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_REF_DATA), 0, 0, PACKETJ_TYPE0); + ring->ring[ptr++] = val; + ring->ring[ptr++] = PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_EXTERNAL_REG_BASE), 0, 0, PACKETJ_TYPE0); + if (((reg_offset >= 0x1f800) && (reg_offset <= 0x21fff)) || + ((reg_offset >= 0x1e000) && (reg_offset <= 0x1e1ff))) { + ring->ring[ptr++] = 0; + ring->ring[ptr++] = PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE3); + } else { + ring->ring[ptr++] = reg_offset; + ring->ring[ptr++] = PACKETJ(0, 0, 0, PACKETJ_TYPE3); + } + ring->ring[ptr++] = mask; + + //9th to 21st: insert no-op + for (i = 0; i <= 12; i++) { + ring->ring[ptr++] = PACKETJ(0, 0, 0, PACKETJ_TYPE6); + ring->ring[ptr++] = 0; + } + + //22nd: reset mmUVD_JRBC_RB_RPTR + reg = SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_RPTR); + reg_offset = (reg << 2); + val = 0; + vcn_v1_0_jpeg_ring_patch_wreg(ring, &ptr, reg_offset, val); + + //23rd: program mmUVD_JRBC_RB_CNTL to disable no_fetch + reg = SOC15_REG_OFFSET(UVD, 0, mmUVD_JRBC_RB_CNTL); + reg_offset = (reg << 2); + val = 0x12; + vcn_v1_0_jpeg_ring_patch_wreg(ring, &ptr, reg_offset, val); +} + static int vcn_v1_0_set_interrupt_state(struct amdgpu_device *adev, struct amdgpu_irq_src *source, unsigned type, @@ -1150,6 +1565,9 @@ static int vcn_v1_0_process_interrupt(struct amdgpu_device *adev, case 120: amdgpu_fence_process(&adev->vcn.ring_enc[1]); break; + case 126: + amdgpu_fence_process(&adev->vcn.ring_jpeg); + break; default: DRM_ERROR("Unhandled interrupt: %d %d\n", entry->src_id, entry->src_data[0]); @@ -1273,6 +1691,39 @@ static const struct amdgpu_ring_funcs vcn_v1_0_enc_ring_vm_funcs = { .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, }; +static const struct amdgpu_ring_funcs vcn_v1_0_jpeg_ring_vm_funcs = { + .type = AMDGPU_RING_TYPE_VCN_JPEG, + .align_mask = 0xf, + .nop = PACKET0(0x81ff, 0), + .support_64bit_ptrs = false, + .vmhub = AMDGPU_MMHUB, + .extra_dw = 64, + .get_rptr = vcn_v1_0_jpeg_ring_get_rptr, + .get_wptr = vcn_v1_0_jpeg_ring_get_wptr, + .set_wptr = vcn_v1_0_jpeg_ring_set_wptr, + .emit_frame_size = + 6 + 6 + /* hdp invalidate / flush */ + SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + + 8 + /* vcn_v1_0_dec_ring_emit_vm_flush */ + 14 + 14 + /* vcn_v1_0_dec_ring_emit_fence x2 vm fence */ + 6, + .emit_ib_size = 22, /* vcn_v1_0_dec_ring_emit_ib */ + .emit_ib = vcn_v1_0_jpeg_ring_emit_ib, + .emit_fence = vcn_v1_0_jpeg_ring_emit_fence, + .emit_vm_flush = vcn_v1_0_jpeg_ring_emit_vm_flush, + .test_ring = amdgpu_vcn_jpeg_ring_test_ring, + .test_ib = amdgpu_vcn_jpeg_ring_test_ib, + .insert_nop = vcn_v1_0_jpeg_ring_nop, + .insert_start = vcn_v1_0_jpeg_ring_insert_start, + .insert_end = vcn_v1_0_jpeg_ring_insert_end, + .pad_ib = amdgpu_ring_generic_pad_ib, + .begin_use = amdgpu_vcn_ring_begin_use, + .end_use = amdgpu_vcn_ring_end_use, + .emit_wreg = vcn_v1_0_jpeg_ring_emit_wreg, + .emit_reg_wait = vcn_v1_0_jpeg_ring_emit_reg_wait, +}; + static void vcn_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev) { adev->vcn.ring_dec.funcs = &vcn_v1_0_dec_ring_vm_funcs; @@ -1289,6 +1740,12 @@ static void vcn_v1_0_set_enc_ring_funcs(struct amdgpu_device *adev) DRM_INFO("VCN encode is enabled in VM mode\n"); } +static void vcn_v1_0_set_jpeg_ring_funcs(struct amdgpu_device *adev) +{ + adev->vcn.ring_jpeg.funcs = &vcn_v1_0_jpeg_ring_vm_funcs; + DRM_INFO("VCN jpeg decode is enabled in VM mode\n"); +} + static const struct amdgpu_irq_src_funcs vcn_v1_0_irq_funcs = { .set = vcn_v1_0_set_interrupt_state, .process = vcn_v1_0_process_interrupt, diff --git a/drivers/gpu/drm/amd/display/Kconfig b/drivers/gpu/drm/amd/display/Kconfig index d5d4586e6176..4c35625eb2c7 100644 --- a/drivers/gpu/drm/amd/display/Kconfig +++ b/drivers/gpu/drm/amd/display/Kconfig @@ -9,19 +9,10 @@ config DRM_AMD_DC support for AMDGPU. This adds required support for Vega and Raven ASICs. -config DRM_AMD_DC_FBC - bool "AMD FBC - Enable Frame Buffer Compression" - depends on DRM_AMD_DC - help - Choose this option if you want to use frame buffer compression - support. - This is a power optimisation feature, check its availability - on your hardware before enabling this option. - - config DRM_AMD_DC_DCN1_0 bool "DCN 1.0 Raven family" depends on DRM_AMD_DC && X86 + default y help Choose this option if you want to have RV family for display engine diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index a1dd49545a5b..655950102827 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -347,7 +347,6 @@ static void hotplug_notify_work_func(struct work_struct *work) drm_kms_helper_hotplug_event(dev); } -#if defined(CONFIG_DRM_AMD_DC_FBC) /* Allocate memory for FBC compressed data */ static void amdgpu_dm_fbc_init(struct drm_connector *connector) { @@ -388,7 +387,6 @@ static void amdgpu_dm_fbc_init(struct drm_connector *connector) } } -#endif /* Init display KMS @@ -3426,12 +3424,15 @@ static int amdgpu_dm_connector_get_modes(struct drm_connector *connector) struct edid *edid = amdgpu_dm_connector->edid; encoder = helper->best_encoder(connector); - amdgpu_dm_connector_ddc_get_modes(connector, edid); - amdgpu_dm_connector_add_common_modes(encoder, connector); -#if defined(CONFIG_DRM_AMD_DC_FBC) + if (!edid || !drm_edid_is_valid(edid)) { + drm_add_modes_noedid(connector, 640, 480); + } else { + amdgpu_dm_connector_ddc_get_modes(connector, edid); + amdgpu_dm_connector_add_common_modes(encoder, connector); + } amdgpu_dm_fbc_init(connector); -#endif + return amdgpu_dm_connector->num_modes; } diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h index d5aa89ad5571..a29dc35954c9 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h @@ -72,13 +72,11 @@ struct irq_list_head { struct work_struct work; }; -#if defined(CONFIG_DRM_AMD_DC_FBC) struct dm_comressor_info { void *cpu_addr; struct amdgpu_bo *bo_ptr; uint64_t gpu_addr; }; -#endif struct amdgpu_display_manager { @@ -129,9 +127,8 @@ struct amdgpu_display_manager { * Caches device atomic state for suspend/resume */ struct drm_atomic_state *cached_state; -#if defined(CONFIG_DRM_AMD_DC_FBC) + struct dm_comressor_info compressor; -#endif }; struct amdgpu_dm_connector { diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c index ec304b1a5973..b19dc4cfc030 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c @@ -497,6 +497,34 @@ enum dc_edid_status dm_helpers_read_local_edid( DRM_ERROR("EDID err: %d, on connector: %s", edid_status, aconnector->base.name); + if (link->aux_mode) { + union test_request test_request = {0}; + union test_response test_response = {0}; + + dm_helpers_dp_read_dpcd(ctx, + link, + DP_TEST_REQUEST, + &test_request.raw, + sizeof(union test_request)); + + if (!test_request.bits.EDID_READ) + return edid_status; + + test_response.bits.EDID_CHECKSUM_WRITE = 1; + + dm_helpers_dp_write_dpcd(ctx, + link, + DP_TEST_EDID_CHECKSUM, + &sink->dc_edid.raw_edid[sink->dc_edid.length-1], + 1); + + dm_helpers_dp_write_dpcd(ctx, + link, + DP_TEST_RESPONSE, + &test_response.raw, + sizeof(test_response)); + + } return edid_status; } diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_services.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_services.c index 5a3346124a01..e861929dd981 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_services.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_services.c @@ -35,14 +35,6 @@ #include "amdgpu_dm_irq.h" #include "amdgpu_pm.h" -unsigned long long dm_get_timestamp(struct dc_context *ctx) -{ - struct timespec64 time; - - getrawmonotonic64(&time); - return timespec64_to_ns(&time); -} - unsigned long long dm_get_elapse_time_in_ns(struct dc_context *ctx, unsigned long long current_time_stamp, unsigned long long last_time_stamp) diff --git a/drivers/gpu/drm/amd/display/dc/basics/log_helpers.c b/drivers/gpu/drm/amd/display/dc/basics/log_helpers.c index 021451549ff7..f6c00a51d51a 100644 --- a/drivers/gpu/drm/amd/display/dc/basics/log_helpers.c +++ b/drivers/gpu/drm/amd/display/dc/basics/log_helpers.c @@ -78,6 +78,8 @@ void dc_conn_log(struct dc_context *ctx, if (i == NUM_ELEMENTS(signal_type_info_tbl)) goto fail; + dm_logger_append_heading(&entry); + dm_logger_append(&entry, "[%s][ConnIdx:%d] ", signal_type_info_tbl[i].name, link->link_index); diff --git a/drivers/gpu/drm/amd/display/dc/basics/logger.c b/drivers/gpu/drm/amd/display/dc/basics/logger.c index 0866874ae8c6..a3c56cd8b396 100644 --- a/drivers/gpu/drm/amd/display/dc/basics/logger.c +++ b/drivers/gpu/drm/amd/display/dc/basics/logger.c @@ -32,8 +32,9 @@ static const struct dc_log_type_info log_type_info_tbl[] = { {LOG_ERROR, "Error"}, {LOG_WARNING, "Warning"}, - {LOG_DEBUG, "Debug"}, + {LOG_DEBUG, "Debug"}, {LOG_DC, "DC_Interface"}, + {LOG_DTN, "DTN"}, {LOG_SURFACE, "Surface"}, {LOG_HW_HOTPLUG, "HW_Hotplug"}, {LOG_HW_LINK_TRAINING, "HW_LKTN"}, @@ -60,7 +61,7 @@ static const struct dc_log_type_info log_type_info_tbl[] = { {LOG_EVENT_LINK_LOSS, "LinkLoss"}, {LOG_EVENT_UNDERFLOW, "Underflow"}, {LOG_IF_TRACE, "InterfaceTrace"}, - {LOG_DTN, "DTN"}, + {LOG_PERF_TRACE, "PerfTrace"}, {LOG_DISPLAYSTATS, "DisplayStats"} }; @@ -128,8 +129,45 @@ uint32_t dal_logger_destroy(struct dal_logger **logger) } /* ------------------------------------------------------------------------ */ +void dm_logger_append_heading(struct log_entry *entry) +{ + int j; + + for (j = 0; j < NUM_ELEMENTS(log_type_info_tbl); j++) { + const struct dc_log_type_info *info = &log_type_info_tbl[j]; + if (info->type == entry->type) + dm_logger_append(entry, "[%s]\t", info->name); + } +} + + +/* Print everything unread existing in log_buffer to debug console*/ +void dm_logger_flush_buffer(struct dal_logger *logger, bool should_warn) +{ + char *string_start = &logger->log_buffer[logger->buffer_read_offset]; + + if (should_warn) + dm_output_to_console( + "---------------- FLUSHING LOG BUFFER ----------------\n"); + while (logger->buffer_read_offset < logger->buffer_write_offset) { + + if (logger->log_buffer[logger->buffer_read_offset] == '\0') { + dm_output_to_console("%s", string_start); + string_start = logger->log_buffer + logger->buffer_read_offset + 1; + } + logger->buffer_read_offset++; + } + if (should_warn) + dm_output_to_console( + "-------------- END FLUSHING LOG BUFFER --------------\n\n"); +} +/* ------------------------------------------------------------------------ */ + +/* Warning: Be careful that 'msg' is null terminated and the total size is + * less than DAL_LOGGER_BUFFER_MAX_LOG_LINE_SIZE (256) including '\0' + */ static bool dal_logger_should_log( struct dal_logger *logger, enum dc_log_type log_type) @@ -159,26 +197,6 @@ static void log_to_debug_console(struct log_entry *entry) } } -/* Print everything unread existing in log_buffer to debug console*/ -void dm_logger_flush_buffer(struct dal_logger *logger, bool should_warn) -{ - char *string_start = &logger->log_buffer[logger->buffer_read_offset]; - - if (should_warn) - dm_output_to_console( - "---------------- FLUSHING LOG BUFFER ----------------\n"); - while (logger->buffer_read_offset < logger->buffer_write_offset) { - - if (logger->log_buffer[logger->buffer_read_offset] == '\0') { - dm_output_to_console("%s", string_start); - string_start = logger->log_buffer + logger->buffer_read_offset + 1; - } - logger->buffer_read_offset++; - } - if (should_warn) - dm_output_to_console( - "-------------- END FLUSHING LOG BUFFER --------------\n\n"); -} static void log_to_internal_buffer(struct log_entry *entry) { @@ -229,19 +247,6 @@ static void log_to_internal_buffer(struct log_entry *entry) } } -static void log_heading(struct log_entry *entry) -{ - int j; - - for (j = 0; j < NUM_ELEMENTS(log_type_info_tbl); j++) { - - const struct dc_log_type_info *info = &log_type_info_tbl[j]; - - if (info->type == entry->type) - dm_logger_append(entry, "[%s]\t", info->name); - } -} - static void append_entry( struct log_entry *entry, char *buffer, @@ -259,11 +264,7 @@ static void append_entry( entry->buf_offset += buf_size; } -/* ------------------------------------------------------------------------ */ -/* Warning: Be careful that 'msg' is null terminated and the total size is - * less than DAL_LOGGER_BUFFER_MAX_LOG_LINE_SIZE (256) including '\0' - */ void dm_logger_write( struct dal_logger *logger, enum dc_log_type log_type, @@ -287,7 +288,7 @@ void dm_logger_write( entry.type = log_type; - log_heading(&entry); + dm_logger_append_heading(&entry); size = dm_log_to_buffer( buffer, LOG_MAX_LINE_SIZE - 1, msg, args); @@ -372,7 +373,7 @@ void dm_logger_open( logger->open_count++; - log_heading(entry); + dm_logger_append_heading(entry); } void dm_logger_close(struct log_entry *entry) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 644b2187507b..53ce7fa864b4 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -169,6 +169,22 @@ failed_alloc: return false; } +/** + ***************************************************************************** + * Function: dc_stream_adjust_vmin_vmax + * + * @brief + * Looks up the pipe context of dc_stream_state and updates the + * vertical_total_min and vertical_total_max of the DRR, Dynamic Refresh + * Rate, which is a power-saving feature that targets reducing panel + * refresh rate while the screen is static + * + * @param [in] dc: dc reference + * @param [in] stream: Initial dc stream state + * @param [in] adjust: Updated parameters for vertical_total_min and + * vertical_total_max + ***************************************************************************** + */ bool dc_stream_adjust_vmin_vmax(struct dc *dc, struct dc_stream_state **streams, int num_streams, int vmin, int vmax) @@ -465,6 +481,7 @@ static bool construct(struct dc *dc, dc_ctx->driver_context = init_params->driver; dc_ctx->dc = dc; dc_ctx->asic_id = init_params->asic_id; + dc_ctx->dc_sink_id_count = 0; dc->ctx = dc_ctx; dc->current_state = dc_create_state(); @@ -1548,7 +1565,7 @@ struct dc_sink *dc_link_add_remote_sink( struct dc_sink *dc_sink; enum dc_edid_status edid_status; - if (len > MAX_EDID_BUFFER_SIZE) { + if (len > DC_MAX_EDID_BUFFER_SIZE) { dm_error("Max EDID buffer size breached!\n"); return NULL; } diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index 2fa521812d23..08b7ee526f0f 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -1861,28 +1861,6 @@ static enum dc_status enable_link( break; } - if (pipe_ctx->stream_res.audio && status == DC_OK) { - struct dc *core_dc = pipe_ctx->stream->ctx->dc; - /* notify audio driver for audio modes of monitor */ - struct pp_smu_funcs_rv *pp_smu = core_dc->res_pool->pp_smu; - unsigned int i, num_audio = 1; - for (i = 0; i < MAX_PIPES; i++) { - /*current_state not updated yet*/ - if (core_dc->current_state->res_ctx.pipe_ctx[i].stream_res.audio != NULL) - num_audio++; - } - - pipe_ctx->stream_res.audio->funcs->az_enable(pipe_ctx->stream_res.audio); - - if (num_audio == 1 && pp_smu != NULL && pp_smu->set_pme_wa_enable != NULL) - /*this is the first audio. apply the PME w/a in order to wake AZ from D3*/ - pp_smu->set_pme_wa_enable(&pp_smu->pp_smu); - /* un-mute audio */ - /* TODO: audio should be per stream rather than per link */ - pipe_ctx->stream_res.stream_enc->funcs->audio_mute_control( - pipe_ctx->stream_res.stream_enc, false); - } - return status; } @@ -2415,6 +2393,8 @@ void core_link_enable_stream( } } + core_dc->hwss.enable_audio_stream(pipe_ctx); + /* turn off otg test pattern if enable */ pipe_ctx->stream_res.tg->funcs->set_test_pattern(pipe_ctx->stream_res.tg, CONTROLLER_DP_TEST_PATTERN_VIDEOMODE, @@ -2453,6 +2433,22 @@ void core_link_set_avmute(struct pipe_ctx *pipe_ctx, bool enable) core_dc->hwss.set_avmute(pipe_ctx, enable); } +/** + ***************************************************************************** + * Function: dc_link_enable_hpd_filter + * + * @brief + * If enable is true, programs HPD filter on associated HPD line using + * delay_on_disconnect/delay_on_connect values dependent on + * link->connector_signal + * + * If enable is false, programs HPD filter on associated HPD line with no + * delays on connect or disconnect + * + * @param [in] link: pointer to the dc link + * @param [in] enable: boolean specifying whether to enable hbd + ***************************************************************************** + */ void dc_link_enable_hpd_filter(struct dc_link *link, bool enable) { struct gpio *hpd; diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index 7857cb42b3e6..509f265663d2 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -1647,22 +1647,26 @@ static enum dc_status read_hpd_rx_irq_data( irq_data->raw, sizeof(union hpd_irq_data)); else { - /* Read 2 bytes at this location,... */ + /* Read 14 bytes in a single read and then copy only the required fields. + * This is more efficient than doing it in two separate AUX reads. */ + + uint8_t tmp[DP_SINK_STATUS_ESI - DP_SINK_COUNT_ESI + 1]; + retval = core_link_read_dpcd( link, DP_SINK_COUNT_ESI, - irq_data->raw, - 2); + tmp, + sizeof(tmp)); if (retval != DC_OK) return retval; - /* ... then read remaining 4 at the other location */ - retval = core_link_read_dpcd( - link, - DP_LANE0_1_STATUS_ESI, - &irq_data->raw[2], - 4); + irq_data->bytes.sink_cnt.raw = tmp[DP_SINK_COUNT_ESI - DP_SINK_COUNT_ESI]; + irq_data->bytes.device_service_irq.raw = tmp[DP_DEVICE_SERVICE_IRQ_VECTOR_ESI0 - DP_SINK_COUNT_ESI]; + irq_data->bytes.lane01_status.raw = tmp[DP_LANE0_1_STATUS_ESI - DP_SINK_COUNT_ESI]; + irq_data->bytes.lane23_status.raw = tmp[DP_LANE2_3_STATUS_ESI - DP_SINK_COUNT_ESI]; + irq_data->bytes.lane_status_updated.raw = tmp[DP_LANE_ALIGN_STATUS_UPDATED_ESI - DP_SINK_COUNT_ESI]; + irq_data->bytes.sink_status.raw = tmp[DP_SINK_STATUS_ESI - DP_SINK_COUNT_ESI]; } return retval; @@ -2305,6 +2309,7 @@ static bool retrieve_link_cap(struct dc_link *link) { uint8_t dpcd_data[DP_ADAPTER_CAP - DP_DPCD_REV + 1]; + struct dp_device_vendor_id sink_id; union down_stream_port_count down_strm_port_count; union edp_configuration_cap edp_config_cap; union dp_downstream_port_present ds_port = { 0 }; @@ -2391,6 +2396,17 @@ static bool retrieve_link_cap(struct dc_link *link) &link->dpcd_caps.sink_count.raw, sizeof(link->dpcd_caps.sink_count.raw)); + /* read sink ieee oui */ + core_link_read_dpcd(link, + DP_SINK_OUI, + (uint8_t *)(&sink_id), + sizeof(sink_id)); + + link->dpcd_caps.sink_dev_id = + (sink_id.ieee_oui[0] << 16) + + (sink_id.ieee_oui[1] << 8) + + (sink_id.ieee_oui[2]); + /* Connectivity log: detection */ CONN_DATA_DETECT(link, dpcd_data, sizeof(dpcd_data), "Rx Caps: "); diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index 751f3ac9d921..fca22550417a 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -522,13 +522,12 @@ static void calculate_viewport(struct pipe_ctx *pipe_ctx) } } -static void calculate_recout(struct pipe_ctx *pipe_ctx, struct view *recout_skip) +static void calculate_recout(struct pipe_ctx *pipe_ctx, struct rect *recout_full) { const struct dc_plane_state *plane_state = pipe_ctx->plane_state; const struct dc_stream_state *stream = pipe_ctx->stream; struct rect surf_src = plane_state->src_rect; struct rect surf_clip = plane_state->clip_rect; - int recout_full_x, recout_full_y; bool pri_split = pipe_ctx->bottom_pipe && pipe_ctx->bottom_pipe->plane_state == pipe_ctx->plane_state; bool sec_split = pipe_ctx->top_pipe && @@ -597,20 +596,22 @@ static void calculate_recout(struct pipe_ctx *pipe_ctx, struct view *recout_skip } } /* Unclipped recout offset = stream dst offset + ((surf dst offset - stream surf_src offset) - * * 1/ stream scaling ratio) - (surf surf_src offset * 1/ full scl - * ratio) + * * 1/ stream scaling ratio) - (surf surf_src offset * 1/ full scl + * ratio) */ - recout_full_x = stream->dst.x + (plane_state->dst_rect.x - stream->src.x) + recout_full->x = stream->dst.x + (plane_state->dst_rect.x - stream->src.x) * stream->dst.width / stream->src.width - surf_src.x * plane_state->dst_rect.width / surf_src.width * stream->dst.width / stream->src.width; - recout_full_y = stream->dst.y + (plane_state->dst_rect.y - stream->src.y) + recout_full->y = stream->dst.y + (plane_state->dst_rect.y - stream->src.y) * stream->dst.height / stream->src.height - surf_src.y * plane_state->dst_rect.height / surf_src.height * stream->dst.height / stream->src.height; - recout_skip->width = pipe_ctx->plane_res.scl_data.recout.x - recout_full_x; - recout_skip->height = pipe_ctx->plane_res.scl_data.recout.y - recout_full_y; + recout_full->width = plane_state->dst_rect.width + * stream->dst.width / stream->src.width; + recout_full->height = plane_state->dst_rect.height + * stream->dst.height / stream->src.height; } static void calculate_scaling_ratios(struct pipe_ctx *pipe_ctx) @@ -662,7 +663,7 @@ static void calculate_scaling_ratios(struct pipe_ctx *pipe_ctx) pipe_ctx->plane_res.scl_data.ratios.vert_c, 19); } -static void calculate_inits_and_adj_vp(struct pipe_ctx *pipe_ctx, struct view *recout_skip) +static void calculate_inits_and_adj_vp(struct pipe_ctx *pipe_ctx, struct rect *recout_full) { struct scaler_data *data = &pipe_ctx->plane_res.scl_data; struct rect src = pipe_ctx->plane_state->src_rect; @@ -680,15 +681,14 @@ static void calculate_inits_and_adj_vp(struct pipe_ctx *pipe_ctx, struct view *r flip_vert_scan_dir = true; else if (pipe_ctx->plane_state->rotation == ROTATION_ANGLE_270) flip_horz_scan_dir = true; - if (pipe_ctx->plane_state->horizontal_mirror) - flip_horz_scan_dir = !flip_horz_scan_dir; if (pipe_ctx->plane_state->rotation == ROTATION_ANGLE_90 || pipe_ctx->plane_state->rotation == ROTATION_ANGLE_270) { rect_swap_helper(&src); rect_swap_helper(&data->viewport_c); rect_swap_helper(&data->viewport); - } + } else if (pipe_ctx->plane_state->horizontal_mirror) + flip_horz_scan_dir = !flip_horz_scan_dir; /* * Init calculated according to formula: @@ -708,127 +708,286 @@ static void calculate_inits_and_adj_vp(struct pipe_ctx *pipe_ctx, struct view *r data->inits.v_c = dc_fixpt_truncate(dc_fixpt_add(data->inits.v_c, dc_fixpt_div_int( dc_fixpt_add_int(data->ratios.vert_c, data->taps.v_taps_c + 1), 2)), 19); + if (!flip_horz_scan_dir) { + /* Adjust for viewport end clip-off */ + if ((data->viewport.x + data->viewport.width) < (src.x + src.width)) { + int vp_clip = src.x + src.width - data->viewport.width - data->viewport.x; + int int_part = dc_fixpt_floor( + dc_fixpt_sub(data->inits.h, data->ratios.horz)); + int_part = int_part > 0 ? int_part : 0; + data->viewport.width += int_part < vp_clip ? int_part : vp_clip; + } + if ((data->viewport_c.x + data->viewport_c.width) < (src.x + src.width) / vpc_div) { + int vp_clip = (src.x + src.width) / vpc_div - + data->viewport_c.width - data->viewport_c.x; + int int_part = dc_fixpt_floor( + dc_fixpt_sub(data->inits.h_c, data->ratios.horz_c)); + + int_part = int_part > 0 ? int_part : 0; + data->viewport_c.width += int_part < vp_clip ? int_part : vp_clip; + } - /* Adjust for viewport end clip-off */ - if ((data->viewport.x + data->viewport.width) < (src.x + src.width) && !flip_horz_scan_dir) { - int vp_clip = src.x + src.width - data->viewport.width - data->viewport.x; - int int_part = dc_fixpt_floor( - dc_fixpt_sub(data->inits.h, data->ratios.horz)); - - int_part = int_part > 0 ? int_part : 0; - data->viewport.width += int_part < vp_clip ? int_part : vp_clip; - } - if ((data->viewport.y + data->viewport.height) < (src.y + src.height) && !flip_vert_scan_dir) { - int vp_clip = src.y + src.height - data->viewport.height - data->viewport.y; - int int_part = dc_fixpt_floor( - dc_fixpt_sub(data->inits.v, data->ratios.vert)); - - int_part = int_part > 0 ? int_part : 0; - data->viewport.height += int_part < vp_clip ? int_part : vp_clip; - } - if ((data->viewport_c.x + data->viewport_c.width) < (src.x + src.width) / vpc_div && !flip_horz_scan_dir) { - int vp_clip = (src.x + src.width) / vpc_div - - data->viewport_c.width - data->viewport_c.x; - int int_part = dc_fixpt_floor( - dc_fixpt_sub(data->inits.h_c, data->ratios.horz_c)); - - int_part = int_part > 0 ? int_part : 0; - data->viewport_c.width += int_part < vp_clip ? int_part : vp_clip; - } - if ((data->viewport_c.y + data->viewport_c.height) < (src.y + src.height) / vpc_div && !flip_vert_scan_dir) { - int vp_clip = (src.y + src.height) / vpc_div - - data->viewport_c.height - data->viewport_c.y; - int int_part = dc_fixpt_floor( - dc_fixpt_sub(data->inits.v_c, data->ratios.vert_c)); - - int_part = int_part > 0 ? int_part : 0; - data->viewport_c.height += int_part < vp_clip ? int_part : vp_clip; - } - - /* Adjust for non-0 viewport offset */ - if (data->viewport.x && !flip_horz_scan_dir) { - int int_part; - - data->inits.h = dc_fixpt_add(data->inits.h, dc_fixpt_mul_int( - data->ratios.horz, recout_skip->width)); - int_part = dc_fixpt_floor(data->inits.h) - data->viewport.x; - if (int_part < data->taps.h_taps) { - int int_adj = data->viewport.x >= (data->taps.h_taps - int_part) ? - (data->taps.h_taps - int_part) : data->viewport.x; - data->viewport.x -= int_adj; - data->viewport.width += int_adj; - int_part += int_adj; - } else if (int_part > data->taps.h_taps) { - data->viewport.x += int_part - data->taps.h_taps; - data->viewport.width -= int_part - data->taps.h_taps; - int_part = data->taps.h_taps; + /* Adjust for non-0 viewport offset */ + if (data->viewport.x) { + int int_part; + + data->inits.h = dc_fixpt_add(data->inits.h, dc_fixpt_mul_int( + data->ratios.horz, data->recout.x - recout_full->x)); + int_part = dc_fixpt_floor(data->inits.h) - data->viewport.x; + if (int_part < data->taps.h_taps) { + int int_adj = data->viewport.x >= (data->taps.h_taps - int_part) ? + (data->taps.h_taps - int_part) : data->viewport.x; + data->viewport.x -= int_adj; + data->viewport.width += int_adj; + int_part += int_adj; + } else if (int_part > data->taps.h_taps) { + data->viewport.x += int_part - data->taps.h_taps; + data->viewport.width -= int_part - data->taps.h_taps; + int_part = data->taps.h_taps; + } + data->inits.h.value &= 0xffffffff; + data->inits.h = dc_fixpt_add_int(data->inits.h, int_part); } - data->inits.h.value &= 0xffffffff; - data->inits.h = dc_fixpt_add_int(data->inits.h, int_part); - } - - if (data->viewport_c.x && !flip_horz_scan_dir) { - int int_part; - - data->inits.h_c = dc_fixpt_add(data->inits.h_c, dc_fixpt_mul_int( - data->ratios.horz_c, recout_skip->width)); - int_part = dc_fixpt_floor(data->inits.h_c) - data->viewport_c.x; - if (int_part < data->taps.h_taps_c) { - int int_adj = data->viewport_c.x >= (data->taps.h_taps_c - int_part) ? - (data->taps.h_taps_c - int_part) : data->viewport_c.x; - data->viewport_c.x -= int_adj; - data->viewport_c.width += int_adj; - int_part += int_adj; - } else if (int_part > data->taps.h_taps_c) { - data->viewport_c.x += int_part - data->taps.h_taps_c; - data->viewport_c.width -= int_part - data->taps.h_taps_c; - int_part = data->taps.h_taps_c; + + if (data->viewport_c.x) { + int int_part; + + data->inits.h_c = dc_fixpt_add(data->inits.h_c, dc_fixpt_mul_int( + data->ratios.horz_c, data->recout.x - recout_full->x)); + int_part = dc_fixpt_floor(data->inits.h_c) - data->viewport_c.x; + if (int_part < data->taps.h_taps_c) { + int int_adj = data->viewport_c.x >= (data->taps.h_taps_c - int_part) ? + (data->taps.h_taps_c - int_part) : data->viewport_c.x; + data->viewport_c.x -= int_adj; + data->viewport_c.width += int_adj; + int_part += int_adj; + } else if (int_part > data->taps.h_taps_c) { + data->viewport_c.x += int_part - data->taps.h_taps_c; + data->viewport_c.width -= int_part - data->taps.h_taps_c; + int_part = data->taps.h_taps_c; + } + data->inits.h_c.value &= 0xffffffff; + data->inits.h_c = dc_fixpt_add_int(data->inits.h_c, int_part); } - data->inits.h_c.value &= 0xffffffff; - data->inits.h_c = dc_fixpt_add_int(data->inits.h_c, int_part); - } - - if (data->viewport.y && !flip_vert_scan_dir) { - int int_part; - - data->inits.v = dc_fixpt_add(data->inits.v, dc_fixpt_mul_int( - data->ratios.vert, recout_skip->height)); - int_part = dc_fixpt_floor(data->inits.v) - data->viewport.y; - if (int_part < data->taps.v_taps) { - int int_adj = data->viewport.y >= (data->taps.v_taps - int_part) ? - (data->taps.v_taps - int_part) : data->viewport.y; - data->viewport.y -= int_adj; - data->viewport.height += int_adj; - int_part += int_adj; - } else if (int_part > data->taps.v_taps) { - data->viewport.y += int_part - data->taps.v_taps; - data->viewport.height -= int_part - data->taps.v_taps; - int_part = data->taps.v_taps; + } else { + /* Adjust for non-0 viewport offset */ + if (data->viewport.x) { + int int_part = dc_fixpt_floor( + dc_fixpt_sub(data->inits.h, data->ratios.horz)); + + int_part = int_part > 0 ? int_part : 0; + data->viewport.width += int_part < data->viewport.x ? int_part : data->viewport.x; + data->viewport.x -= int_part < data->viewport.x ? int_part : data->viewport.x; + } + if (data->viewport_c.x) { + int int_part = dc_fixpt_floor( + dc_fixpt_sub(data->inits.h_c, data->ratios.horz_c)); + + int_part = int_part > 0 ? int_part : 0; + data->viewport_c.width += int_part < data->viewport_c.x ? int_part : data->viewport_c.x; + data->viewport_c.x -= int_part < data->viewport_c.x ? int_part : data->viewport_c.x; } - data->inits.v.value &= 0xffffffff; - data->inits.v = dc_fixpt_add_int(data->inits.v, int_part); - } - - if (data->viewport_c.y && !flip_vert_scan_dir) { - int int_part; - - data->inits.v_c = dc_fixpt_add(data->inits.v_c, dc_fixpt_mul_int( - data->ratios.vert_c, recout_skip->height)); - int_part = dc_fixpt_floor(data->inits.v_c) - data->viewport_c.y; - if (int_part < data->taps.v_taps_c) { - int int_adj = data->viewport_c.y >= (data->taps.v_taps_c - int_part) ? - (data->taps.v_taps_c - int_part) : data->viewport_c.y; - data->viewport_c.y -= int_adj; - data->viewport_c.height += int_adj; - int_part += int_adj; - } else if (int_part > data->taps.v_taps_c) { - data->viewport_c.y += int_part - data->taps.v_taps_c; - data->viewport_c.height -= int_part - data->taps.v_taps_c; - int_part = data->taps.v_taps_c; + + /* Adjust for viewport end clip-off */ + if ((data->viewport.x + data->viewport.width) < (src.x + src.width)) { + int int_part; + int end_offset = src.x + src.width + - data->viewport.x - data->viewport.width; + + /* + * this is init if vp had no offset, keep in mind this is from the + * right side of vp due to scan direction + */ + data->inits.h = dc_fixpt_add(data->inits.h, dc_fixpt_mul_int( + data->ratios.horz, data->recout.x - recout_full->x)); + /* + * this is the difference between first pixel of viewport available to read + * and init position, takning into account scan direction + */ + int_part = dc_fixpt_floor(data->inits.h) - end_offset; + if (int_part < data->taps.h_taps) { + int int_adj = end_offset >= (data->taps.h_taps - int_part) ? + (data->taps.h_taps - int_part) : end_offset; + data->viewport.width += int_adj; + int_part += int_adj; + } else if (int_part > data->taps.h_taps) { + data->viewport.width += int_part - data->taps.h_taps; + int_part = data->taps.h_taps; + } + data->inits.h.value &= 0xffffffff; + data->inits.h = dc_fixpt_add_int(data->inits.h, int_part); + } + + if ((data->viewport_c.x + data->viewport_c.width) < (src.x + src.width) / vpc_div) { + int int_part; + int end_offset = (src.x + src.width) / vpc_div + - data->viewport_c.x - data->viewport_c.width; + + /* + * this is init if vp had no offset, keep in mind this is from the + * right side of vp due to scan direction + */ + data->inits.h_c = dc_fixpt_add(data->inits.h_c, dc_fixpt_mul_int( + data->ratios.horz_c, data->recout.x - recout_full->x)); + /* + * this is the difference between first pixel of viewport available to read + * and init position, takning into account scan direction + */ + int_part = dc_fixpt_floor(data->inits.h_c) - end_offset; + if (int_part < data->taps.h_taps_c) { + int int_adj = end_offset >= (data->taps.h_taps_c - int_part) ? + (data->taps.h_taps_c - int_part) : end_offset; + data->viewport_c.width += int_adj; + int_part += int_adj; + } else if (int_part > data->taps.h_taps_c) { + data->viewport_c.width += int_part - data->taps.h_taps_c; + int_part = data->taps.h_taps_c; + } + data->inits.h_c.value &= 0xffffffff; + data->inits.h_c = dc_fixpt_add_int(data->inits.h_c, int_part); + } + + } + if (!flip_vert_scan_dir) { + /* Adjust for viewport end clip-off */ + if ((data->viewport.y + data->viewport.height) < (src.y + src.height)) { + int vp_clip = src.y + src.height - data->viewport.height - data->viewport.y; + int int_part = dc_fixpt_floor( + dc_fixpt_sub(data->inits.v, data->ratios.vert)); + + int_part = int_part > 0 ? int_part : 0; + data->viewport.height += int_part < vp_clip ? int_part : vp_clip; + } + if ((data->viewport_c.y + data->viewport_c.height) < (src.y + src.height) / vpc_div) { + int vp_clip = (src.y + src.height) / vpc_div - + data->viewport_c.height - data->viewport_c.y; + int int_part = dc_fixpt_floor( + dc_fixpt_sub(data->inits.v_c, data->ratios.vert_c)); + + int_part = int_part > 0 ? int_part : 0; + data->viewport_c.height += int_part < vp_clip ? int_part : vp_clip; + } + + /* Adjust for non-0 viewport offset */ + if (data->viewport.y) { + int int_part; + + data->inits.v = dc_fixpt_add(data->inits.v, dc_fixpt_mul_int( + data->ratios.vert, data->recout.y - recout_full->y)); + int_part = dc_fixpt_floor(data->inits.v) - data->viewport.y; + if (int_part < data->taps.v_taps) { + int int_adj = data->viewport.y >= (data->taps.v_taps - int_part) ? + (data->taps.v_taps - int_part) : data->viewport.y; + data->viewport.y -= int_adj; + data->viewport.height += int_adj; + int_part += int_adj; + } else if (int_part > data->taps.v_taps) { + data->viewport.y += int_part - data->taps.v_taps; + data->viewport.height -= int_part - data->taps.v_taps; + int_part = data->taps.v_taps; + } + data->inits.v.value &= 0xffffffff; + data->inits.v = dc_fixpt_add_int(data->inits.v, int_part); + } + + if (data->viewport_c.y) { + int int_part; + + data->inits.v_c = dc_fixpt_add(data->inits.v_c, dc_fixpt_mul_int( + data->ratios.vert_c, data->recout.y - recout_full->y)); + int_part = dc_fixpt_floor(data->inits.v_c) - data->viewport_c.y; + if (int_part < data->taps.v_taps_c) { + int int_adj = data->viewport_c.y >= (data->taps.v_taps_c - int_part) ? + (data->taps.v_taps_c - int_part) : data->viewport_c.y; + data->viewport_c.y -= int_adj; + data->viewport_c.height += int_adj; + int_part += int_adj; + } else if (int_part > data->taps.v_taps_c) { + data->viewport_c.y += int_part - data->taps.v_taps_c; + data->viewport_c.height -= int_part - data->taps.v_taps_c; + int_part = data->taps.v_taps_c; + } + data->inits.v_c.value &= 0xffffffff; + data->inits.v_c = dc_fixpt_add_int(data->inits.v_c, int_part); + } + } else { + /* Adjust for non-0 viewport offset */ + if (data->viewport.y) { + int int_part = dc_fixpt_floor( + dc_fixpt_sub(data->inits.v, data->ratios.vert)); + + int_part = int_part > 0 ? int_part : 0; + data->viewport.height += int_part < data->viewport.y ? int_part : data->viewport.y; + data->viewport.y -= int_part < data->viewport.y ? int_part : data->viewport.y; + } + if (data->viewport_c.y) { + int int_part = dc_fixpt_floor( + dc_fixpt_sub(data->inits.v_c, data->ratios.vert_c)); + + int_part = int_part > 0 ? int_part : 0; + data->viewport_c.height += int_part < data->viewport_c.y ? int_part : data->viewport_c.y; + data->viewport_c.y -= int_part < data->viewport_c.y ? int_part : data->viewport_c.y; + } + + /* Adjust for viewport end clip-off */ + if ((data->viewport.y + data->viewport.height) < (src.y + src.height)) { + int int_part; + int end_offset = src.y + src.height + - data->viewport.y - data->viewport.height; + + /* + * this is init if vp had no offset, keep in mind this is from the + * right side of vp due to scan direction + */ + data->inits.v = dc_fixpt_add(data->inits.v, dc_fixpt_mul_int( + data->ratios.vert, data->recout.y - recout_full->y)); + /* + * this is the difference between first pixel of viewport available to read + * and init position, taking into account scan direction + */ + int_part = dc_fixpt_floor(data->inits.v) - end_offset; + if (int_part < data->taps.v_taps) { + int int_adj = end_offset >= (data->taps.v_taps - int_part) ? + (data->taps.v_taps - int_part) : end_offset; + data->viewport.height += int_adj; + int_part += int_adj; + } else if (int_part > data->taps.v_taps) { + data->viewport.height += int_part - data->taps.v_taps; + int_part = data->taps.v_taps; + } + data->inits.v.value &= 0xffffffff; + data->inits.v = dc_fixpt_add_int(data->inits.v, int_part); + } + + if ((data->viewport_c.y + data->viewport_c.height) < (src.y + src.height) / vpc_div) { + int int_part; + int end_offset = (src.y + src.height) / vpc_div + - data->viewport_c.y - data->viewport_c.height; + + /* + * this is init if vp had no offset, keep in mind this is from the + * right side of vp due to scan direction + */ + data->inits.v_c = dc_fixpt_add(data->inits.v_c, dc_fixpt_mul_int( + data->ratios.vert_c, data->recout.y - recout_full->y)); + /* + * this is the difference between first pixel of viewport available to read + * and init position, taking into account scan direction + */ + int_part = dc_fixpt_floor(data->inits.v_c) - end_offset; + if (int_part < data->taps.v_taps_c) { + int int_adj = end_offset >= (data->taps.v_taps_c - int_part) ? + (data->taps.v_taps_c - int_part) : end_offset; + data->viewport_c.height += int_adj; + int_part += int_adj; + } else if (int_part > data->taps.v_taps_c) { + data->viewport_c.height += int_part - data->taps.v_taps_c; + int_part = data->taps.v_taps_c; + } + data->inits.v_c.value &= 0xffffffff; + data->inits.v_c = dc_fixpt_add_int(data->inits.v_c, int_part); } - data->inits.v_c.value &= 0xffffffff; - data->inits.v_c = dc_fixpt_add_int(data->inits.v_c, int_part); } /* Interlaced inits based on final vert inits */ @@ -846,7 +1005,7 @@ bool resource_build_scaling_params(struct pipe_ctx *pipe_ctx) { const struct dc_plane_state *plane_state = pipe_ctx->plane_state; struct dc_crtc_timing *timing = &pipe_ctx->stream->timing; - struct view recout_skip = { 0 }; + struct rect recout_full = { 0 }; bool res = false; DC_LOGGER_INIT(pipe_ctx->stream->ctx->logger); /* Important: scaling ratio calculation requires pixel format, @@ -866,7 +1025,7 @@ bool resource_build_scaling_params(struct pipe_ctx *pipe_ctx) if (pipe_ctx->plane_res.scl_data.viewport.height < 16 || pipe_ctx->plane_res.scl_data.viewport.width < 16) return false; - calculate_recout(pipe_ctx, &recout_skip); + calculate_recout(pipe_ctx, &recout_full); /** * Setting line buffer pixel depth to 24bpp yields banding @@ -910,7 +1069,7 @@ bool resource_build_scaling_params(struct pipe_ctx *pipe_ctx) if (res) /* May need to re-check lb size after this in some obscure scenario */ - calculate_inits_and_adj_vp(pipe_ctx, &recout_skip); + calculate_inits_and_adj_vp(pipe_ctx, &recout_full); DC_LOG_SCALER( "%s: Viewport:\nheight:%d width:%d x:%d " @@ -2347,7 +2506,8 @@ static void set_hdr_static_info_packet( { /* HDR Static Metadata info packet for HDR10 */ - if (!stream->hdr_static_metadata.valid) + if (!stream->hdr_static_metadata.valid || + stream->use_dynamic_meta) return; *info_packet = stream->hdr_static_metadata; diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_sink.c b/drivers/gpu/drm/amd/display/dc/core/dc_sink.c index 25fae38409ab..9971b515c3eb 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_sink.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_sink.c @@ -53,6 +53,10 @@ static bool construct(struct dc_sink *sink, const struct dc_sink_init_data *init sink->dongle_max_pix_clk = init_params->dongle_max_pix_clk; sink->converter_disable_audio = init_params->converter_disable_audio; sink->dc_container_id = NULL; + sink->sink_id = init_params->link->ctx->dc_sink_id_count; + // increment dc_sink_id_count because we don't want two sinks with same ID + // unless they are actually the same + init_params->link->ctx->dc_sink_id_count++; return true; } diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_surface.c b/drivers/gpu/drm/amd/display/dc/core/dc_surface.c index 68a71adeb12e..815dfb50089b 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_surface.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_surface.c @@ -84,6 +84,17 @@ struct dc_plane_state *dc_create_plane_state(struct dc *dc) return plane_state; } +/** + ***************************************************************************** + * Function: dc_plane_get_status + * + * @brief + * Looks up the pipe context of plane_state and updates the pending status + * of the pipe context. Then returns plane_state->status + * + * @param [in] plane_state: pointer to the plane_state to get the status of + ***************************************************************************** + */ const struct dc_plane_status *dc_plane_get_status( const struct dc_plane_state *plane_state) { diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 9cfde0ccf4e9..7ebce7669eea 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -38,7 +38,7 @@ #include "inc/compressor.h" #include "dml/display_mode_lib.h" -#define DC_VER "3.1.44" +#define DC_VER "3.1.47" #define MAX_SURFACES 3 #define MAX_STREAMS 6 @@ -68,6 +68,7 @@ struct dc_caps { uint32_t max_planes; uint32_t max_downscale_ratio; uint32_t i2c_speed_in_khz; + uint32_t dmdata_alloc_size; unsigned int max_cursor_size; unsigned int max_video_width; int linear_pitch_alignment; @@ -288,9 +289,7 @@ struct dc { bool apply_edp_fast_boot_optimization; /* FBC compressor */ -#if defined(CONFIG_DRM_AMD_DC_FBC) struct compressor *fbc_compressor; -#endif }; enum frame_buffer_mode { @@ -358,6 +357,7 @@ enum dc_transfer_func_type { TF_TYPE_PREDEFINED, TF_TYPE_DISTRIBUTED_POINTS, TF_TYPE_BYPASS, + TF_TYPE_HWPWL }; struct dc_transfer_func_distributed_points { @@ -377,16 +377,21 @@ enum dc_transfer_func_predefined { TRANSFER_FUNCTION_PQ, TRANSFER_FUNCTION_LINEAR, TRANSFER_FUNCTION_UNITY, + TRANSFER_FUNCTION_HLG, + TRANSFER_FUNCTION_HLG12 }; struct dc_transfer_func { struct kref refcount; - struct dc_transfer_func_distributed_points tf_pts; enum dc_transfer_func_type type; enum dc_transfer_func_predefined tf; /* FP16 1.0 reference level in nits, default is 80 nits, only for PQ*/ uint32_t sdr_ref_white_level; struct dc_context *ctx; + union { + struct pwl_params pwl; + struct dc_transfer_func_distributed_points tf_pts; + }; }; /* @@ -661,9 +666,13 @@ struct dc_sink { struct dc_link *link; struct dc_context *ctx; + uint32_t sink_id; + /* private to dc_sink.c */ + // refcount must be the last member in dc_sink, since we want the + // sink structure to be logically cloneable up to (but not including) + // refcount struct kref refcount; - }; void dc_sink_retain(struct dc_sink *sink); diff --git a/drivers/gpu/drm/amd/display/dc/dc_ddc_types.h b/drivers/gpu/drm/amd/display/dc/dc_ddc_types.h index e1affeb5cc51..05c8c31d8b31 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_ddc_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_ddc_types.h @@ -25,6 +25,65 @@ #ifndef DC_DDC_TYPES_H_ #define DC_DDC_TYPES_H_ +enum aux_transaction_type { + AUX_TRANSACTION_TYPE_DP, + AUX_TRANSACTION_TYPE_I2C +}; + + +enum i2caux_transaction_action { + I2CAUX_TRANSACTION_ACTION_I2C_WRITE = 0x00, + I2CAUX_TRANSACTION_ACTION_I2C_READ = 0x10, + I2CAUX_TRANSACTION_ACTION_I2C_STATUS_REQUEST = 0x20, + + I2CAUX_TRANSACTION_ACTION_I2C_WRITE_MOT = 0x40, + I2CAUX_TRANSACTION_ACTION_I2C_READ_MOT = 0x50, + I2CAUX_TRANSACTION_ACTION_I2C_STATUS_REQUEST_MOT = 0x60, + + I2CAUX_TRANSACTION_ACTION_DP_WRITE = 0x80, + I2CAUX_TRANSACTION_ACTION_DP_READ = 0x90 +}; + +enum aux_channel_operation_result { + AUX_CHANNEL_OPERATION_SUCCEEDED, + AUX_CHANNEL_OPERATION_FAILED_REASON_UNKNOWN, + AUX_CHANNEL_OPERATION_FAILED_INVALID_REPLY, + AUX_CHANNEL_OPERATION_FAILED_TIMEOUT, + AUX_CHANNEL_OPERATION_FAILED_HPD_DISCON +}; + + +struct aux_request_transaction_data { + enum aux_transaction_type type; + enum i2caux_transaction_action action; + /* 20-bit AUX channel transaction address */ + uint32_t address; + /* delay, in 100-microsecond units */ + uint8_t delay; + uint32_t length; + uint8_t *data; +}; + +enum aux_transaction_reply { + AUX_TRANSACTION_REPLY_AUX_ACK = 0x00, + AUX_TRANSACTION_REPLY_AUX_NACK = 0x01, + AUX_TRANSACTION_REPLY_AUX_DEFER = 0x02, + + AUX_TRANSACTION_REPLY_I2C_ACK = 0x00, + AUX_TRANSACTION_REPLY_I2C_NACK = 0x10, + AUX_TRANSACTION_REPLY_I2C_DEFER = 0x20, + + AUX_TRANSACTION_REPLY_HPD_DISCON = 0x40, + + AUX_TRANSACTION_REPLY_INVALID = 0xFF +}; + +struct aux_reply_transaction_data { + enum aux_transaction_reply status; + uint32_t length; + uint8_t *data; +}; + struct i2c_payload { bool write; uint8_t address; @@ -109,7 +168,7 @@ struct ddc_service { uint32_t address; uint32_t edid_buf_len; - uint8_t edid_buf[MAX_EDID_BUFFER_SIZE]; + uint8_t edid_buf[DC_MAX_EDID_BUFFER_SIZE]; }; #endif /* DC_DDC_TYPES_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/dc_dp_types.h b/drivers/gpu/drm/amd/display/dc/dc_dp_types.h index 90bccd5ccaa2..da93ab43f2d8 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dp_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_dp_types.h @@ -430,7 +430,7 @@ union test_request { struct { uint8_t LINK_TRAINING :1; uint8_t LINK_TEST_PATTRN :1; - uint8_t EDID_REAT :1; + uint8_t EDID_READ :1; uint8_t PHY_TEST_PATTERN :1; uint8_t AUDIO_TEST_PATTERN :1; uint8_t RESERVED :1; @@ -443,7 +443,8 @@ union test_response { struct { uint8_t ACK :1; uint8_t NO_ACK :1; - uint8_t RESERVED :6; + uint8_t EDID_CHECKSUM_WRITE:1; + uint8_t RESERVED :5; } bits; uint8_t raw; }; diff --git a/drivers/gpu/drm/amd/display/dc/dc_hw_types.h b/drivers/gpu/drm/amd/display/dc/dc_hw_types.h index b1f70579d61b..d31023d57b58 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_hw_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_hw_types.h @@ -567,25 +567,25 @@ struct scaling_taps { }; enum dc_timing_standard { - TIMING_STANDARD_UNDEFINED, - TIMING_STANDARD_DMT, - TIMING_STANDARD_GTF, - TIMING_STANDARD_CVT, - TIMING_STANDARD_CVT_RB, - TIMING_STANDARD_CEA770, - TIMING_STANDARD_CEA861, - TIMING_STANDARD_HDMI, - TIMING_STANDARD_TV_NTSC, - TIMING_STANDARD_TV_NTSC_J, - TIMING_STANDARD_TV_PAL, - TIMING_STANDARD_TV_PAL_M, - TIMING_STANDARD_TV_PAL_CN, - TIMING_STANDARD_TV_SECAM, - TIMING_STANDARD_EXPLICIT, + DC_TIMING_STANDARD_UNDEFINED, + DC_TIMING_STANDARD_DMT, + DC_TIMING_STANDARD_GTF, + DC_TIMING_STANDARD_CVT, + DC_TIMING_STANDARD_CVT_RB, + DC_TIMING_STANDARD_CEA770, + DC_TIMING_STANDARD_CEA861, + DC_TIMING_STANDARD_HDMI, + DC_TIMING_STANDARD_TV_NTSC, + DC_TIMING_STANDARD_TV_NTSC_J, + DC_TIMING_STANDARD_TV_PAL, + DC_TIMING_STANDARD_TV_PAL_M, + DC_TIMING_STANDARD_TV_PAL_CN, + DC_TIMING_STANDARD_TV_SECAM, + DC_TIMING_STANDARD_EXPLICIT, /*!< For explicit timings from EDID, VBIOS, etc.*/ - TIMING_STANDARD_USER_OVERRIDE, + DC_TIMING_STANDARD_USER_OVERRIDE, /*!< For mode timing override by user*/ - TIMING_STANDARD_MAX + DC_TIMING_STANDARD_MAX }; enum dc_color_depth { diff --git a/drivers/gpu/drm/amd/display/dc/dc_stream.h b/drivers/gpu/drm/amd/display/dc/dc_stream.h index d7e6d53bb383..af503e0286a7 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_stream.h +++ b/drivers/gpu/drm/amd/display/dc/dc_stream.h @@ -59,6 +59,9 @@ struct dc_stream_state { struct freesync_context freesync_ctx; struct dc_info_packet hdr_static_metadata; + PHYSICAL_ADDRESS_LOC dmdata_address; + bool use_dynamic_meta; + struct dc_transfer_func *out_transfer_func; struct colorspace_transform gamut_remap_matrix; struct dc_csc_transform csc_color_matrix; @@ -299,9 +302,4 @@ bool dc_stream_get_crtc_position(struct dc *dc, unsigned int *v_pos, unsigned int *nom_v_pos); -void dc_stream_set_static_screen_events(struct dc *dc, - struct dc_stream_state **stream, - int num_streams, - const struct dc_static_screen_events *events); - #endif /* DC_STREAM_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h index 76df2534c4a4..c96e526d07bb 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_types.h @@ -92,13 +92,12 @@ struct dc_context { bool created_bios; struct gpio_service *gpio_service; struct i2caux *i2caux; -#if defined(CONFIG_DRM_AMD_DC_FBC) + uint32_t dc_sink_id_count; uint64_t fbc_gpu_addr; -#endif }; -#define MAX_EDID_BUFFER_SIZE 512 +#define DC_MAX_EDID_BUFFER_SIZE 512 #define EDID_BLOCK_SIZE 128 #define MAX_SURFACE_NUM 4 #define NUM_PIXEL_FORMATS 10 @@ -137,13 +136,13 @@ enum plane_stereo_format { */ enum dc_edid_connector_type { - EDID_CONNECTOR_UNKNOWN = 0, - EDID_CONNECTOR_ANALOG = 1, - EDID_CONNECTOR_DIGITAL = 10, - EDID_CONNECTOR_DVI = 11, - EDID_CONNECTOR_HDMIA = 12, - EDID_CONNECTOR_MDDI = 14, - EDID_CONNECTOR_DISPLAYPORT = 15 + DC_EDID_CONNECTOR_UNKNOWN = 0, + DC_EDID_CONNECTOR_ANALOG = 1, + DC_EDID_CONNECTOR_DIGITAL = 10, + DC_EDID_CONNECTOR_DVI = 11, + DC_EDID_CONNECTOR_HDMIA = 12, + DC_EDID_CONNECTOR_MDDI = 14, + DC_EDID_CONNECTOR_DISPLAYPORT = 15 }; enum dc_edid_status { @@ -169,7 +168,7 @@ struct dc_cea_audio_mode { struct dc_edid { uint32_t length; - uint8_t raw_edid[MAX_EDID_BUFFER_SIZE]; + uint8_t raw_edid[DC_MAX_EDID_BUFFER_SIZE]; }; /* When speaker location data block is not available, DEFAULT_SPEAKER_LOCATION diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c index c0e813c7ddd4..91642e684858 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c @@ -289,11 +289,6 @@ static void dce110_stream_encoder_dp_set_stream_attribute( struct dce110_stream_encoder *enc110 = DCE110STRENC_FROM_STRENC(enc); -#if defined(CONFIG_DRM_AMD_DC_DCN1_0) - if (REG(DP_DB_CNTL)) - REG_UPDATE(DP_DB_CNTL, DP_DB_DISABLE, 1); -#endif - /* set pixel encoding */ switch (crtc_timing->pixel_encoding) { case PIXEL_ENCODING_YCBCR422: diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_compressor.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_compressor.c index e2994d337044..df027013e50c 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_compressor.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_compressor.c @@ -143,7 +143,7 @@ static void wait_for_fbc_state_changed( struct dce110_compressor *cp110, bool enabled) { - uint8_t counter = 0; + uint16_t counter = 0; uint32_t addr = mmFBC_STATUS; uint32_t value; @@ -551,9 +551,7 @@ void dce110_compressor_construct(struct dce110_compressor *compressor, compressor->base.lpt_channels_num = 0; compressor->base.attached_inst = 0; compressor->base.is_enabled = false; -#if defined(CONFIG_DRM_AMD_DC_FBC) compressor->base.funcs = &dce110_compressor_funcs; -#endif } diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c index c29052b6da5a..353ffcbdf5ba 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c @@ -34,9 +34,7 @@ #include "dce/dce_hwseq.h" #include "gpio_service_interface.h" -#if defined(CONFIG_DRM_AMD_DC_FBC) #include "dce110_compressor.h" -#endif #include "bios/bios_parser_helper.h" #include "timing_generator.h" @@ -667,16 +665,25 @@ static enum dc_status bios_parser_crtc_source_select( void dce110_update_info_frame(struct pipe_ctx *pipe_ctx) { + bool is_hdmi; + bool is_dp; + ASSERT(pipe_ctx->stream); if (pipe_ctx->stream_res.stream_enc == NULL) return; /* this is not root pipe */ - if (dc_is_hdmi_signal(pipe_ctx->stream->signal)) + is_hdmi = dc_is_hdmi_signal(pipe_ctx->stream->signal); + is_dp = dc_is_dp_signal(pipe_ctx->stream->signal); + + if (!is_hdmi && !is_dp) + return; + + if (is_hdmi) pipe_ctx->stream_res.stream_enc->funcs->update_hdmi_info_packets( pipe_ctx->stream_res.stream_enc, &pipe_ctx->stream_res.encoder_info_frame); - else if (dc_is_dp_signal(pipe_ctx->stream->signal)) + else pipe_ctx->stream_res.stream_enc->funcs->update_dp_info_packets( pipe_ctx->stream_res.stream_enc, &pipe_ctx->stream_res.encoder_info_frame); @@ -972,19 +979,35 @@ void hwss_edp_backlight_control( edp_receiver_ready_T9(link); } -void dce110_disable_stream(struct pipe_ctx *pipe_ctx, int option) +void dce110_enable_audio_stream(struct pipe_ctx *pipe_ctx) { - struct dc_stream_state *stream = pipe_ctx->stream; - struct dc_link *link = stream->sink->link; - struct dc *dc = pipe_ctx->stream->ctx->dc; + struct dc *core_dc = pipe_ctx->stream->ctx->dc; + /* notify audio driver for audio modes of monitor */ + struct pp_smu_funcs_rv *pp_smu = core_dc->res_pool->pp_smu; + unsigned int i, num_audio = 1; - if (dc_is_hdmi_signal(pipe_ctx->stream->signal)) - pipe_ctx->stream_res.stream_enc->funcs->stop_hdmi_info_packets( - pipe_ctx->stream_res.stream_enc); + if (pipe_ctx->stream_res.audio) { + for (i = 0; i < MAX_PIPES; i++) { + /*current_state not updated yet*/ + if (core_dc->current_state->res_ctx.pipe_ctx[i].stream_res.audio != NULL) + num_audio++; + } - if (dc_is_dp_signal(pipe_ctx->stream->signal)) - pipe_ctx->stream_res.stream_enc->funcs->stop_dp_info_packets( - pipe_ctx->stream_res.stream_enc); + pipe_ctx->stream_res.audio->funcs->az_enable(pipe_ctx->stream_res.audio); + + if (num_audio == 1 && pp_smu != NULL && pp_smu->set_pme_wa_enable != NULL) + /*this is the first audio. apply the PME w/a in order to wake AZ from D3*/ + pp_smu->set_pme_wa_enable(&pp_smu->pp_smu); + /* un-mute audio */ + /* TODO: audio should be per stream rather than per link */ + pipe_ctx->stream_res.stream_enc->funcs->audio_mute_control( + pipe_ctx->stream_res.stream_enc, false); + } +} + +void dce110_disable_audio_stream(struct pipe_ctx *pipe_ctx, int option) +{ + struct dc *dc = pipe_ctx->stream->ctx->dc; pipe_ctx->stream_res.stream_enc->funcs->audio_mute_control( pipe_ctx->stream_res.stream_enc, true); @@ -1015,7 +1038,23 @@ void dce110_disable_stream(struct pipe_ctx *pipe_ctx, int option) * stream->stream_engine_id); */ } +} +void dce110_disable_stream(struct pipe_ctx *pipe_ctx, int option) +{ + struct dc_stream_state *stream = pipe_ctx->stream; + struct dc_link *link = stream->sink->link; + struct dc *dc = pipe_ctx->stream->ctx->dc; + + if (dc_is_hdmi_signal(pipe_ctx->stream->signal)) + pipe_ctx->stream_res.stream_enc->funcs->stop_hdmi_info_packets( + pipe_ctx->stream_res.stream_enc); + + if (dc_is_dp_signal(pipe_ctx->stream->signal)) + pipe_ctx->stream_res.stream_enc->funcs->stop_dp_info_packets( + pipe_ctx->stream_res.stream_enc); + + dc->hwss.disable_audio_stream(pipe_ctx, option); link->link_enc->funcs->connect_dig_be_to_fe( link->link_enc, @@ -1298,6 +1337,30 @@ static enum dc_status apply_single_controller_ctx_to_hw( struct pipe_ctx *pipe_ctx_old = &dc->current_state->res_ctx. pipe_ctx[pipe_ctx->pipe_idx]; + if (pipe_ctx->stream_res.audio != NULL) { + struct audio_output audio_output; + + build_audio_output(context, pipe_ctx, &audio_output); + + if (dc_is_dp_signal(pipe_ctx->stream->signal)) + pipe_ctx->stream_res.stream_enc->funcs->dp_audio_setup( + pipe_ctx->stream_res.stream_enc, + pipe_ctx->stream_res.audio->inst, + &pipe_ctx->stream->audio_info); + else + pipe_ctx->stream_res.stream_enc->funcs->hdmi_audio_setup( + pipe_ctx->stream_res.stream_enc, + pipe_ctx->stream_res.audio->inst, + &pipe_ctx->stream->audio_info, + &audio_output.crtc_info); + + pipe_ctx->stream_res.audio->funcs->az_configure( + pipe_ctx->stream_res.audio, + pipe_ctx->stream->signal, + &audio_output.crtc_info, + &pipe_ctx->stream->audio_info); + } + /* */ dc->hwss.enable_stream_timing(pipe_ctx, context, dc); @@ -1441,10 +1504,8 @@ static void power_down_all_hw_blocks(struct dc *dc) power_down_clock_sources(dc); -#if defined(CONFIG_DRM_AMD_DC_FBC) if (dc->fbc_compressor) dc->fbc_compressor->funcs->disable_fbc(dc->fbc_compressor); -#endif } static void disable_vga_and_power_gate_all_controllers( @@ -1686,9 +1747,7 @@ static void set_static_screen_control(struct pipe_ctx **pipe_ctx, if (events->force_trigger) value |= 0x1; -#if defined(CONFIG_DRM_AMD_DC_FBC) value |= 0x84; -#endif for (i = 0; i < num_pipes; i++) pipe_ctx[i]->stream_res.tg->funcs-> @@ -1816,8 +1875,6 @@ static void apply_min_clocks( } } -#if defined(CONFIG_DRM_AMD_DC_FBC) - /* * Check if FBC can be enabled */ @@ -1896,7 +1953,6 @@ static void enable_fbc(struct dc *dc, compr->funcs->enable_fbc(compr, ¶ms); } } -#endif static void dce110_reset_hw_ctx_wrap( struct dc *dc, @@ -1949,6 +2005,86 @@ static void dce110_reset_hw_ctx_wrap( } } +static void dce110_setup_audio_dto( + struct dc *dc, + struct dc_state *context) +{ + int i; + + /* program audio wall clock. use HDMI as clock source if HDMI + * audio active. Otherwise, use DP as clock source + * first, loop to find any HDMI audio, if not, loop find DP audio + */ + /* Setup audio rate clock source */ + /* Issue: + * Audio lag happened on DP monitor when unplug a HDMI monitor + * + * Cause: + * In case of DP and HDMI connected or HDMI only, DCCG_AUDIO_DTO_SEL + * is set to either dto0 or dto1, audio should work fine. + * In case of DP connected only, DCCG_AUDIO_DTO_SEL should be dto1, + * set to dto0 will cause audio lag. + * + * Solution: + * Not optimized audio wall dto setup. When mode set, iterate pipe_ctx, + * find first available pipe with audio, setup audio wall DTO per topology + * instead of per pipe. + */ + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i]; + + if (pipe_ctx->stream == NULL) + continue; + + if (pipe_ctx->top_pipe) + continue; + + if (pipe_ctx->stream->signal != SIGNAL_TYPE_HDMI_TYPE_A) + continue; + + if (pipe_ctx->stream_res.audio != NULL) { + struct audio_output audio_output; + + build_audio_output(context, pipe_ctx, &audio_output); + + pipe_ctx->stream_res.audio->funcs->wall_dto_setup( + pipe_ctx->stream_res.audio, + pipe_ctx->stream->signal, + &audio_output.crtc_info, + &audio_output.pll_info); + break; + } + } + + /* no HDMI audio is found, try DP audio */ + if (i == dc->res_pool->pipe_count) { + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i]; + + if (pipe_ctx->stream == NULL) + continue; + + if (pipe_ctx->top_pipe) + continue; + + if (!dc_is_dp_signal(pipe_ctx->stream->signal)) + continue; + + if (pipe_ctx->stream_res.audio != NULL) { + struct audio_output audio_output; + + build_audio_output(context, pipe_ctx, &audio_output); + + pipe_ctx->stream_res.audio->funcs->wall_dto_setup( + pipe_ctx->stream_res.audio, + pipe_ctx->stream->signal, + &audio_output.crtc_info, + &audio_output.pll_info); + break; + } + } + } +} enum dc_status dce110_apply_ctx_to_hw( struct dc *dc, @@ -1993,10 +2129,9 @@ enum dc_status dce110_apply_ctx_to_hw( set_safe_displaymarks(&context->res_ctx, dc->res_pool); -#if defined(CONFIG_DRM_AMD_DC_FBC) if (dc->fbc_compressor) dc->fbc_compressor->funcs->disable_fbc(dc->fbc_compressor); -#endif + /*TODO: when pplib works*/ apply_min_clocks(dc, context, &clocks_state, true); @@ -2040,79 +2175,8 @@ enum dc_status dce110_apply_ctx_to_hw( dc->res_pool->display_clock, context->bw.dce.dispclk_khz * 115 / 100); } - /* program audio wall clock. use HDMI as clock source if HDMI - * audio active. Otherwise, use DP as clock source - * first, loop to find any HDMI audio, if not, loop find DP audio - */ - /* Setup audio rate clock source */ - /* Issue: - * Audio lag happened on DP monitor when unplug a HDMI monitor - * - * Cause: - * In case of DP and HDMI connected or HDMI only, DCCG_AUDIO_DTO_SEL - * is set to either dto0 or dto1, audio should work fine. - * In case of DP connected only, DCCG_AUDIO_DTO_SEL should be dto1, - * set to dto0 will cause audio lag. - * - * Solution: - * Not optimized audio wall dto setup. When mode set, iterate pipe_ctx, - * find first available pipe with audio, setup audio wall DTO per topology - * instead of per pipe. - */ - for (i = 0; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i]; - - if (pipe_ctx->stream == NULL) - continue; - - if (pipe_ctx->top_pipe) - continue; - - if (pipe_ctx->stream->signal != SIGNAL_TYPE_HDMI_TYPE_A) - continue; - - if (pipe_ctx->stream_res.audio != NULL) { - struct audio_output audio_output; - - build_audio_output(context, pipe_ctx, &audio_output); - - pipe_ctx->stream_res.audio->funcs->wall_dto_setup( - pipe_ctx->stream_res.audio, - pipe_ctx->stream->signal, - &audio_output.crtc_info, - &audio_output.pll_info); - break; - } - } - /* no HDMI audio is found, try DP audio */ - if (i == dc->res_pool->pipe_count) { - for (i = 0; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i]; - - if (pipe_ctx->stream == NULL) - continue; - - if (pipe_ctx->top_pipe) - continue; - - if (!dc_is_dp_signal(pipe_ctx->stream->signal)) - continue; - - if (pipe_ctx->stream_res.audio != NULL) { - struct audio_output audio_output; - - build_audio_output(context, pipe_ctx, &audio_output); - - pipe_ctx->stream_res.audio->funcs->wall_dto_setup( - pipe_ctx->stream_res.audio, - pipe_ctx->stream->signal, - &audio_output.crtc_info, - &audio_output.pll_info); - break; - } - } - } + dce110_setup_audio_dto(dc, context); for (i = 0; i < dc->res_pool->pipe_count; i++) { struct pipe_ctx *pipe_ctx_old = @@ -2131,31 +2195,6 @@ enum dc_status dce110_apply_ctx_to_hw( if (pipe_ctx->top_pipe) continue; - if (context->res_ctx.pipe_ctx[i].stream_res.audio != NULL) { - - struct audio_output audio_output; - - build_audio_output(context, pipe_ctx, &audio_output); - - if (dc_is_dp_signal(pipe_ctx->stream->signal)) - pipe_ctx->stream_res.stream_enc->funcs->dp_audio_setup( - pipe_ctx->stream_res.stream_enc, - pipe_ctx->stream_res.audio->inst, - &pipe_ctx->stream->audio_info); - else - pipe_ctx->stream_res.stream_enc->funcs->hdmi_audio_setup( - pipe_ctx->stream_res.stream_enc, - pipe_ctx->stream_res.audio->inst, - &pipe_ctx->stream->audio_info, - &audio_output.crtc_info); - - pipe_ctx->stream_res.audio->funcs->az_configure( - pipe_ctx->stream_res.audio, - pipe_ctx->stream->signal, - &audio_output.crtc_info, - &pipe_ctx->stream->audio_info); - } - status = apply_single_controller_ctx_to_hw( pipe_ctx, context, @@ -2170,12 +2209,9 @@ enum dc_status dce110_apply_ctx_to_hw( dcb->funcs->set_scratch_critical_state(dcb, false); -#if defined(CONFIG_DRM_AMD_DC_FBC) if (dc->fbc_compressor) enable_fbc(dc, context); -#endif - return DC_OK; } @@ -2490,10 +2526,9 @@ static void init_hw(struct dc *dc) abm->funcs->init_backlight(abm); abm->funcs->abm_init(abm); } -#if defined(CONFIG_DRM_AMD_DC_FBC) + if (dc->fbc_compressor) dc->fbc_compressor->funcs->power_up_fbc(dc->fbc_compressor); -#endif } @@ -2679,9 +2714,7 @@ static void dce110_program_front_end_for_pipe( struct dc_plane_state *plane_state = pipe_ctx->plane_state; struct xfm_grph_csc_adjustment adjust; struct out_csc_color_matrix tbl_entry; -#if defined(CONFIG_DRM_AMD_DC_FBC) unsigned int underlay_idx = dc->res_pool->underlay_pipe_index; -#endif unsigned int i; DC_LOGGER_INIT(); memset(&tbl_entry, 0, sizeof(tbl_entry)); @@ -2722,7 +2755,6 @@ static void dce110_program_front_end_for_pipe( program_scaler(dc, pipe_ctx); -#if defined(CONFIG_DRM_AMD_DC_FBC) /* fbc not applicable on Underlay pipe */ if (dc->fbc_compressor && old_pipe->stream && pipe_ctx->pipe_idx != underlay_idx) { @@ -2731,7 +2763,6 @@ static void dce110_program_front_end_for_pipe( else enable_fbc(dc, dc->current_state); } -#endif mi->funcs->mem_input_program_surface_config( mi, @@ -2968,6 +2999,8 @@ static const struct hw_sequencer_funcs dce110_funcs = { .disable_stream = dce110_disable_stream, .unblank_stream = dce110_unblank_stream, .blank_stream = dce110_blank_stream, + .enable_audio_stream = dce110_enable_audio_stream, + .disable_audio_stream = dce110_disable_audio_stream, .enable_display_pipe_clock_gating = enable_display_pipe_clock_gating, .enable_display_power_gating = dce110_enable_display_power_gating, .disable_plane = dce110_power_down_fe, diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.h index 5d7e9f516827..f48d5a68d238 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.h +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.h @@ -49,6 +49,10 @@ void dce110_unblank_stream(struct pipe_ctx *pipe_ctx, struct dc_link_settings *link_settings); void dce110_blank_stream(struct pipe_ctx *pipe_ctx); + +void dce110_enable_audio_stream(struct pipe_ctx *pipe_ctx); +void dce110_disable_audio_stream(struct pipe_ctx *pipe_ctx, int option); + void dce110_update_info_frame(struct pipe_ctx *pipe_ctx); void dce110_set_avmute(struct pipe_ctx *pipe_ctx, bool enable); diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c index ee33786bdef6..20c029089551 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c @@ -54,9 +54,8 @@ #define DC_LOGGER \ dc->ctx->logger -#if defined(CONFIG_DRM_AMD_DC_FBC) + #include "dce110/dce110_compressor.h" -#endif #include "reg_helper.h" @@ -1267,12 +1266,8 @@ static bool construct( } } -#if defined(CONFIG_DRM_AMD_DC_FBC) dc->fbc_compressor = dce110_compressor_create(ctx); - - -#endif if (!underlay_create(ctx, &pool->base)) goto res_create_fail; diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c index f8e0576af6e0..03eb736a312f 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c @@ -719,19 +719,7 @@ static void reset_back_end_for_pipe( if (!pipe_ctx->stream->dpms_off) core_link_disable_stream(pipe_ctx, FREE_ACQUIRED_RESOURCE); else if (pipe_ctx->stream_res.audio) { - /* - * if stream is already disabled outside of commit streams path, - * audio disable was skipped. Need to do it here - */ - pipe_ctx->stream_res.audio->funcs->az_disable(pipe_ctx->stream_res.audio); - - if (dc->caps.dynamic_audio == true) { - /*we have to dynamic arbitrate the audio endpoints*/ - pipe_ctx->stream_res.audio = NULL; - /*we free the resource, need reset is_audio_acquired*/ - update_audio_usage(&dc->current_state->res_ctx, dc->res_pool, pipe_ctx->stream_res.audio, false); - } - + dc->hwss.disable_audio_stream(pipe_ctx, FREE_ACQUIRED_RESOURCE); } } @@ -2063,12 +2051,13 @@ static void update_dchubp_dpp( static void dcn10_blank_pixel_data( struct dc *dc, - struct stream_resource *stream_res, - struct dc_stream_state *stream, + struct pipe_ctx *pipe_ctx, bool blank) { enum dc_color_space color_space; struct tg_color black_color = {0}; + struct stream_resource *stream_res = &pipe_ctx->stream_res; + struct dc_stream_state *stream = pipe_ctx->stream; /* program otg blank color */ color_space = stream->output_color_space; @@ -2127,8 +2116,7 @@ static void program_all_pipe_in_tree( pipe_ctx->stream_res.tg->funcs->program_global_sync( pipe_ctx->stream_res.tg); - dc->hwss.blank_pixel_data(dc, &pipe_ctx->stream_res, - pipe_ctx->stream, blank); + dc->hwss.blank_pixel_data(dc, pipe_ctx, blank); } if (pipe_ctx->plane_state != NULL) { @@ -2247,7 +2235,7 @@ static void dcn10_apply_ctx_for_surface( if (num_planes == 0) { /* OTG blank before remove all front end */ - dc->hwss.blank_pixel_data(dc, &top_pipe_to_program->stream_res, top_pipe_to_program->stream, true); + dc->hwss.blank_pixel_data(dc, top_pipe_to_program, true); } /* Disconnect unused mpcc */ @@ -2778,6 +2766,8 @@ static const struct hw_sequencer_funcs dcn10_funcs = { .disable_stream = dce110_disable_stream, .unblank_stream = dce110_unblank_stream, .blank_stream = dce110_blank_stream, + .enable_audio_stream = dce110_enable_audio_stream, + .disable_audio_stream = dce110_disable_audio_stream, .enable_display_power_gating = dcn10_dummy_display_power_gating, .disable_plane = dcn10_disable_plane, .blank_pixel_data = dcn10_blank_pixel_data, diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c index f2fbce0e3fc5..e6a3ade154b9 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c @@ -1257,6 +1257,37 @@ void optc1_read_otg_state(struct optc *optc1, OPTC_UNDERFLOW_OCCURRED_STATUS, &s->underflow_occurred_status); } +bool optc1_get_otg_active_size(struct timing_generator *optc, + uint32_t *otg_active_width, + uint32_t *otg_active_height) +{ + uint32_t otg_enabled; + uint32_t v_blank_start; + uint32_t v_blank_end; + uint32_t h_blank_start; + uint32_t h_blank_end; + struct optc *optc1 = DCN10TG_FROM_TG(optc); + + + REG_GET(OTG_CONTROL, + OTG_MASTER_EN, &otg_enabled); + + if (otg_enabled == 0) + return false; + + REG_GET_2(OTG_V_BLANK_START_END, + OTG_V_BLANK_START, &v_blank_start, + OTG_V_BLANK_END, &v_blank_end); + + REG_GET_2(OTG_H_BLANK_START_END, + OTG_H_BLANK_START, &h_blank_start, + OTG_H_BLANK_END, &h_blank_end); + + *otg_active_width = v_blank_start - v_blank_end; + *otg_active_height = h_blank_start - h_blank_end; + return true; +} + void optc1_clear_optc_underflow(struct timing_generator *optc) { struct optc *optc1 = DCN10TG_FROM_TG(optc); @@ -1305,6 +1336,7 @@ static const struct timing_generator_funcs dcn10_tg_funcs = { .get_position = optc1_get_position, .get_frame_count = optc1_get_vblank_counter, .get_scanoutpos = optc1_get_crtc_scanoutpos, + .get_otg_active_size = optc1_get_otg_active_size, .set_early_control = optc1_set_early_control, /* used by enable_timing_synchronization. Not need for FPGA */ .wait_for_state = optc1_wait_for_state, diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h index c62052f46460..59ed272e0c49 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h @@ -507,4 +507,8 @@ bool optc1_is_optc_underflow_occurred(struct timing_generator *optc); void optc1_set_blank_data_double_buffer(struct timing_generator *optc, bool enable); +bool optc1_get_otg_active_size(struct timing_generator *optc, + uint32_t *otg_active_width, + uint32_t *otg_active_height); + #endif /* __DC_TIMING_GENERATOR_DCN10_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c index df5cb2d1d164..2da325ce781b 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c @@ -417,6 +417,7 @@ static const struct dce110_clk_src_mask cs_mask = { static const struct resource_caps res_cap = { .num_timing_generator = 4, + .num_opp = 4, .num_video_plane = 4, .num_audio = 4, .num_stream_encoder = 4, @@ -1004,7 +1005,8 @@ static bool construct( ctx->dc_bios->regs = &bios_regs; - pool->base.res_cap = &res_cap; + pool->base.res_cap = &res_cap; + pool->base.funcs = &dcn10_res_pool_funcs; /* diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c index c928ee4cd382..6f9078f3c4d3 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c @@ -257,20 +257,18 @@ void enc1_stream_encoder_dp_set_stream_attribute( uint8_t colorimetry_bpc; uint8_t dynamic_range_rgb = 0; /*full range*/ uint8_t dynamic_range_ycbcr = 1; /*bt709*/ + uint8_t dp_pixel_encoding = 0; + uint8_t dp_component_depth = 0; struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc); - REG_UPDATE(DP_DB_CNTL, DP_DB_DISABLE, 1); - /* set pixel encoding */ switch (crtc_timing->pixel_encoding) { case PIXEL_ENCODING_YCBCR422: - REG_UPDATE(DP_PIXEL_FORMAT, DP_PIXEL_ENCODING, - DP_PIXEL_ENCODING_TYPE_YCBCR422); + dp_pixel_encoding = DP_PIXEL_ENCODING_TYPE_YCBCR422; break; case PIXEL_ENCODING_YCBCR444: - REG_UPDATE(DP_PIXEL_FORMAT, DP_PIXEL_ENCODING, - DP_PIXEL_ENCODING_TYPE_YCBCR444); + dp_pixel_encoding = DP_PIXEL_ENCODING_TYPE_YCBCR444; if (crtc_timing->flags.Y_ONLY) if (crtc_timing->display_color_depth != COLOR_DEPTH_666) @@ -278,8 +276,8 @@ void enc1_stream_encoder_dp_set_stream_attribute( * Color depth of Y-only could be * 8, 10, 12, 16 bits */ - REG_UPDATE(DP_PIXEL_FORMAT, DP_PIXEL_ENCODING, - DP_PIXEL_ENCODING_TYPE_Y_ONLY); + dp_pixel_encoding = DP_PIXEL_ENCODING_TYPE_Y_ONLY; + /* Note: DP_MSA_MISC1 bit 7 is the indicator * of Y-only mode. * This bit is set in HW if register @@ -287,48 +285,55 @@ void enc1_stream_encoder_dp_set_stream_attribute( */ break; case PIXEL_ENCODING_YCBCR420: - REG_UPDATE(DP_PIXEL_FORMAT, DP_PIXEL_ENCODING, - DP_PIXEL_ENCODING_TYPE_YCBCR420); + dp_pixel_encoding = DP_PIXEL_ENCODING_TYPE_YCBCR420; REG_UPDATE(DP_VID_TIMING, DP_VID_N_MUL, 1); break; default: - REG_UPDATE(DP_PIXEL_FORMAT, DP_PIXEL_ENCODING, - DP_PIXEL_ENCODING_TYPE_RGB444); + dp_pixel_encoding = DP_PIXEL_ENCODING_TYPE_RGB444; break; } misc1 = REG_READ(DP_MSA_MISC); + /* For YCbCr420 and BT2020 Colorimetry Formats, VSC SDP shall be used. + * When MISC1, bit 6, is Set to 1, a Source device uses a VSC SDP to indicate the + * Pixel Encoding/Colorimetry Format and that a Sink device shall ignore MISC1, bit 7, + * and MISC0, bits 7:1 (MISC1, bit 7, and MISC0, bits 7:1, become "don't care"). + */ + if ((crtc_timing->pixel_encoding == PIXEL_ENCODING_YCBCR420) || + (output_color_space == COLOR_SPACE_2020_YCBCR) || + (output_color_space == COLOR_SPACE_2020_RGB_FULLRANGE) || + (output_color_space == COLOR_SPACE_2020_RGB_LIMITEDRANGE)) + misc1 = misc1 | 0x40; + else + misc1 = misc1 & ~0x40; /* set color depth */ - switch (crtc_timing->display_color_depth) { case COLOR_DEPTH_666: - REG_UPDATE(DP_PIXEL_FORMAT, DP_COMPONENT_DEPTH, - 0); + dp_component_depth = DP_COMPONENT_PIXEL_DEPTH_6BPC; break; case COLOR_DEPTH_888: - REG_UPDATE(DP_PIXEL_FORMAT, DP_COMPONENT_DEPTH, - DP_COMPONENT_PIXEL_DEPTH_8BPC); + dp_component_depth = DP_COMPONENT_PIXEL_DEPTH_8BPC; break; case COLOR_DEPTH_101010: - REG_UPDATE(DP_PIXEL_FORMAT, DP_COMPONENT_DEPTH, - DP_COMPONENT_PIXEL_DEPTH_10BPC); - + dp_component_depth = DP_COMPONENT_PIXEL_DEPTH_10BPC; break; case COLOR_DEPTH_121212: - REG_UPDATE(DP_PIXEL_FORMAT, DP_COMPONENT_DEPTH, - DP_COMPONENT_PIXEL_DEPTH_12BPC); + dp_component_depth = DP_COMPONENT_PIXEL_DEPTH_12BPC; break; case COLOR_DEPTH_161616: - REG_UPDATE(DP_PIXEL_FORMAT, DP_COMPONENT_DEPTH, - DP_COMPONENT_PIXEL_DEPTH_16BPC); + dp_component_depth = DP_COMPONENT_PIXEL_DEPTH_16BPC; break; default: - REG_UPDATE(DP_PIXEL_FORMAT, DP_COMPONENT_DEPTH, - DP_COMPONENT_PIXEL_DEPTH_6BPC); + dp_component_depth = DP_COMPONENT_PIXEL_DEPTH_6BPC; break; } + /* Set DP pixel encoding and component depth */ + REG_UPDATE_2(DP_PIXEL_FORMAT, + DP_PIXEL_ENCODING, dp_pixel_encoding, + DP_COMPONENT_DEPTH, dp_component_depth); + /* set dynamic range and YCbCr range */ switch (crtc_timing->display_color_depth) { @@ -354,7 +359,6 @@ void enc1_stream_encoder_dp_set_stream_attribute( switch (output_color_space) { case COLOR_SPACE_SRGB: - misc0 = misc0 | 0x0; misc1 = misc1 & ~0x80; /* bit7 = 0*/ dynamic_range_rgb = 0; /*full range*/ break; @@ -1087,27 +1091,6 @@ static union audio_cea_channels speakers_to_channels( return cea_channels; } -static uint32_t calc_max_audio_packets_per_line( - const struct audio_crtc_info *crtc_info) -{ - uint32_t max_packets_per_line; - - max_packets_per_line = - crtc_info->h_total - crtc_info->h_active; - - if (crtc_info->pixel_repetition) - max_packets_per_line *= crtc_info->pixel_repetition; - - /* for other hdmi features */ - max_packets_per_line -= 58; - /* for Control Period */ - max_packets_per_line -= 16; - /* Number of Audio Packets per Line */ - max_packets_per_line /= 32; - - return max_packets_per_line; -} - static void get_audio_clock_info( enum dc_color_depth color_depth, uint32_t crtc_pixel_clock_in_khz, @@ -1201,16 +1184,9 @@ static void enc1_se_setup_hdmi_audio( struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc); struct audio_clock_info audio_clock_info = {0}; - uint32_t max_packets_per_line; - - /* For now still do calculation, although this field is ignored when - * above HDMI_PACKET_GEN_VERSION set to 1 - */ - max_packets_per_line = calc_max_audio_packets_per_line(crtc_info); /* HDMI_AUDIO_PACKET_CONTROL */ - REG_UPDATE_2(HDMI_AUDIO_PACKET_CONTROL, - HDMI_AUDIO_PACKETS_PER_LINE, max_packets_per_line, + REG_UPDATE(HDMI_AUDIO_PACKET_CONTROL, HDMI_AUDIO_DELAY_EN, 1); /* AFMT_AUDIO_PACKET_CONTROL */ diff --git a/drivers/gpu/drm/amd/display/dc/dm_services.h b/drivers/gpu/drm/amd/display/dc/dm_services.h index 4ff9b2bba178..eb5ab3978e84 100644 --- a/drivers/gpu/drm/amd/display/dc/dm_services.h +++ b/drivers/gpu/drm/amd/display/dc/dm_services.h @@ -339,7 +339,10 @@ bool dm_dmcu_set_pipe(struct dc_context *ctx, unsigned int controller_id); #define dm_log_to_buffer(buffer, size, fmt, args)\ vsnprintf(buffer, size, fmt, args) -unsigned long long dm_get_timestamp(struct dc_context *ctx); +static inline unsigned long long dm_get_timestamp(struct dc_context *ctx) +{ + return ktime_get_raw_ns(); +} unsigned long long dm_get_elapse_time_in_ns(struct dc_context *ctx, unsigned long long current_time_stamp, diff --git a/drivers/gpu/drm/amd/display/dc/i2caux/aux_engine.c b/drivers/gpu/drm/amd/display/dc/i2caux/aux_engine.c index bb526ad326e5..1d7309611978 100644 --- a/drivers/gpu/drm/amd/display/dc/i2caux/aux_engine.c +++ b/drivers/gpu/drm/amd/display/dc/i2caux/aux_engine.c @@ -157,6 +157,10 @@ static void process_read_reply( ctx->operation_succeeded = false; } break; + case AUX_TRANSACTION_REPLY_HPD_DISCON: + ctx->status = I2CAUX_TRANSACTION_STATUS_FAILED_HPD_DISCON; + ctx->operation_succeeded = false; + break; default: ctx->status = I2CAUX_TRANSACTION_STATUS_UNKNOWN; ctx->operation_succeeded = false; @@ -215,6 +219,10 @@ static void process_read_request( * so we should not wait here */ } break; + case AUX_CHANNEL_OPERATION_FAILED_HPD_DISCON: + ctx->status = I2CAUX_TRANSACTION_STATUS_FAILED_HPD_DISCON; + ctx->operation_succeeded = false; + break; default: ctx->status = I2CAUX_TRANSACTION_STATUS_UNKNOWN; ctx->operation_succeeded = false; @@ -370,6 +378,10 @@ static void process_write_reply( ctx->operation_succeeded = false; } break; + case AUX_TRANSACTION_REPLY_HPD_DISCON: + ctx->status = I2CAUX_TRANSACTION_STATUS_FAILED_HPD_DISCON; + ctx->operation_succeeded = false; + break; default: ctx->status = I2CAUX_TRANSACTION_STATUS_UNKNOWN; ctx->operation_succeeded = false; @@ -422,6 +434,10 @@ static void process_write_request( * so we should not wait here */ } break; + case AUX_CHANNEL_OPERATION_FAILED_HPD_DISCON: + ctx->status = I2CAUX_TRANSACTION_STATUS_FAILED_HPD_DISCON; + ctx->operation_succeeded = false; + break; default: ctx->status = I2CAUX_TRANSACTION_STATUS_UNKNOWN; ctx->operation_succeeded = false; diff --git a/drivers/gpu/drm/amd/display/dc/i2caux/aux_engine.h b/drivers/gpu/drm/amd/display/dc/i2caux/aux_engine.h index 8e71324ccb10..b01488f710d5 100644 --- a/drivers/gpu/drm/amd/display/dc/i2caux/aux_engine.h +++ b/drivers/gpu/drm/amd/display/dc/i2caux/aux_engine.h @@ -26,46 +26,7 @@ #ifndef __DAL_AUX_ENGINE_H__ #define __DAL_AUX_ENGINE_H__ -enum aux_transaction_type { - AUX_TRANSACTION_TYPE_DP, - AUX_TRANSACTION_TYPE_I2C -}; - -struct aux_request_transaction_data { - enum aux_transaction_type type; - enum i2caux_transaction_action action; - /* 20-bit AUX channel transaction address */ - uint32_t address; - /* delay, in 100-microsecond units */ - uint8_t delay; - uint32_t length; - uint8_t *data; -}; - -enum aux_transaction_reply { - AUX_TRANSACTION_REPLY_AUX_ACK = 0x00, - AUX_TRANSACTION_REPLY_AUX_NACK = 0x01, - AUX_TRANSACTION_REPLY_AUX_DEFER = 0x02, - - AUX_TRANSACTION_REPLY_I2C_ACK = 0x00, - AUX_TRANSACTION_REPLY_I2C_NACK = 0x10, - AUX_TRANSACTION_REPLY_I2C_DEFER = 0x20, - - AUX_TRANSACTION_REPLY_INVALID = 0xFF -}; - -struct aux_reply_transaction_data { - enum aux_transaction_reply status; - uint32_t length; - uint8_t *data; -}; - -enum aux_channel_operation_result { - AUX_CHANNEL_OPERATION_SUCCEEDED, - AUX_CHANNEL_OPERATION_FAILED_REASON_UNKNOWN, - AUX_CHANNEL_OPERATION_FAILED_INVALID_REPLY, - AUX_CHANNEL_OPERATION_FAILED_TIMEOUT -}; +#include "dc_ddc_types.h" struct aux_engine; diff --git a/drivers/gpu/drm/amd/display/dc/i2caux/dce110/aux_engine_dce110.c b/drivers/gpu/drm/amd/display/dc/i2caux/dce110/aux_engine_dce110.c index 5f47f6c007ac..2b927f25937b 100644 --- a/drivers/gpu/drm/amd/display/dc/i2caux/dce110/aux_engine_dce110.c +++ b/drivers/gpu/drm/amd/display/dc/i2caux/dce110/aux_engine_dce110.c @@ -198,27 +198,27 @@ static void submit_channel_request( ((request->type == AUX_TRANSACTION_TYPE_I2C) && ((request->action == I2CAUX_TRANSACTION_ACTION_I2C_WRITE) || (request->action == I2CAUX_TRANSACTION_ACTION_I2C_WRITE_MOT))); + if (REG(AUXN_IMPCAL)) { + /* clear_aux_error */ + REG_UPDATE_SEQ(AUXN_IMPCAL, AUXN_CALOUT_ERROR_AK, + 1, + 0); - /* clear_aux_error */ - REG_UPDATE_SEQ(AUXN_IMPCAL, AUXN_CALOUT_ERROR_AK, - 1, - 0); - - REG_UPDATE_SEQ(AUXP_IMPCAL, AUXP_CALOUT_ERROR_AK, - 1, - 0); - - /* force_default_calibrate */ - REG_UPDATE_1BY1_2(AUXN_IMPCAL, - AUXN_IMPCAL_ENABLE, 1, - AUXN_IMPCAL_OVERRIDE_ENABLE, 0); + REG_UPDATE_SEQ(AUXP_IMPCAL, AUXP_CALOUT_ERROR_AK, + 1, + 0); - /* bug? why AUXN update EN and OVERRIDE_EN 1 by 1 while AUX P toggles OVERRIDE? */ + /* force_default_calibrate */ + REG_UPDATE_1BY1_2(AUXN_IMPCAL, + AUXN_IMPCAL_ENABLE, 1, + AUXN_IMPCAL_OVERRIDE_ENABLE, 0); - REG_UPDATE_SEQ(AUXP_IMPCAL, AUXP_IMPCAL_OVERRIDE_ENABLE, - 1, - 0); + /* bug? why AUXN update EN and OVERRIDE_EN 1 by 1 while AUX P toggles OVERRIDE? */ + REG_UPDATE_SEQ(AUXP_IMPCAL, AUXP_IMPCAL_OVERRIDE_ENABLE, + 1, + 0); + } /* set the delay and the number of bytes to write */ /* The length include @@ -291,6 +291,12 @@ static void process_channel_reply( value = REG_GET(AUX_SW_STATUS, AUX_SW_REPLY_BYTE_COUNT, &bytes_replied); + /* in case HPD is LOW, exit AUX transaction */ + if ((value & AUX_SW_STATUS__AUX_SW_HPD_DISCON_MASK)) { + reply->status = AUX_TRANSACTION_REPLY_HPD_DISCON; + return; + } + if (bytes_replied) { uint32_t reply_result; @@ -347,8 +353,10 @@ static void process_channel_reply( * because there was surely an error that was asserted * that should have been handled * for hot plug case, this could happens*/ - if (!(value & AUX_SW_STATUS__AUX_SW_HPD_DISCON_MASK)) + if (!(value & AUX_SW_STATUS__AUX_SW_HPD_DISCON_MASK)) { + reply->status = AUX_TRANSACTION_REPLY_INVALID; ASSERT_CRITICAL(false); + } } } @@ -371,6 +379,10 @@ static enum aux_channel_operation_result get_channel_status( value = REG_WAIT(AUX_SW_STATUS, AUX_SW_DONE, 1, 10, aux110->timeout_period/10); + /* in case HPD is LOW, exit AUX transaction */ + if ((value & AUX_SW_STATUS__AUX_SW_HPD_DISCON_MASK)) + return AUX_CHANNEL_OPERATION_FAILED_HPD_DISCON; + /* Note that the following bits are set in 'status.bits' * during CTS 4.2.1.2 (FW 3.3.1): * AUX_SW_RX_MIN_COUNT_VIOL, AUX_SW_RX_INVALID_STOP, @@ -402,10 +414,10 @@ static enum aux_channel_operation_result get_channel_status( return AUX_CHANNEL_OPERATION_SUCCEEDED; } } else { - /*time_elapsed >= aux_engine->timeout_period */ - if (!(value & AUX_SW_STATUS__AUX_SW_HPD_DISCON_MASK)) - ASSERT_CRITICAL(false); - + /*time_elapsed >= aux_engine->timeout_period + * AUX_SW_STATUS__AUX_SW_HPD_DISCON = at this point + */ + ASSERT_CRITICAL(false); return AUX_CHANNEL_OPERATION_FAILED_TIMEOUT; } } diff --git a/drivers/gpu/drm/amd/display/dc/i2caux/engine.h b/drivers/gpu/drm/amd/display/dc/i2caux/engine.h index 33de8a8834dc..1e8a1585e401 100644 --- a/drivers/gpu/drm/amd/display/dc/i2caux/engine.h +++ b/drivers/gpu/drm/amd/display/dc/i2caux/engine.h @@ -26,6 +26,8 @@ #ifndef __DAL_ENGINE_H__ #define __DAL_ENGINE_H__ +#include "dc_ddc_types.h" + enum i2caux_transaction_operation { I2CAUX_TRANSACTION_READ, I2CAUX_TRANSACTION_WRITE @@ -53,7 +55,8 @@ enum i2caux_transaction_status { I2CAUX_TRANSACTION_STATUS_FAILED_INCOMPLETE, I2CAUX_TRANSACTION_STATUS_FAILED_OPERATION, I2CAUX_TRANSACTION_STATUS_FAILED_INVALID_OPERATION, - I2CAUX_TRANSACTION_STATUS_FAILED_BUFFER_OVERFLOW + I2CAUX_TRANSACTION_STATUS_FAILED_BUFFER_OVERFLOW, + I2CAUX_TRANSACTION_STATUS_FAILED_HPD_DISCON }; struct i2caux_transaction_request { @@ -75,19 +78,6 @@ enum i2c_default_speed { I2CAUX_DEFAULT_I2C_SW_SPEED = 50 }; -enum i2caux_transaction_action { - I2CAUX_TRANSACTION_ACTION_I2C_WRITE = 0x00, - I2CAUX_TRANSACTION_ACTION_I2C_READ = 0x10, - I2CAUX_TRANSACTION_ACTION_I2C_STATUS_REQUEST = 0x20, - - I2CAUX_TRANSACTION_ACTION_I2C_WRITE_MOT = 0x40, - I2CAUX_TRANSACTION_ACTION_I2C_READ_MOT = 0x50, - I2CAUX_TRANSACTION_ACTION_I2C_STATUS_REQUEST_MOT = 0x60, - - I2CAUX_TRANSACTION_ACTION_DP_WRITE = 0x80, - I2CAUX_TRANSACTION_ACTION_DP_READ = 0x90 -}; - struct engine; struct engine_funcs { diff --git a/drivers/gpu/drm/amd/display/dc/inc/core_types.h b/drivers/gpu/drm/amd/display/dc/inc/core_types.h index a94942d4e66b..4beddca0180c 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/core_types.h +++ b/drivers/gpu/drm/amd/display/dc/inc/core_types.h @@ -148,6 +148,7 @@ struct resource_pool { unsigned int underlay_pipe_index; unsigned int stream_enc_count; unsigned int ref_clock_inKhz; + unsigned int dentist_vco_freq_khz; unsigned int timing_generator_count; /* diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h b/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h index 69cb0a105300..af700c7dac50 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h @@ -156,6 +156,9 @@ struct timing_generator_funcs { uint32_t *v_blank_end, uint32_t *h_position, uint32_t *v_position); + bool (*get_otg_active_size)(struct timing_generator *optc, + uint32_t *otg_active_width, + uint32_t *otg_active_height); void (*set_early_control)(struct timing_generator *tg, uint32_t early_cntl); void (*wait_for_state)(struct timing_generator *tg, diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h index 63fc6c499789..a71770ed4b9f 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h @@ -154,14 +154,18 @@ struct hw_sequencer_funcs { struct dc_link_settings *link_settings); void (*blank_stream)(struct pipe_ctx *pipe_ctx); + + void (*enable_audio_stream)(struct pipe_ctx *pipe_ctx); + + void (*disable_audio_stream)(struct pipe_ctx *pipe_ctx, int option); + void (*pipe_control_lock)( struct dc *dc, struct pipe_ctx *pipe, bool lock); void (*blank_pixel_data)( struct dc *dc, - struct stream_resource *stream_res, - struct dc_stream_state *stream, + struct pipe_ctx *pipe_ctx, bool blank); void (*set_bandwidth)( diff --git a/drivers/gpu/drm/amd/display/dc/inc/resource.h b/drivers/gpu/drm/amd/display/dc/inc/resource.h index 640a647f4611..e92facbd038f 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/resource.h +++ b/drivers/gpu/drm/amd/display/dc/inc/resource.h @@ -38,6 +38,7 @@ enum dce_version resource_parse_asic_id( struct resource_caps { int num_timing_generator; + int num_opp; int num_video_plane; int num_audio; int num_stream_encoder; diff --git a/drivers/gpu/drm/amd/display/include/ddc_service_types.h b/drivers/gpu/drm/amd/display/include/ddc_service_types.h index 019e7a095ea1..d968956a10cd 100644 --- a/drivers/gpu/drm/amd/display/include/ddc_service_types.h +++ b/drivers/gpu/drm/amd/display/include/ddc_service_types.h @@ -40,7 +40,8 @@ enum ddc_result { DDC_RESULT_FAILED_INCOMPLETE, DDC_RESULT_FAILED_OPERATION, DDC_RESULT_FAILED_INVALID_OPERATION, - DDC_RESULT_FAILED_BUFFER_OVERFLOW + DDC_RESULT_FAILED_BUFFER_OVERFLOW, + DDC_RESULT_FAILED_HPD_DISCON }; enum ddc_service_type { diff --git a/drivers/gpu/drm/amd/display/include/fixed31_32.h b/drivers/gpu/drm/amd/display/include/fixed31_32.h index a981b3e99ab3..52a73332befb 100644 --- a/drivers/gpu/drm/amd/display/include/fixed31_32.h +++ b/drivers/gpu/drm/amd/display/include/fixed31_32.h @@ -26,6 +26,13 @@ #ifndef __DAL_FIXED31_32_H__ #define __DAL_FIXED31_32_H__ +#ifndef LLONG_MAX +#define LLONG_MAX 9223372036854775807ll +#endif +#ifndef LLONG_MIN +#define LLONG_MIN (-LLONG_MAX - 1ll) +#endif + #define FIXED31_32_BITS_PER_FRACTIONAL_PART 32 #ifndef LLONG_MIN #define LLONG_MIN (1LL<<63) diff --git a/drivers/gpu/drm/amd/display/include/logger_interface.h b/drivers/gpu/drm/amd/display/include/logger_interface.h index dc98d6d4b2bd..0f10ed710e0d 100644 --- a/drivers/gpu/drm/amd/display/include/logger_interface.h +++ b/drivers/gpu/drm/amd/display/include/logger_interface.h @@ -62,6 +62,8 @@ void dm_logger_append_va( const char *msg, va_list args); +void dm_logger_append_heading(struct log_entry *entry); + void dm_logger_open( struct dal_logger *logger, struct log_entry *entry, diff --git a/drivers/gpu/drm/amd/display/modules/color/color_gamma.c b/drivers/gpu/drm/amd/display/modules/color/color_gamma.c index eee0dfad6962..98edaefa2b47 100644 --- a/drivers/gpu/drm/amd/display/modules/color/color_gamma.c +++ b/drivers/gpu/drm/amd/display/modules/color/color_gamma.c @@ -131,6 +131,63 @@ static void compute_de_pq(struct fixed31_32 in_x, struct fixed31_32 *out_y) dc_fixpt_div(dc_fixpt_one, m1)); } + +/*de gamma, none linear to linear*/ +static void compute_hlg_oetf(struct fixed31_32 in_x, bool is_light0_12, struct fixed31_32 *out_y) +{ + struct fixed31_32 a; + struct fixed31_32 b; + struct fixed31_32 c; + struct fixed31_32 threshold; + struct fixed31_32 reference_white_level; + + a = dc_fixpt_from_fraction(17883277, 100000000); + if (is_light0_12) { + /*light 0-12*/ + b = dc_fixpt_from_fraction(28466892, 100000000); + c = dc_fixpt_from_fraction(55991073, 100000000); + threshold = dc_fixpt_one; + reference_white_level = dc_fixpt_half; + } else { + /*light 0-1*/ + b = dc_fixpt_from_fraction(2372241, 100000000); + c = dc_fixpt_add(dc_fixpt_one, dc_fixpt_from_fraction(429347, 100000000)); + threshold = dc_fixpt_from_fraction(1, 12); + reference_white_level = dc_fixpt_pow(dc_fixpt_from_fraction(3, 1), dc_fixpt_half); + } + if (dc_fixpt_lt(threshold, in_x)) + *out_y = dc_fixpt_add(c, dc_fixpt_mul(a, dc_fixpt_log(dc_fixpt_sub(in_x, b)))); + else + *out_y = dc_fixpt_mul(dc_fixpt_pow(in_x, dc_fixpt_half), reference_white_level); +} + +/*re gamma, linear to none linear*/ +static void compute_hlg_eotf(struct fixed31_32 in_x, bool is_light0_12, struct fixed31_32 *out_y) +{ + struct fixed31_32 a; + struct fixed31_32 b; + struct fixed31_32 c; + struct fixed31_32 reference_white_level; + + a = dc_fixpt_from_fraction(17883277, 100000000); + if (is_light0_12) { + /*light 0-12*/ + b = dc_fixpt_from_fraction(28466892, 100000000); + c = dc_fixpt_from_fraction(55991073, 100000000); + reference_white_level = dc_fixpt_from_fraction(4, 1); + } else { + /*light 0-1*/ + b = dc_fixpt_from_fraction(2372241, 100000000); + c = dc_fixpt_add(dc_fixpt_one, dc_fixpt_from_fraction(429347, 100000000)); + reference_white_level = dc_fixpt_from_fraction(1, 3); + } + if (dc_fixpt_lt(dc_fixpt_half, in_x)) + *out_y = dc_fixpt_add(dc_fixpt_exp(dc_fixpt_div(dc_fixpt_sub(in_x, c), a)), b); + else + *out_y = dc_fixpt_mul(dc_fixpt_pow(in_x, dc_fixpt_from_fraction(2, 1)), reference_white_level); +} + + /* one-time pre-compute PQ values - only for sdr_white_level 80 */ void precompute_pq(void) { @@ -691,6 +748,48 @@ static void build_degamma(struct pwl_float_data_ex *curve, } } +static void build_hlg_degamma(struct pwl_float_data_ex *degamma, + uint32_t hw_points_num, + const struct hw_x_point *coordinate_x, bool is_light0_12) +{ + uint32_t i; + + struct pwl_float_data_ex *rgb = degamma; + const struct hw_x_point *coord_x = coordinate_x; + + i = 0; + + while (i != hw_points_num + 1) { + compute_hlg_oetf(coord_x->x, is_light0_12, &rgb->r); + rgb->g = rgb->r; + rgb->b = rgb->r; + ++coord_x; + ++rgb; + ++i; + } +} + +static void build_hlg_regamma(struct pwl_float_data_ex *regamma, + uint32_t hw_points_num, + const struct hw_x_point *coordinate_x, bool is_light0_12) +{ + uint32_t i; + + struct pwl_float_data_ex *rgb = regamma; + const struct hw_x_point *coord_x = coordinate_x; + + i = 0; + + while (i != hw_points_num + 1) { + compute_hlg_eotf(coord_x->x, is_light0_12, &rgb->r); + rgb->g = rgb->r; + rgb->b = rgb->r; + ++coord_x; + ++rgb; + ++i; + } +} + static void scale_gamma(struct pwl_float_data *pwl_rgb, const struct dc_gamma *ramp, struct dividers dividers) @@ -1622,6 +1721,25 @@ bool mod_color_calculate_curve(enum dc_transfer_func_predefined trans, ret = true; kvfree(rgb_regamma); + } else if (trans == TRANSFER_FUNCTION_HLG || + trans == TRANSFER_FUNCTION_HLG12) { + rgb_regamma = kvzalloc(sizeof(*rgb_regamma) * + (MAX_HW_POINTS + _EXTRA_POINTS), + GFP_KERNEL); + if (!rgb_regamma) + goto rgb_regamma_alloc_fail; + + build_hlg_regamma(rgb_regamma, + MAX_HW_POINTS, + coordinates_x, + trans == TRANSFER_FUNCTION_HLG12 ? true:false); + for (i = 0; i <= MAX_HW_POINTS ; i++) { + points->red[i] = rgb_regamma[i].r; + points->green[i] = rgb_regamma[i].g; + points->blue[i] = rgb_regamma[i].b; + } + ret = true; + kvfree(rgb_regamma); } rgb_regamma_alloc_fail: return ret; @@ -1682,6 +1800,25 @@ bool mod_color_calculate_degamma_curve(enum dc_transfer_func_predefined trans, ret = true; kvfree(rgb_degamma); + } else if (trans == TRANSFER_FUNCTION_HLG || + trans == TRANSFER_FUNCTION_HLG12) { + rgb_degamma = kvzalloc(sizeof(*rgb_degamma) * + (MAX_HW_POINTS + _EXTRA_POINTS), + GFP_KERNEL); + if (!rgb_degamma) + goto rgb_degamma_alloc_fail; + + build_hlg_degamma(rgb_degamma, + MAX_HW_POINTS, + coordinates_x, + trans == TRANSFER_FUNCTION_HLG12 ? true:false); + for (i = 0; i <= MAX_HW_POINTS ; i++) { + points->red[i] = rgb_degamma[i].r; + points->green[i] = rgb_degamma[i].g; + points->blue[i] = rgb_degamma[i].b; + } + ret = true; + kvfree(rgb_degamma); } points->end_exponent = 0; points->x_point_at_y1_red = 1; diff --git a/drivers/gpu/drm/amd/display/modules/stats/stats.c b/drivers/gpu/drm/amd/display/modules/stats/stats.c index 710852ad03f3..3d4c1b1ab8c4 100644 --- a/drivers/gpu/drm/amd/display/modules/stats/stats.c +++ b/drivers/gpu/drm/amd/display/modules/stats/stats.c @@ -29,7 +29,7 @@ #include "core_types.h" #define DAL_STATS_ENABLE_REGKEY "DalStatsEnable" -#define DAL_STATS_ENABLE_REGKEY_DEFAULT 0x00000001 +#define DAL_STATS_ENABLE_REGKEY_DEFAULT 0x00000000 #define DAL_STATS_ENABLE_REGKEY_ENABLED 0x00000001 #define DAL_STATS_ENTRIES_REGKEY "DalStatsEntries" @@ -238,7 +238,7 @@ void mod_stats_dump(struct mod_stats *mod_stats) for (int i = 0; i < core_stats->entry_id; i++) { if (event_index < core_stats->event_index && i == events[event_index].entry_id) { - DISPLAY_STATS("%s\n", events[event_index].event_string); + DISPLAY_STATS("==Event==%s\n", events[event_index].event_string); event_index++; } else if (time_index < core_stats->index && i == time[time_index].entry_id) { diff --git a/drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_1_0_offset.h b/drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_1_0_offset.h index 18a32477ed1d..fe0cbaade3c3 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_1_0_offset.h +++ b/drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_1_0_offset.h @@ -89,6 +89,8 @@ #define mmUVD_JPEG_RB_SIZE_BASE_IDX 1 #define mmUVD_JPEG_ADDR_CONFIG 0x021f #define mmUVD_JPEG_ADDR_CONFIG_BASE_IDX 1 +#define mmUVD_JPEG_PITCH 0x0222 +#define mmUVD_JPEG_PITCH_BASE_IDX 1 #define mmUVD_JPEG_GPCOM_CMD 0x022c #define mmUVD_JPEG_GPCOM_CMD_BASE_IDX 1 #define mmUVD_JPEG_GPCOM_DATA0 0x022d @@ -203,6 +205,8 @@ #define mmUVD_RB_WPTR4_BASE_IDX 1 #define mmUVD_JRBC_RB_RPTR 0x0457 #define mmUVD_JRBC_RB_RPTR_BASE_IDX 1 +#define mmUVD_LMI_JPEG_VMID 0x045d +#define mmUVD_LMI_JPEG_VMID_BASE_IDX 1 #define mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH 0x045e #define mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH_BASE_IDX 1 #define mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW 0x045f @@ -231,6 +235,8 @@ #define mmUVD_LMI_JRBC_IB_64BIT_BAR_HIGH_BASE_IDX 1 #define mmUVD_LMI_JRBC_IB_VMID 0x0507 #define mmUVD_LMI_JRBC_IB_VMID_BASE_IDX 1 +#define mmUVD_LMI_JRBC_RB_VMID 0x0508 +#define mmUVD_LMI_JRBC_RB_VMID_BASE_IDX 1 #define mmUVD_JRBC_RB_WPTR 0x0509 #define mmUVD_JRBC_RB_WPTR_BASE_IDX 1 #define mmUVD_JRBC_RB_CNTL 0x050a @@ -239,6 +245,20 @@ #define mmUVD_JRBC_IB_SIZE_BASE_IDX 1 #define mmUVD_JRBC_LMI_SWAP_CNTL 0x050d #define mmUVD_JRBC_LMI_SWAP_CNTL_BASE_IDX 1 +#define mmUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_LOW 0x050e +#define mmUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_LOW_BASE_IDX 1 +#define mmUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_HIGH 0x050f +#define mmUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_HIGH_BASE_IDX 1 +#define mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW 0x0510 +#define mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW_BASE_IDX 1 +#define mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH 0x0511 +#define mmUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH_BASE_IDX 1 +#define mmUVD_JRBC_RB_REF_DATA 0x0512 +#define mmUVD_JRBC_RB_REF_DATA_BASE_IDX 1 +#define mmUVD_JRBC_RB_COND_RD_TIMER 0x0513 +#define mmUVD_JRBC_RB_COND_RD_TIMER_BASE_IDX 1 +#define mmUVD_JRBC_EXTERNAL_REG_BASE 0x0517 +#define mmUVD_JRBC_EXTERNAL_REG_BASE_BASE_IDX 1 #define mmUVD_JRBC_SOFT_RESET 0x0519 #define mmUVD_JRBC_SOFT_RESET_BASE_IDX 1 #define mmUVD_JRBC_STATUS 0x051a diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c index e63bc47dc715..9b675d9bd162 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c @@ -81,7 +81,6 @@ int hwmgr_early_init(struct pp_hwmgr *hwmgr) return -EINVAL; hwmgr->usec_timeout = AMD_MAX_USEC_TIMEOUT; - hwmgr->power_source = PP_PowerSource_AC; hwmgr->pp_table_version = PP_TABLE_V1; hwmgr->dpm_level = AMD_DPM_FORCED_LEVEL_AUTO; hwmgr->request_dpm_level = AMD_DPM_FORCED_LEVEL_AUTO; @@ -236,6 +235,11 @@ int hwmgr_hw_init(struct pp_hwmgr *hwmgr) ret = hwmgr->hwmgr_func->backend_init(hwmgr); if (ret) goto err1; + /* make sure dc limits are valid */ + if ((hwmgr->dyn_state.max_clock_voltage_on_dc.sclk == 0) || + (hwmgr->dyn_state.max_clock_voltage_on_dc.mclk == 0)) + hwmgr->dyn_state.max_clock_voltage_on_dc = + hwmgr->dyn_state.max_clock_voltage_on_ac; ret = psm_init_power_state_table(hwmgr); if (ret) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_clockpowergating.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_clockpowergating.c index 6d72a5600917..41495621d94a 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_clockpowergating.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_clockpowergating.c @@ -39,13 +39,6 @@ static int smu7_enable_disable_vce_dpm(struct pp_hwmgr *hwmgr, bool enable) PPSMC_MSG_VCEDPM_Disable); } -static int smu7_enable_disable_samu_dpm(struct pp_hwmgr *hwmgr, bool enable) -{ - return smum_send_msg_to_smc(hwmgr, enable ? - PPSMC_MSG_SAMUDPM_Enable : - PPSMC_MSG_SAMUDPM_Disable); -} - static int smu7_update_uvd_dpm(struct pp_hwmgr *hwmgr, bool bgate) { if (!bgate) @@ -60,13 +53,6 @@ static int smu7_update_vce_dpm(struct pp_hwmgr *hwmgr, bool bgate) return smu7_enable_disable_vce_dpm(hwmgr, !bgate); } -static int smu7_update_samu_dpm(struct pp_hwmgr *hwmgr, bool bgate) -{ - if (!bgate) - smum_update_smc_table(hwmgr, SMU_SAMU_TABLE); - return smu7_enable_disable_samu_dpm(hwmgr, !bgate); -} - int smu7_powerdown_uvd(struct pp_hwmgr *hwmgr) { if (phm_cf_want_uvd_power_gating(hwmgr)) @@ -107,35 +93,15 @@ static int smu7_powerup_vce(struct pp_hwmgr *hwmgr) return 0; } -static int smu7_powerdown_samu(struct pp_hwmgr *hwmgr) -{ - if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, - PHM_PlatformCaps_SamuPowerGating)) - return smum_send_msg_to_smc(hwmgr, - PPSMC_MSG_SAMPowerOFF); - return 0; -} - -static int smu7_powerup_samu(struct pp_hwmgr *hwmgr) -{ - if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, - PHM_PlatformCaps_SamuPowerGating)) - return smum_send_msg_to_smc(hwmgr, - PPSMC_MSG_SAMPowerON); - return 0; -} - int smu7_disable_clock_power_gating(struct pp_hwmgr *hwmgr) { struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); data->uvd_power_gated = false; data->vce_power_gated = false; - data->samu_power_gated = false; smu7_powerup_uvd(hwmgr); smu7_powerup_vce(hwmgr); - smu7_powerup_samu(hwmgr); return 0; } @@ -195,26 +161,6 @@ void smu7_powergate_vce(struct pp_hwmgr *hwmgr, bool bgate) } } -int smu7_powergate_samu(struct pp_hwmgr *hwmgr, bool bgate) -{ - struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); - - if (data->samu_power_gated == bgate) - return 0; - - data->samu_power_gated = bgate; - - if (bgate) { - smu7_update_samu_dpm(hwmgr, true); - smu7_powerdown_samu(hwmgr); - } else { - smu7_powerup_samu(hwmgr); - smu7_update_samu_dpm(hwmgr, false); - } - - return 0; -} - int smu7_update_clock_gatings(struct pp_hwmgr *hwmgr, const uint32_t *msg_id) { diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_clockpowergating.h b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_clockpowergating.h index 1ddce023218a..be7f66d2b234 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_clockpowergating.h +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_clockpowergating.h @@ -29,7 +29,6 @@ void smu7_powergate_vce(struct pp_hwmgr *hwmgr, bool bgate); void smu7_powergate_uvd(struct pp_hwmgr *hwmgr, bool bgate); int smu7_powerdown_uvd(struct pp_hwmgr *hwmgr); -int smu7_powergate_samu(struct pp_hwmgr *hwmgr, bool bgate); int smu7_powergate_acp(struct pp_hwmgr *hwmgr, bool bgate); int smu7_disable_clock_power_gating(struct pp_hwmgr *hwmgr); int smu7_update_clock_gatings(struct pp_hwmgr *hwmgr, diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c index f8e866ceda02..b89d6fb8559b 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c @@ -885,6 +885,60 @@ static void smu7_setup_voltage_range_from_vbios(struct pp_hwmgr *hwmgr) data->odn_dpm_table.max_vddc = max_vddc; } +static void smu7_check_dpm_table_updated(struct pp_hwmgr *hwmgr) +{ + struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); + struct smu7_odn_dpm_table *odn_table = &(data->odn_dpm_table); + struct phm_ppt_v1_information *table_info = + (struct phm_ppt_v1_information *)(hwmgr->pptable); + uint32_t i; + + struct phm_ppt_v1_clock_voltage_dependency_table *dep_table; + struct phm_ppt_v1_clock_voltage_dependency_table *odn_dep_table; + + if (table_info == NULL) + return; + + for (i = 0; i < data->dpm_table.sclk_table.count; i++) { + if (odn_table->odn_core_clock_dpm_levels.entries[i].clock != + data->dpm_table.sclk_table.dpm_levels[i].value) { + data->need_update_smu7_dpm_table |= DPMTABLE_OD_UPDATE_SCLK; + break; + } + } + + for (i = 0; i < data->dpm_table.mclk_table.count; i++) { + if (odn_table->odn_memory_clock_dpm_levels.entries[i].clock != + data->dpm_table.mclk_table.dpm_levels[i].value) { + data->need_update_smu7_dpm_table |= DPMTABLE_OD_UPDATE_MCLK; + break; + } + } + + dep_table = table_info->vdd_dep_on_mclk; + odn_dep_table = (struct phm_ppt_v1_clock_voltage_dependency_table *)&(odn_table->vdd_dependency_on_mclk); + + for (i = 0; i < dep_table->count; i++) { + if (dep_table->entries[i].vddc != odn_dep_table->entries[i].vddc) { + data->need_update_smu7_dpm_table |= DPMTABLE_OD_UPDATE_VDDC | DPMTABLE_OD_UPDATE_MCLK; + return; + } + } + + dep_table = table_info->vdd_dep_on_sclk; + odn_dep_table = (struct phm_ppt_v1_clock_voltage_dependency_table *)&(odn_table->vdd_dependency_on_sclk); + for (i = 0; i < dep_table->count; i++) { + if (dep_table->entries[i].vddc != odn_dep_table->entries[i].vddc) { + data->need_update_smu7_dpm_table |= DPMTABLE_OD_UPDATE_VDDC | DPMTABLE_OD_UPDATE_SCLK; + return; + } + } + if (data->need_update_smu7_dpm_table & DPMTABLE_OD_UPDATE_VDDC) { + data->need_update_smu7_dpm_table &= ~DPMTABLE_OD_UPDATE_VDDC; + data->need_update_smu7_dpm_table |= DPMTABLE_OD_UPDATE_SCLK | DPMTABLE_OD_UPDATE_MCLK; + } +} + static int smu7_setup_default_dpm_tables(struct pp_hwmgr *hwmgr) { struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); @@ -904,10 +958,13 @@ static int smu7_setup_default_dpm_tables(struct pp_hwmgr *hwmgr) /* initialize ODN table */ if (hwmgr->od_enabled) { - smu7_setup_voltage_range_from_vbios(hwmgr); - smu7_odn_initial_default_setting(hwmgr); + if (data->odn_dpm_table.max_vddc) { + smu7_check_dpm_table_updated(hwmgr); + } else { + smu7_setup_voltage_range_from_vbios(hwmgr); + smu7_odn_initial_default_setting(hwmgr); + } } - return 0; } @@ -2820,7 +2877,7 @@ static int smu7_apply_state_adjust_rules(struct pp_hwmgr *hwmgr, struct pp_power_state *request_ps, const struct pp_power_state *current_ps) { - + struct amdgpu_device *adev = hwmgr->adev; struct smu7_power_state *smu7_ps = cast_phw_smu7_power_state(&request_ps->hardware); uint32_t sclk; @@ -2843,12 +2900,12 @@ static int smu7_apply_state_adjust_rules(struct pp_hwmgr *hwmgr, "VI should always have 2 performance levels", ); - max_limits = (PP_PowerSource_AC == hwmgr->power_source) ? + max_limits = adev->pm.ac_power ? &(hwmgr->dyn_state.max_clock_voltage_on_ac) : &(hwmgr->dyn_state.max_clock_voltage_on_dc); /* Cap clock DPM tables at DC MAX if it is in DC. */ - if (PP_PowerSource_DC == hwmgr->power_source) { + if (!adev->pm.ac_power) { for (i = 0; i < smu7_ps->performance_level_count; i++) { if (smu7_ps->performance_levels[i].memory_clock > max_limits->mclk) smu7_ps->performance_levels[i].memory_clock = max_limits->mclk; @@ -3717,8 +3774,9 @@ static int smu7_trim_single_dpm_states(struct pp_hwmgr *hwmgr, uint32_t i; for (i = 0; i < dpm_table->count; i++) { - if ((dpm_table->dpm_levels[i].value < low_limit) - || (dpm_table->dpm_levels[i].value > high_limit)) + /*skip the trim if od is enabled*/ + if (!hwmgr->od_enabled && (dpm_table->dpm_levels[i].value < low_limit + || dpm_table->dpm_levels[i].value > high_limit)) dpm_table->dpm_levels[i].enabled = false; else dpm_table->dpm_levels[i].enabled = true; @@ -3762,10 +3820,8 @@ static int smu7_generate_dpm_level_enable_mask( const struct smu7_power_state *smu7_ps = cast_const_phw_smu7_power_state(states->pnew_state); - /*skip the trim if od is enabled*/ - if (!hwmgr->od_enabled) - result = smu7_trim_dpm_states(hwmgr, smu7_ps); + result = smu7_trim_dpm_states(hwmgr, smu7_ps); if (result) return result; @@ -4244,7 +4300,6 @@ static int smu7_init_power_gate_state(struct pp_hwmgr *hwmgr) data->uvd_power_gated = false; data->vce_power_gated = false; - data->samu_power_gated = false; return 0; } @@ -4739,60 +4794,6 @@ static bool smu7_check_clk_voltage_valid(struct pp_hwmgr *hwmgr, return true; } -static void smu7_check_dpm_table_updated(struct pp_hwmgr *hwmgr) -{ - struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); - struct smu7_odn_dpm_table *odn_table = &(data->odn_dpm_table); - struct phm_ppt_v1_information *table_info = - (struct phm_ppt_v1_information *)(hwmgr->pptable); - uint32_t i; - - struct phm_ppt_v1_clock_voltage_dependency_table *dep_table; - struct phm_ppt_v1_clock_voltage_dependency_table *odn_dep_table; - - if (table_info == NULL) - return; - - for (i=0; i<data->dpm_table.sclk_table.count; i++) { - if (odn_table->odn_core_clock_dpm_levels.entries[i].clock != - data->dpm_table.sclk_table.dpm_levels[i].value) { - data->need_update_smu7_dpm_table |= DPMTABLE_OD_UPDATE_SCLK; - break; - } - } - - for (i=0; i<data->dpm_table.mclk_table.count; i++) { - if (odn_table->odn_memory_clock_dpm_levels.entries[i].clock != - data->dpm_table.mclk_table.dpm_levels[i].value) { - data->need_update_smu7_dpm_table |= DPMTABLE_OD_UPDATE_MCLK; - break; - } - } - - dep_table = table_info->vdd_dep_on_mclk; - odn_dep_table = (struct phm_ppt_v1_clock_voltage_dependency_table *)&(odn_table->vdd_dependency_on_mclk); - - for (i=0; i < dep_table->count; i++) { - if (dep_table->entries[i].vddc != odn_dep_table->entries[i].vddc) { - data->need_update_smu7_dpm_table |= DPMTABLE_OD_UPDATE_VDDC | DPMTABLE_OD_UPDATE_MCLK; - return; - } - } - - dep_table = table_info->vdd_dep_on_sclk; - odn_dep_table = (struct phm_ppt_v1_clock_voltage_dependency_table *)&(odn_table->vdd_dependency_on_sclk); - for (i=0; i < dep_table->count; i++) { - if (dep_table->entries[i].vddc != odn_dep_table->entries[i].vddc) { - data->need_update_smu7_dpm_table |= DPMTABLE_OD_UPDATE_VDDC | DPMTABLE_OD_UPDATE_SCLK; - return; - } - } - if (data->need_update_smu7_dpm_table & DPMTABLE_OD_UPDATE_VDDC) { - data->need_update_smu7_dpm_table &= ~DPMTABLE_OD_UPDATE_VDDC; - data->need_update_smu7_dpm_table |= DPMTABLE_OD_UPDATE_SCLK | DPMTABLE_OD_UPDATE_MCLK; - } -} - static int smu7_odn_edit_dpm_table(struct pp_hwmgr *hwmgr, enum PP_OD_DPM_TABLE_COMMAND type, long *input, uint32_t size) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.h b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.h index c91e75db6a8e..3784ce6e50ab 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.h +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.h @@ -310,7 +310,6 @@ struct smu7_hwmgr { /* ---- Power Gating States ---- */ bool uvd_power_gated; bool vce_power_gated; - bool samu_power_gated; bool need_long_memory_training; /* Application power optimization parameters */ diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c index 05e680d55dbb..3b8d36df52e9 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c @@ -2414,6 +2414,40 @@ static int vega10_populate_and_upload_avfs_fuse_override(struct pp_hwmgr *hwmgr) return result; } +static void vega10_check_dpm_table_updated(struct pp_hwmgr *hwmgr) +{ + struct vega10_hwmgr *data = hwmgr->backend; + struct vega10_odn_dpm_table *odn_table = &(data->odn_dpm_table); + struct phm_ppt_v2_information *table_info = hwmgr->pptable; + struct phm_ppt_v1_clock_voltage_dependency_table *dep_table; + struct phm_ppt_v1_clock_voltage_dependency_table *odn_dep_table; + uint32_t i; + + dep_table = table_info->vdd_dep_on_mclk; + odn_dep_table = (struct phm_ppt_v1_clock_voltage_dependency_table *)&(odn_table->vdd_dep_on_mclk); + + for (i = 0; i < dep_table->count; i++) { + if (dep_table->entries[i].vddc != odn_dep_table->entries[i].vddc) { + data->need_update_dpm_table |= DPMTABLE_OD_UPDATE_VDDC | DPMTABLE_OD_UPDATE_MCLK; + return; + } + } + + dep_table = table_info->vdd_dep_on_sclk; + odn_dep_table = (struct phm_ppt_v1_clock_voltage_dependency_table *)&(odn_table->vdd_dep_on_sclk); + for (i = 0; i < dep_table->count; i++) { + if (dep_table->entries[i].vddc != odn_dep_table->entries[i].vddc) { + data->need_update_dpm_table |= DPMTABLE_OD_UPDATE_VDDC | DPMTABLE_OD_UPDATE_SCLK; + return; + } + } + + if (data->need_update_dpm_table & DPMTABLE_OD_UPDATE_VDDC) { + data->need_update_dpm_table &= ~DPMTABLE_OD_UPDATE_VDDC; + data->need_update_dpm_table |= DPMTABLE_OD_UPDATE_SCLK | DPMTABLE_OD_UPDATE_MCLK; + } +} + /** * Initializes the SMC table and uploads it * @@ -2430,6 +2464,7 @@ static int vega10_init_smc_table(struct pp_hwmgr *hwmgr) PPTable_t *pp_table = &(data->smc_state_table.pp_table); struct pp_atomfwctrl_voltage_table voltage_table; struct pp_atomfwctrl_bios_boot_up_values boot_up_values; + struct vega10_odn_dpm_table *odn_table = &(data->odn_dpm_table); result = vega10_setup_default_dpm_tables(hwmgr); PP_ASSERT_WITH_CODE(!result, @@ -2437,8 +2472,14 @@ static int vega10_init_smc_table(struct pp_hwmgr *hwmgr) return result); /* initialize ODN table */ - if (hwmgr->od_enabled) - vega10_odn_initial_default_setting(hwmgr); + if (hwmgr->od_enabled) { + if (odn_table->max_vddc) { + data->need_update_dpm_table |= DPMTABLE_OD_UPDATE_SCLK | DPMTABLE_OD_UPDATE_MCLK; + vega10_check_dpm_table_updated(hwmgr); + } else { + vega10_odn_initial_default_setting(hwmgr); + } + } pp_atomfwctrl_get_voltage_table_v4(hwmgr, VOLTAGE_TYPE_VDDC, VOLTAGE_OBJ_SVID2, &voltage_table); @@ -3061,6 +3102,7 @@ static int vega10_apply_state_adjust_rules(struct pp_hwmgr *hwmgr, struct pp_power_state *request_ps, const struct pp_power_state *current_ps) { + struct amdgpu_device *adev = hwmgr->adev; struct vega10_power_state *vega10_ps = cast_phw_vega10_power_state(&request_ps->hardware); uint32_t sclk; @@ -3086,12 +3128,12 @@ static int vega10_apply_state_adjust_rules(struct pp_hwmgr *hwmgr, if (vega10_ps->performance_level_count != 2) pr_info("VI should always have 2 performance levels"); - max_limits = (PP_PowerSource_AC == hwmgr->power_source) ? + max_limits = adev->pm.ac_power ? &(hwmgr->dyn_state.max_clock_voltage_on_ac) : &(hwmgr->dyn_state.max_clock_voltage_on_dc); /* Cap clock DPM tables at DC MAX if it is in DC. */ - if (PP_PowerSource_DC == hwmgr->power_source) { + if (!adev->pm.ac_power) { for (i = 0; i < vega10_ps->performance_level_count; i++) { if (vega10_ps->performance_levels[i].mem_clock > max_limits->mclk) @@ -4695,40 +4737,6 @@ static bool vega10_check_clk_voltage_valid(struct pp_hwmgr *hwmgr, return true; } -static void vega10_check_dpm_table_updated(struct pp_hwmgr *hwmgr) -{ - struct vega10_hwmgr *data = hwmgr->backend; - struct vega10_odn_dpm_table *odn_table = &(data->odn_dpm_table); - struct phm_ppt_v2_information *table_info = hwmgr->pptable; - struct phm_ppt_v1_clock_voltage_dependency_table *dep_table; - struct phm_ppt_v1_clock_voltage_dependency_table *odn_dep_table; - uint32_t i; - - dep_table = table_info->vdd_dep_on_mclk; - odn_dep_table = (struct phm_ppt_v1_clock_voltage_dependency_table *)&(odn_table->vdd_dep_on_mclk); - - for (i = 0; i < dep_table->count; i++) { - if (dep_table->entries[i].vddc != odn_dep_table->entries[i].vddc) { - data->need_update_dpm_table |= DPMTABLE_OD_UPDATE_VDDC | DPMTABLE_OD_UPDATE_MCLK; - return; - } - } - - dep_table = table_info->vdd_dep_on_sclk; - odn_dep_table = (struct phm_ppt_v1_clock_voltage_dependency_table *)&(odn_table->vdd_dep_on_sclk); - for (i = 0; i < dep_table->count; i++) { - if (dep_table->entries[i].vddc != odn_dep_table->entries[i].vddc) { - data->need_update_dpm_table |= DPMTABLE_OD_UPDATE_VDDC | DPMTABLE_OD_UPDATE_SCLK; - return; - } - } - - if (data->need_update_dpm_table & DPMTABLE_OD_UPDATE_VDDC) { - data->need_update_dpm_table &= ~DPMTABLE_OD_UPDATE_VDDC; - data->need_update_dpm_table |= DPMTABLE_OD_UPDATE_SCLK | DPMTABLE_OD_UPDATE_MCLK; - } -} - static void vega10_odn_update_soc_table(struct pp_hwmgr *hwmgr, enum PP_OD_DPM_TABLE_COMMAND type) { diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.h b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.h index aadd6cbc7e85..339820da9e6a 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.h +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.h @@ -370,7 +370,6 @@ struct vega10_hwmgr { /* ---- Power Gating States ---- */ bool uvd_power_gated; bool vce_power_gated; - bool samu_power_gated; bool need_long_memory_training; /* Internal settings to apply the application power optimization parameters */ diff --git a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h index b99fb8ac822c..40c98ca5feb7 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h +++ b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h @@ -26,7 +26,6 @@ #include <linux/seq_file.h> #include "amd_powerplay.h" #include "hardwaremanager.h" -#include "pp_power_source.h" #include "hwmgr_ppt.h" #include "ppatomctrl.h" #include "hwmgr_ppt.h" @@ -741,7 +740,6 @@ struct pp_hwmgr { const struct pp_table_func *pptable_func; struct pp_power_state *ps; - enum pp_power_source power_source; uint32_t num_ps; struct pp_thermal_controller_info thermal_controller; bool fan_ctrl_is_in_default_mode; diff --git a/drivers/gpu/drm/amd/powerplay/inc/pp_power_source.h b/drivers/gpu/drm/amd/powerplay/inc/pp_power_source.h deleted file mode 100644 index b43315cc5d58..000000000000 --- a/drivers/gpu/drm/amd/powerplay/inc/pp_power_source.h +++ /dev/null @@ -1,36 +0,0 @@ -/* - * Copyright 2015 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - */ - -#ifndef PP_POWERSOURCE_H -#define PP_POWERSOURCE_H - -enum pp_power_source { - PP_PowerSource_AC = 0, - PP_PowerSource_DC, - PP_PowerSource_LimitedPower, - PP_PowerSource_LimitedPower_2, - PP_PowerSource_Max -}; - - -#endif diff --git a/drivers/gpu/drm/amd/powerplay/inc/smumgr.h b/drivers/gpu/drm/amd/powerplay/inc/smumgr.h index 6c22ed9249bf..89dfbf53c7e6 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/smumgr.h +++ b/drivers/gpu/drm/amd/powerplay/inc/smumgr.h @@ -29,7 +29,6 @@ enum SMU_TABLE { SMU_UVD_TABLE = 0, SMU_VCE_TABLE, - SMU_SAMU_TABLE, SMU_BIF_TABLE, }; @@ -47,7 +46,6 @@ enum SMU_MEMBER { UcodeLoadStatus, UvdBootLevel, VceBootLevel, - SamuBootLevel, LowSclkInterruptThreshold, DRAM_LOG_ADDR_H, DRAM_LOG_ADDR_L, diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c index 2d4ec8ac3a08..fbe3ef4ee45c 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/ci_smumgr.c @@ -1614,37 +1614,6 @@ static int ci_populate_smc_acp_level(struct pp_hwmgr *hwmgr, return result; } -static int ci_populate_smc_samu_level(struct pp_hwmgr *hwmgr, - SMU7_Discrete_DpmTable *table) -{ - int result = -EINVAL; - uint8_t count; - struct pp_atomctrl_clock_dividers_vi dividers; - struct phm_samu_clock_voltage_dependency_table *samu_table = - hwmgr->dyn_state.samu_clock_voltage_dependency_table; - - table->SamuBootLevel = 0; - table->SamuLevelCount = (uint8_t)(samu_table->count); - - for (count = 0; count < table->SamuLevelCount; count++) { - table->SamuLevel[count].Frequency = samu_table->entries[count].samclk; - table->SamuLevel[count].MinVoltage = samu_table->entries[count].v * VOLTAGE_SCALE; - table->SamuLevel[count].MinPhases = 1; - - /* retrieve divider value for VBIOS */ - result = atomctrl_get_dfs_pll_dividers_vi(hwmgr, - table->SamuLevel[count].Frequency, ÷rs); - PP_ASSERT_WITH_CODE((0 == result), - "can not find divide id for samu clock", return result); - - table->SamuLevel[count].Divider = (uint8_t)dividers.pll_post_divider; - - CONVERT_FROM_HOST_TO_SMC_UL(table->SamuLevel[count].Frequency); - CONVERT_FROM_HOST_TO_SMC_US(table->SamuLevel[count].MinVoltage); - } - return result; -} - static int ci_populate_memory_timing_parameters( struct pp_hwmgr *hwmgr, uint32_t engine_clock, @@ -2026,10 +1995,6 @@ static int ci_init_smc_table(struct pp_hwmgr *hwmgr) PP_ASSERT_WITH_CODE(0 == result, "Failed to initialize ACP Level!", return result); - result = ci_populate_smc_samu_level(hwmgr, table); - PP_ASSERT_WITH_CODE(0 == result, - "Failed to initialize SAMU Level!", return result); - /* Since only the initial state is completely set up at this point (the other states are just copies of the boot state) we only */ /* need to populate the ARB settings for the initial state. */ result = ci_program_memory_timing_parameters(hwmgr); @@ -2881,6 +2846,89 @@ static int ci_update_dpm_settings(struct pp_hwmgr *hwmgr, return 0; } +static int ci_update_uvd_smc_table(struct pp_hwmgr *hwmgr) +{ + struct amdgpu_device *adev = hwmgr->adev; + struct smu7_hwmgr *data = hwmgr->backend; + struct ci_smumgr *smu_data = hwmgr->smu_backend; + struct phm_uvd_clock_voltage_dependency_table *uvd_table = + hwmgr->dyn_state.uvd_clock_voltage_dependency_table; + uint32_t profile_mode_mask = AMD_DPM_FORCED_LEVEL_PROFILE_STANDARD | + AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK | + AMD_DPM_FORCED_LEVEL_PROFILE_MIN_MCLK | + AMD_DPM_FORCED_LEVEL_PROFILE_PEAK; + uint32_t max_vddc = adev->pm.ac_power ? hwmgr->dyn_state.max_clock_voltage_on_ac.vddc : + hwmgr->dyn_state.max_clock_voltage_on_dc.vddc; + int32_t i; + + if (PP_CAP(PHM_PlatformCaps_UVDDPM) || uvd_table->count <= 0) + smu_data->smc_state_table.UvdBootLevel = 0; + else + smu_data->smc_state_table.UvdBootLevel = uvd_table->count - 1; + + PHM_WRITE_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, DPM_TABLE_475, + UvdBootLevel, smu_data->smc_state_table.UvdBootLevel); + + data->dpm_level_enable_mask.uvd_dpm_enable_mask = 0; + + for (i = uvd_table->count - 1; i >= 0; i--) { + if (uvd_table->entries[i].v <= max_vddc) + data->dpm_level_enable_mask.uvd_dpm_enable_mask |= 1 << i; + if (hwmgr->dpm_level & profile_mode_mask || !PP_CAP(PHM_PlatformCaps_UVDDPM)) + break; + } + ci_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_UVDDPM_SetEnabledMask, + data->dpm_level_enable_mask.uvd_dpm_enable_mask); + + return 0; +} + +static int ci_update_vce_smc_table(struct pp_hwmgr *hwmgr) +{ + struct amdgpu_device *adev = hwmgr->adev; + struct smu7_hwmgr *data = hwmgr->backend; + struct phm_vce_clock_voltage_dependency_table *vce_table = + hwmgr->dyn_state.vce_clock_voltage_dependency_table; + uint32_t profile_mode_mask = AMD_DPM_FORCED_LEVEL_PROFILE_STANDARD | + AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK | + AMD_DPM_FORCED_LEVEL_PROFILE_MIN_MCLK | + AMD_DPM_FORCED_LEVEL_PROFILE_PEAK; + uint32_t max_vddc = adev->pm.ac_power ? hwmgr->dyn_state.max_clock_voltage_on_ac.vddc : + hwmgr->dyn_state.max_clock_voltage_on_dc.vddc; + int32_t i; + + PHM_WRITE_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, DPM_TABLE_475, + VceBootLevel, 0); /* temp hard code to level 0, vce can set min evclk*/ + + data->dpm_level_enable_mask.vce_dpm_enable_mask = 0; + + for (i = vce_table->count - 1; i >= 0; i--) { + if (vce_table->entries[i].v <= max_vddc) + data->dpm_level_enable_mask.vce_dpm_enable_mask |= 1 << i; + if (hwmgr->dpm_level & profile_mode_mask || !PP_CAP(PHM_PlatformCaps_VCEDPM)) + break; + } + ci_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_VCEDPM_SetEnabledMask, + data->dpm_level_enable_mask.vce_dpm_enable_mask); + + return 0; +} + +static int ci_update_smc_table(struct pp_hwmgr *hwmgr, uint32_t type) +{ + switch (type) { + case SMU_UVD_TABLE: + ci_update_uvd_smc_table(hwmgr); + break; + case SMU_VCE_TABLE: + ci_update_vce_smc_table(hwmgr); + break; + default: + break; + } + return 0; +} + const struct pp_smumgr_func ci_smu_funcs = { .smu_init = ci_smu_init, .smu_fini = ci_smu_fini, @@ -2903,4 +2951,5 @@ const struct pp_smumgr_func ci_smu_funcs = { .initialize_mc_reg_table = ci_initialize_mc_reg_table, .is_dpm_running = ci_is_dpm_running, .update_dpm_settings = ci_update_dpm_settings, + .update_smc_table = ci_update_smc_table, }; diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c index 53df9405f43a..18048f8e2f13 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c @@ -1503,44 +1503,6 @@ static int fiji_populate_smc_acp_level(struct pp_hwmgr *hwmgr, return result; } -static int fiji_populate_smc_samu_level(struct pp_hwmgr *hwmgr, - SMU73_Discrete_DpmTable *table) -{ - int result = -EINVAL; - uint8_t count; - struct pp_atomctrl_clock_dividers_vi dividers; - struct phm_ppt_v1_information *table_info = - (struct phm_ppt_v1_information *)(hwmgr->pptable); - struct phm_ppt_v1_mm_clock_voltage_dependency_table *mm_table = - table_info->mm_dep_table; - - table->SamuBootLevel = 0; - table->SamuLevelCount = (uint8_t)(mm_table->count); - - for (count = 0; count < table->SamuLevelCount; count++) { - /* not sure whether we need evclk or not */ - table->SamuLevel[count].MinVoltage = 0; - table->SamuLevel[count].Frequency = mm_table->entries[count].samclock; - table->SamuLevel[count].MinVoltage |= (mm_table->entries[count].vddc * - VOLTAGE_SCALE) << VDDC_SHIFT; - table->SamuLevel[count].MinVoltage |= ((mm_table->entries[count].vddc - - VDDC_VDDCI_DELTA) * VOLTAGE_SCALE) << VDDCI_SHIFT; - table->SamuLevel[count].MinVoltage |= 1 << PHASES_SHIFT; - - /* retrieve divider value for VBIOS */ - result = atomctrl_get_dfs_pll_dividers_vi(hwmgr, - table->SamuLevel[count].Frequency, ÷rs); - PP_ASSERT_WITH_CODE((0 == result), - "can not find divide id for samu clock", return result); - - table->SamuLevel[count].Divider = (uint8_t)dividers.pll_post_divider; - - CONVERT_FROM_HOST_TO_SMC_UL(table->SamuLevel[count].Frequency); - CONVERT_FROM_HOST_TO_SMC_UL(table->SamuLevel[count].MinVoltage); - } - return result; -} - static int fiji_populate_memory_timing_parameters(struct pp_hwmgr *hwmgr, int32_t eng_clock, int32_t mem_clock, struct SMU73_Discrete_MCArbDramTimingTableEntry *arb_regs) @@ -2028,10 +1990,6 @@ static int fiji_init_smc_table(struct pp_hwmgr *hwmgr) PP_ASSERT_WITH_CODE(0 == result, "Failed to initialize ACP Level!", return result); - result = fiji_populate_smc_samu_level(hwmgr, table); - PP_ASSERT_WITH_CODE(0 == result, - "Failed to initialize SAMU Level!", return result); - /* Since only the initial state is completely set up at this point * (the other states are just copies of the boot state) we only * need to populate the ARB settings for the initial state. @@ -2378,8 +2336,6 @@ static uint32_t fiji_get_offsetof(uint32_t type, uint32_t member) return offsetof(SMU73_Discrete_DpmTable, UvdBootLevel); case VceBootLevel: return offsetof(SMU73_Discrete_DpmTable, VceBootLevel); - case SamuBootLevel: - return offsetof(SMU73_Discrete_DpmTable, SamuBootLevel); case LowSclkInterruptThreshold: return offsetof(SMU73_Discrete_DpmTable, LowSclkInterruptThreshold); } @@ -2478,33 +2434,6 @@ static int fiji_update_vce_smc_table(struct pp_hwmgr *hwmgr) return 0; } -static int fiji_update_samu_smc_table(struct pp_hwmgr *hwmgr) -{ - struct fiji_smumgr *smu_data = (struct fiji_smumgr *)(hwmgr->smu_backend); - uint32_t mm_boot_level_offset, mm_boot_level_value; - - - smu_data->smc_state_table.SamuBootLevel = 0; - mm_boot_level_offset = smu_data->smu7_data.dpm_table_start + - offsetof(SMU73_Discrete_DpmTable, SamuBootLevel); - - mm_boot_level_offset /= 4; - mm_boot_level_offset *= 4; - mm_boot_level_value = cgs_read_ind_register(hwmgr->device, - CGS_IND_REG__SMC, mm_boot_level_offset); - mm_boot_level_value &= 0xFFFFFF00; - mm_boot_level_value |= smu_data->smc_state_table.SamuBootLevel << 0; - cgs_write_ind_register(hwmgr->device, - CGS_IND_REG__SMC, mm_boot_level_offset, mm_boot_level_value); - - if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, - PHM_PlatformCaps_StablePState)) - smum_send_msg_to_smc_with_parameter(hwmgr, - PPSMC_MSG_SAMUDPM_SetEnabledMask, - (uint32_t)(1 << smu_data->smc_state_table.SamuBootLevel)); - return 0; -} - static int fiji_update_smc_table(struct pp_hwmgr *hwmgr, uint32_t type) { switch (type) { @@ -2514,9 +2443,6 @@ static int fiji_update_smc_table(struct pp_hwmgr *hwmgr, uint32_t type) case SMU_VCE_TABLE: fiji_update_vce_smc_table(hwmgr); break; - case SMU_SAMU_TABLE: - fiji_update_samu_smc_table(hwmgr); - break; default: break; } diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/iceland_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/iceland_smumgr.c index 415f691c3fa9..9299b93aa09a 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/iceland_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/iceland_smumgr.c @@ -1578,12 +1578,6 @@ static int iceland_populate_smc_acp_level(struct pp_hwmgr *hwmgr, return 0; } -static int iceland_populate_smc_samu_level(struct pp_hwmgr *hwmgr, - SMU71_Discrete_DpmTable *table) -{ - return 0; -} - static int iceland_populate_memory_timing_parameters( struct pp_hwmgr *hwmgr, uint32_t engine_clock, @@ -1992,10 +1986,6 @@ static int iceland_init_smc_table(struct pp_hwmgr *hwmgr) PP_ASSERT_WITH_CODE(0 == result, "Failed to initialize ACP Level!", return result;); - result = iceland_populate_smc_samu_level(hwmgr, table); - PP_ASSERT_WITH_CODE(0 == result, - "Failed to initialize SAMU Level!", return result;); - /* Since only the initial state is completely set up at this point (the other states are just copies of the boot state) we only */ /* need to populate the ARB settings for the initial state. */ result = iceland_program_memory_timing_parameters(hwmgr); diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c index a8c6524f07e4..a4ce199af475 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c @@ -1337,55 +1337,6 @@ static int polaris10_populate_smc_vce_level(struct pp_hwmgr *hwmgr, return result; } - -static int polaris10_populate_smc_samu_level(struct pp_hwmgr *hwmgr, - SMU74_Discrete_DpmTable *table) -{ - int result = -EINVAL; - uint8_t count; - struct pp_atomctrl_clock_dividers_vi dividers; - struct phm_ppt_v1_information *table_info = - (struct phm_ppt_v1_information *)(hwmgr->pptable); - struct phm_ppt_v1_mm_clock_voltage_dependency_table *mm_table = - table_info->mm_dep_table; - struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); - uint32_t vddci; - - table->SamuBootLevel = 0; - table->SamuLevelCount = (uint8_t)(mm_table->count); - - for (count = 0; count < table->SamuLevelCount; count++) { - /* not sure whether we need evclk or not */ - table->SamuLevel[count].MinVoltage = 0; - table->SamuLevel[count].Frequency = mm_table->entries[count].samclock; - table->SamuLevel[count].MinVoltage |= (mm_table->entries[count].vddc * - VOLTAGE_SCALE) << VDDC_SHIFT; - - if (SMU7_VOLTAGE_CONTROL_BY_GPIO == data->vddci_control) - vddci = (uint32_t)phm_find_closest_vddci(&(data->vddci_voltage_table), - mm_table->entries[count].vddc - VDDC_VDDCI_DELTA); - else if (SMU7_VOLTAGE_CONTROL_BY_SVID2 == data->vddci_control) - vddci = mm_table->entries[count].vddc - VDDC_VDDCI_DELTA; - else - vddci = (data->vbios_boot_state.vddci_bootup_value * VOLTAGE_SCALE) << VDDCI_SHIFT; - - table->SamuLevel[count].MinVoltage |= (vddci * VOLTAGE_SCALE) << VDDCI_SHIFT; - table->SamuLevel[count].MinVoltage |= 1 << PHASES_SHIFT; - - /* retrieve divider value for VBIOS */ - result = atomctrl_get_dfs_pll_dividers_vi(hwmgr, - table->SamuLevel[count].Frequency, ÷rs); - PP_ASSERT_WITH_CODE((0 == result), - "can not find divide id for samu clock", return result); - - table->SamuLevel[count].Divider = (uint8_t)dividers.pll_post_divider; - - CONVERT_FROM_HOST_TO_SMC_UL(table->SamuLevel[count].Frequency); - CONVERT_FROM_HOST_TO_SMC_UL(table->SamuLevel[count].MinVoltage); - } - return result; -} - static int polaris10_populate_memory_timing_parameters(struct pp_hwmgr *hwmgr, int32_t eng_clock, int32_t mem_clock, SMU74_Discrete_MCArbDramTimingTableEntry *arb_regs) @@ -1865,10 +1816,6 @@ static int polaris10_init_smc_table(struct pp_hwmgr *hwmgr) PP_ASSERT_WITH_CODE(0 == result, "Failed to initialize VCE Level!", return result); - result = polaris10_populate_smc_samu_level(hwmgr, table); - PP_ASSERT_WITH_CODE(0 == result, - "Failed to initialize SAMU Level!", return result); - /* Since only the initial state is completely set up at this point * (the other states are just copies of the boot state) we only * need to populate the ARB settings for the initial state. @@ -2222,34 +2169,6 @@ static int polaris10_update_vce_smc_table(struct pp_hwmgr *hwmgr) return 0; } -static int polaris10_update_samu_smc_table(struct pp_hwmgr *hwmgr) -{ - struct polaris10_smumgr *smu_data = (struct polaris10_smumgr *)(hwmgr->smu_backend); - uint32_t mm_boot_level_offset, mm_boot_level_value; - - - smu_data->smc_state_table.SamuBootLevel = 0; - mm_boot_level_offset = smu_data->smu7_data.dpm_table_start + - offsetof(SMU74_Discrete_DpmTable, SamuBootLevel); - - mm_boot_level_offset /= 4; - mm_boot_level_offset *= 4; - mm_boot_level_value = cgs_read_ind_register(hwmgr->device, - CGS_IND_REG__SMC, mm_boot_level_offset); - mm_boot_level_value &= 0xFFFFFF00; - mm_boot_level_value |= smu_data->smc_state_table.SamuBootLevel << 0; - cgs_write_ind_register(hwmgr->device, - CGS_IND_REG__SMC, mm_boot_level_offset, mm_boot_level_value); - - if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, - PHM_PlatformCaps_StablePState)) - smum_send_msg_to_smc_with_parameter(hwmgr, - PPSMC_MSG_SAMUDPM_SetEnabledMask, - (uint32_t)(1 << smu_data->smc_state_table.SamuBootLevel)); - return 0; -} - - static int polaris10_update_bif_smc_table(struct pp_hwmgr *hwmgr) { struct polaris10_smumgr *smu_data = (struct polaris10_smumgr *)(hwmgr->smu_backend); @@ -2276,9 +2195,6 @@ static int polaris10_update_smc_table(struct pp_hwmgr *hwmgr, uint32_t type) case SMU_VCE_TABLE: polaris10_update_vce_smc_table(hwmgr); break; - case SMU_SAMU_TABLE: - polaris10_update_samu_smc_table(hwmgr); - break; case SMU_BIF_TABLE: polaris10_update_bif_smc_table(hwmgr); default: @@ -2357,8 +2273,6 @@ static uint32_t polaris10_get_offsetof(uint32_t type, uint32_t member) return offsetof(SMU74_Discrete_DpmTable, UvdBootLevel); case VceBootLevel: return offsetof(SMU74_Discrete_DpmTable, VceBootLevel); - case SamuBootLevel: - return offsetof(SMU74_Discrete_DpmTable, SamuBootLevel); case LowSclkInterruptThreshold: return offsetof(SMU74_Discrete_DpmTable, LowSclkInterruptThreshold); } diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c index 782b19fc2e70..7dabc6c456e1 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c @@ -1443,51 +1443,6 @@ static int tonga_populate_smc_acp_level(struct pp_hwmgr *hwmgr, return result; } -static int tonga_populate_smc_samu_level(struct pp_hwmgr *hwmgr, - SMU72_Discrete_DpmTable *table) -{ - int result = 0; - uint8_t count; - pp_atomctrl_clock_dividers_vi dividers; - struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); - struct phm_ppt_v1_information *pptable_info = - (struct phm_ppt_v1_information *)(hwmgr->pptable); - phm_ppt_v1_mm_clock_voltage_dependency_table *mm_table = - pptable_info->mm_dep_table; - - table->SamuBootLevel = 0; - table->SamuLevelCount = (uint8_t) (mm_table->count); - - for (count = 0; count < table->SamuLevelCount; count++) { - /* not sure whether we need evclk or not */ - table->SamuLevel[count].Frequency = - pptable_info->mm_dep_table->entries[count].samclock; - table->SamuLevel[count].MinVoltage.Vddc = - phm_get_voltage_index(pptable_info->vddc_lookup_table, - mm_table->entries[count].vddc); - table->SamuLevel[count].MinVoltage.VddGfx = - (data->vdd_gfx_control == SMU7_VOLTAGE_CONTROL_BY_SVID2) ? - phm_get_voltage_index(pptable_info->vddgfx_lookup_table, - mm_table->entries[count].vddgfx) : 0; - table->SamuLevel[count].MinVoltage.Vddci = - phm_get_voltage_id(&data->vddci_voltage_table, - mm_table->entries[count].vddc - VDDC_VDDCI_DELTA); - table->SamuLevel[count].MinVoltage.Phases = 1; - - /* retrieve divider value for VBIOS */ - result = atomctrl_get_dfs_pll_dividers_vi(hwmgr, - table->SamuLevel[count].Frequency, ÷rs); - PP_ASSERT_WITH_CODE((!result), - "can not find divide id for samu clock", return result); - - table->SamuLevel[count].Divider = (uint8_t)dividers.pll_post_divider; - - CONVERT_FROM_HOST_TO_SMC_UL(table->SamuLevel[count].Frequency); - } - - return result; -} - static int tonga_populate_memory_timing_parameters( struct pp_hwmgr *hwmgr, uint32_t engine_clock, @@ -2323,10 +2278,6 @@ static int tonga_init_smc_table(struct pp_hwmgr *hwmgr) PP_ASSERT_WITH_CODE(!result, "Failed to initialize ACP Level !", return result); - result = tonga_populate_smc_samu_level(hwmgr, table); - PP_ASSERT_WITH_CODE(!result, - "Failed to initialize SAMU Level !", return result); - /* Since only the initial state is completely set up at this * point (the other states are just copies of the boot state) we only * need to populate the ARB settings for the initial state. @@ -2673,8 +2624,6 @@ static uint32_t tonga_get_offsetof(uint32_t type, uint32_t member) return offsetof(SMU72_Discrete_DpmTable, UvdBootLevel); case VceBootLevel: return offsetof(SMU72_Discrete_DpmTable, VceBootLevel); - case SamuBootLevel: - return offsetof(SMU72_Discrete_DpmTable, SamuBootLevel); case LowSclkInterruptThreshold: return offsetof(SMU72_Discrete_DpmTable, LowSclkInterruptThreshold); } @@ -2773,32 +2722,6 @@ static int tonga_update_vce_smc_table(struct pp_hwmgr *hwmgr) return 0; } -static int tonga_update_samu_smc_table(struct pp_hwmgr *hwmgr) -{ - struct tonga_smumgr *smu_data = (struct tonga_smumgr *)(hwmgr->smu_backend); - uint32_t mm_boot_level_offset, mm_boot_level_value; - - smu_data->smc_state_table.SamuBootLevel = 0; - mm_boot_level_offset = smu_data->smu7_data.dpm_table_start + - offsetof(SMU72_Discrete_DpmTable, SamuBootLevel); - - mm_boot_level_offset /= 4; - mm_boot_level_offset *= 4; - mm_boot_level_value = cgs_read_ind_register(hwmgr->device, - CGS_IND_REG__SMC, mm_boot_level_offset); - mm_boot_level_value &= 0xFFFFFF00; - mm_boot_level_value |= smu_data->smc_state_table.SamuBootLevel << 0; - cgs_write_ind_register(hwmgr->device, - CGS_IND_REG__SMC, mm_boot_level_offset, mm_boot_level_value); - - if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, - PHM_PlatformCaps_StablePState)) - smum_send_msg_to_smc_with_parameter(hwmgr, - PPSMC_MSG_SAMUDPM_SetEnabledMask, - (uint32_t)(1 << smu_data->smc_state_table.SamuBootLevel)); - return 0; -} - static int tonga_update_smc_table(struct pp_hwmgr *hwmgr, uint32_t type) { switch (type) { @@ -2808,9 +2731,6 @@ static int tonga_update_smc_table(struct pp_hwmgr *hwmgr, uint32_t type) case SMU_VCE_TABLE: tonga_update_vce_smc_table(hwmgr); break; - case SMU_SAMU_TABLE: - tonga_update_samu_smc_table(hwmgr); - break; default: break; } diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/vegam_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/vegam_smumgr.c index 2de48959ac93..57420d7caa4e 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/vegam_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/vegam_smumgr.c @@ -393,34 +393,6 @@ static int vegam_update_vce_smc_table(struct pp_hwmgr *hwmgr) return 0; } -static int vegam_update_samu_smc_table(struct pp_hwmgr *hwmgr) -{ - struct vegam_smumgr *smu_data = (struct vegam_smumgr *)(hwmgr->smu_backend); - uint32_t mm_boot_level_offset, mm_boot_level_value; - - - smu_data->smc_state_table.SamuBootLevel = 0; - mm_boot_level_offset = smu_data->smu7_data.dpm_table_start + - offsetof(SMU75_Discrete_DpmTable, SamuBootLevel); - - mm_boot_level_offset /= 4; - mm_boot_level_offset *= 4; - mm_boot_level_value = cgs_read_ind_register(hwmgr->device, - CGS_IND_REG__SMC, mm_boot_level_offset); - mm_boot_level_value &= 0xFFFFFF00; - mm_boot_level_value |= smu_data->smc_state_table.SamuBootLevel << 0; - cgs_write_ind_register(hwmgr->device, - CGS_IND_REG__SMC, mm_boot_level_offset, mm_boot_level_value); - - if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, - PHM_PlatformCaps_StablePState)) - smum_send_msg_to_smc_with_parameter(hwmgr, - PPSMC_MSG_SAMUDPM_SetEnabledMask, - (uint32_t)(1 << smu_data->smc_state_table.SamuBootLevel)); - return 0; -} - - static int vegam_update_bif_smc_table(struct pp_hwmgr *hwmgr) { struct vegam_smumgr *smu_data = (struct vegam_smumgr *)(hwmgr->smu_backend); @@ -447,9 +419,6 @@ static int vegam_update_smc_table(struct pp_hwmgr *hwmgr, uint32_t type) case SMU_VCE_TABLE: vegam_update_vce_smc_table(hwmgr); break; - case SMU_SAMU_TABLE: - vegam_update_samu_smc_table(hwmgr); - break; case SMU_BIF_TABLE: vegam_update_bif_smc_table(hwmgr); break; @@ -1281,54 +1250,6 @@ static int vegam_populate_smc_vce_level(struct pp_hwmgr *hwmgr, return result; } -static int vegam_populate_smc_samu_level(struct pp_hwmgr *hwmgr, - SMU75_Discrete_DpmTable *table) -{ - int result = -EINVAL; - uint8_t count; - struct pp_atomctrl_clock_dividers_vi dividers; - struct phm_ppt_v1_information *table_info = - (struct phm_ppt_v1_information *)(hwmgr->pptable); - struct phm_ppt_v1_mm_clock_voltage_dependency_table *mm_table = - table_info->mm_dep_table; - struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); - uint32_t vddci; - - table->SamuBootLevel = 0; - table->SamuLevelCount = (uint8_t)(mm_table->count); - - for (count = 0; count < table->SamuLevelCount; count++) { - /* not sure whether we need evclk or not */ - table->SamuLevel[count].MinVoltage = 0; - table->SamuLevel[count].Frequency = mm_table->entries[count].samclock; - table->SamuLevel[count].MinVoltage |= (mm_table->entries[count].vddc * - VOLTAGE_SCALE) << VDDC_SHIFT; - - if (SMU7_VOLTAGE_CONTROL_BY_GPIO == data->vddci_control) - vddci = (uint32_t)phm_find_closest_vddci(&(data->vddci_voltage_table), - mm_table->entries[count].vddc - VDDC_VDDCI_DELTA); - else if (SMU7_VOLTAGE_CONTROL_BY_SVID2 == data->vddci_control) - vddci = mm_table->entries[count].vddc - VDDC_VDDCI_DELTA; - else - vddci = (data->vbios_boot_state.vddci_bootup_value * VOLTAGE_SCALE) << VDDCI_SHIFT; - - table->SamuLevel[count].MinVoltage |= (vddci * VOLTAGE_SCALE) << VDDCI_SHIFT; - table->SamuLevel[count].MinVoltage |= 1 << PHASES_SHIFT; - - /* retrieve divider value for VBIOS */ - result = atomctrl_get_dfs_pll_dividers_vi(hwmgr, - table->SamuLevel[count].Frequency, ÷rs); - PP_ASSERT_WITH_CODE((0 == result), - "can not find divide id for samu clock", return result); - - table->SamuLevel[count].Divider = (uint8_t)dividers.pll_post_divider; - - CONVERT_FROM_HOST_TO_SMC_UL(table->SamuLevel[count].Frequency); - CONVERT_FROM_HOST_TO_SMC_UL(table->SamuLevel[count].MinVoltage); - } - return result; -} - static int vegam_populate_memory_timing_parameters(struct pp_hwmgr *hwmgr, int32_t eng_clock, int32_t mem_clock, SMU75_Discrete_MCArbDramTimingTableEntry *arb_regs) @@ -2062,10 +1983,6 @@ static int vegam_init_smc_table(struct pp_hwmgr *hwmgr) PP_ASSERT_WITH_CODE(!result, "Failed to initialize VCE Level!", return result); - result = vegam_populate_smc_samu_level(hwmgr, table); - PP_ASSERT_WITH_CODE(!result, - "Failed to initialize SAMU Level!", return result); - /* Since only the initial state is completely set up at this point * (the other states are just copies of the boot state) we only * need to populate the ARB settings for the initial state. @@ -2273,8 +2190,6 @@ static uint32_t vegam_get_offsetof(uint32_t type, uint32_t member) return offsetof(SMU75_Discrete_DpmTable, UvdBootLevel); case VceBootLevel: return offsetof(SMU75_Discrete_DpmTable, VceBootLevel); - case SamuBootLevel: - return offsetof(SMU75_Discrete_DpmTable, SamuBootLevel); case LowSclkInterruptThreshold: return offsetof(SMU75_Discrete_DpmTable, LowSclkInterruptThreshold); } diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c index 8689fcca051c..cbb67e9ffb3a 100644 --- a/drivers/gpu/drm/radeon/radeon_ttm.c +++ b/drivers/gpu/drm/radeon/radeon_ttm.c @@ -947,11 +947,11 @@ void radeon_ttm_set_active_vram_size(struct radeon_device *rdev, u64 size) static struct vm_operations_struct radeon_ttm_vm_ops; static const struct vm_operations_struct *ttm_vm_ops = NULL; -static int radeon_ttm_fault(struct vm_fault *vmf) +static vm_fault_t radeon_ttm_fault(struct vm_fault *vmf) { struct ttm_buffer_object *bo; struct radeon_device *rdev; - int r; + vm_fault_t ret; bo = (struct ttm_buffer_object *)vmf->vma->vm_private_data; if (bo == NULL) { @@ -959,9 +959,9 @@ static int radeon_ttm_fault(struct vm_fault *vmf) } rdev = radeon_get_rdev(bo->bdev); down_read(&rdev->pm.mclk_lock); - r = ttm_vm_ops->fault(vmf); + ret = ttm_vm_ops->fault(vmf); up_read(&rdev->pm.mclk_lock); - return r; + return ret; } int radeon_mmap(struct file *filp, struct vm_area_struct *vma) diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler.c b/drivers/gpu/drm/scheduler/gpu_scheduler.c index 44d480768dfe..6a316701da73 100644 --- a/drivers/gpu/drm/scheduler/gpu_scheduler.c +++ b/drivers/gpu/drm/scheduler/gpu_scheduler.c @@ -21,6 +21,29 @@ * */ +/** + * DOC: Overview + * + * The GPU scheduler provides entities which allow userspace to push jobs + * into software queues which are then scheduled on a hardware run queue. + * The software queues have a priority among them. The scheduler selects the entities + * from the run queue using a FIFO. The scheduler provides dependency handling + * features among jobs. The driver is supposed to provide callback functions for + * backend operations to the scheduler like submitting a job to hardware run queue, + * returning the dependencies of a job etc. + * + * The organisation of the scheduler is the following: + * + * 1. Each hw run queue has one scheduler + * 2. Each scheduler has multiple run queues with different priorities + * (e.g., HIGH_HW,HIGH_SW, KERNEL, NORMAL) + * 3. Each scheduler run queue has a queue of entities to schedule + * 4. Entities themselves maintain a queue of jobs that will be scheduled on + * the hardware. + * + * The jobs in a entity are always scheduled in the order that they were pushed. + */ + #include <linux/kthread.h> #include <linux/wait.h> #include <linux/sched.h> @@ -39,7 +62,13 @@ static bool drm_sched_entity_is_ready(struct drm_sched_entity *entity); static void drm_sched_wakeup(struct drm_gpu_scheduler *sched); static void drm_sched_process_job(struct dma_fence *f, struct dma_fence_cb *cb); -/* Initialize a given run queue struct */ +/** + * drm_sched_rq_init - initialize a given run queue struct + * + * @rq: scheduler run queue + * + * Initializes a scheduler runqueue. + */ static void drm_sched_rq_init(struct drm_sched_rq *rq) { spin_lock_init(&rq->lock); @@ -47,6 +76,14 @@ static void drm_sched_rq_init(struct drm_sched_rq *rq) rq->current_entity = NULL; } +/** + * drm_sched_rq_add_entity - add an entity + * + * @rq: scheduler run queue + * @entity: scheduler entity + * + * Adds a scheduler entity to the run queue. + */ static void drm_sched_rq_add_entity(struct drm_sched_rq *rq, struct drm_sched_entity *entity) { @@ -57,6 +94,14 @@ static void drm_sched_rq_add_entity(struct drm_sched_rq *rq, spin_unlock(&rq->lock); } +/** + * drm_sched_rq_remove_entity - remove an entity + * + * @rq: scheduler run queue + * @entity: scheduler entity + * + * Removes a scheduler entity from the run queue. + */ static void drm_sched_rq_remove_entity(struct drm_sched_rq *rq, struct drm_sched_entity *entity) { @@ -70,9 +115,9 @@ static void drm_sched_rq_remove_entity(struct drm_sched_rq *rq, } /** - * Select an entity which could provide a job to run + * drm_sched_rq_select_entity - Select an entity which could provide a job to run * - * @rq The run queue to check. + * @rq: scheduler run queue to check. * * Try to find a ready entity, returns NULL if none found. */ @@ -112,15 +157,16 @@ drm_sched_rq_select_entity(struct drm_sched_rq *rq) } /** - * Init a context entity used by scheduler when submit to HW ring. + * drm_sched_entity_init - Init a context entity used by scheduler when + * submit to HW ring. * - * @sched The pointer to the scheduler - * @entity The pointer to a valid drm_sched_entity - * @rq The run queue this entity belongs - * @guilty atomic_t set to 1 when a job on this queue - * is found to be guilty causing a timeout + * @sched: scheduler instance + * @entity: scheduler entity to init + * @rq: the run queue this entity belongs + * @guilty: atomic_t set to 1 when a job on this queue + * is found to be guilty causing a timeout * - * return 0 if succeed. negative error code on failure + * Returns 0 on success or a negative error code on failure. */ int drm_sched_entity_init(struct drm_gpu_scheduler *sched, struct drm_sched_entity *entity, @@ -135,7 +181,6 @@ int drm_sched_entity_init(struct drm_gpu_scheduler *sched, entity->rq = rq; entity->sched = sched; entity->guilty = guilty; - entity->fini_status = 0; entity->last_scheduled = NULL; spin_lock_init(&entity->rq_lock); @@ -149,10 +194,10 @@ int drm_sched_entity_init(struct drm_gpu_scheduler *sched, EXPORT_SYMBOL(drm_sched_entity_init); /** - * Query if entity is initialized + * drm_sched_entity_is_initialized - Query if entity is initialized * - * @sched Pointer to scheduler instance - * @entity The pointer to a valid scheduler entity + * @sched: Pointer to scheduler instance + * @entity: The pointer to a valid scheduler entity * * return true if entity is initialized, false otherwise */ @@ -164,25 +209,26 @@ static bool drm_sched_entity_is_initialized(struct drm_gpu_scheduler *sched, } /** - * Check if entity is idle + * drm_sched_entity_is_idle - Check if entity is idle * - * @entity The pointer to a valid scheduler entity + * @entity: scheduler entity * - * Return true if entity don't has any unscheduled jobs. + * Returns true if the entity does not have any unscheduled jobs. */ static bool drm_sched_entity_is_idle(struct drm_sched_entity *entity) { rmb(); - if (spsc_queue_peek(&entity->job_queue) == NULL) + + if (!entity->rq || spsc_queue_peek(&entity->job_queue) == NULL) return true; return false; } /** - * Check if entity is ready + * drm_sched_entity_is_ready - Check if entity is ready * - * @entity The pointer to a valid scheduler entity + * @entity: scheduler entity * * Return true if entity could provide a job. */ @@ -210,44 +256,66 @@ static void drm_sched_entity_kill_jobs_cb(struct dma_fence *f, /** - * Destroy a context entity + * drm_sched_entity_do_release - Destroy a context entity * - * @sched Pointer to scheduler instance - * @entity The pointer to a valid scheduler entity + * @sched: scheduler instance + * @entity: scheduler entity + * @timeout: time to wait in for Q to become empty in jiffies. * - * Splitting drm_sched_entity_fini() into two functions, The first one is does the waiting, + * Splitting drm_sched_entity_fini() into two functions, The first one does the waiting, * removes the entity from the runqueue and returns an error when the process was killed. + * + * Returns the remaining time in jiffies left from the input timeout */ -void drm_sched_entity_do_release(struct drm_gpu_scheduler *sched, - struct drm_sched_entity *entity) +long drm_sched_entity_do_release(struct drm_gpu_scheduler *sched, + struct drm_sched_entity *entity, long timeout) { + long ret = timeout; + if (!drm_sched_entity_is_initialized(sched, entity)) - return; + return ret; /** * The client will not queue more IBs during this fini, consume existing * queued IBs or discard them on SIGKILL */ - if ((current->flags & PF_SIGNALED) && current->exit_code == SIGKILL) - entity->fini_status = -ERESTARTSYS; - else - entity->fini_status = wait_event_killable(sched->job_scheduled, - drm_sched_entity_is_idle(entity)); - drm_sched_entity_set_rq(entity, NULL); + if (current->flags & PF_EXITING) { + if (timeout) + ret = wait_event_timeout( + sched->job_scheduled, + drm_sched_entity_is_idle(entity), + timeout); + } else + wait_event_killable(sched->job_scheduled, drm_sched_entity_is_idle(entity)); + + + /* For killed process disable any more IBs enqueue right now */ + if ((current->flags & PF_EXITING) && (current->exit_code == SIGKILL)) + drm_sched_entity_set_rq(entity, NULL); + + return ret; } EXPORT_SYMBOL(drm_sched_entity_do_release); /** - * Destroy a context entity + * drm_sched_entity_cleanup - Destroy a context entity + * + * @sched: scheduler instance + * @entity: scheduler entity * - * @sched Pointer to scheduler instance - * @entity The pointer to a valid scheduler entity + * This should be called after @drm_sched_entity_do_release. It goes over the + * entity and signals all jobs with an error code if the process was killed. * - * The second one then goes over the entity and signals all jobs with an error code. */ void drm_sched_entity_cleanup(struct drm_gpu_scheduler *sched, struct drm_sched_entity *entity) { - if (entity->fini_status) { + + drm_sched_entity_set_rq(entity, NULL); + + /* Consumption of existing IBs wasn't completed. Forcefully + * remove them here. + */ + if (spsc_queue_peek(&entity->job_queue)) { struct drm_sched_job *job; int r; @@ -267,12 +335,22 @@ void drm_sched_entity_cleanup(struct drm_gpu_scheduler *sched, struct drm_sched_fence *s_fence = job->s_fence; drm_sched_fence_scheduled(s_fence); dma_fence_set_error(&s_fence->finished, -ESRCH); - r = dma_fence_add_callback(entity->last_scheduled, &job->finish_cb, - drm_sched_entity_kill_jobs_cb); - if (r == -ENOENT) + + /* + * When pipe is hanged by older entity, new entity might + * not even have chance to submit it's first job to HW + * and so entity->last_scheduled will remain NULL + */ + if (!entity->last_scheduled) { drm_sched_entity_kill_jobs_cb(NULL, &job->finish_cb); - else if (r) - DRM_ERROR("fence add callback failed (%d)\n", r); + } else { + r = dma_fence_add_callback(entity->last_scheduled, &job->finish_cb, + drm_sched_entity_kill_jobs_cb); + if (r == -ENOENT) + drm_sched_entity_kill_jobs_cb(NULL, &job->finish_cb); + else if (r) + DRM_ERROR("fence add callback failed (%d)\n", r); + } } } @@ -281,10 +359,18 @@ void drm_sched_entity_cleanup(struct drm_gpu_scheduler *sched, } EXPORT_SYMBOL(drm_sched_entity_cleanup); +/** + * drm_sched_entity_fini - Destroy a context entity + * + * @sched: scheduler instance + * @entity: scheduler entity + * + * Calls drm_sched_entity_do_release() and drm_sched_entity_cleanup() + */ void drm_sched_entity_fini(struct drm_gpu_scheduler *sched, struct drm_sched_entity *entity) { - drm_sched_entity_do_release(sched, entity); + drm_sched_entity_do_release(sched, entity, MAX_WAIT_SCHED_ENTITY_Q_EMPTY); drm_sched_entity_cleanup(sched, entity); } EXPORT_SYMBOL(drm_sched_entity_fini); @@ -306,6 +392,15 @@ static void drm_sched_entity_clear_dep(struct dma_fence *f, struct dma_fence_cb dma_fence_put(f); } +/** + * drm_sched_entity_set_rq - Sets the run queue for an entity + * + * @entity: scheduler entity + * @rq: scheduler run queue + * + * Sets the run queue for an entity and removes the entity from the previous + * run queue in which was present. + */ void drm_sched_entity_set_rq(struct drm_sched_entity *entity, struct drm_sched_rq *rq) { @@ -325,6 +420,14 @@ void drm_sched_entity_set_rq(struct drm_sched_entity *entity, } EXPORT_SYMBOL(drm_sched_entity_set_rq); +/** + * drm_sched_dependency_optimized + * + * @fence: the dependency fence + * @entity: the entity which depends on the above fence + * + * Returns true if the dependency can be optimized and false otherwise + */ bool drm_sched_dependency_optimized(struct dma_fence* fence, struct drm_sched_entity *entity) { @@ -413,9 +516,10 @@ drm_sched_entity_pop_job(struct drm_sched_entity *entity) } /** - * Submit a job to the job queue + * drm_sched_entity_push_job - Submit a job to the entity's job queue * - * @sched_job The pointer to job required to submit + * @sched_job: job to submit + * @entity: scheduler entity * * Note: To guarantee that the order of insertion to queue matches * the job's fence sequence number this function should be @@ -506,6 +610,13 @@ static void drm_sched_job_timedout(struct work_struct *work) job->sched->ops->timedout_job(job); } +/** + * drm_sched_hw_job_reset - stop the scheduler if it contains the bad job + * + * @sched: scheduler instance + * @bad: bad scheduler job + * + */ void drm_sched_hw_job_reset(struct drm_gpu_scheduler *sched, struct drm_sched_job *bad) { struct drm_sched_job *s_job; @@ -550,6 +661,12 @@ void drm_sched_hw_job_reset(struct drm_gpu_scheduler *sched, struct drm_sched_jo } EXPORT_SYMBOL(drm_sched_hw_job_reset); +/** + * drm_sched_job_recovery - recover jobs after a reset + * + * @sched: scheduler instance + * + */ void drm_sched_job_recovery(struct drm_gpu_scheduler *sched) { struct drm_sched_job *s_job, *tmp; @@ -599,10 +716,17 @@ void drm_sched_job_recovery(struct drm_gpu_scheduler *sched) EXPORT_SYMBOL(drm_sched_job_recovery); /** - * Init a sched_job with basic field + * drm_sched_job_init - init a scheduler job + * + * @job: scheduler job to init + * @sched: scheduler instance + * @entity: scheduler entity to use + * @owner: job owner for debugging * - * Note: Refer to drm_sched_entity_push_job documentation + * Refer to drm_sched_entity_push_job() documentation * for locking considerations. + * + * Returns 0 for success, negative error code otherwise. */ int drm_sched_job_init(struct drm_sched_job *job, struct drm_gpu_scheduler *sched, @@ -626,7 +750,11 @@ int drm_sched_job_init(struct drm_sched_job *job, EXPORT_SYMBOL(drm_sched_job_init); /** - * Return ture if we can push more jobs to the hw. + * drm_sched_ready - is the scheduler ready + * + * @sched: scheduler instance + * + * Return true if we can push more jobs to the hw, otherwise false. */ static bool drm_sched_ready(struct drm_gpu_scheduler *sched) { @@ -635,7 +763,10 @@ static bool drm_sched_ready(struct drm_gpu_scheduler *sched) } /** - * Wake up the scheduler when it is ready + * drm_sched_wakeup - Wake up the scheduler when it is ready + * + * @sched: scheduler instance + * */ static void drm_sched_wakeup(struct drm_gpu_scheduler *sched) { @@ -644,8 +775,12 @@ static void drm_sched_wakeup(struct drm_gpu_scheduler *sched) } /** - * Select next entity to process -*/ + * drm_sched_select_entity - Select next entity to process + * + * @sched: scheduler instance + * + * Returns the entity to process or NULL if none are found. + */ static struct drm_sched_entity * drm_sched_select_entity(struct drm_gpu_scheduler *sched) { @@ -665,6 +800,14 @@ drm_sched_select_entity(struct drm_gpu_scheduler *sched) return entity; } +/** + * drm_sched_process_job - process a job + * + * @f: fence + * @cb: fence callbacks + * + * Called after job has finished execution. + */ static void drm_sched_process_job(struct dma_fence *f, struct dma_fence_cb *cb) { struct drm_sched_fence *s_fence = @@ -680,6 +823,13 @@ static void drm_sched_process_job(struct dma_fence *f, struct dma_fence_cb *cb) wake_up_interruptible(&sched->wake_up_worker); } +/** + * drm_sched_blocked - check if the scheduler is blocked + * + * @sched: scheduler instance + * + * Returns true if blocked, otherwise false. + */ static bool drm_sched_blocked(struct drm_gpu_scheduler *sched) { if (kthread_should_park()) { @@ -690,6 +840,13 @@ static bool drm_sched_blocked(struct drm_gpu_scheduler *sched) return false; } +/** + * drm_sched_main - main scheduler thread + * + * @param: scheduler instance + * + * Returns 0. + */ static int drm_sched_main(void *param) { struct sched_param sparam = {.sched_priority = 1}; @@ -744,15 +901,17 @@ static int drm_sched_main(void *param) } /** - * Init a gpu scheduler instance + * drm_sched_init - Init a gpu scheduler instance * - * @sched The pointer to the scheduler - * @ops The backend operations for this scheduler. - * @hw_submissions Number of hw submissions to do. - * @name Name used for debugging + * @sched: scheduler instance + * @ops: backend operations for this scheduler + * @hw_submission: number of hw submissions that can be in flight + * @hang_limit: number of times to allow a job to hang before dropping it + * @timeout: timeout value in jiffies for the scheduler + * @name: name used for debugging * * Return 0 on success, otherwise error code. -*/ + */ int drm_sched_init(struct drm_gpu_scheduler *sched, const struct drm_sched_backend_ops *ops, unsigned hw_submission, @@ -788,9 +947,11 @@ int drm_sched_init(struct drm_gpu_scheduler *sched, EXPORT_SYMBOL(drm_sched_init); /** - * Destroy a gpu scheduler + * drm_sched_fini - Destroy a gpu scheduler + * + * @sched: scheduler instance * - * @sched The pointer to the scheduler + * Tears down and cleans up the scheduler. */ void drm_sched_fini(struct drm_gpu_scheduler *sched) { diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c index c7ece7613a6a..0ca0ec47334e 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c @@ -44,10 +44,11 @@ #define TTM_BO_VM_NUM_PREFAULT 16 -static int ttm_bo_vm_fault_idle(struct ttm_buffer_object *bo, +static vm_fault_t ttm_bo_vm_fault_idle(struct ttm_buffer_object *bo, struct vm_fault *vmf) { - int ret = 0; + vm_fault_t ret = 0; + int err = 0; if (likely(!bo->moving)) goto out_unlock; @@ -78,9 +79,9 @@ static int ttm_bo_vm_fault_idle(struct ttm_buffer_object *bo, /* * Ordinary wait. */ - ret = dma_fence_wait(bo->moving, true); - if (unlikely(ret != 0)) { - ret = (ret != -ERESTARTSYS) ? VM_FAULT_SIGBUS : + err = dma_fence_wait(bo->moving, true); + if (unlikely(err != 0)) { + ret = (err != -ERESTARTSYS) ? VM_FAULT_SIGBUS : VM_FAULT_NOPAGE; goto out_unlock; } @@ -105,7 +106,7 @@ static unsigned long ttm_bo_io_mem_pfn(struct ttm_buffer_object *bo, + page_offset; } -static int ttm_bo_vm_fault(struct vm_fault *vmf) +static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; struct ttm_buffer_object *bo = (struct ttm_buffer_object *) @@ -116,8 +117,9 @@ static int ttm_bo_vm_fault(struct vm_fault *vmf) unsigned long pfn; struct ttm_tt *ttm = NULL; struct page *page; - int ret; + int err; int i; + vm_fault_t ret = VM_FAULT_NOPAGE; unsigned long address = vmf->address; struct ttm_mem_type_manager *man = &bdev->man[bo->mem.mem_type]; @@ -129,9 +131,9 @@ static int ttm_bo_vm_fault(struct vm_fault *vmf) * for reserve, and if it fails, retry the fault after waiting * for the buffer to become unreserved. */ - ret = ttm_bo_reserve(bo, true, true, NULL); - if (unlikely(ret != 0)) { - if (ret != -EBUSY) + err = ttm_bo_reserve(bo, true, true, NULL); + if (unlikely(err != 0)) { + if (err != -EBUSY) return VM_FAULT_NOPAGE; if (vmf->flags & FAULT_FLAG_ALLOW_RETRY) { @@ -163,8 +165,8 @@ static int ttm_bo_vm_fault(struct vm_fault *vmf) } if (bdev->driver->fault_reserve_notify) { - ret = bdev->driver->fault_reserve_notify(bo); - switch (ret) { + err = bdev->driver->fault_reserve_notify(bo); + switch (err) { case 0: break; case -EBUSY: @@ -192,13 +194,13 @@ static int ttm_bo_vm_fault(struct vm_fault *vmf) goto out_unlock; } - ret = ttm_mem_io_lock(man, true); - if (unlikely(ret != 0)) { + err = ttm_mem_io_lock(man, true); + if (unlikely(err != 0)) { ret = VM_FAULT_NOPAGE; goto out_unlock; } - ret = ttm_mem_io_reserve_vm(bo); - if (unlikely(ret != 0)) { + err = ttm_mem_io_reserve_vm(bo); + if (unlikely(err != 0)) { ret = VM_FAULT_SIGBUS; goto out_io_unlock; } @@ -266,23 +268,20 @@ static int ttm_bo_vm_fault(struct vm_fault *vmf) } if (vma->vm_flags & VM_MIXEDMAP) - ret = vm_insert_mixed(&cvma, address, + ret = vmf_insert_mixed(&cvma, address, __pfn_to_pfn_t(pfn, PFN_DEV)); else - ret = vm_insert_pfn(&cvma, address, pfn); + ret = vmf_insert_pfn(&cvma, address, pfn); /* * Somebody beat us to this PTE or prefaulting to * an already populated PTE, or prefaulting error. */ - if (unlikely((ret == -EBUSY) || (ret != 0 && i > 0))) + if (unlikely((ret == VM_FAULT_NOPAGE && i > 0))) break; - else if (unlikely(ret != 0)) { - ret = - (ret == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS; + else if (unlikely(ret & VM_FAULT_ERROR)) goto out_io_unlock; - } address += PAGE_SIZE; if (unlikely(++page_offset >= page_last)) diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h index dec655894d08..7c2dfd6cc1af 100644 --- a/include/drm/gpu_scheduler.h +++ b/include/drm/gpu_scheduler.h @@ -27,6 +27,8 @@ #include <drm/spsc_queue.h> #include <linux/dma-fence.h> +#define MAX_WAIT_SCHED_ENTITY_Q_EMPTY msecs_to_jiffies(1000) + struct drm_gpu_scheduler; struct drm_sched_rq; @@ -43,13 +45,33 @@ enum drm_sched_priority { }; /** - * drm_sched_entity - A wrapper around a job queue (typically attached - * to the DRM file_priv). + * struct drm_sched_entity - A wrapper around a job queue (typically + * attached to the DRM file_priv). + * + * @list: used to append this struct to the list of entities in the + * runqueue. + * @rq: runqueue to which this entity belongs. + * @rq_lock: lock to modify the runqueue to which this entity belongs. + * @sched: the scheduler instance to which this entity is enqueued. + * @job_queue: the list of jobs of this entity. + * @fence_seq: a linearly increasing seqno incremented with each + * new &drm_sched_fence which is part of the entity. + * @fence_context: a unique context for all the fences which belong + * to this entity. + * The &drm_sched_fence.scheduled uses the + * fence_context but &drm_sched_fence.finished uses + * fence_context + 1. + * @dependency: the dependency fence of the job which is on the top + * of the job queue. + * @cb: callback for the dependency fence above. + * @guilty: points to ctx's guilty. + * @fini_status: contains the exit status in case the process was signalled. + * @last_scheduled: points to the finished fence of the last scheduled job. * * Entities will emit jobs in order to their corresponding hardware * ring, and the scheduler will alternate between entities based on * scheduling policy. -*/ + */ struct drm_sched_entity { struct list_head list; struct drm_sched_rq *rq; @@ -63,47 +85,95 @@ struct drm_sched_entity { struct dma_fence *dependency; struct dma_fence_cb cb; - atomic_t *guilty; /* points to ctx's guilty */ - int fini_status; - struct dma_fence *last_scheduled; + atomic_t *guilty; + struct dma_fence *last_scheduled; }; /** + * struct drm_sched_rq - queue of entities to be scheduled. + * + * @lock: to modify the entities list. + * @entities: list of the entities to be scheduled. + * @current_entity: the entity which is to be scheduled. + * * Run queue is a set of entities scheduling command submissions for * one specific ring. It implements the scheduling policy that selects * the next entity to emit commands from. -*/ + */ struct drm_sched_rq { spinlock_t lock; struct list_head entities; struct drm_sched_entity *current_entity; }; +/** + * struct drm_sched_fence - fences corresponding to the scheduling of a job. + */ struct drm_sched_fence { + /** + * @scheduled: this fence is what will be signaled by the scheduler + * when the job is scheduled. + */ struct dma_fence scheduled; - /* This fence is what will be signaled by the scheduler when - * the job is completed. - * - * When setting up an out fence for the job, you should use - * this, since it's available immediately upon - * drm_sched_job_init(), and the fence returned by the driver - * from run_job() won't be created until the dependencies have - * resolved. - */ + /** + * @finished: this fence is what will be signaled by the scheduler + * when the job is completed. + * + * When setting up an out fence for the job, you should use + * this, since it's available immediately upon + * drm_sched_job_init(), and the fence returned by the driver + * from run_job() won't be created until the dependencies have + * resolved. + */ struct dma_fence finished; + /** + * @cb: the callback for the parent fence below. + */ struct dma_fence_cb cb; + /** + * @parent: the fence returned by &drm_sched_backend_ops.run_job + * when scheduling the job on hardware. We signal the + * &drm_sched_fence.finished fence once parent is signalled. + */ struct dma_fence *parent; + /** + * @sched: the scheduler instance to which the job having this struct + * belongs to. + */ struct drm_gpu_scheduler *sched; + /** + * @lock: the lock used by the scheduled and the finished fences. + */ spinlock_t lock; + /** + * @owner: job owner for debugging + */ void *owner; }; struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f); /** - * drm_sched_job - A job to be run by an entity. + * struct drm_sched_job - A job to be run by an entity. + * + * @queue_node: used to append this struct to the queue of jobs in an entity. + * @sched: the scheduler instance on which this job is scheduled. + * @s_fence: contains the fences for the scheduling of job. + * @finish_cb: the callback for the finished fence. + * @finish_work: schedules the function @drm_sched_job_finish once the job has + * finished to remove the job from the + * @drm_gpu_scheduler.ring_mirror_list. + * @node: used to append this struct to the @drm_gpu_scheduler.ring_mirror_list. + * @work_tdr: schedules a delayed call to @drm_sched_job_timedout after the timeout + * interval is over. + * @id: a unique id assigned to each job scheduled on the scheduler. + * @karma: increment on every hang caused by this job. If this exceeds the hang + * limit of the scheduler then the job is marked guilty and will not + * be scheduled further. + * @s_priority: the priority of the job. + * @entity: the entity to which this job belongs. * * A job is created by the driver using drm_sched_job_init(), and * should call drm_sched_entity_push_job() once it wants the scheduler @@ -130,38 +200,64 @@ static inline bool drm_sched_invalidate_job(struct drm_sched_job *s_job, } /** + * struct drm_sched_backend_ops + * * Define the backend operations called by the scheduler, - * these functions should be implemented in driver side -*/ + * these functions should be implemented in driver side. + */ struct drm_sched_backend_ops { - /* Called when the scheduler is considering scheduling this - * job next, to get another struct dma_fence for this job to + /** + * @dependency: Called when the scheduler is considering scheduling + * this job next, to get another struct dma_fence for this job to * block on. Once it returns NULL, run_job() may be called. */ struct dma_fence *(*dependency)(struct drm_sched_job *sched_job, struct drm_sched_entity *s_entity); - /* Called to execute the job once all of the dependencies have - * been resolved. This may be called multiple times, if + /** + * @run_job: Called to execute the job once all of the dependencies + * have been resolved. This may be called multiple times, if * timedout_job() has happened and drm_sched_job_recovery() * decides to try it again. */ struct dma_fence *(*run_job)(struct drm_sched_job *sched_job); - /* Called when a job has taken too long to execute, to trigger - * GPU recovery. + /** + * @timedout_job: Called when a job has taken too long to execute, + * to trigger GPU recovery. */ void (*timedout_job)(struct drm_sched_job *sched_job); - /* Called once the job's finished fence has been signaled and - * it's time to clean it up. + /** + * @free_job: Called once the job's finished fence has been signaled + * and it's time to clean it up. */ void (*free_job)(struct drm_sched_job *sched_job); }; /** - * One scheduler is implemented for each hardware ring -*/ + * struct drm_gpu_scheduler + * + * @ops: backend operations provided by the driver. + * @hw_submission_limit: the max size of the hardware queue. + * @timeout: the time after which a job is removed from the scheduler. + * @name: name of the ring for which this scheduler is being used. + * @sched_rq: priority wise array of run queues. + * @wake_up_worker: the wait queue on which the scheduler sleeps until a job + * is ready to be scheduled. + * @job_scheduled: once @drm_sched_entity_do_release is called the scheduler + * waits on this wait queue until all the scheduled jobs are + * finished. + * @hw_rq_count: the number of jobs currently in the hardware queue. + * @job_id_count: used to assign unique id to the each job. + * @thread: the kthread on which the scheduler which run. + * @ring_mirror_list: the list of jobs which are currently in the job queue. + * @job_list_lock: lock to protect the ring_mirror_list. + * @hang_limit: once the hangs by a job crosses this limit then it is marked + * guilty and it will be considered for scheduling further. + * + * One scheduler is implemented for each hardware ring. + */ struct drm_gpu_scheduler { const struct drm_sched_backend_ops *ops; uint32_t hw_submission_limit; @@ -188,8 +284,8 @@ int drm_sched_entity_init(struct drm_gpu_scheduler *sched, struct drm_sched_entity *entity, struct drm_sched_rq *rq, atomic_t *guilty); -void drm_sched_entity_do_release(struct drm_gpu_scheduler *sched, - struct drm_sched_entity *entity); +long drm_sched_entity_do_release(struct drm_gpu_scheduler *sched, + struct drm_sched_entity *entity, long timeout); void drm_sched_entity_cleanup(struct drm_gpu_scheduler *sched, struct drm_sched_entity *entity); void drm_sched_entity_fini(struct drm_gpu_scheduler *sched, diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 78b4dd89fcb4..784b0fe470ee 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -72,6 +72,29 @@ extern "C" { #define DRM_IOCTL_AMDGPU_FENCE_TO_HANDLE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_FENCE_TO_HANDLE, union drm_amdgpu_fence_to_handle) #define DRM_IOCTL_AMDGPU_SCHED DRM_IOW(DRM_COMMAND_BASE + DRM_AMDGPU_SCHED, union drm_amdgpu_sched) +/** + * DOC: memory domains + * + * %AMDGPU_GEM_DOMAIN_CPU System memory that is not GPU accessible. + * Memory in this pool could be swapped out to disk if there is pressure. + * + * %AMDGPU_GEM_DOMAIN_GTT GPU accessible system memory, mapped into the + * GPU's virtual address space via gart. Gart memory linearizes non-contiguous + * pages of system memory, allows GPU access system memory in a linezrized + * fashion. + * + * %AMDGPU_GEM_DOMAIN_VRAM Local video memory. For APUs, it is memory + * carved out by the BIOS. + * + * %AMDGPU_GEM_DOMAIN_GDS Global on-chip data storage used to share data + * across shader threads. + * + * %AMDGPU_GEM_DOMAIN_GWS Global wave sync, used to synchronize the + * execution of all the waves on a device. + * + * %AMDGPU_GEM_DOMAIN_OA Ordered append, used by 3D or Compute engines + * for appending data. + */ #define AMDGPU_GEM_DOMAIN_CPU 0x1 #define AMDGPU_GEM_DOMAIN_GTT 0x2 #define AMDGPU_GEM_DOMAIN_VRAM 0x4 @@ -483,7 +506,8 @@ struct drm_amdgpu_gem_va { #define AMDGPU_HW_IP_UVD_ENC 5 #define AMDGPU_HW_IP_VCN_DEC 6 #define AMDGPU_HW_IP_VCN_ENC 7 -#define AMDGPU_HW_IP_NUM 8 +#define AMDGPU_HW_IP_VCN_JPEG 8 +#define AMDGPU_HW_IP_NUM 9 #define AMDGPU_HW_IP_INSTANCE_MAX_COUNT 1 |