diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2018-12-25 20:48:26 +0100 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-12-25 20:48:26 +0100 |
commit | 4971f090aa7f6ce5daa094ce4334f6618f93a7eb (patch) | |
tree | 45d75782b7dedbec76a3ab82d2769f7707668071 /drivers/gpu/drm/amd | |
parent | Merge tag 'platform-drivers-x86-v4.21-1' of git://git.infradead.org/linux-pla... (diff) | |
parent | Merge tag 'vmwgfx-next-2018-12-13' of git://people.freedesktop.org/~thomash/l... (diff) | |
download | linux-4971f090aa7f6ce5daa094ce4334f6618f93a7eb.tar.xz linux-4971f090aa7f6ce5daa094ce4334f6618f93a7eb.zip |
Merge tag 'drm-next-2018-12-14' of git://anongit.freedesktop.org/drm/drm
Pull drm updates from Dave Airlie:
"Core:
- shared fencing staging removal
- drop transactional atomic helpers and move helpers to new location
- DP/MST atomic cleanup
- Leasing cleanups and drop EXPORT_SYMBOL
- Convert drivers to atomic helpers and generic fbdev.
- removed deprecated obj_ref/unref in favour of get/put
- Improve dumb callback documentation
- MODESET_LOCK_BEGIN/END helpers
panels:
- CDTech panels, Banana Pi Panel, DLC1010GIG,
- Olimex LCD-O-LinuXino, Samsung S6D16D0, Truly NT35597 WQXGA,
- Himax HX8357D, simulated RTSM AEMv8.
- GPD Win2 panel
- AUO G101EVN010
vgem:
- render node support
ttm:
- move global init out of drivers
- fix LRU handling for ghost objects
- Support for simultaneous submissions to multiple engines
scheduler:
- timeout/fault handling changes to help GPU recovery
- helpers for hw with preemption support
i915:
- Scaler/Watermark fixes
- DP MST + powerwell fixes
- PSR fixes
- Break long get/put shmemfs pages
- Icelake fixes
- Icelake DSI video mode enablement
- Engine workaround improvements
amdgpu:
- freesync support
- GPU reset enabled on CI, VI, SOC15 dGPUs
- ABM support in DC
- KFD support for vega12/polaris12
- SDMA paging queue on vega
- More amdkfd code sharing
- DCC scanout on GFX9
- DC kerneldoc
- Updated SMU firmware for GFX8 chips
- XGMI PSP + hive reset support
- GPU reset
- DC trace support
- Powerplay updates for newer Polaris
- Cursor plane update fast path
- kfd dma-buf support
virtio-gpu:
- add EDID support
vmwgfx:
- pageflip with damage support
nouveau:
- Initial Turing TU104/TU106 modesetting support
msm:
- a2xx gpu support for apq8060 and imx5
- a2xx gpummu support
- mdp4 display support for apq8060
- DPU fixes and cleanups
- enhanced profiling support
- debug object naming interface
- get_iova/page pinning decoupling
tegra:
- Tegra194 host1x, VIC and display support enabled
- Audio over HDMI for Tegra186 and Tegra194
exynos:
- DMA/IOMMU refactoring
- plane alpha + blend mode support
- Color format fixes for mixer driver
rcar-du:
- R8A7744 and R8A77470 support
- R8A77965 LVDS support
imx:
- fbdev emulation fix
- multi-tiled scalling fixes
- SPDX identifiers
rockchip
- dw_hdmi support
- dw-mipi-dsi + dual dsi support
- mailbox read size fix
qxl:
- fix cursor pinning
vc4:
- YUV support (scaling + cursor)
v3d:
- enable TFU (Texture Formatting Unit)
mali-dp:
- add support for linear tiled formats
sun4i:
- Display Engine 3 support
- H6 DE3 mixer 0 support
- H6 display engine support
- dw-hdmi support
- H6 HDMI phy support
- implicit fence waiting
- BGRX8888 support
meson:
- Overlay plane support
- implicit fence waiting
- HDMI 1.4 4k modes
bridge:
- i2c fixes for sii902x"
* tag 'drm-next-2018-12-14' of git://anongit.freedesktop.org/drm/drm: (1403 commits)
drm/amd/display: Add fast path for cursor plane updates
drm/amdgpu: Enable GPU recovery by default for CI
drm/amd/display: Fix duplicating scaling/underscan connector state
drm/amd/display: Fix unintialized max_bpc state values
Revert "drm/amd/display: Set RMX_ASPECT as default"
drm/amdgpu: Fix stub function name
drm/msm/dpu: Fix clock issue after bind failure
drm/msm/dpu: Clean up dpu_media_info.h static inline functions
drm/msm/dpu: Further cleanups for static inline functions
drm/msm/dpu: Cleanup the debugfs functions
drm/msm/dpu: Remove dpu_irq and unused functions
drm/msm: Make irq_postinstall optional
drm/msm/dpu: Cleanup callers of dpu_hw_blk_init
drm/msm/dpu: Remove unused functions
drm/msm/dpu: Remove dpu_crtc_is_enabled()
drm/msm/dpu: Remove dpu_crtc_get_mixer_height
drm/msm/dpu: Remove dpu_dbg
drm/msm: dpu: Remove crtc_lock
drm/msm: dpu: Remove vblank_requested flag from dpu_crtc
drm/msm: dpu: Separate crtc assignment from vblank enable
...
Diffstat (limited to 'drivers/gpu/drm/amd')
236 files changed, 9562 insertions, 6590 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 138cb787d27e..f76bcb9c45e4 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -53,7 +53,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \ amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \ amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o amdgpu_atomfirmware.o \ amdgpu_vf_error.o amdgpu_sched.o amdgpu_debugfs.o amdgpu_ids.o \ - amdgpu_gmc.o amdgpu_xgmi.o + amdgpu_gmc.o amdgpu_xgmi.o amdgpu_csa.o # add asic specific block amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \ @@ -105,6 +105,7 @@ amdgpu-y += \ # add GFX block amdgpu-y += \ amdgpu_gfx.o \ + amdgpu_rlc.o \ gfx_v8_0.o \ gfx_v9_0.o diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index b0fc116296cb..bcef6ea4bcf9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -75,11 +75,14 @@ #include "amdgpu_sdma.h" #include "amdgpu_dm.h" #include "amdgpu_virt.h" +#include "amdgpu_csa.h" #include "amdgpu_gart.h" #include "amdgpu_debugfs.h" #include "amdgpu_job.h" #include "amdgpu_bo_list.h" #include "amdgpu_gem.h" +#include "amdgpu_doorbell.h" +#include "amdgpu_amdkfd.h" #define MAX_GPU_INSTANCE 16 @@ -161,6 +164,7 @@ extern int amdgpu_si_support; extern int amdgpu_cik_support; #endif +#define AMDGPU_VM_MAX_NUM_CTX 4096 #define AMDGPU_SG_THRESHOLD (256*1024*1024) #define AMDGPU_DEFAULT_GTT_SIZE_MB 3072ULL /* 3GB by default */ #define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS 3000 @@ -360,123 +364,6 @@ int amdgpu_fence_slab_init(void); void amdgpu_fence_slab_fini(void); /* - * GPU doorbell structures, functions & helpers - */ -typedef enum _AMDGPU_DOORBELL_ASSIGNMENT -{ - AMDGPU_DOORBELL_KIQ = 0x000, - AMDGPU_DOORBELL_HIQ = 0x001, - AMDGPU_DOORBELL_DIQ = 0x002, - AMDGPU_DOORBELL_MEC_RING0 = 0x010, - AMDGPU_DOORBELL_MEC_RING1 = 0x011, - AMDGPU_DOORBELL_MEC_RING2 = 0x012, - AMDGPU_DOORBELL_MEC_RING3 = 0x013, - AMDGPU_DOORBELL_MEC_RING4 = 0x014, - AMDGPU_DOORBELL_MEC_RING5 = 0x015, - AMDGPU_DOORBELL_MEC_RING6 = 0x016, - AMDGPU_DOORBELL_MEC_RING7 = 0x017, - AMDGPU_DOORBELL_GFX_RING0 = 0x020, - AMDGPU_DOORBELL_sDMA_ENGINE0 = 0x1E0, - AMDGPU_DOORBELL_sDMA_ENGINE1 = 0x1E1, - AMDGPU_DOORBELL_IH = 0x1E8, - AMDGPU_DOORBELL_MAX_ASSIGNMENT = 0x3FF, - AMDGPU_DOORBELL_INVALID = 0xFFFF -} AMDGPU_DOORBELL_ASSIGNMENT; - -struct amdgpu_doorbell { - /* doorbell mmio */ - resource_size_t base; - resource_size_t size; - u32 __iomem *ptr; - u32 num_doorbells; /* Number of doorbells actually reserved for amdgpu. */ -}; - -/* - * 64bit doorbell, offset are in QWORD, occupy 2KB doorbell space - */ -typedef enum _AMDGPU_DOORBELL64_ASSIGNMENT -{ - /* - * All compute related doorbells: kiq, hiq, diq, traditional compute queue, user queue, should locate in - * a continues range so that programming CP_MEC_DOORBELL_RANGE_LOWER/UPPER can cover this range. - * Compute related doorbells are allocated from 0x00 to 0x8a - */ - - - /* kernel scheduling */ - AMDGPU_DOORBELL64_KIQ = 0x00, - - /* HSA interface queue and debug queue */ - AMDGPU_DOORBELL64_HIQ = 0x01, - AMDGPU_DOORBELL64_DIQ = 0x02, - - /* Compute engines */ - AMDGPU_DOORBELL64_MEC_RING0 = 0x03, - AMDGPU_DOORBELL64_MEC_RING1 = 0x04, - AMDGPU_DOORBELL64_MEC_RING2 = 0x05, - AMDGPU_DOORBELL64_MEC_RING3 = 0x06, - AMDGPU_DOORBELL64_MEC_RING4 = 0x07, - AMDGPU_DOORBELL64_MEC_RING5 = 0x08, - AMDGPU_DOORBELL64_MEC_RING6 = 0x09, - AMDGPU_DOORBELL64_MEC_RING7 = 0x0a, - - /* User queue doorbell range (128 doorbells) */ - AMDGPU_DOORBELL64_USERQUEUE_START = 0x0b, - AMDGPU_DOORBELL64_USERQUEUE_END = 0x8a, - - /* Graphics engine */ - AMDGPU_DOORBELL64_GFX_RING0 = 0x8b, - - /* - * Other graphics doorbells can be allocated here: from 0x8c to 0xdf - * Graphics voltage island aperture 1 - * default non-graphics QWORD index is 0xe0 - 0xFF inclusive - */ - - /* sDMA engines reserved from 0xe0 -oxef */ - AMDGPU_DOORBELL64_sDMA_ENGINE0 = 0xE0, - AMDGPU_DOORBELL64_sDMA_HI_PRI_ENGINE0 = 0xE1, - AMDGPU_DOORBELL64_sDMA_ENGINE1 = 0xE8, - AMDGPU_DOORBELL64_sDMA_HI_PRI_ENGINE1 = 0xE9, - - /* For vega10 sriov, the sdma doorbell must be fixed as follow - * to keep the same setting with host driver, or it will - * happen conflicts - */ - AMDGPU_VEGA10_DOORBELL64_sDMA_ENGINE0 = 0xF0, - AMDGPU_VEGA10_DOORBELL64_sDMA_HI_PRI_ENGINE0 = 0xF1, - AMDGPU_VEGA10_DOORBELL64_sDMA_ENGINE1 = 0xF2, - AMDGPU_VEGA10_DOORBELL64_sDMA_HI_PRI_ENGINE1 = 0xF3, - - /* Interrupt handler */ - AMDGPU_DOORBELL64_IH = 0xF4, /* For legacy interrupt ring buffer */ - AMDGPU_DOORBELL64_IH_RING1 = 0xF5, /* For page migration request log */ - AMDGPU_DOORBELL64_IH_RING2 = 0xF6, /* For page migration translation/invalidation log */ - - /* VCN engine use 32 bits doorbell */ - AMDGPU_DOORBELL64_VCN0_1 = 0xF8, /* lower 32 bits for VNC0 and upper 32 bits for VNC1 */ - AMDGPU_DOORBELL64_VCN2_3 = 0xF9, - AMDGPU_DOORBELL64_VCN4_5 = 0xFA, - AMDGPU_DOORBELL64_VCN6_7 = 0xFB, - - /* overlap the doorbell assignment with VCN as they are mutually exclusive - * VCE engine's doorbell is 32 bit and two VCE ring share one QWORD - */ - AMDGPU_DOORBELL64_UVD_RING0_1 = 0xF8, - AMDGPU_DOORBELL64_UVD_RING2_3 = 0xF9, - AMDGPU_DOORBELL64_UVD_RING4_5 = 0xFA, - AMDGPU_DOORBELL64_UVD_RING6_7 = 0xFB, - - AMDGPU_DOORBELL64_VCE_RING0_1 = 0xFC, - AMDGPU_DOORBELL64_VCE_RING2_3 = 0xFD, - AMDGPU_DOORBELL64_VCE_RING4_5 = 0xFE, - AMDGPU_DOORBELL64_VCE_RING6_7 = 0xFF, - - AMDGPU_DOORBELL64_MAX_ASSIGNMENT = 0xFF, - AMDGPU_DOORBELL64_INVALID = 0xFFFF -} AMDGPU_DOORBELL64_ASSIGNMENT; - -/* * IRQS. */ @@ -653,6 +540,8 @@ struct amdgpu_asic_funcs { struct amdgpu_ring *ring); /* check if the asic needs a full reset of if soft reset will work */ bool (*need_full_reset)(struct amdgpu_device *adev); + /* initialize doorbell layout for specific asic*/ + void (*init_doorbell_index)(struct amdgpu_device *adev); }; /* @@ -831,7 +720,6 @@ struct amdgpu_device { bool need_dma32; bool need_swiotlb; bool accel_working; - struct work_struct reset_work; struct notifier_block acpi_nb; struct amdgpu_i2c_chan *i2c_bus[AMDGPU_MAX_I2C_BUS]; struct amdgpu_debugfs debugfs[AMDGPU_DEBUGFS_MAX_COMPONENTS]; @@ -976,6 +864,9 @@ struct amdgpu_device { /* GDS */ struct amdgpu_gds gds; + /* KFD */ + struct amdgpu_kfd_dev kfd; + /* display related functionality */ struct amdgpu_display_manager dm; @@ -989,9 +880,6 @@ struct amdgpu_device { atomic64_t visible_pin_size; atomic64_t gart_pin_size; - /* amdkfd interface */ - struct kfd_dev *kfd; - /* soc15 register offset based on ip, instance and segment */ uint32_t *reg_offset[MAX_HWIP][HWIP_MAX_INSTANCE]; @@ -1023,6 +911,10 @@ struct amdgpu_device { unsigned long last_mm_index; bool in_gpu_reset; struct mutex lock_reset; + struct amdgpu_doorbell_index doorbell_index; + + int asic_reset_res; + struct work_struct xgmi_reset_work; }; static inline struct amdgpu_device *amdgpu_ttm_adev(struct ttm_bo_device *bdev) @@ -1047,11 +939,6 @@ uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset); u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg); void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v); -u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index); -void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v); -u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index); -void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v); - bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type); bool amdgpu_device_has_dc_support(struct amdgpu_device *adev); @@ -1113,11 +1000,6 @@ int emu_soc_asic_init(struct amdgpu_device *adev); #define RREG32_IO(reg) amdgpu_io_rreg(adev, (reg)) #define WREG32_IO(reg, v) amdgpu_io_wreg(adev, (reg), (v)) -#define RDOORBELL32(index) amdgpu_mm_rdoorbell(adev, (index)) -#define WDOORBELL32(index, v) amdgpu_mm_wdoorbell(adev, (index), (v)) -#define RDOORBELL64(index) amdgpu_mm_rdoorbell64(adev, (index)) -#define WDOORBELL64(index, v) amdgpu_mm_wdoorbell64(adev, (index), (v)) - #define REG_FIELD_SHIFT(reg, field) reg##__##field##__SHIFT #define REG_FIELD_MASK(reg, field) reg##__##field##_MASK @@ -1159,6 +1041,7 @@ int emu_soc_asic_init(struct amdgpu_device *adev); #define amdgpu_asic_flush_hdp(adev, r) (adev)->asic_funcs->flush_hdp((adev), (r)) #define amdgpu_asic_invalidate_hdp(adev, r) (adev)->asic_funcs->invalidate_hdp((adev), (r)) #define amdgpu_asic_need_full_reset(adev) (adev)->asic_funcs->need_full_reset((adev)) +#define amdgpu_asic_init_doorbell_index(adev) (adev)->asic_funcs->init_doorbell_index((adev)) /* Common functions */ bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev); @@ -1219,12 +1102,6 @@ void amdgpu_disable_vblank_kms(struct drm_device *dev, unsigned int pipe); long amdgpu_kms_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); - -/* - * functions used by amdgpu_xgmi.c - */ -int amdgpu_xgmi_add_device(struct amdgpu_device *adev); - /* * functions used by amdgpu_encoder.c */ @@ -1252,6 +1129,9 @@ bool amdgpu_acpi_is_pcie_performance_request_supported(struct amdgpu_device *ade int amdgpu_acpi_pcie_performance_request(struct amdgpu_device *adev, u8 perf_req, bool advertise); int amdgpu_acpi_pcie_notify_device_ready(struct amdgpu_device *adev); + +void amdgpu_acpi_get_backlight_caps(struct amdgpu_device *adev, + struct amdgpu_dm_backlight_caps *caps); #else static inline int amdgpu_acpi_init(struct amdgpu_device *adev) { return 0; } static inline void amdgpu_acpi_fini(struct amdgpu_device *adev) { } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c index 7f0afc526419..4376b17ca594 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c @@ -41,28 +41,21 @@ struct amdgpu_atif_notification_cfg { }; struct amdgpu_atif_notifications { - bool display_switch; - bool expansion_mode_change; bool thermal_state; bool forced_power_state; bool system_power_state; - bool display_conf_change; - bool px_gfx_switch; bool brightness_change; bool dgpu_display_event; + bool gpu_package_power_limit; }; struct amdgpu_atif_functions { bool system_params; bool sbios_requests; - bool select_active_disp; - bool lid_state; - bool get_tv_standard; - bool set_tv_standard; - bool get_panel_expansion_mode; - bool set_panel_expansion_mode; bool temperature_change; - bool graphics_device_types; + bool query_backlight_transfer_characteristics; + bool ready_to_undock; + bool external_gpu_information; }; struct amdgpu_atif { @@ -72,6 +65,7 @@ struct amdgpu_atif { struct amdgpu_atif_functions functions; struct amdgpu_atif_notification_cfg notification_cfg; struct amdgpu_encoder *encoder_for_bl; + struct amdgpu_dm_backlight_caps backlight_caps; }; /* Call the ATIF method @@ -137,15 +131,12 @@ static union acpi_object *amdgpu_atif_call(struct amdgpu_atif *atif, */ static void amdgpu_atif_parse_notification(struct amdgpu_atif_notifications *n, u32 mask) { - n->display_switch = mask & ATIF_DISPLAY_SWITCH_REQUEST_SUPPORTED; - n->expansion_mode_change = mask & ATIF_EXPANSION_MODE_CHANGE_REQUEST_SUPPORTED; n->thermal_state = mask & ATIF_THERMAL_STATE_CHANGE_REQUEST_SUPPORTED; n->forced_power_state = mask & ATIF_FORCED_POWER_STATE_CHANGE_REQUEST_SUPPORTED; n->system_power_state = mask & ATIF_SYSTEM_POWER_SOURCE_CHANGE_REQUEST_SUPPORTED; - n->display_conf_change = mask & ATIF_DISPLAY_CONF_CHANGE_REQUEST_SUPPORTED; - n->px_gfx_switch = mask & ATIF_PX_GFX_SWITCH_REQUEST_SUPPORTED; n->brightness_change = mask & ATIF_PANEL_BRIGHTNESS_CHANGE_REQUEST_SUPPORTED; n->dgpu_display_event = mask & ATIF_DGPU_DISPLAY_EVENT_SUPPORTED; + n->gpu_package_power_limit = mask & ATIF_GPU_PACKAGE_POWER_LIMIT_REQUEST_SUPPORTED; } /** @@ -162,14 +153,11 @@ static void amdgpu_atif_parse_functions(struct amdgpu_atif_functions *f, u32 mas { f->system_params = mask & ATIF_GET_SYSTEM_PARAMETERS_SUPPORTED; f->sbios_requests = mask & ATIF_GET_SYSTEM_BIOS_REQUESTS_SUPPORTED; - f->select_active_disp = mask & ATIF_SELECT_ACTIVE_DISPLAYS_SUPPORTED; - f->lid_state = mask & ATIF_GET_LID_STATE_SUPPORTED; - f->get_tv_standard = mask & ATIF_GET_TV_STANDARD_FROM_CMOS_SUPPORTED; - f->set_tv_standard = mask & ATIF_SET_TV_STANDARD_IN_CMOS_SUPPORTED; - f->get_panel_expansion_mode = mask & ATIF_GET_PANEL_EXPANSION_MODE_FROM_CMOS_SUPPORTED; - f->set_panel_expansion_mode = mask & ATIF_SET_PANEL_EXPANSION_MODE_IN_CMOS_SUPPORTED; f->temperature_change = mask & ATIF_TEMPERATURE_CHANGE_NOTIFICATION_SUPPORTED; - f->graphics_device_types = mask & ATIF_GET_GRAPHICS_DEVICE_TYPES_SUPPORTED; + f->query_backlight_transfer_characteristics = + mask & ATIF_QUERY_BACKLIGHT_TRANSFER_CHARACTERISTICS_SUPPORTED; + f->ready_to_undock = mask & ATIF_READY_TO_UNDOCK_NOTIFICATION_SUPPORTED; + f->external_gpu_information = mask & ATIF_GET_EXTERNAL_GPU_INFORMATION_SUPPORTED; } /** @@ -311,6 +299,65 @@ out: } /** + * amdgpu_atif_query_backlight_caps - get min and max backlight input signal + * + * @handle: acpi handle + * + * Execute the QUERY_BRIGHTNESS_TRANSFER_CHARACTERISTICS ATIF function + * to determine the acceptable range of backlight values + * + * Backlight_caps.caps_valid will be set to true if the query is successful + * + * The input signals are in range 0-255 + * + * This function assumes the display with backlight is the first LCD + * + * Returns 0 on success, error on failure. + */ +static int amdgpu_atif_query_backlight_caps(struct amdgpu_atif *atif) +{ + union acpi_object *info; + struct atif_qbtc_output characteristics; + struct atif_qbtc_arguments arguments; + struct acpi_buffer params; + size_t size; + int err = 0; + + arguments.size = sizeof(arguments); + arguments.requested_display = ATIF_QBTC_REQUEST_LCD1; + + params.length = sizeof(arguments); + params.pointer = (void *)&arguments; + + info = amdgpu_atif_call(atif, + ATIF_FUNCTION_QUERY_BRIGHTNESS_TRANSFER_CHARACTERISTICS, + ¶ms); + if (!info) { + err = -EIO; + goto out; + } + + size = *(u16 *) info->buffer.pointer; + if (size < 10) { + err = -EINVAL; + goto out; + } + + memset(&characteristics, 0, sizeof(characteristics)); + size = min(sizeof(characteristics), size); + memcpy(&characteristics, info->buffer.pointer, size); + + atif->backlight_caps.caps_valid = true; + atif->backlight_caps.min_input_signal = + characteristics.min_input_signal; + atif->backlight_caps.max_input_signal = + characteristics.max_input_signal; +out: + kfree(info); + return err; +} + +/** * amdgpu_atif_get_sbios_requests - get requested sbios event * * @handle: acpi handle @@ -799,6 +846,17 @@ int amdgpu_acpi_init(struct amdgpu_device *adev) } } + if (atif->functions.query_backlight_transfer_characteristics) { + ret = amdgpu_atif_query_backlight_caps(atif); + if (ret) { + DRM_DEBUG_DRIVER("Call to QUERY_BACKLIGHT_TRANSFER_CHARACTERISTICS failed: %d\n", + ret); + atif->backlight_caps.caps_valid = false; + } + } else { + atif->backlight_caps.caps_valid = false; + } + out: adev->acpi_nb.notifier_call = amdgpu_acpi_event; register_acpi_notifier(&adev->acpi_nb); @@ -806,6 +864,18 @@ out: return ret; } +void amdgpu_acpi_get_backlight_caps(struct amdgpu_device *adev, + struct amdgpu_dm_backlight_caps *caps) +{ + if (!adev->atif) { + caps->caps_valid = false; + return; + } + caps->caps_valid = adev->atif->backlight_caps.caps_valid; + caps->min_input_signal = adev->atif->backlight_caps.min_input_signal; + caps->max_input_signal = adev->atif->backlight_caps.max_input_signal; +} + /** * amdgpu_acpi_fini - tear down driver acpi support * @@ -816,6 +886,5 @@ out: void amdgpu_acpi_fini(struct amdgpu_device *adev) { unregister_acpi_notifier(&adev->acpi_nb); - if (adev->atif) - kfree(adev->atif); + kfree(adev->atif); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 1580ec60b89f..2dfaf158ef07 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -26,15 +26,26 @@ #include "amdgpu.h" #include "amdgpu_gfx.h" #include <linux/module.h> +#include <linux/dma-buf.h> const struct kgd2kfd_calls *kgd2kfd; static const unsigned int compute_vmid_bitmap = 0xFF00; +/* Total memory size in system memory and all GPU VRAM. Used to + * estimate worst case amount of memory to reserve for page tables + */ +uint64_t amdgpu_amdkfd_total_mem_size; + int amdgpu_amdkfd_init(void) { + struct sysinfo si; int ret; + si_meminfo(&si); + amdgpu_amdkfd_total_mem_size = si.totalram - si.totalhigh; + amdgpu_amdkfd_total_mem_size *= si.mem_unit; + #ifdef CONFIG_HSA_AMD ret = kgd2kfd_init(KFD_INTERFACE_VERSION, &kgd2kfd); if (ret) @@ -73,9 +84,11 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev) case CHIP_FIJI: case CHIP_POLARIS10: case CHIP_POLARIS11: + case CHIP_POLARIS12: kfd2kgd = amdgpu_amdkfd_gfx_8_0_get_functions(); break; case CHIP_VEGA10: + case CHIP_VEGA12: case CHIP_VEGA20: case CHIP_RAVEN: kfd2kgd = amdgpu_amdkfd_gfx_9_0_get_functions(); @@ -85,8 +98,11 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev) return; } - adev->kfd = kgd2kfd->probe((struct kgd_dev *)adev, - adev->pdev, kfd2kgd); + adev->kfd.dev = kgd2kfd->probe((struct kgd_dev *)adev, + adev->pdev, kfd2kgd); + + if (adev->kfd.dev) + amdgpu_amdkfd_total_mem_size += adev->gmc.real_vram_size; } /** @@ -126,7 +142,8 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) { int i, n; int last_valid_bit; - if (adev->kfd) { + + if (adev->kfd.dev) { struct kgd2kfd_shared_resources gpu_resources = { .compute_vmid_bitmap = compute_vmid_bitmap, .num_pipe_per_mec = adev->gfx.mec.num_pipe_per_mec, @@ -144,7 +161,7 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) KGD_MAX_QUEUES); /* remove the KIQ bit as well */ - if (adev->gfx.kiq.ring.ready) + if (adev->gfx.kiq.ring.sched.ready) clear_bit(amdgpu_gfx_queue_to_bit(adev, adev->gfx.kiq.ring.me - 1, adev->gfx.kiq.ring.pipe, @@ -165,7 +182,7 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) &gpu_resources.doorbell_start_offset); if (adev->asic_type < CHIP_VEGA10) { - kgd2kfd->device_init(adev->kfd, &gpu_resources); + kgd2kfd->device_init(adev->kfd.dev, &gpu_resources); return; } @@ -179,25 +196,14 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) * process in case of 64-bit doorbells so we * can use each doorbell assignment twice. */ - if (adev->asic_type == CHIP_VEGA10) { - gpu_resources.sdma_doorbell[0][i] = - AMDGPU_VEGA10_DOORBELL64_sDMA_ENGINE0 + (i >> 1); - gpu_resources.sdma_doorbell[0][i+1] = - AMDGPU_VEGA10_DOORBELL64_sDMA_ENGINE0 + 0x200 + (i >> 1); - gpu_resources.sdma_doorbell[1][i] = - AMDGPU_VEGA10_DOORBELL64_sDMA_ENGINE1 + (i >> 1); - gpu_resources.sdma_doorbell[1][i+1] = - AMDGPU_VEGA10_DOORBELL64_sDMA_ENGINE1 + 0x200 + (i >> 1); - } else { - gpu_resources.sdma_doorbell[0][i] = - AMDGPU_DOORBELL64_sDMA_ENGINE0 + (i >> 1); - gpu_resources.sdma_doorbell[0][i+1] = - AMDGPU_DOORBELL64_sDMA_ENGINE0 + 0x200 + (i >> 1); - gpu_resources.sdma_doorbell[1][i] = - AMDGPU_DOORBELL64_sDMA_ENGINE1 + (i >> 1); - gpu_resources.sdma_doorbell[1][i+1] = - AMDGPU_DOORBELL64_sDMA_ENGINE1 + 0x200 + (i >> 1); - } + gpu_resources.sdma_doorbell[0][i] = + adev->doorbell_index.sdma_engine0 + (i >> 1); + gpu_resources.sdma_doorbell[0][i+1] = + adev->doorbell_index.sdma_engine0 + 0x200 + (i >> 1); + gpu_resources.sdma_doorbell[1][i] = + adev->doorbell_index.sdma_engine1 + (i >> 1); + gpu_resources.sdma_doorbell[1][i+1] = + adev->doorbell_index.sdma_engine1 + 0x200 + (i >> 1); } /* Doorbells 0x0e0-0ff and 0x2e0-2ff are reserved for * SDMA, IH and VCN. So don't use them for the CP. @@ -205,37 +211,37 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) gpu_resources.reserved_doorbell_mask = 0x1e0; gpu_resources.reserved_doorbell_val = 0x0e0; - kgd2kfd->device_init(adev->kfd, &gpu_resources); + kgd2kfd->device_init(adev->kfd.dev, &gpu_resources); } } void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev) { - if (adev->kfd) { - kgd2kfd->device_exit(adev->kfd); - adev->kfd = NULL; + if (adev->kfd.dev) { + kgd2kfd->device_exit(adev->kfd.dev); + adev->kfd.dev = NULL; } } void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev, const void *ih_ring_entry) { - if (adev->kfd) - kgd2kfd->interrupt(adev->kfd, ih_ring_entry); + if (adev->kfd.dev) + kgd2kfd->interrupt(adev->kfd.dev, ih_ring_entry); } void amdgpu_amdkfd_suspend(struct amdgpu_device *adev) { - if (adev->kfd) - kgd2kfd->suspend(adev->kfd); + if (adev->kfd.dev) + kgd2kfd->suspend(adev->kfd.dev); } int amdgpu_amdkfd_resume(struct amdgpu_device *adev) { int r = 0; - if (adev->kfd) - r = kgd2kfd->resume(adev->kfd); + if (adev->kfd.dev) + r = kgd2kfd->resume(adev->kfd.dev); return r; } @@ -244,8 +250,8 @@ int amdgpu_amdkfd_pre_reset(struct amdgpu_device *adev) { int r = 0; - if (adev->kfd) - r = kgd2kfd->pre_reset(adev->kfd); + if (adev->kfd.dev) + r = kgd2kfd->pre_reset(adev->kfd.dev); return r; } @@ -254,8 +260,8 @@ int amdgpu_amdkfd_post_reset(struct amdgpu_device *adev) { int r = 0; - if (adev->kfd) - r = kgd2kfd->post_reset(adev->kfd); + if (adev->kfd.dev) + r = kgd2kfd->post_reset(adev->kfd.dev); return r; } @@ -268,9 +274,9 @@ void amdgpu_amdkfd_gpu_reset(struct kgd_dev *kgd) amdgpu_device_gpu_recover(adev, NULL); } -int alloc_gtt_mem(struct kgd_dev *kgd, size_t size, - void **mem_obj, uint64_t *gpu_addr, - void **cpu_ptr, bool mqd_gfx9) +int amdgpu_amdkfd_alloc_gtt_mem(struct kgd_dev *kgd, size_t size, + void **mem_obj, uint64_t *gpu_addr, + void **cpu_ptr, bool mqd_gfx9) { struct amdgpu_device *adev = (struct amdgpu_device *)kgd; struct amdgpu_bo *bo = NULL; @@ -340,7 +346,7 @@ allocate_mem_reserve_bo_failed: return r; } -void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj) +void amdgpu_amdkfd_free_gtt_mem(struct kgd_dev *kgd, void *mem_obj) { struct amdgpu_bo *bo = (struct amdgpu_bo *) mem_obj; @@ -351,8 +357,8 @@ void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj) amdgpu_bo_unref(&(bo)); } -void get_local_mem_info(struct kgd_dev *kgd, - struct kfd_local_mem_info *mem_info) +void amdgpu_amdkfd_get_local_mem_info(struct kgd_dev *kgd, + struct kfd_local_mem_info *mem_info) { struct amdgpu_device *adev = (struct amdgpu_device *)kgd; uint64_t address_mask = adev->dev->dma_mask ? ~*adev->dev->dma_mask : @@ -383,7 +389,7 @@ void get_local_mem_info(struct kgd_dev *kgd, mem_info->mem_clk_max = 100; } -uint64_t get_gpu_clock_counter(struct kgd_dev *kgd) +uint64_t amdgpu_amdkfd_get_gpu_clock_counter(struct kgd_dev *kgd) { struct amdgpu_device *adev = (struct amdgpu_device *)kgd; @@ -392,7 +398,7 @@ uint64_t get_gpu_clock_counter(struct kgd_dev *kgd) return 0; } -uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd) +uint32_t amdgpu_amdkfd_get_max_engine_clock_in_mhz(struct kgd_dev *kgd) { struct amdgpu_device *adev = (struct amdgpu_device *)kgd; @@ -405,7 +411,7 @@ uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd) return 100; } -void get_cu_info(struct kgd_dev *kgd, struct kfd_cu_info *cu_info) +void amdgpu_amdkfd_get_cu_info(struct kgd_dev *kgd, struct kfd_cu_info *cu_info) { struct amdgpu_device *adev = (struct amdgpu_device *)kgd; struct amdgpu_cu_info acu_info = adev->gfx.cu_info; @@ -428,6 +434,62 @@ void get_cu_info(struct kgd_dev *kgd, struct kfd_cu_info *cu_info) cu_info->lds_size = acu_info.lds_size; } +int amdgpu_amdkfd_get_dmabuf_info(struct kgd_dev *kgd, int dma_buf_fd, + struct kgd_dev **dma_buf_kgd, + uint64_t *bo_size, void *metadata_buffer, + size_t buffer_size, uint32_t *metadata_size, + uint32_t *flags) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)kgd; + struct dma_buf *dma_buf; + struct drm_gem_object *obj; + struct amdgpu_bo *bo; + uint64_t metadata_flags; + int r = -EINVAL; + + dma_buf = dma_buf_get(dma_buf_fd); + if (IS_ERR(dma_buf)) + return PTR_ERR(dma_buf); + + if (dma_buf->ops != &amdgpu_dmabuf_ops) + /* Can't handle non-graphics buffers */ + goto out_put; + + obj = dma_buf->priv; + if (obj->dev->driver != adev->ddev->driver) + /* Can't handle buffers from different drivers */ + goto out_put; + + adev = obj->dev->dev_private; + bo = gem_to_amdgpu_bo(obj); + if (!(bo->preferred_domains & (AMDGPU_GEM_DOMAIN_VRAM | + AMDGPU_GEM_DOMAIN_GTT))) + /* Only VRAM and GTT BOs are supported */ + goto out_put; + + r = 0; + if (dma_buf_kgd) + *dma_buf_kgd = (struct kgd_dev *)adev; + if (bo_size) + *bo_size = amdgpu_bo_size(bo); + if (metadata_size) + *metadata_size = bo->metadata_size; + if (metadata_buffer) + r = amdgpu_bo_get_metadata(bo, metadata_buffer, buffer_size, + metadata_size, &metadata_flags); + if (flags) { + *flags = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ? + ALLOC_MEM_FLAGS_VRAM : ALLOC_MEM_FLAGS_GTT; + + if (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) + *flags |= ALLOC_MEM_FLAGS_PUBLIC; + } + +out_put: + dma_buf_put(dma_buf); + return r; +} + uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd) { struct amdgpu_device *adev = (struct amdgpu_device *)kgd; @@ -510,7 +572,7 @@ void amdgpu_amdkfd_set_compute_idle(struct kgd_dev *kgd, bool idle) bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid) { - if (adev->kfd) { + if (adev->kfd.dev) { if ((1 << vmid) & compute_vmid_bitmap) return true; } @@ -524,7 +586,7 @@ bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm) return false; } -void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo) +void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo) { } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 8e0d4f7196b4..70429f7aa9a8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -27,7 +27,6 @@ #include <linux/types.h> #include <linux/mm.h> -#include <linux/mmu_context.h> #include <linux/workqueue.h> #include <kgd_kfd_interface.h> #include <drm/ttm/ttm_execbuf_util.h> @@ -35,6 +34,7 @@ #include "amdgpu_vm.h" extern const struct kgd2kfd_calls *kgd2kfd; +extern uint64_t amdgpu_amdkfd_total_mem_size; struct amdgpu_device; @@ -77,6 +77,11 @@ struct amdgpu_amdkfd_fence { char timeline_name[TASK_COMM_LEN]; }; +struct amdgpu_kfd_dev { + struct kfd_dev *dev; + uint64_t vram_used; +}; + struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context, struct mm_struct *mm); bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm); @@ -134,16 +139,21 @@ int amdgpu_amdkfd_post_reset(struct amdgpu_device *adev); void amdgpu_amdkfd_gpu_reset(struct kgd_dev *kgd); /* Shared API */ -int alloc_gtt_mem(struct kgd_dev *kgd, size_t size, - void **mem_obj, uint64_t *gpu_addr, - void **cpu_ptr, bool mqd_gfx9); -void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj); -void get_local_mem_info(struct kgd_dev *kgd, - struct kfd_local_mem_info *mem_info); -uint64_t get_gpu_clock_counter(struct kgd_dev *kgd); - -uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd); -void get_cu_info(struct kgd_dev *kgd, struct kfd_cu_info *cu_info); +int amdgpu_amdkfd_alloc_gtt_mem(struct kgd_dev *kgd, size_t size, + void **mem_obj, uint64_t *gpu_addr, + void **cpu_ptr, bool mqd_gfx9); +void amdgpu_amdkfd_free_gtt_mem(struct kgd_dev *kgd, void *mem_obj); +void amdgpu_amdkfd_get_local_mem_info(struct kgd_dev *kgd, + struct kfd_local_mem_info *mem_info); +uint64_t amdgpu_amdkfd_get_gpu_clock_counter(struct kgd_dev *kgd); + +uint32_t amdgpu_amdkfd_get_max_engine_clock_in_mhz(struct kgd_dev *kgd); +void amdgpu_amdkfd_get_cu_info(struct kgd_dev *kgd, struct kfd_cu_info *cu_info); +int amdgpu_amdkfd_get_dmabuf_info(struct kgd_dev *kgd, int dma_buf_fd, + struct kgd_dev **dmabuf_kgd, + uint64_t *bo_size, void *metadata_buffer, + size_t buffer_size, uint32_t *metadata_size, + uint32_t *flags); uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd); uint64_t amdgpu_amdkfd_get_hive_id(struct kgd_dev *kgd); @@ -195,7 +205,13 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *process_info, int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd, struct kfd_vm_fault_info *info); +int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd, + struct dma_buf *dmabuf, + uint64_t va, void *vm, + struct kgd_mem **mem, uint64_t *size, + uint64_t *mmap_offset); + void amdgpu_amdkfd_gpuvm_init_mem_limits(void); -void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo); +void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo); #endif /* AMDGPU_AMDKFD_H_INCLUDED */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c index 244d9834a381..ff7fac7df34b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c @@ -23,6 +23,7 @@ #include <linux/fdtable.h> #include <linux/uaccess.h> #include <linux/firmware.h> +#include <linux/mmu_context.h> #include <drm/drmP.h> #include "amdgpu.h" #include "amdgpu_amdkfd.h" @@ -173,13 +174,6 @@ static int get_tile_config(struct kgd_dev *kgd, } static const struct kfd2kgd_calls kfd2kgd = { - .init_gtt_mem_allocation = alloc_gtt_mem, - .free_gtt_mem = free_gtt_mem, - .get_local_mem_info = get_local_mem_info, - .get_gpu_clock_counter = get_gpu_clock_counter, - .get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz, - .alloc_pasid = amdgpu_pasid_alloc, - .free_pasid = amdgpu_pasid_free, .program_sh_mem_settings = kgd_program_sh_mem_settings, .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, .init_interrupts = kgd_init_interrupts, @@ -200,28 +194,10 @@ static const struct kfd2kgd_calls kfd2kgd = { .get_fw_version = get_fw_version, .set_scratch_backing_va = set_scratch_backing_va, .get_tile_config = get_tile_config, - .get_cu_info = get_cu_info, - .get_vram_usage = amdgpu_amdkfd_get_vram_usage, - .create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm, - .acquire_process_vm = amdgpu_amdkfd_gpuvm_acquire_process_vm, - .destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm, - .release_process_vm = amdgpu_amdkfd_gpuvm_release_process_vm, - .get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir, .set_vm_context_page_table_base = set_vm_context_page_table_base, - .alloc_memory_of_gpu = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu, - .free_memory_of_gpu = amdgpu_amdkfd_gpuvm_free_memory_of_gpu, - .map_memory_to_gpu = amdgpu_amdkfd_gpuvm_map_memory_to_gpu, - .unmap_memory_to_gpu = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu, - .sync_memory = amdgpu_amdkfd_gpuvm_sync_memory, - .map_gtt_bo_to_kernel = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel, - .restore_process_bos = amdgpu_amdkfd_gpuvm_restore_process_bos, .invalidate_tlbs = invalidate_tlbs, .invalidate_tlbs_vmid = invalidate_tlbs_vmid, - .submit_ib = amdgpu_amdkfd_submit_ib, - .get_vm_fault_info = amdgpu_amdkfd_gpuvm_get_vm_fault_info, .read_vmid_from_vmfault_reg = read_vmid_from_vmfault_reg, - .gpu_recover = amdgpu_amdkfd_gpu_reset, - .set_compute_idle = amdgpu_amdkfd_set_compute_idle }; struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c index 9f149914ad6c..56ea929f524b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c @@ -24,6 +24,7 @@ #include <linux/fdtable.h> #include <linux/uaccess.h> #include <linux/firmware.h> +#include <linux/mmu_context.h> #include <drm/drmP.h> #include "amdgpu.h" #include "amdgpu_amdkfd.h" @@ -128,13 +129,6 @@ static int get_tile_config(struct kgd_dev *kgd, } static const struct kfd2kgd_calls kfd2kgd = { - .init_gtt_mem_allocation = alloc_gtt_mem, - .free_gtt_mem = free_gtt_mem, - .get_local_mem_info = get_local_mem_info, - .get_gpu_clock_counter = get_gpu_clock_counter, - .get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz, - .alloc_pasid = amdgpu_pasid_alloc, - .free_pasid = amdgpu_pasid_free, .program_sh_mem_settings = kgd_program_sh_mem_settings, .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, .init_interrupts = kgd_init_interrupts, @@ -157,27 +151,9 @@ static const struct kfd2kgd_calls kfd2kgd = { .get_fw_version = get_fw_version, .set_scratch_backing_va = set_scratch_backing_va, .get_tile_config = get_tile_config, - .get_cu_info = get_cu_info, - .get_vram_usage = amdgpu_amdkfd_get_vram_usage, - .create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm, - .acquire_process_vm = amdgpu_amdkfd_gpuvm_acquire_process_vm, - .destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm, - .release_process_vm = amdgpu_amdkfd_gpuvm_release_process_vm, - .get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir, .set_vm_context_page_table_base = set_vm_context_page_table_base, - .alloc_memory_of_gpu = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu, - .free_memory_of_gpu = amdgpu_amdkfd_gpuvm_free_memory_of_gpu, - .map_memory_to_gpu = amdgpu_amdkfd_gpuvm_map_memory_to_gpu, - .unmap_memory_to_gpu = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu, - .sync_memory = amdgpu_amdkfd_gpuvm_sync_memory, - .map_gtt_bo_to_kernel = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel, - .restore_process_bos = amdgpu_amdkfd_gpuvm_restore_process_bos, .invalidate_tlbs = invalidate_tlbs, .invalidate_tlbs_vmid = invalidate_tlbs_vmid, - .submit_ib = amdgpu_amdkfd_submit_ib, - .get_vm_fault_info = amdgpu_amdkfd_gpuvm_get_vm_fault_info, - .gpu_recover = amdgpu_amdkfd_gpu_reset, - .set_compute_idle = amdgpu_amdkfd_set_compute_idle }; struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c index 42cb4c4e0929..5c51d4910650 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c @@ -26,6 +26,7 @@ #include <linux/fdtable.h> #include <linux/uaccess.h> #include <linux/firmware.h> +#include <linux/mmu_context.h> #include <drm/drmP.h> #include "amdgpu.h" #include "amdgpu_amdkfd.h" @@ -46,38 +47,9 @@ #include "v9_structs.h" #include "soc15.h" #include "soc15d.h" +#include "mmhub_v1_0.h" +#include "gfxhub_v1_0.h" -/* HACK: MMHUB and GC both have VM-related register with the same - * names but different offsets. Define the MMHUB register we need here - * with a prefix. A proper solution would be to move the functions - * programming these registers into gfx_v9_0.c and mmhub_v1_0.c - * respectively. - */ -#define mmMMHUB_VM_INVALIDATE_ENG16_REQ 0x06f3 -#define mmMMHUB_VM_INVALIDATE_ENG16_REQ_BASE_IDX 0 - -#define mmMMHUB_VM_INVALIDATE_ENG16_ACK 0x0705 -#define mmMMHUB_VM_INVALIDATE_ENG16_ACK_BASE_IDX 0 - -#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32 0x072b -#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32_BASE_IDX 0 -#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32 0x072c -#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32_BASE_IDX 0 - -#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32 0x074b -#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32_BASE_IDX 0 -#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32 0x074c -#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32_BASE_IDX 0 - -#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32 0x076b -#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32_BASE_IDX 0 -#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32 0x076c -#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32_BASE_IDX 0 - -#define mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_LO32 0x0727 -#define mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_LO32_BASE_IDX 0 -#define mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_HI32 0x0728 -#define mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_HI32_BASE_IDX 0 #define V9_PIPE_PER_MEC (4) #define V9_QUEUES_PER_PIPE_MEC (8) @@ -167,13 +139,6 @@ static int amdgpu_amdkfd_get_tile_config(struct kgd_dev *kgd, } static const struct kfd2kgd_calls kfd2kgd = { - .init_gtt_mem_allocation = alloc_gtt_mem, - .free_gtt_mem = free_gtt_mem, - .get_local_mem_info = get_local_mem_info, - .get_gpu_clock_counter = get_gpu_clock_counter, - .get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz, - .alloc_pasid = amdgpu_pasid_alloc, - .free_pasid = amdgpu_pasid_free, .program_sh_mem_settings = kgd_program_sh_mem_settings, .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, .init_interrupts = kgd_init_interrupts, @@ -196,26 +161,9 @@ static const struct kfd2kgd_calls kfd2kgd = { .get_fw_version = get_fw_version, .set_scratch_backing_va = set_scratch_backing_va, .get_tile_config = amdgpu_amdkfd_get_tile_config, - .get_cu_info = get_cu_info, - .get_vram_usage = amdgpu_amdkfd_get_vram_usage, - .create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm, - .acquire_process_vm = amdgpu_amdkfd_gpuvm_acquire_process_vm, - .destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm, - .release_process_vm = amdgpu_amdkfd_gpuvm_release_process_vm, - .get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir, .set_vm_context_page_table_base = set_vm_context_page_table_base, - .alloc_memory_of_gpu = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu, - .free_memory_of_gpu = amdgpu_amdkfd_gpuvm_free_memory_of_gpu, - .map_memory_to_gpu = amdgpu_amdkfd_gpuvm_map_memory_to_gpu, - .unmap_memory_to_gpu = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu, - .sync_memory = amdgpu_amdkfd_gpuvm_sync_memory, - .map_gtt_bo_to_kernel = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel, - .restore_process_bos = amdgpu_amdkfd_gpuvm_restore_process_bos, .invalidate_tlbs = invalidate_tlbs, .invalidate_tlbs_vmid = invalidate_tlbs_vmid, - .submit_ib = amdgpu_amdkfd_submit_ib, - .gpu_recover = amdgpu_amdkfd_gpu_reset, - .set_compute_idle = amdgpu_amdkfd_set_compute_idle, .get_hive_id = amdgpu_amdkfd_get_hive_id, }; @@ -785,15 +733,6 @@ static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid) { struct amdgpu_device *adev = (struct amdgpu_device *) kgd; - uint32_t req = (1 << vmid) | - (0 << VM_INVALIDATE_ENG16_REQ__FLUSH_TYPE__SHIFT) | /* legacy */ - VM_INVALIDATE_ENG16_REQ__INVALIDATE_L2_PTES_MASK | - VM_INVALIDATE_ENG16_REQ__INVALIDATE_L2_PDE0_MASK | - VM_INVALIDATE_ENG16_REQ__INVALIDATE_L2_PDE1_MASK | - VM_INVALIDATE_ENG16_REQ__INVALIDATE_L2_PDE2_MASK | - VM_INVALIDATE_ENG16_REQ__INVALIDATE_L1_PTES_MASK; - - mutex_lock(&adev->srbm_mutex); /* Use legacy mode tlb invalidation. * @@ -810,34 +749,7 @@ static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid) * TODO 2: support range-based invalidation, requires kfg2kgd * interface change */ - WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_ADDR_RANGE_LO32), - 0xffffffff); - WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_ADDR_RANGE_HI32), - 0x0000001f); - - WREG32(SOC15_REG_OFFSET(MMHUB, 0, - mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_LO32), - 0xffffffff); - WREG32(SOC15_REG_OFFSET(MMHUB, 0, - mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_HI32), - 0x0000001f); - - WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_REQ), req); - - WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_INVALIDATE_ENG16_REQ), - req); - - while (!(RREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_ACK)) & - (1 << vmid))) - cpu_relax(); - - while (!(RREG32(SOC15_REG_OFFSET(MMHUB, 0, - mmMMHUB_VM_INVALIDATE_ENG16_ACK)) & - (1 << vmid))) - cpu_relax(); - - mutex_unlock(&adev->srbm_mutex); - + amdgpu_gmc_flush_gpu_tlb(adev, vmid, 0); } static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid) @@ -876,7 +788,7 @@ static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid) if (adev->in_gpu_reset) return -EIO; - if (ring->ready) + if (ring->sched.ready) return invalidate_tlbs_with_kiq(adev, pasid); for (vmid = 0; vmid < 16; vmid++) { @@ -1016,7 +928,6 @@ static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, uint64_t page_table_base) { struct amdgpu_device *adev = get_amdgpu_device(kgd); - uint64_t base = page_table_base | AMDGPU_PTE_VALID; if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) { pr_err("trying to set page table base for wrong VMID %u\n", @@ -1028,25 +939,7 @@ static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, * now, all processes share the same address space size, like * on GFX8 and older. */ - WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32) + (vmid*2), 0); - WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32) + (vmid*2), 0); - - WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32) + (vmid*2), - lower_32_bits(adev->vm_manager.max_pfn - 1)); - WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32) + (vmid*2), - upper_32_bits(adev->vm_manager.max_pfn - 1)); - - WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32) + (vmid*2), lower_32_bits(base)); - WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32) + (vmid*2), upper_32_bits(base)); - - WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32) + (vmid*2), 0); - WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32) + (vmid*2), 0); - - WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32) + (vmid*2), - lower_32_bits(adev->vm_manager.max_pfn - 1)); - WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32) + (vmid*2), - upper_32_bits(adev->vm_manager.max_pfn - 1)); + mmhub_v1_0_setup_vm_pt_regs(adev, vmid, page_table_base); - WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32) + (vmid*2), lower_32_bits(base)); - WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32) + (vmid*2), upper_32_bits(base)); + gfxhub_v1_0_setup_vm_pt_regs(adev, vmid, page_table_base); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index df0a059565f9..be1ab43473c6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -25,6 +25,7 @@ #include <linux/list.h> #include <linux/pagemap.h> #include <linux/sched/mm.h> +#include <linux/dma-buf.h> #include <drm/drmP.h> #include "amdgpu_object.h" #include "amdgpu_vm.h" @@ -46,9 +47,9 @@ /* Impose limit on how much memory KFD can use */ static struct { uint64_t max_system_mem_limit; - uint64_t max_userptr_mem_limit; + uint64_t max_ttm_mem_limit; int64_t system_mem_used; - int64_t userptr_mem_used; + int64_t ttm_mem_used; spinlock_t mem_limit_lock; } kfd_mem_limit; @@ -90,8 +91,8 @@ static bool check_if_add_bo_to_vm(struct amdgpu_vm *avm, } /* Set memory usage limits. Current, limits are - * System (kernel) memory - 3/8th System RAM - * Userptr memory - 3/4th System RAM + * System (TTM + userptr) memory - 3/4th System RAM + * TTM memory - 3/8th System RAM */ void amdgpu_amdkfd_gpuvm_init_mem_limits(void) { @@ -103,48 +104,61 @@ void amdgpu_amdkfd_gpuvm_init_mem_limits(void) mem *= si.mem_unit; spin_lock_init(&kfd_mem_limit.mem_limit_lock); - kfd_mem_limit.max_system_mem_limit = (mem >> 1) - (mem >> 3); - kfd_mem_limit.max_userptr_mem_limit = mem - (mem >> 2); - pr_debug("Kernel memory limit %lluM, userptr limit %lluM\n", + kfd_mem_limit.max_system_mem_limit = (mem >> 1) + (mem >> 2); + kfd_mem_limit.max_ttm_mem_limit = (mem >> 1) - (mem >> 3); + pr_debug("Kernel memory limit %lluM, TTM limit %lluM\n", (kfd_mem_limit.max_system_mem_limit >> 20), - (kfd_mem_limit.max_userptr_mem_limit >> 20)); + (kfd_mem_limit.max_ttm_mem_limit >> 20)); } -static int amdgpu_amdkfd_reserve_system_mem_limit(struct amdgpu_device *adev, - uint64_t size, u32 domain) +static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, + uint64_t size, u32 domain, bool sg) { - size_t acc_size; + size_t acc_size, system_mem_needed, ttm_mem_needed, vram_needed; + uint64_t reserved_for_pt = amdgpu_amdkfd_total_mem_size >> 9; int ret = 0; acc_size = ttm_bo_dma_acc_size(&adev->mman.bdev, size, sizeof(struct amdgpu_bo)); - spin_lock(&kfd_mem_limit.mem_limit_lock); + vram_needed = 0; if (domain == AMDGPU_GEM_DOMAIN_GTT) { - if (kfd_mem_limit.system_mem_used + (acc_size + size) > - kfd_mem_limit.max_system_mem_limit) { - ret = -ENOMEM; - goto err_no_mem; - } - kfd_mem_limit.system_mem_used += (acc_size + size); - } else if (domain == AMDGPU_GEM_DOMAIN_CPU) { - if ((kfd_mem_limit.system_mem_used + acc_size > - kfd_mem_limit.max_system_mem_limit) || - (kfd_mem_limit.userptr_mem_used + (size + acc_size) > - kfd_mem_limit.max_userptr_mem_limit)) { - ret = -ENOMEM; - goto err_no_mem; - } - kfd_mem_limit.system_mem_used += acc_size; - kfd_mem_limit.userptr_mem_used += size; + /* TTM GTT memory */ + system_mem_needed = acc_size + size; + ttm_mem_needed = acc_size + size; + } else if (domain == AMDGPU_GEM_DOMAIN_CPU && !sg) { + /* Userptr */ + system_mem_needed = acc_size + size; + ttm_mem_needed = acc_size; + } else { + /* VRAM and SG */ + system_mem_needed = acc_size; + ttm_mem_needed = acc_size; + if (domain == AMDGPU_GEM_DOMAIN_VRAM) + vram_needed = size; + } + + spin_lock(&kfd_mem_limit.mem_limit_lock); + + if ((kfd_mem_limit.system_mem_used + system_mem_needed > + kfd_mem_limit.max_system_mem_limit) || + (kfd_mem_limit.ttm_mem_used + ttm_mem_needed > + kfd_mem_limit.max_ttm_mem_limit) || + (adev->kfd.vram_used + vram_needed > + adev->gmc.real_vram_size - reserved_for_pt)) { + ret = -ENOMEM; + } else { + kfd_mem_limit.system_mem_used += system_mem_needed; + kfd_mem_limit.ttm_mem_used += ttm_mem_needed; + adev->kfd.vram_used += vram_needed; } -err_no_mem: + spin_unlock(&kfd_mem_limit.mem_limit_lock); return ret; } -static void unreserve_system_mem_limit(struct amdgpu_device *adev, - uint64_t size, u32 domain) +static void unreserve_mem_limit(struct amdgpu_device *adev, + uint64_t size, u32 domain, bool sg) { size_t acc_size; @@ -154,35 +168,39 @@ static void unreserve_system_mem_limit(struct amdgpu_device *adev, spin_lock(&kfd_mem_limit.mem_limit_lock); if (domain == AMDGPU_GEM_DOMAIN_GTT) { kfd_mem_limit.system_mem_used -= (acc_size + size); - } else if (domain == AMDGPU_GEM_DOMAIN_CPU) { + kfd_mem_limit.ttm_mem_used -= (acc_size + size); + } else if (domain == AMDGPU_GEM_DOMAIN_CPU && !sg) { + kfd_mem_limit.system_mem_used -= (acc_size + size); + kfd_mem_limit.ttm_mem_used -= acc_size; + } else { kfd_mem_limit.system_mem_used -= acc_size; - kfd_mem_limit.userptr_mem_used -= size; + kfd_mem_limit.ttm_mem_used -= acc_size; + if (domain == AMDGPU_GEM_DOMAIN_VRAM) { + adev->kfd.vram_used -= size; + WARN_ONCE(adev->kfd.vram_used < 0, + "kfd VRAM memory accounting unbalanced"); + } } WARN_ONCE(kfd_mem_limit.system_mem_used < 0, "kfd system memory accounting unbalanced"); - WARN_ONCE(kfd_mem_limit.userptr_mem_used < 0, - "kfd userptr memory accounting unbalanced"); + WARN_ONCE(kfd_mem_limit.ttm_mem_used < 0, + "kfd TTM memory accounting unbalanced"); spin_unlock(&kfd_mem_limit.mem_limit_lock); } -void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo) +void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo) { - spin_lock(&kfd_mem_limit.mem_limit_lock); + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); + u32 domain = bo->preferred_domains; + bool sg = (bo->preferred_domains == AMDGPU_GEM_DOMAIN_CPU); if (bo->flags & AMDGPU_AMDKFD_USERPTR_BO) { - kfd_mem_limit.system_mem_used -= bo->tbo.acc_size; - kfd_mem_limit.userptr_mem_used -= amdgpu_bo_size(bo); - } else if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GTT) { - kfd_mem_limit.system_mem_used -= - (bo->tbo.acc_size + amdgpu_bo_size(bo)); + domain = AMDGPU_GEM_DOMAIN_CPU; + sg = false; } - WARN_ONCE(kfd_mem_limit.system_mem_used < 0, - "kfd system memory accounting unbalanced"); - WARN_ONCE(kfd_mem_limit.userptr_mem_used < 0, - "kfd userptr memory accounting unbalanced"); - spin_unlock(&kfd_mem_limit.mem_limit_lock); + unreserve_mem_limit(adev, amdgpu_bo_size(bo), domain, sg); } @@ -395,23 +413,6 @@ static int vm_validate_pt_pd_bos(struct amdgpu_vm *vm) return 0; } -static int sync_vm_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync, - struct dma_fence *f) -{ - int ret = amdgpu_sync_fence(adev, sync, f, false); - - /* Sync objects can't handle multiple GPUs (contexts) updating - * sync->last_vm_update. Fortunately we don't need it for - * KFD's purposes, so we can just drop that fence. - */ - if (sync->last_vm_update) { - dma_fence_put(sync->last_vm_update); - sync->last_vm_update = NULL; - } - - return ret; -} - static int vm_update_pds(struct amdgpu_vm *vm, struct amdgpu_sync *sync) { struct amdgpu_bo *pd = vm->root.base.bo; @@ -422,7 +423,7 @@ static int vm_update_pds(struct amdgpu_vm *vm, struct amdgpu_sync *sync) if (ret) return ret; - return sync_vm_fence(adev, sync, vm->last_update); + return amdgpu_sync_fence(NULL, sync, vm->last_update, false); } /* add_bo_to_vm - Add a BO to a VM @@ -536,7 +537,7 @@ static void add_kgd_mem_to_kfd_bo_list(struct kgd_mem *mem, struct amdgpu_bo *bo = mem->bo; INIT_LIST_HEAD(&entry->head); - entry->shared = true; + entry->num_shared = 1; entry->bo = &bo->tbo; mutex_lock(&process_info->lock); if (userptr) @@ -677,7 +678,7 @@ static int reserve_bo_and_vm(struct kgd_mem *mem, ctx->kfd_bo.priority = 0; ctx->kfd_bo.tv.bo = &bo->tbo; - ctx->kfd_bo.tv.shared = true; + ctx->kfd_bo.tv.num_shared = 1; ctx->kfd_bo.user_pages = NULL; list_add(&ctx->kfd_bo.tv.head, &ctx->list); @@ -741,7 +742,7 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem, ctx->kfd_bo.priority = 0; ctx->kfd_bo.tv.bo = &bo->tbo; - ctx->kfd_bo.tv.shared = true; + ctx->kfd_bo.tv.num_shared = 1; ctx->kfd_bo.user_pages = NULL; list_add(&ctx->kfd_bo.tv.head, &ctx->list); @@ -826,7 +827,7 @@ static int unmap_bo_from_gpuvm(struct amdgpu_device *adev, /* Add the eviction fence back */ amdgpu_bo_fence(pd, &vm->process_info->eviction_fence->base, true); - sync_vm_fence(adev, sync, bo_va->last_pt_update); + amdgpu_sync_fence(NULL, sync, bo_va->last_pt_update, false); return 0; } @@ -851,7 +852,7 @@ static int update_gpuvm_pte(struct amdgpu_device *adev, return ret; } - return sync_vm_fence(adev, sync, bo_va->last_pt_update); + return amdgpu_sync_fence(NULL, sync, bo_va->last_pt_update, false); } static int map_bo_to_gpuvm(struct amdgpu_device *adev, @@ -886,6 +887,24 @@ update_gpuvm_pte_failed: return ret; } +static struct sg_table *create_doorbell_sg(uint64_t addr, uint32_t size) +{ + struct sg_table *sg = kmalloc(sizeof(*sg), GFP_KERNEL); + + if (!sg) + return NULL; + if (sg_alloc_table(sg, 1, GFP_KERNEL)) { + kfree(sg); + return NULL; + } + sg->sgl->dma_address = addr; + sg->sgl->length = size; +#ifdef CONFIG_NEED_SG_DMA_LENGTH + sg->sgl->dma_length = size; +#endif + return sg; +} + static int process_validate_vms(struct amdkfd_process_info *process_info) { struct amdgpu_vm *peer_vm; @@ -901,6 +920,26 @@ static int process_validate_vms(struct amdkfd_process_info *process_info) return 0; } +static int process_sync_pds_resv(struct amdkfd_process_info *process_info, + struct amdgpu_sync *sync) +{ + struct amdgpu_vm *peer_vm; + int ret; + + list_for_each_entry(peer_vm, &process_info->vm_list_head, + vm_list_node) { + struct amdgpu_bo *pd = peer_vm->root.base.bo; + + ret = amdgpu_sync_resv(NULL, + sync, pd->tbo.resv, + AMDGPU_FENCE_OWNER_UNDEFINED, false); + if (ret) + return ret; + } + + return 0; +} + static int process_update_pds(struct amdkfd_process_info *process_info, struct amdgpu_sync *sync) { @@ -1149,6 +1188,8 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( { struct amdgpu_device *adev = get_amdgpu_device(kgd); struct amdgpu_vm *avm = (struct amdgpu_vm *)vm; + enum ttm_bo_type bo_type = ttm_bo_type_device; + struct sg_table *sg = NULL; uint64_t user_addr = 0; struct amdgpu_bo *bo; struct amdgpu_bo_param bp; @@ -1177,13 +1218,25 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( if (!offset || !*offset) return -EINVAL; user_addr = *offset; + } else if (flags & ALLOC_MEM_FLAGS_DOORBELL) { + domain = AMDGPU_GEM_DOMAIN_GTT; + alloc_domain = AMDGPU_GEM_DOMAIN_CPU; + bo_type = ttm_bo_type_sg; + alloc_flags = 0; + if (size > UINT_MAX) + return -EINVAL; + sg = create_doorbell_sg(*offset, size); + if (!sg) + return -ENOMEM; } else { return -EINVAL; } *mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL); - if (!*mem) - return -ENOMEM; + if (!*mem) { + ret = -ENOMEM; + goto err; + } INIT_LIST_HEAD(&(*mem)->bo_va_list); mutex_init(&(*mem)->lock); (*mem)->aql_queue = !!(flags & ALLOC_MEM_FLAGS_AQL_QUEUE_MEM); @@ -1199,7 +1252,8 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( byte_align = (adev->family == AMDGPU_FAMILY_VI && adev->asic_type != CHIP_FIJI && adev->asic_type != CHIP_POLARIS10 && - adev->asic_type != CHIP_POLARIS11) ? + adev->asic_type != CHIP_POLARIS11 && + adev->asic_type != CHIP_POLARIS12) ? VI_BO_SIZE_ALIGN : 1; mapping_flags = AMDGPU_VM_PAGE_READABLE; @@ -1215,10 +1269,10 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( amdgpu_sync_create(&(*mem)->sync); - ret = amdgpu_amdkfd_reserve_system_mem_limit(adev, size, alloc_domain); + ret = amdgpu_amdkfd_reserve_mem_limit(adev, size, alloc_domain, !!sg); if (ret) { pr_debug("Insufficient system memory\n"); - goto err_reserve_system_mem; + goto err_reserve_limit; } pr_debug("\tcreate BO VA 0x%llx size 0x%llx domain %s\n", @@ -1229,7 +1283,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( bp.byte_align = byte_align; bp.domain = alloc_domain; bp.flags = alloc_flags; - bp.type = ttm_bo_type_device; + bp.type = bo_type; bp.resv = NULL; ret = amdgpu_bo_create(adev, &bp, &bo); if (ret) { @@ -1237,6 +1291,10 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( domain_string(alloc_domain), ret); goto err_bo_create; } + if (bo_type == ttm_bo_type_sg) { + bo->tbo.sg = sg; + bo->tbo.ttm->sg = sg; + } bo->kfd_bo = *mem; (*mem)->bo = bo; if (user_addr) @@ -1266,12 +1324,17 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( allocate_init_user_pages_failed: amdgpu_bo_unref(&bo); /* Don't unreserve system mem limit twice */ - goto err_reserve_system_mem; + goto err_reserve_limit; err_bo_create: - unreserve_system_mem_limit(adev, size, alloc_domain); -err_reserve_system_mem: + unreserve_mem_limit(adev, size, alloc_domain, !!sg); +err_reserve_limit: mutex_destroy(&(*mem)->lock); kfree(*mem); +err: + if (sg) { + sg_free_table(sg); + kfree(sg); + } return ret; } @@ -1341,6 +1404,14 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( /* Free the sync object */ amdgpu_sync_free(&mem->sync); + /* If the SG is not NULL, it's one we created for a doorbell + * BO. We need to free it. + */ + if (mem->bo->tbo.sg) { + sg_free_table(mem->bo->tbo.sg); + kfree(mem->bo->tbo.sg); + } + /* Free the BO*/ amdgpu_bo_unref(&mem->bo); mutex_destroy(&mem->lock); @@ -1405,7 +1476,8 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( * the queues are still stopped and we can leave mapping for * the next restore worker */ - if (bo->tbo.mem.mem_type == TTM_PL_SYSTEM) + if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) && + bo->tbo.mem.mem_type == TTM_PL_SYSTEM) is_invalid_userptr = true; if (check_if_add_bo_to_vm(avm, mem)) { @@ -1642,6 +1714,60 @@ int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd, return 0; } +int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd, + struct dma_buf *dma_buf, + uint64_t va, void *vm, + struct kgd_mem **mem, uint64_t *size, + uint64_t *mmap_offset) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)kgd; + struct drm_gem_object *obj; + struct amdgpu_bo *bo; + struct amdgpu_vm *avm = (struct amdgpu_vm *)vm; + + if (dma_buf->ops != &amdgpu_dmabuf_ops) + /* Can't handle non-graphics buffers */ + return -EINVAL; + + obj = dma_buf->priv; + if (obj->dev->dev_private != adev) + /* Can't handle buffers from other devices */ + return -EINVAL; + + bo = gem_to_amdgpu_bo(obj); + if (!(bo->preferred_domains & (AMDGPU_GEM_DOMAIN_VRAM | + AMDGPU_GEM_DOMAIN_GTT))) + /* Only VRAM and GTT BOs are supported */ + return -EINVAL; + + *mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL); + if (!*mem) + return -ENOMEM; + + if (size) + *size = amdgpu_bo_size(bo); + + if (mmap_offset) + *mmap_offset = amdgpu_bo_mmap_offset(bo); + + INIT_LIST_HEAD(&(*mem)->bo_va_list); + mutex_init(&(*mem)->lock); + (*mem)->mapping_flags = + AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_PAGE_WRITEABLE | + AMDGPU_VM_PAGE_EXECUTABLE | AMDGPU_VM_MTYPE_NC; + + (*mem)->bo = amdgpu_bo_ref(bo); + (*mem)->va = va; + (*mem)->domain = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ? + AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT; + (*mem)->mapped_to_gpu_memory = 0; + (*mem)->process_info = avm->process_info; + add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, false); + amdgpu_sync_create(&(*mem)->sync); + + return 0; +} + /* Evict a userptr BO by stopping the queues if necessary * * Runs in MMU notifier, may be in RECLAIM_FS context. This means it @@ -1808,7 +1934,7 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info) validate_list.head) { list_add_tail(&mem->resv_list.head, &resv_list); mem->resv_list.bo = mem->validate_list.bo; - mem->resv_list.shared = mem->validate_list.shared; + mem->resv_list.num_shared = mem->validate_list.num_shared; } /* Reserve all BOs and page tables for validation */ @@ -2027,7 +2153,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) list_add_tail(&mem->resv_list.head, &ctx.list); mem->resv_list.bo = mem->validate_list.bo; - mem->resv_list.shared = mem->validate_list.shared; + mem->resv_list.num_shared = mem->validate_list.num_shared; } ret = ttm_eu_reserve_buffers(&ctx.ticket, &ctx.list, @@ -2044,13 +2170,10 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) if (ret) goto validate_map_fail; - /* Wait for PD/PTs validate to finish */ - /* FIXME: I think this isn't needed */ - list_for_each_entry(peer_vm, &process_info->vm_list_head, - vm_list_node) { - struct amdgpu_bo *bo = peer_vm->root.base.bo; - - ttm_bo_wait(&bo->tbo, false, false); + ret = process_sync_pds_resv(process_info, &sync_obj); + if (ret) { + pr_debug("Memory eviction: Failed to sync to PD BO moving fence. Try again\n"); + goto validate_map_fail; } /* Validate BOs and map them to GPUVM (update VM page tables). */ @@ -2066,7 +2189,11 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) pr_debug("Memory eviction: Validate BOs failed. Try again\n"); goto validate_map_fail; } - + ret = amdgpu_sync_fence(NULL, &sync_obj, bo->tbo.moving, false); + if (ret) { + pr_debug("Memory eviction: Sync BO fence failed. Try again\n"); + goto validate_map_fail; + } list_for_each_entry(bo_va_entry, &mem->bo_va_list, bo_list) { ret = update_gpuvm_pte((struct amdgpu_device *) @@ -2087,6 +2214,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) goto validate_map_fail; } + /* Wait for validate and PT updates to finish */ amdgpu_sync_wait(&sync_obj, false); /* Release old eviction fence and create new one, because fence only @@ -2105,10 +2233,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) process_info->eviction_fence = new_fence; *ef = dma_fence_get(&new_fence->base); - /* Wait for validate to finish and attach new eviction fence */ - list_for_each_entry(mem, &process_info->kfd_bo_list, - validate_list.head) - ttm_bo_wait(&mem->bo->tbo, false, false); + /* Attach new eviction fence to all BOs */ list_for_each_entry(mem, &process_info->kfd_bo_list, validate_list.head) amdgpu_bo_fence(mem->bo, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c index 14d2982a47cc..5c79da8e1150 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c @@ -118,7 +118,6 @@ int amdgpu_bo_list_create(struct amdgpu_device *adev, struct drm_file *filp, entry->priority = min(info[i].bo_priority, AMDGPU_BO_LIST_MAX_PRIORITY); entry->tv.bo = &bo->tbo; - entry->tv.shared = !bo->prime_shared_count; if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GDS) list->gds_obj = bo; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 0acc8dee2cb8..cf4e190c0a72 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -50,7 +50,8 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p, bo = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj)); p->uf_entry.priority = 0; p->uf_entry.tv.bo = &bo->tbo; - p->uf_entry.tv.shared = true; + /* One for TTM and one for the CS job */ + p->uf_entry.tv.num_shared = 2; p->uf_entry.user_pages = NULL; drm_gem_object_put_unlocked(gobj); @@ -598,6 +599,10 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, return r; } + /* One for TTM and one for the CS job */ + amdgpu_bo_list_for_each_entry(e, p->bo_list) + e->tv.num_shared = 2; + amdgpu_bo_list_get_list(p->bo_list, &p->validated); if (p->bo_list->first_userptr != p->bo_list->num_entries) p->mn = amdgpu_mn_get(p->adev, AMDGPU_MN_TYPE_GFX); @@ -717,8 +722,14 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, gws = p->bo_list->gws_obj; oa = p->bo_list->oa_obj; - amdgpu_bo_list_for_each_entry(e, p->bo_list) - e->bo_va = amdgpu_vm_bo_find(vm, ttm_to_amdgpu_bo(e->tv.bo)); + amdgpu_bo_list_for_each_entry(e, p->bo_list) { + struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); + + /* Make sure we use the exclusive slot for shared BOs */ + if (bo->prime_shared_count) + e->tv.num_shared = 0; + e->bo_va = amdgpu_vm_bo_find(vm, bo); + } if (gds) { p->job->gds_base = amdgpu_bo_gpu_offset(gds) >> PAGE_SHIFT; @@ -955,10 +966,6 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) if (r) return r; - r = reservation_object_reserve_shared(vm->root.base.bo->tbo.resv); - if (r) - return r; - p->job->vm_pd_addr = amdgpu_gmc_pd_addr(vm->root.base.bo); if (amdgpu_vm_debug) { @@ -1104,7 +1111,7 @@ static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p, { int r; struct dma_fence *fence; - r = drm_syncobj_find_fence(p->filp, handle, 0, &fence); + r = drm_syncobj_find_fence(p->filp, handle, 0, 0, &fence); if (r) return r; @@ -1193,7 +1200,7 @@ static void amdgpu_cs_post_dependencies(struct amdgpu_cs_parser *p) int i; for (i = 0; i < p->num_post_dep_syncobjs; ++i) - drm_syncobj_replace_fence(p->post_dep_syncobjs[i], 0, p->fence); + drm_syncobj_replace_fence(p->post_dep_syncobjs[i], p->fence); } static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, @@ -1260,8 +1267,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, return 0; error_abort: - dma_fence_put(&job->base.s_fence->finished); - job->base.s_fence = NULL; + drm_sched_job_cleanup(&job->base); amdgpu_mn_unlock(p->mn); error_unlock: @@ -1285,7 +1291,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) r = amdgpu_cs_parser_init(&parser, data); if (r) { - DRM_ERROR("Failed to initialize parser !\n"); + DRM_ERROR("Failed to initialize parser %d!\n", r); goto out; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c new file mode 100644 index 000000000000..7e22be7ca68a --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c @@ -0,0 +1,117 @@ +/* + * Copyright 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + + * * Author: Monk.liu@amd.com + */ + +#include "amdgpu.h" + +uint64_t amdgpu_csa_vaddr(struct amdgpu_device *adev) +{ + uint64_t addr = adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT; + + addr -= AMDGPU_VA_RESERVED_SIZE; + addr = amdgpu_gmc_sign_extend(addr); + + return addr; +} + +int amdgpu_allocate_static_csa(struct amdgpu_device *adev, struct amdgpu_bo **bo, + u32 domain, uint32_t size) +{ + int r; + void *ptr; + + r = amdgpu_bo_create_kernel(adev, size, PAGE_SIZE, + domain, bo, + NULL, &ptr); + if (!*bo) + return -ENOMEM; + + memset(ptr, 0, size); + return 0; +} + +void amdgpu_free_static_csa(struct amdgpu_bo **bo) +{ + amdgpu_bo_free_kernel(bo, NULL, NULL); +} + +/* + * amdgpu_map_static_csa should be called during amdgpu_vm_init + * it maps virtual address amdgpu_csa_vaddr() to this VM, and each command + * submission of GFX should use this virtual address within META_DATA init + * package to support SRIOV gfx preemption. + */ +int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm, + struct amdgpu_bo *bo, struct amdgpu_bo_va **bo_va, + uint64_t csa_addr, uint32_t size) +{ + struct ww_acquire_ctx ticket; + struct list_head list; + struct amdgpu_bo_list_entry pd; + struct ttm_validate_buffer csa_tv; + int r; + + INIT_LIST_HEAD(&list); + INIT_LIST_HEAD(&csa_tv.head); + csa_tv.bo = &bo->tbo; + csa_tv.num_shared = 1; + + list_add(&csa_tv.head, &list); + amdgpu_vm_get_pd_bo(vm, &list, &pd); + + r = ttm_eu_reserve_buffers(&ticket, &list, true, NULL); + if (r) { + DRM_ERROR("failed to reserve CSA,PD BOs: err=%d\n", r); + return r; + } + + *bo_va = amdgpu_vm_bo_add(adev, vm, bo); + if (!*bo_va) { + ttm_eu_backoff_reservation(&ticket, &list); + DRM_ERROR("failed to create bo_va for static CSA\n"); + return -ENOMEM; + } + + r = amdgpu_vm_alloc_pts(adev, (*bo_va)->base.vm, csa_addr, + size); + if (r) { + DRM_ERROR("failed to allocate pts for static CSA, err=%d\n", r); + amdgpu_vm_bo_rmv(adev, *bo_va); + ttm_eu_backoff_reservation(&ticket, &list); + return r; + } + + r = amdgpu_vm_bo_map(adev, *bo_va, csa_addr, 0, size, + AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE | + AMDGPU_PTE_EXECUTABLE); + + if (r) { + DRM_ERROR("failed to do bo_map on static CSA, err=%d\n", r); + amdgpu_vm_bo_rmv(adev, *bo_va); + ttm_eu_backoff_reservation(&ticket, &list); + return r; + } + + ttm_eu_backoff_reservation(&ticket, &list); + return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.h new file mode 100644 index 000000000000..524b4437a021 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.h @@ -0,0 +1,39 @@ +/* + * Copyright 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Author: Monk.liu@amd.com + */ + +#ifndef AMDGPU_CSA_MANAGER_H +#define AMDGPU_CSA_MANAGER_H + +#define AMDGPU_CSA_SIZE (128 * 1024) + +uint32_t amdgpu_get_total_csa_size(struct amdgpu_device *adev); +uint64_t amdgpu_csa_vaddr(struct amdgpu_device *adev); +int amdgpu_allocate_static_csa(struct amdgpu_device *adev, struct amdgpu_bo **bo, + u32 domain, uint32_t size); +int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm, + struct amdgpu_bo *bo, struct amdgpu_bo_va **bo_va, + uint64_t csa_addr, uint32_t size); +void amdgpu_free_static_csa(struct amdgpu_bo **bo); + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index 95f4c4139fc6..d85184b5b35c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -248,7 +248,7 @@ static int amdgpu_ctx_alloc(struct amdgpu_device *adev, return -ENOMEM; mutex_lock(&mgr->lock); - r = idr_alloc(&mgr->ctx_handles, ctx, 1, 0, GFP_KERNEL); + r = idr_alloc(&mgr->ctx_handles, ctx, 1, AMDGPU_VM_MAX_NUM_CTX, GFP_KERNEL); if (r < 0) { mutex_unlock(&mgr->lock); kfree(ctx); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 30bc345d6fdf..b60afeade50a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -59,6 +59,8 @@ #include "amdgpu_amdkfd.h" #include "amdgpu_pm.h" +#include "amdgpu_xgmi.h" + MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin"); MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin"); MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin"); @@ -513,6 +515,7 @@ void amdgpu_device_pci_config_reset(struct amdgpu_device *adev) */ static int amdgpu_device_doorbell_init(struct amdgpu_device *adev) { + /* No doorbell on SI hardware generation */ if (adev->asic_type < CHIP_BONAIRE) { adev->doorbell.base = 0; @@ -525,15 +528,26 @@ static int amdgpu_device_doorbell_init(struct amdgpu_device *adev) if (pci_resource_flags(adev->pdev, 2) & IORESOURCE_UNSET) return -EINVAL; + amdgpu_asic_init_doorbell_index(adev); + /* doorbell bar mapping */ adev->doorbell.base = pci_resource_start(adev->pdev, 2); adev->doorbell.size = pci_resource_len(adev->pdev, 2); adev->doorbell.num_doorbells = min_t(u32, adev->doorbell.size / sizeof(u32), - AMDGPU_DOORBELL_MAX_ASSIGNMENT+1); + adev->doorbell_index.max_assignment+1); if (adev->doorbell.num_doorbells == 0) return -EINVAL; + /* For Vega, reserve and map two pages on doorbell BAR since SDMA + * paging queue doorbell use the second page. The + * AMDGPU_DOORBELL64_MAX_ASSIGNMENT definition assumes all the + * doorbells are in the first page. So with paging queue enabled, + * the max num_doorbells should + 1 page (0x400 in dword) + */ + if (adev->asic_type >= CHIP_VEGA10) + adev->doorbell.num_doorbells += 0x400; + adev->doorbell.ptr = ioremap(adev->doorbell.base, adev->doorbell.num_doorbells * sizeof(u32)); @@ -1656,7 +1670,9 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) /* right after GMC hw init, we create CSA */ if (amdgpu_sriov_vf(adev)) { - r = amdgpu_allocate_static_csa(adev); + r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj, + AMDGPU_GEM_DOMAIN_VRAM, + AMDGPU_CSA_SIZE); if (r) { DRM_ERROR("allocate CSA failed %d\n", r); return r; @@ -1681,7 +1697,8 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) if (r) return r; - amdgpu_xgmi_add_device(adev); + if (adev->gmc.xgmi.num_physical_nodes > 1) + amdgpu_xgmi_add_device(adev); amdgpu_amdkfd_device_init(adev); if (amdgpu_sriov_vf(adev)) @@ -1848,6 +1865,9 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev) { int i, r; + if (adev->gmc.xgmi.num_physical_nodes > 1) + amdgpu_xgmi_remove_device(adev); + amdgpu_amdkfd_device_fini(adev); amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE); @@ -1890,7 +1910,7 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev) if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) { amdgpu_ucode_free_bo(adev); - amdgpu_free_static_csa(adev); + amdgpu_free_static_csa(&adev->virt.csa_obj); amdgpu_device_wb_fini(adev); amdgpu_device_vram_scratch_fini(adev); } @@ -2337,6 +2357,19 @@ bool amdgpu_device_has_dc_support(struct amdgpu_device *adev) return amdgpu_device_asic_has_dc_support(adev->asic_type); } + +static void amdgpu_device_xgmi_reset_func(struct work_struct *__work) +{ + struct amdgpu_device *adev = + container_of(__work, struct amdgpu_device, xgmi_reset_work); + + adev->asic_reset_res = amdgpu_asic_reset(adev); + if (adev->asic_reset_res) + DRM_WARN("ASIC reset failed with err r, %d for drm dev, %s", + adev->asic_reset_res, adev->ddev->unique); +} + + /** * amdgpu_device_init - initialize the driver * @@ -2435,6 +2468,8 @@ int amdgpu_device_init(struct amdgpu_device *adev, INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work, amdgpu_device_delay_enable_gfx_off); + INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func); + adev->gfx.gfx_off_req_count = 1; adev->pm.ac_power = power_supply_is_system_supplied() > 0 ? true : false; @@ -2455,9 +2490,6 @@ int amdgpu_device_init(struct amdgpu_device *adev, DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base); DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size); - /* doorbell bar mapping */ - amdgpu_device_doorbell_init(adev); - /* io port mapping */ for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) { if (pci_resource_flags(adev->pdev, i) & IORESOURCE_IO) { @@ -2476,6 +2508,9 @@ int amdgpu_device_init(struct amdgpu_device *adev, if (r) return r; + /* doorbell bar mapping and doorbell index init*/ + amdgpu_device_doorbell_init(adev); + /* if we have > 1 VGA cards, then disable the amdgpu VGA resources */ /* this will fail for cards that aren't VGA class devices, just * ignore it */ @@ -3148,86 +3183,6 @@ static int amdgpu_device_recover_vram(struct amdgpu_device *adev) return 0; } -/** - * amdgpu_device_reset - reset ASIC/GPU for bare-metal or passthrough - * - * @adev: amdgpu device pointer - * - * attempt to do soft-reset or full-reset and reinitialize Asic - * return 0 means succeeded otherwise failed - */ -static int amdgpu_device_reset(struct amdgpu_device *adev) -{ - bool need_full_reset, vram_lost = 0; - int r; - - need_full_reset = amdgpu_device_ip_need_full_reset(adev); - - if (!need_full_reset) { - amdgpu_device_ip_pre_soft_reset(adev); - r = amdgpu_device_ip_soft_reset(adev); - amdgpu_device_ip_post_soft_reset(adev); - if (r || amdgpu_device_ip_check_soft_reset(adev)) { - DRM_INFO("soft reset failed, will fallback to full reset!\n"); - need_full_reset = true; - } - } - - if (need_full_reset) { - r = amdgpu_device_ip_suspend(adev); - -retry: - r = amdgpu_asic_reset(adev); - /* post card */ - amdgpu_atom_asic_init(adev->mode_info.atom_context); - - if (!r) { - dev_info(adev->dev, "GPU reset succeeded, trying to resume\n"); - r = amdgpu_device_ip_resume_phase1(adev); - if (r) - goto out; - - vram_lost = amdgpu_device_check_vram_lost(adev); - if (vram_lost) { - DRM_ERROR("VRAM is lost!\n"); - atomic_inc(&adev->vram_lost_counter); - } - - r = amdgpu_gtt_mgr_recover( - &adev->mman.bdev.man[TTM_PL_TT]); - if (r) - goto out; - - r = amdgpu_device_fw_loading(adev); - if (r) - return r; - - r = amdgpu_device_ip_resume_phase2(adev); - if (r) - goto out; - - if (vram_lost) - amdgpu_device_fill_reset_magic(adev); - } - } - -out: - if (!r) { - amdgpu_irq_gpu_reset_resume_helper(adev); - r = amdgpu_ib_ring_tests(adev); - if (r) { - dev_err(adev->dev, "ib ring test failed (%d).\n", r); - r = amdgpu_device_ip_suspend(adev); - need_full_reset = true; - goto retry; - } - } - - if (!r) - r = amdgpu_device_recover_vram(adev); - - return r; -} /** * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf @@ -3295,40 +3250,46 @@ bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev) return false; } - if (amdgpu_gpu_recovery == 0 || (amdgpu_gpu_recovery == -1 && - !amdgpu_sriov_vf(adev))) { - DRM_INFO("GPU recovery disabled.\n"); - return false; - } + if (amdgpu_gpu_recovery == 0) + goto disabled; - return true; -} + if (amdgpu_sriov_vf(adev)) + return true; -/** - * amdgpu_device_gpu_recover - reset the asic and recover scheduler - * - * @adev: amdgpu device pointer - * @job: which job trigger hang - * - * Attempt to reset the GPU if it has hung (all asics). - * Returns 0 for success or an error on failure. - */ -int amdgpu_device_gpu_recover(struct amdgpu_device *adev, - struct amdgpu_job *job) -{ - int i, r, resched; + if (amdgpu_gpu_recovery == -1) { + switch (adev->asic_type) { + case CHIP_BONAIRE: + case CHIP_HAWAII: + case CHIP_TOPAZ: + case CHIP_TONGA: + case CHIP_FIJI: + case CHIP_POLARIS10: + case CHIP_POLARIS11: + case CHIP_POLARIS12: + case CHIP_VEGAM: + case CHIP_VEGA20: + case CHIP_VEGA10: + case CHIP_VEGA12: + break; + default: + goto disabled; + } + } - dev_info(adev->dev, "GPU reset begin!\n"); + return true; - mutex_lock(&adev->lock_reset); - atomic_inc(&adev->gpu_reset_counter); - adev->in_gpu_reset = 1; +disabled: + DRM_INFO("GPU recovery disabled.\n"); + return false; +} - /* Block kfd */ - amdgpu_amdkfd_pre_reset(adev); - /* block TTM */ - resched = ttm_bo_lock_delayed_workqueue(&adev->mman.bdev); +static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev, + struct amdgpu_job *job, + bool *need_full_reset_arg) +{ + int i, r = 0; + bool need_full_reset = *need_full_reset_arg; /* block all schedulers and reset given job's ring */ for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { @@ -3348,10 +3309,144 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, amdgpu_fence_driver_force_completion(ring); } - if (amdgpu_sriov_vf(adev)) - r = amdgpu_device_reset_sriov(adev, job ? false : true); - else - r = amdgpu_device_reset(adev); + + + if (!amdgpu_sriov_vf(adev)) { + + if (!need_full_reset) + need_full_reset = amdgpu_device_ip_need_full_reset(adev); + + if (!need_full_reset) { + amdgpu_device_ip_pre_soft_reset(adev); + r = amdgpu_device_ip_soft_reset(adev); + amdgpu_device_ip_post_soft_reset(adev); + if (r || amdgpu_device_ip_check_soft_reset(adev)) { + DRM_INFO("soft reset failed, will fallback to full reset!\n"); + need_full_reset = true; + } + } + + if (need_full_reset) + r = amdgpu_device_ip_suspend(adev); + + *need_full_reset_arg = need_full_reset; + } + + return r; +} + +static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive, + struct list_head *device_list_handle, + bool *need_full_reset_arg) +{ + struct amdgpu_device *tmp_adev = NULL; + bool need_full_reset = *need_full_reset_arg, vram_lost = false; + int r = 0; + + /* + * ASIC reset has to be done on all HGMI hive nodes ASAP + * to allow proper links negotiation in FW (within 1 sec) + */ + if (need_full_reset) { + list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) { + /* For XGMI run all resets in parallel to speed up the process */ + if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) { + if (!queue_work(system_highpri_wq, &tmp_adev->xgmi_reset_work)) + r = -EALREADY; + } else + r = amdgpu_asic_reset(tmp_adev); + + if (r) { + DRM_ERROR("ASIC reset failed with err r, %d for drm dev, %s", + r, tmp_adev->ddev->unique); + break; + } + } + + /* For XGMI wait for all PSP resets to complete before proceed */ + if (!r) { + list_for_each_entry(tmp_adev, device_list_handle, + gmc.xgmi.head) { + if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) { + flush_work(&tmp_adev->xgmi_reset_work); + r = tmp_adev->asic_reset_res; + if (r) + break; + } + } + } + } + + + list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) { + if (need_full_reset) { + /* post card */ + if (amdgpu_atom_asic_init(tmp_adev->mode_info.atom_context)) + DRM_WARN("asic atom init failed!"); + + if (!r) { + dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n"); + r = amdgpu_device_ip_resume_phase1(tmp_adev); + if (r) + goto out; + + vram_lost = amdgpu_device_check_vram_lost(tmp_adev); + if (vram_lost) { + DRM_ERROR("VRAM is lost!\n"); + atomic_inc(&tmp_adev->vram_lost_counter); + } + + r = amdgpu_gtt_mgr_recover( + &tmp_adev->mman.bdev.man[TTM_PL_TT]); + if (r) + goto out; + + r = amdgpu_device_fw_loading(tmp_adev); + if (r) + return r; + + r = amdgpu_device_ip_resume_phase2(tmp_adev); + if (r) + goto out; + + if (vram_lost) + amdgpu_device_fill_reset_magic(tmp_adev); + + /* Update PSP FW topology after reset */ + if (hive && tmp_adev->gmc.xgmi.num_physical_nodes > 1) + r = amdgpu_xgmi_update_topology(hive, tmp_adev); + } + } + + +out: + if (!r) { + amdgpu_irq_gpu_reset_resume_helper(tmp_adev); + r = amdgpu_ib_ring_tests(tmp_adev); + if (r) { + dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r); + r = amdgpu_device_ip_suspend(tmp_adev); + need_full_reset = true; + r = -EAGAIN; + goto end; + } + } + + if (!r) + r = amdgpu_device_recover_vram(tmp_adev); + else + tmp_adev->asic_reset_res = r; + } + +end: + *need_full_reset_arg = need_full_reset; + return r; +} + +static void amdgpu_device_post_asic_reset(struct amdgpu_device *adev, + struct amdgpu_job *job) +{ + int i; for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { struct amdgpu_ring *ring = adev->rings[i]; @@ -3363,7 +3458,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, * or all rings (in the case @job is NULL) * after above amdgpu_reset accomplished */ - if ((!job || job->base.sched == &ring->sched) && !r) + if ((!job || job->base.sched == &ring->sched) && !adev->asic_reset_res) drm_sched_job_recovery(&ring->sched); kthread_unpark(ring->sched.thread); @@ -3373,21 +3468,142 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, drm_helper_resume_force_mode(adev->ddev); } - ttm_bo_unlock_delayed_workqueue(&adev->mman.bdev, resched); + adev->asic_reset_res = 0; +} - if (r) { - /* bad news, how to tell it to userspace ? */ - dev_info(adev->dev, "GPU reset(%d) failed\n", atomic_read(&adev->gpu_reset_counter)); - amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r); - } else { - dev_info(adev->dev, "GPU reset(%d) succeeded!\n",atomic_read(&adev->gpu_reset_counter)); - } +static void amdgpu_device_lock_adev(struct amdgpu_device *adev) +{ + mutex_lock(&adev->lock_reset); + atomic_inc(&adev->gpu_reset_counter); + adev->in_gpu_reset = 1; + /* Block kfd */ + amdgpu_amdkfd_pre_reset(adev); +} +static void amdgpu_device_unlock_adev(struct amdgpu_device *adev) +{ /*unlock kfd */ amdgpu_amdkfd_post_reset(adev); amdgpu_vf_error_trans_all(adev); adev->in_gpu_reset = 0; mutex_unlock(&adev->lock_reset); +} + + +/** + * amdgpu_device_gpu_recover - reset the asic and recover scheduler + * + * @adev: amdgpu device pointer + * @job: which job trigger hang + * + * Attempt to reset the GPU if it has hung (all asics). + * Attempt to do soft-reset or full-reset and reinitialize Asic + * Returns 0 for success or an error on failure. + */ + +int amdgpu_device_gpu_recover(struct amdgpu_device *adev, + struct amdgpu_job *job) +{ + int r; + struct amdgpu_hive_info *hive = NULL; + bool need_full_reset = false; + struct amdgpu_device *tmp_adev = NULL; + struct list_head device_list, *device_list_handle = NULL; + + INIT_LIST_HEAD(&device_list); + + dev_info(adev->dev, "GPU reset begin!\n"); + + /* + * In case of XGMI hive disallow concurrent resets to be triggered + * by different nodes. No point also since the one node already executing + * reset will also reset all the other nodes in the hive. + */ + hive = amdgpu_get_xgmi_hive(adev); + if (hive && adev->gmc.xgmi.num_physical_nodes > 1 && + !mutex_trylock(&hive->hive_lock)) + return 0; + + /* Start with adev pre asic reset first for soft reset check.*/ + amdgpu_device_lock_adev(adev); + r = amdgpu_device_pre_asic_reset(adev, + job, + &need_full_reset); + if (r) { + /*TODO Should we stop ?*/ + DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ", + r, adev->ddev->unique); + adev->asic_reset_res = r; + } + + /* Build list of devices to reset */ + if (need_full_reset && adev->gmc.xgmi.num_physical_nodes > 1) { + if (!hive) { + amdgpu_device_unlock_adev(adev); + return -ENODEV; + } + + /* + * In case we are in XGMI hive mode device reset is done for all the + * nodes in the hive to retrain all XGMI links and hence the reset + * sequence is executed in loop on all nodes. + */ + device_list_handle = &hive->device_list; + } else { + list_add_tail(&adev->gmc.xgmi.head, &device_list); + device_list_handle = &device_list; + } + +retry: /* Rest of adevs pre asic reset from XGMI hive. */ + list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) { + + if (tmp_adev == adev) + continue; + + amdgpu_device_lock_adev(tmp_adev); + r = amdgpu_device_pre_asic_reset(tmp_adev, + NULL, + &need_full_reset); + /*TODO Should we stop ?*/ + if (r) { + DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ", + r, tmp_adev->ddev->unique); + tmp_adev->asic_reset_res = r; + } + } + + /* Actual ASIC resets if needed.*/ + /* TODO Implement XGMI hive reset logic for SRIOV */ + if (amdgpu_sriov_vf(adev)) { + r = amdgpu_device_reset_sriov(adev, job ? false : true); + if (r) + adev->asic_reset_res = r; + } else { + r = amdgpu_do_asic_reset(hive, device_list_handle, &need_full_reset); + if (r && r == -EAGAIN) + goto retry; + } + + /* Post ASIC reset for all devs .*/ + list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) { + amdgpu_device_post_asic_reset(tmp_adev, tmp_adev == adev ? job : NULL); + + if (r) { + /* bad news, how to tell it to userspace ? */ + dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&adev->gpu_reset_counter)); + amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r); + } else { + dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&adev->gpu_reset_counter)); + } + + amdgpu_device_unlock_adev(tmp_adev); + } + + if (hive && adev->gmc.xgmi.num_physical_nodes > 1) + mutex_unlock(&hive->hive_lock); + + if (r) + dev_info(adev->dev, "GPU reset end with ret = %d\n", r); return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index 686a26de50f9..15ce7e681d67 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c @@ -631,6 +631,11 @@ int amdgpu_display_modeset_create_props(struct amdgpu_device *adev) drm_property_create_range(adev->ddev, 0, "max bpc", 8, 16); if (!adev->mode_info.max_bpc_property) return -ENOMEM; + adev->mode_info.abm_level_property = + drm_property_create_range(adev->ddev, 0, + "abm level", 0, 4); + if (!adev->mode_info.abm_level_property) + return -ENOMEM; } return 0; @@ -857,7 +862,12 @@ int amdgpu_display_get_crtc_scanoutpos(struct drm_device *dev, /* Inside "upper part" of vblank area? Apply corrective offset if so: */ if (in_vbl && (*vpos >= vbl_start)) { vtotal = mode->crtc_vtotal; - *vpos = *vpos - vtotal; + + /* With variable refresh rate displays the vpos can exceed + * the vtotal value. Clamp to 0 to return -vbl_end instead + * of guessing the remaining number of lines until scanout. + */ + *vpos = (*vpos < vtotal) ? (*vpos - vtotal) : 0; } /* Correct for shifted end of vbl at vbl_end. */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h new file mode 100644 index 000000000000..be620b29f4aa --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h @@ -0,0 +1,243 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/* + * GPU doorbell structures, functions & helpers + */ +struct amdgpu_doorbell { + /* doorbell mmio */ + resource_size_t base; + resource_size_t size; + u32 __iomem *ptr; + u32 num_doorbells; /* Number of doorbells actually reserved for amdgpu. */ +}; + +/* Reserved doorbells for amdgpu (including multimedia). + * KFD can use all the rest in the 2M doorbell bar. + * For asic before vega10, doorbell is 32-bit, so the + * index/offset is in dword. For vega10 and after, doorbell + * can be 64-bit, so the index defined is in qword. + */ +struct amdgpu_doorbell_index { + uint32_t kiq; + uint32_t mec_ring0; + uint32_t mec_ring1; + uint32_t mec_ring2; + uint32_t mec_ring3; + uint32_t mec_ring4; + uint32_t mec_ring5; + uint32_t mec_ring6; + uint32_t mec_ring7; + uint32_t userqueue_start; + uint32_t userqueue_end; + uint32_t gfx_ring0; + uint32_t sdma_engine0; + uint32_t sdma_engine1; + uint32_t sdma_engine2; + uint32_t sdma_engine3; + uint32_t sdma_engine4; + uint32_t sdma_engine5; + uint32_t sdma_engine6; + uint32_t sdma_engine7; + uint32_t ih; + union { + struct { + uint32_t vcn_ring0_1; + uint32_t vcn_ring2_3; + uint32_t vcn_ring4_5; + uint32_t vcn_ring6_7; + } vcn; + struct { + uint32_t uvd_ring0_1; + uint32_t uvd_ring2_3; + uint32_t uvd_ring4_5; + uint32_t uvd_ring6_7; + uint32_t vce_ring0_1; + uint32_t vce_ring2_3; + uint32_t vce_ring4_5; + uint32_t vce_ring6_7; + } uvd_vce; + }; + uint32_t max_assignment; +}; + +typedef enum _AMDGPU_DOORBELL_ASSIGNMENT +{ + AMDGPU_DOORBELL_KIQ = 0x000, + AMDGPU_DOORBELL_HIQ = 0x001, + AMDGPU_DOORBELL_DIQ = 0x002, + AMDGPU_DOORBELL_MEC_RING0 = 0x010, + AMDGPU_DOORBELL_MEC_RING1 = 0x011, + AMDGPU_DOORBELL_MEC_RING2 = 0x012, + AMDGPU_DOORBELL_MEC_RING3 = 0x013, + AMDGPU_DOORBELL_MEC_RING4 = 0x014, + AMDGPU_DOORBELL_MEC_RING5 = 0x015, + AMDGPU_DOORBELL_MEC_RING6 = 0x016, + AMDGPU_DOORBELL_MEC_RING7 = 0x017, + AMDGPU_DOORBELL_GFX_RING0 = 0x020, + AMDGPU_DOORBELL_sDMA_ENGINE0 = 0x1E0, + AMDGPU_DOORBELL_sDMA_ENGINE1 = 0x1E1, + AMDGPU_DOORBELL_IH = 0x1E8, + AMDGPU_DOORBELL_MAX_ASSIGNMENT = 0x3FF, + AMDGPU_DOORBELL_INVALID = 0xFFFF +} AMDGPU_DOORBELL_ASSIGNMENT; + +typedef enum _AMDGPU_VEGA20_DOORBELL_ASSIGNMENT +{ + /* Compute + GFX: 0~255 */ + AMDGPU_VEGA20_DOORBELL_KIQ = 0x000, + AMDGPU_VEGA20_DOORBELL_HIQ = 0x001, + AMDGPU_VEGA20_DOORBELL_DIQ = 0x002, + AMDGPU_VEGA20_DOORBELL_MEC_RING0 = 0x003, + AMDGPU_VEGA20_DOORBELL_MEC_RING1 = 0x004, + AMDGPU_VEGA20_DOORBELL_MEC_RING2 = 0x005, + AMDGPU_VEGA20_DOORBELL_MEC_RING3 = 0x006, + AMDGPU_VEGA20_DOORBELL_MEC_RING4 = 0x007, + AMDGPU_VEGA20_DOORBELL_MEC_RING5 = 0x008, + AMDGPU_VEGA20_DOORBELL_MEC_RING6 = 0x009, + AMDGPU_VEGA20_DOORBELL_MEC_RING7 = 0x00A, + AMDGPU_VEGA20_DOORBELL_USERQUEUE_START = 0x00B, + AMDGPU_VEGA20_DOORBELL_USERQUEUE_END = 0x08A, + AMDGPU_VEGA20_DOORBELL_GFX_RING0 = 0x08B, + /* SDMA:256~335*/ + AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE0 = 0x100, + AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE1 = 0x10A, + AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE2 = 0x114, + AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE3 = 0x11E, + AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE4 = 0x128, + AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE5 = 0x132, + AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE6 = 0x13C, + AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE7 = 0x146, + /* IH: 376~391 */ + AMDGPU_VEGA20_DOORBELL_IH = 0x178, + /* MMSCH: 392~407 + * overlap the doorbell assignment with VCN as they are mutually exclusive + * VCE engine's doorbell is 32 bit and two VCE ring share one QWORD + */ + AMDGPU_VEGA20_DOORBELL64_VCN0_1 = 0x188, /* lower 32 bits for VNC0 and upper 32 bits for VNC1 */ + AMDGPU_VEGA20_DOORBELL64_VCN2_3 = 0x189, + AMDGPU_VEGA20_DOORBELL64_VCN4_5 = 0x18A, + AMDGPU_VEGA20_DOORBELL64_VCN6_7 = 0x18B, + + AMDGPU_VEGA20_DOORBELL64_UVD_RING0_1 = 0x188, + AMDGPU_VEGA20_DOORBELL64_UVD_RING2_3 = 0x189, + AMDGPU_VEGA20_DOORBELL64_UVD_RING4_5 = 0x18A, + AMDGPU_VEGA20_DOORBELL64_UVD_RING6_7 = 0x18B, + + AMDGPU_VEGA20_DOORBELL64_VCE_RING0_1 = 0x18C, + AMDGPU_VEGA20_DOORBELL64_VCE_RING2_3 = 0x18D, + AMDGPU_VEGA20_DOORBELL64_VCE_RING4_5 = 0x18E, + AMDGPU_VEGA20_DOORBELL64_VCE_RING6_7 = 0x18F, + AMDGPU_VEGA20_DOORBELL_MAX_ASSIGNMENT = 0x18F, + AMDGPU_VEGA20_DOORBELL_INVALID = 0xFFFF +} AMDGPU_VEGA20_DOORBELL_ASSIGNMENT; + +/* + * 64bit doorbell, offset are in QWORD, occupy 2KB doorbell space + */ +typedef enum _AMDGPU_DOORBELL64_ASSIGNMENT +{ + /* + * All compute related doorbells: kiq, hiq, diq, traditional compute queue, user queue, should locate in + * a continues range so that programming CP_MEC_DOORBELL_RANGE_LOWER/UPPER can cover this range. + * Compute related doorbells are allocated from 0x00 to 0x8a + */ + + + /* kernel scheduling */ + AMDGPU_DOORBELL64_KIQ = 0x00, + + /* HSA interface queue and debug queue */ + AMDGPU_DOORBELL64_HIQ = 0x01, + AMDGPU_DOORBELL64_DIQ = 0x02, + + /* Compute engines */ + AMDGPU_DOORBELL64_MEC_RING0 = 0x03, + AMDGPU_DOORBELL64_MEC_RING1 = 0x04, + AMDGPU_DOORBELL64_MEC_RING2 = 0x05, + AMDGPU_DOORBELL64_MEC_RING3 = 0x06, + AMDGPU_DOORBELL64_MEC_RING4 = 0x07, + AMDGPU_DOORBELL64_MEC_RING5 = 0x08, + AMDGPU_DOORBELL64_MEC_RING6 = 0x09, + AMDGPU_DOORBELL64_MEC_RING7 = 0x0a, + + /* User queue doorbell range (128 doorbells) */ + AMDGPU_DOORBELL64_USERQUEUE_START = 0x0b, + AMDGPU_DOORBELL64_USERQUEUE_END = 0x8a, + + /* Graphics engine */ + AMDGPU_DOORBELL64_GFX_RING0 = 0x8b, + + /* + * Other graphics doorbells can be allocated here: from 0x8c to 0xdf + * Graphics voltage island aperture 1 + * default non-graphics QWORD index is 0xe0 - 0xFF inclusive + */ + + /* For vega10 sriov, the sdma doorbell must be fixed as follow + * to keep the same setting with host driver, or it will + * happen conflicts + */ + AMDGPU_DOORBELL64_sDMA_ENGINE0 = 0xF0, + AMDGPU_DOORBELL64_sDMA_HI_PRI_ENGINE0 = 0xF1, + AMDGPU_DOORBELL64_sDMA_ENGINE1 = 0xF2, + AMDGPU_DOORBELL64_sDMA_HI_PRI_ENGINE1 = 0xF3, + + /* Interrupt handler */ + AMDGPU_DOORBELL64_IH = 0xF4, /* For legacy interrupt ring buffer */ + AMDGPU_DOORBELL64_IH_RING1 = 0xF5, /* For page migration request log */ + AMDGPU_DOORBELL64_IH_RING2 = 0xF6, /* For page migration translation/invalidation log */ + + /* VCN engine use 32 bits doorbell */ + AMDGPU_DOORBELL64_VCN0_1 = 0xF8, /* lower 32 bits for VNC0 and upper 32 bits for VNC1 */ + AMDGPU_DOORBELL64_VCN2_3 = 0xF9, + AMDGPU_DOORBELL64_VCN4_5 = 0xFA, + AMDGPU_DOORBELL64_VCN6_7 = 0xFB, + + /* overlap the doorbell assignment with VCN as they are mutually exclusive + * VCE engine's doorbell is 32 bit and two VCE ring share one QWORD + */ + AMDGPU_DOORBELL64_UVD_RING0_1 = 0xF8, + AMDGPU_DOORBELL64_UVD_RING2_3 = 0xF9, + AMDGPU_DOORBELL64_UVD_RING4_5 = 0xFA, + AMDGPU_DOORBELL64_UVD_RING6_7 = 0xFB, + + AMDGPU_DOORBELL64_VCE_RING0_1 = 0xFC, + AMDGPU_DOORBELL64_VCE_RING2_3 = 0xFD, + AMDGPU_DOORBELL64_VCE_RING4_5 = 0xFE, + AMDGPU_DOORBELL64_VCE_RING6_7 = 0xFF, + + AMDGPU_DOORBELL64_MAX_ASSIGNMENT = 0xFF, + AMDGPU_DOORBELL64_INVALID = 0xFFFF +} AMDGPU_DOORBELL64_ASSIGNMENT; + +u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index); +void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v); +u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index); +void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v); + +#define RDOORBELL32(index) amdgpu_mm_rdoorbell(adev, (index)) +#define WDOORBELL32(index, v) amdgpu_mm_wdoorbell(adev, (index), (v)) +#define RDOORBELL64(index) amdgpu_mm_rdoorbell64(adev, (index)) +#define WDOORBELL64(index, v) amdgpu_mm_wdoorbell64(adev, (index), (v)) + diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 74b611e8a1b1..9c77eaa45982 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -454,9 +454,10 @@ module_param_named(cntl_sb_buf_per_se, amdgpu_cntl_sb_buf_per_se, int, 0444); /** * DOC: param_buf_per_se (int) - * Override the size of Off-Chip Pramater Cache per Shader Engine in Byte. The default is 0 (depending on gfx). + * Override the size of Off-Chip Parameter Cache per Shader Engine in Byte. + * The default is 0 (depending on gfx). */ -MODULE_PARM_DESC(param_buf_per_se, "the size of Off-Chip Pramater Cache per Shader Engine (default depending on gfx)"); +MODULE_PARM_DESC(param_buf_per_se, "the size of Off-Chip Parameter Cache per Shader Engine (default depending on gfx)"); module_param_named(param_buf_per_se, amdgpu_param_buf_per_se, int, 0444); /** @@ -1227,9 +1228,6 @@ static struct drm_driver kms_driver = { .patchlevel = KMS_DRIVER_PATCHLEVEL, }; -static struct drm_driver *driver; -static struct pci_driver *pdriver; - static struct pci_driver amdgpu_kms_pci_driver = { .name = DRIVER_NAME, .id_table = pciidlist, @@ -1259,16 +1257,14 @@ static int __init amdgpu_init(void) goto error_fence; DRM_INFO("amdgpu kernel modesetting enabled.\n"); - driver = &kms_driver; - pdriver = &amdgpu_kms_pci_driver; - driver->num_ioctls = amdgpu_max_kms_ioctl; + kms_driver.num_ioctls = amdgpu_max_kms_ioctl; amdgpu_register_atpx_handler(); /* Ignore KFD init failures. Normal when CONFIG_HSA_AMD is not set. */ amdgpu_amdkfd_init(); /* let modprobe override vga console setting */ - return pci_register_driver(pdriver); + return pci_register_driver(&amdgpu_kms_pci_driver); error_fence: amdgpu_sync_fini(); @@ -1280,7 +1276,7 @@ error_sync: static void __exit amdgpu_exit(void) { amdgpu_amdkfd_fini(); - pci_unregister_driver(pdriver); + pci_unregister_driver(&amdgpu_kms_pci_driver); amdgpu_unregister_atpx_handler(); amdgpu_sync_fini(); amdgpu_fence_slab_fini(); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index 5448cf27654e..ee47c11e92ce 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -398,9 +398,9 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring, ring->fence_drv.irq_type = irq_type; ring->fence_drv.initialized = true; - dev_dbg(adev->dev, "fence driver on ring %d use gpu addr 0x%016llx, " - "cpu addr 0x%p\n", ring->idx, - ring->fence_drv.gpu_addr, ring->fence_drv.cpu_addr); + DRM_DEV_DEBUG(adev->dev, "fence driver on ring %s use gpu addr " + "0x%016llx, cpu addr 0x%p\n", ring->name, + ring->fence_drv.gpu_addr, ring->fence_drv.cpu_addr); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c index 11fea28f8ad3..6d11e1721147 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c @@ -248,7 +248,7 @@ int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset, } mb(); amdgpu_asic_flush_hdp(adev, NULL); - amdgpu_gmc_flush_gpu_tlb(adev, 0); + amdgpu_gmc_flush_gpu_tlb(adev, 0, 0); return 0; } @@ -259,6 +259,8 @@ int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset, * @offset: offset into the GPU's gart aperture * @pages: number of pages to bind * @dma_addr: DMA addresses of pages + * @flags: page table entry flags + * @dst: CPU address of the gart table * * Map the dma_addresses into GART entries (all asics). * Returns 0 for success, -EINVAL for failure. @@ -331,7 +333,7 @@ int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset, mb(); amdgpu_asic_flush_hdp(adev, NULL); - amdgpu_gmc_flush_gpu_tlb(adev, 0); + amdgpu_gmc_flush_gpu_tlb(adev, 0, 0); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h index 9ff62887e4e3..afa2e2877d87 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h @@ -41,6 +41,7 @@ struct amdgpu_bo; struct amdgpu_gart { struct amdgpu_bo *bo; + /* CPU kmapped address of gart table */ void *ptr; unsigned num_gpu_pages; unsigned num_cpu_pages; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 7b3d1ebda9df..f4f00217546e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -169,7 +169,7 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj, INIT_LIST_HEAD(&duplicates); tv.bo = &bo->tbo; - tv.shared = true; + tv.num_shared = 1; list_add(&tv.head, &list); amdgpu_vm_get_pd_bo(vm, &list, &vm_pd); @@ -604,7 +604,10 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, return -ENOENT; abo = gem_to_amdgpu_bo(gobj); tv.bo = &abo->tbo; - tv.shared = !!(abo->flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID); + if (abo->flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID) + tv.num_shared = 1; + else + tv.num_shared = 0; list_add(&tv.head, &list); } else { gobj = NULL; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h index d63daba9b17c..f1ddfc50bcc7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h @@ -54,6 +54,8 @@ void *amdgpu_gem_prime_vmap(struct drm_gem_object *obj); void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr); int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma); +extern const struct dma_buf_ops amdgpu_dmabuf_ops; + /* * GEM objects. */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 1a656b8657f7..97a60da62004 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -25,6 +25,7 @@ #include <drm/drmP.h> #include "amdgpu.h" #include "amdgpu_gfx.h" +#include "amdgpu_rlc.h" /* delay 0.1 second to enable gfx off feature */ #define GFX_OFF_DELAY_ENABLE msecs_to_jiffies(100) @@ -249,7 +250,7 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, ring->adev = NULL; ring->ring_obj = NULL; ring->use_doorbell = true; - ring->doorbell_index = AMDGPU_DOORBELL_KIQ; + ring->doorbell_index = adev->doorbell_index.kiq; r = amdgpu_gfx_kiq_acquire(adev, ring); if (r) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index b61b5c11aead..f790e15bcd08 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -29,6 +29,7 @@ */ #include "clearstate_defs.h" #include "amdgpu_ring.h" +#include "amdgpu_rlc.h" /* GFX current status */ #define AMDGPU_GFX_NORMAL_MODE 0x00000000L @@ -37,59 +38,6 @@ #define AMDGPU_GFX_CG_DISABLED_MODE 0x00000004L #define AMDGPU_GFX_LBPW_DISABLED_MODE 0x00000008L - -struct amdgpu_rlc_funcs { - void (*enter_safe_mode)(struct amdgpu_device *adev); - void (*exit_safe_mode)(struct amdgpu_device *adev); -}; - -struct amdgpu_rlc { - /* for power gating */ - struct amdgpu_bo *save_restore_obj; - uint64_t save_restore_gpu_addr; - volatile uint32_t *sr_ptr; - const u32 *reg_list; - u32 reg_list_size; - /* for clear state */ - struct amdgpu_bo *clear_state_obj; - uint64_t clear_state_gpu_addr; - volatile uint32_t *cs_ptr; - const struct cs_section_def *cs_data; - u32 clear_state_size; - /* for cp tables */ - struct amdgpu_bo *cp_table_obj; - uint64_t cp_table_gpu_addr; - volatile uint32_t *cp_table_ptr; - u32 cp_table_size; - - /* safe mode for updating CG/PG state */ - bool in_safe_mode; - const struct amdgpu_rlc_funcs *funcs; - - /* for firmware data */ - u32 save_and_restore_offset; - u32 clear_state_descriptor_offset; - u32 avail_scratch_ram_locations; - u32 reg_restore_list_size; - u32 reg_list_format_start; - u32 reg_list_format_separate_start; - u32 starting_offsets_start; - u32 reg_list_format_size_bytes; - u32 reg_list_size_bytes; - u32 reg_list_format_direct_reg_list_length; - u32 save_restore_list_cntl_size_bytes; - u32 save_restore_list_gpm_size_bytes; - u32 save_restore_list_srm_size_bytes; - - u32 *register_list_format; - u32 *register_restore; - u8 *save_restore_list_cntl; - u8 *save_restore_list_gpm; - u8 *save_restore_list_srm; - - bool is_rlc_v2_1; -}; - #define AMDGPU_MAX_COMPUTE_QUEUES KGD_MAX_QUEUES struct amdgpu_mec { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h index 6fa7ef446e46..81e6070d255b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h @@ -64,7 +64,7 @@ struct amdgpu_vmhub { struct amdgpu_gmc_funcs { /* flush the vm tlb via mmio */ void (*flush_gpu_tlb)(struct amdgpu_device *adev, - uint32_t vmid); + uint32_t vmid, uint32_t flush_type); /* flush the vm tlb via ring */ uint64_t (*emit_flush_gpu_tlb)(struct amdgpu_ring *ring, unsigned vmid, uint64_t pd_addr); @@ -89,7 +89,7 @@ struct amdgpu_gmc_funcs { struct amdgpu_xgmi { /* from psp */ - u64 device_id; + u64 node_id; u64 hive_id; /* fixed per family */ u64 node_segment_size; @@ -99,6 +99,7 @@ struct amdgpu_xgmi { unsigned num_physical_nodes; /* gpu list in the same hive */ struct list_head head; + bool supported; }; struct amdgpu_gmc { @@ -151,7 +152,7 @@ struct amdgpu_gmc { struct amdgpu_xgmi xgmi; }; -#define amdgpu_gmc_flush_gpu_tlb(adev, vmid) (adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid)) +#define amdgpu_gmc_flush_gpu_tlb(adev, vmid, type) (adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), (type)) #define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) (r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr)) #define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) (r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid)) #define amdgpu_gmc_set_pte_pde(adev, pt, idx, addr, flags) (adev)->gmc.gmc_funcs->set_pte_pde((adev), (pt), (idx), (addr), (flags)) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index b8963b725dfa..c48207b377bc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -146,7 +146,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, fence_ctx = 0; } - if (!ring->ready) { + if (!ring->sched.ready) { dev_err(adev->dev, "couldn't schedule ib on ring <%s>\n", ring->name); return -EINVAL; } @@ -221,8 +221,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, !amdgpu_sriov_vf(adev)) /* for SRIOV preemption, Preamble CE ib must be inserted anyway */ continue; - amdgpu_ring_emit_ib(ring, ib, job ? job->vmid : 0, - need_ctx_switch); + amdgpu_ring_emit_ib(ring, job, ib, need_ctx_switch); need_ctx_switch = false; } @@ -347,19 +346,14 @@ int amdgpu_ib_ring_tests(struct amdgpu_device *adev) tmo_gfx = 8 * AMDGPU_IB_TEST_TIMEOUT; } - for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { + for (i = 0; i < adev->num_rings; ++i) { struct amdgpu_ring *ring = adev->rings[i]; long tmo; - if (!ring || !ring->ready) - continue; - - /* skip IB tests for KIQ in general for the below reasons: - * 1. We never submit IBs to the KIQ - * 2. KIQ doesn't use the EOP interrupts, - * we use some other CP interrupt. + /* KIQ rings don't have an IB test because we never submit IBs + * to them and they have no interrupt support. */ - if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) + if (!ring->sched.ready || !ring->funcs->test_ib) continue; /* MM engine need more time */ @@ -374,20 +368,23 @@ int amdgpu_ib_ring_tests(struct amdgpu_device *adev) tmo = tmo_gfx; r = amdgpu_ring_test_ib(ring, tmo); - if (r) { - ring->ready = false; - - if (ring == &adev->gfx.gfx_ring[0]) { - /* oh, oh, that's really bad */ - DRM_ERROR("amdgpu: failed testing IB on GFX ring (%d).\n", r); - adev->accel_working = false; - return r; - - } else { - /* still not good, but we can live with it */ - DRM_ERROR("amdgpu: failed testing IB on ring %d (%d).\n", i, r); - ret = r; - } + if (!r) { + DRM_DEV_DEBUG(adev->dev, "ib test on %s succeeded\n", + ring->name); + continue; + } + + ring->sched.ready = false; + DRM_DEV_ERROR(adev->dev, "IB test failed on %s (%d).\n", + ring->name, r); + + if (ring == &adev->gfx.gfx_ring[0]) { + /* oh, oh, that's really bad */ + adev->accel_working = false; + return r; + + } else { + ret = r; } } return ret; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h index 9ce8c93ec19b..f877bb78d10a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h @@ -51,14 +51,12 @@ struct amdgpu_ih_ring { struct amdgpu_ih_funcs { /* ring read/write ptr handling, called from interrupt context */ u32 (*get_wptr)(struct amdgpu_device *adev); - bool (*prescreen_iv)(struct amdgpu_device *adev); void (*decode_iv)(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry); void (*set_rptr)(struct amdgpu_device *adev); }; #define amdgpu_ih_get_wptr(adev) (adev)->irq.ih_funcs->get_wptr((adev)) -#define amdgpu_ih_prescreen_iv(adev) (adev)->irq.ih_funcs->prescreen_iv((adev)) #define amdgpu_ih_decode_iv(adev, iv) (adev)->irq.ih_funcs->decode_iv((adev), (iv)) #define amdgpu_ih_set_rptr(adev) (adev)->irq.ih_funcs->set_rptr((adev)) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c index 52c17f6219a7..b7968f426862 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c @@ -94,23 +94,6 @@ static void amdgpu_hotplug_work_func(struct work_struct *work) } /** - * amdgpu_irq_reset_work_func - execute GPU reset - * - * @work: work struct pointer - * - * Execute scheduled GPU reset (Cayman+). - * This function is called when the IRQ handler thinks we need a GPU reset. - */ -static void amdgpu_irq_reset_work_func(struct work_struct *work) -{ - struct amdgpu_device *adev = container_of(work, struct amdgpu_device, - reset_work); - - if (!amdgpu_sriov_vf(adev) && amdgpu_device_should_recover_gpu(adev)) - amdgpu_device_gpu_recover(adev, NULL); -} - -/** * amdgpu_irq_disable_all - disable *all* interrupts * * @adev: amdgpu device pointer @@ -162,13 +145,6 @@ static void amdgpu_irq_callback(struct amdgpu_device *adev, u32 ring_index = ih->rptr >> 2; struct amdgpu_iv_entry entry; - /* Prescreening of high-frequency interrupts */ - if (!amdgpu_ih_prescreen_iv(adev)) - return; - - /* Before dispatching irq to IP blocks, send it to amdkfd */ - amdgpu_amdkfd_interrupt(adev, (const void *) &ih->ring[ring_index]); - entry.iv_entry = (const uint32_t *)&ih->ring[ring_index]; amdgpu_ih_decode_iv(adev, &entry); @@ -262,15 +238,12 @@ int amdgpu_irq_init(struct amdgpu_device *adev) amdgpu_hotplug_work_func); } - INIT_WORK(&adev->reset_work, amdgpu_irq_reset_work_func); - adev->irq.installed = true; r = drm_irq_install(adev->ddev, adev->ddev->pdev->irq); if (r) { adev->irq.installed = false; if (!amdgpu_device_has_dc_support(adev)) flush_work(&adev->hotplug_work); - cancel_work_sync(&adev->reset_work); return r; } adev->ddev->max_vblank_count = 0x00ffffff; @@ -299,7 +272,6 @@ void amdgpu_irq_fini(struct amdgpu_device *adev) pci_disable_msi(adev->pdev); if (!amdgpu_device_has_dc_support(adev)) flush_work(&adev->hotplug_work); - cancel_work_sync(&adev->reset_work); } for (i = 0; i < AMDGPU_IRQ_CLIENTID_MAX; ++i) { @@ -392,39 +364,38 @@ void amdgpu_irq_dispatch(struct amdgpu_device *adev, unsigned client_id = entry->client_id; unsigned src_id = entry->src_id; struct amdgpu_irq_src *src; + bool handled = false; int r; trace_amdgpu_iv(entry); if (client_id >= AMDGPU_IRQ_CLIENTID_MAX) { DRM_DEBUG("Invalid client_id in IV: %d\n", client_id); - return; - } - if (src_id >= AMDGPU_MAX_IRQ_SRC_ID) { + } else if (src_id >= AMDGPU_MAX_IRQ_SRC_ID) { DRM_DEBUG("Invalid src_id in IV: %d\n", src_id); - return; - } - if (adev->irq.virq[src_id]) { + } else if (adev->irq.virq[src_id]) { generic_handle_irq(irq_find_mapping(adev->irq.domain, src_id)); - } else { - if (!adev->irq.client[client_id].sources) { - DRM_DEBUG("Unregistered interrupt client_id: %d src_id: %d\n", - client_id, src_id); - return; - } - src = adev->irq.client[client_id].sources[src_id]; - if (!src) { - DRM_DEBUG("Unhandled interrupt src_id: %d\n", src_id); - return; - } + } else if (!adev->irq.client[client_id].sources) { + DRM_DEBUG("Unregistered interrupt client_id: %d src_id: %d\n", + client_id, src_id); + } else if ((src = adev->irq.client[client_id].sources[src_id])) { r = src->funcs->process(adev, src, entry); - if (r) + if (r < 0) DRM_ERROR("error processing interrupt (%d)\n", r); + else if (r) + handled = true; + + } else { + DRM_DEBUG("Unhandled interrupt src_id: %d\n", src_id); } + + /* Send it to amdkfd as well if it isn't already handled */ + if (!handled) + amdgpu_amdkfd_interrupt(adev, entry->iv_entry); } /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 755f733bf0d9..e0af44fd6a0c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -112,6 +112,8 @@ static void amdgpu_job_free_cb(struct drm_sched_job *s_job) struct amdgpu_ring *ring = to_amdgpu_ring(s_job->sched); struct amdgpu_job *job = to_amdgpu_job(s_job); + drm_sched_job_cleanup(s_job); + amdgpu_ring_priority_put(ring, s_job->s_priority); dma_fence_put(job->fence); amdgpu_sync_free(&job->sync); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h index 57cfe78a262b..e1b46a6703de 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h @@ -33,6 +33,8 @@ #define to_amdgpu_job(sched_job) \ container_of((sched_job), struct amdgpu_job, base) +#define AMDGPU_JOB_GET_VMID(job) ((job) ? (job)->vmid : 0) + struct amdgpu_fence; struct amdgpu_job { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 8f3d44e5e787..bc62bf41b7e9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -336,7 +336,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, case AMDGPU_HW_IP_GFX: type = AMD_IP_BLOCK_TYPE_GFX; for (i = 0; i < adev->gfx.num_gfx_rings; i++) - if (adev->gfx.gfx_ring[i].ready) + if (adev->gfx.gfx_ring[i].sched.ready) ++num_rings; ib_start_alignment = 32; ib_size_alignment = 32; @@ -344,7 +344,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, case AMDGPU_HW_IP_COMPUTE: type = AMD_IP_BLOCK_TYPE_GFX; for (i = 0; i < adev->gfx.num_compute_rings; i++) - if (adev->gfx.compute_ring[i].ready) + if (adev->gfx.compute_ring[i].sched.ready) ++num_rings; ib_start_alignment = 32; ib_size_alignment = 32; @@ -352,7 +352,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, case AMDGPU_HW_IP_DMA: type = AMD_IP_BLOCK_TYPE_SDMA; for (i = 0; i < adev->sdma.num_instances; i++) - if (adev->sdma.instance[i].ring.ready) + if (adev->sdma.instance[i].ring.sched.ready) ++num_rings; ib_start_alignment = 256; ib_size_alignment = 4; @@ -363,7 +363,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, if (adev->uvd.harvest_config & (1 << i)) continue; - if (adev->uvd.inst[i].ring.ready) + if (adev->uvd.inst[i].ring.sched.ready) ++num_rings; } ib_start_alignment = 64; @@ -372,7 +372,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, case AMDGPU_HW_IP_VCE: type = AMD_IP_BLOCK_TYPE_VCE; for (i = 0; i < adev->vce.num_rings; i++) - if (adev->vce.ring[i].ready) + if (adev->vce.ring[i].sched.ready) ++num_rings; ib_start_alignment = 4; ib_size_alignment = 1; @@ -384,7 +384,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, continue; for (j = 0; j < adev->uvd.num_enc_rings; j++) - if (adev->uvd.inst[i].ring_enc[j].ready) + if (adev->uvd.inst[i].ring_enc[j].sched.ready) ++num_rings; } ib_start_alignment = 64; @@ -392,7 +392,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, break; case AMDGPU_HW_IP_VCN_DEC: type = AMD_IP_BLOCK_TYPE_VCN; - if (adev->vcn.ring_dec.ready) + if (adev->vcn.ring_dec.sched.ready) ++num_rings; ib_start_alignment = 16; ib_size_alignment = 16; @@ -400,14 +400,14 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, case AMDGPU_HW_IP_VCN_ENC: type = AMD_IP_BLOCK_TYPE_VCN; for (i = 0; i < adev->vcn.num_enc_rings; i++) - if (adev->vcn.ring_enc[i].ready) + if (adev->vcn.ring_enc[i].sched.ready) ++num_rings; ib_start_alignment = 64; ib_size_alignment = 1; break; case AMDGPU_HW_IP_VCN_JPEG: type = AMD_IP_BLOCK_TYPE_VCN; - if (adev->vcn.ring_jpeg.ready) + if (adev->vcn.ring_jpeg.sched.ready) ++num_rings; ib_start_alignment = 16; ib_size_alignment = 16; @@ -978,7 +978,10 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) } if (amdgpu_sriov_vf(adev)) { - r = amdgpu_map_static_csa(adev, &fpriv->vm, &fpriv->csa_va); + uint64_t csa_addr = amdgpu_csa_vaddr(adev) & AMDGPU_GMC_HOLE_MASK; + + r = amdgpu_map_static_csa(adev, &fpriv->vm, adev->virt.csa_obj, + &fpriv->csa_va, csa_addr, AMDGPU_CSA_SIZE); if (r) goto error_vm; } @@ -1048,8 +1051,8 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev, pasid = fpriv->vm.pasid; pd = amdgpu_bo_ref(fpriv->vm.root.base.bo); - amdgpu_vm_fini(adev, &fpriv->vm); amdgpu_ctx_mgr_fini(&fpriv->ctx_mgr); + amdgpu_vm_fini(adev, &fpriv->vm); if (pasid) amdgpu_pasid_free_delayed(pd->tbo.resv, pasid); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h index d1b4d9b6aae0..aadd0fa42e43 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h @@ -38,7 +38,6 @@ #include <drm/drm_crtc_helper.h> #include <drm/drm_fb_helper.h> #include <drm/drm_plane_helper.h> -#include <drm/drm_fb_helper.h> #include <linux/i2c.h> #include <linux/i2c-algo-bit.h> #include <linux/hrtimer.h> @@ -57,7 +56,6 @@ struct amdgpu_hpd; #define to_amdgpu_connector(x) container_of(x, struct amdgpu_connector, base) #define to_amdgpu_encoder(x) container_of(x, struct amdgpu_encoder, base) #define to_amdgpu_framebuffer(x) container_of(x, struct amdgpu_framebuffer, base) -#define to_amdgpu_plane(x) container_of(x, struct amdgpu_plane, base) #define to_dm_plane_state(x) container_of(x, struct dm_plane_state, base); @@ -295,13 +293,6 @@ struct amdgpu_display_funcs { uint16_t connector_object_id, struct amdgpu_hpd *hpd, struct amdgpu_router *router); - /* it is used to enter or exit into free sync mode */ - int (*notify_freesync)(struct drm_device *dev, void *data, - struct drm_file *filp); - /* it is used to allow enablement of freesync mode */ - int (*set_freesync_property)(struct drm_connector *connector, - struct drm_property *property, - uint64_t val); }; @@ -325,7 +316,7 @@ struct amdgpu_mode_info { struct card_info *atom_card_info; bool mode_config_initialized; struct amdgpu_crtc *crtcs[AMDGPU_MAX_CRTCS]; - struct amdgpu_plane *planes[AMDGPU_MAX_PLANES]; + struct drm_plane *planes[AMDGPU_MAX_PLANES]; struct amdgpu_afmt *afmt[AMDGPU_MAX_AFMT_BLOCKS]; /* DVI-I properties */ struct drm_property *coherent_mode_property; @@ -341,6 +332,8 @@ struct amdgpu_mode_info { struct drm_property *dither_property; /* maximum number of bits per channel for monitor color */ struct drm_property *max_bpc_property; + /* Adaptive Backlight Modulation (power feature) */ + struct drm_property *abm_level_property; /* hardcoded DFP edid from BIOS */ struct edid *bios_hardcoded_edid; int bios_hardcoded_edid_size; @@ -436,11 +429,6 @@ struct amdgpu_crtc { struct drm_pending_vblank_event *event; }; -struct amdgpu_plane { - struct drm_plane base; - enum drm_plane_type plane_type; -}; - struct amdgpu_encoder_atom_dig { bool linkb; /* atom dig */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 904014dc5915..fd271f9746a2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -81,7 +81,7 @@ static void amdgpu_bo_destroy(struct ttm_buffer_object *tbo) amdgpu_bo_subtract_pin_size(bo); if (bo->kfd_bo) - amdgpu_amdkfd_unreserve_system_memory_limit(bo); + amdgpu_amdkfd_unreserve_memory_limit(bo); amdgpu_bo_kunmap(bo); @@ -608,53 +608,6 @@ int amdgpu_bo_create(struct amdgpu_device *adev, } /** - * amdgpu_bo_backup_to_shadow - Backs up an &amdgpu_bo buffer object - * @adev: amdgpu device object - * @ring: amdgpu_ring for the engine handling the buffer operations - * @bo: &amdgpu_bo buffer to be backed up - * @resv: reservation object with embedded fence - * @fence: dma_fence associated with the operation - * @direct: whether to submit the job directly - * - * Copies an &amdgpu_bo buffer object to its shadow object. - * Not used for now. - * - * Returns: - * 0 for success or a negative error code on failure. - */ -int amdgpu_bo_backup_to_shadow(struct amdgpu_device *adev, - struct amdgpu_ring *ring, - struct amdgpu_bo *bo, - struct reservation_object *resv, - struct dma_fence **fence, - bool direct) - -{ - struct amdgpu_bo *shadow = bo->shadow; - uint64_t bo_addr, shadow_addr; - int r; - - if (!shadow) - return -EINVAL; - - bo_addr = amdgpu_bo_gpu_offset(bo); - shadow_addr = amdgpu_bo_gpu_offset(bo->shadow); - - r = reservation_object_reserve_shared(bo->tbo.resv); - if (r) - goto err; - - r = amdgpu_copy_buffer(ring, bo_addr, shadow_addr, - amdgpu_bo_size(bo), resv, fence, - direct, false); - if (!r) - amdgpu_bo_fence(bo, *fence, true); - -err: - return r; -} - -/** * amdgpu_bo_validate - validate an &amdgpu_bo buffer object * @bo: pointer to the buffer object * diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index 7d3312d0da11..9291c2f837e9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -267,11 +267,6 @@ int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo); void amdgpu_bo_fence(struct amdgpu_bo *bo, struct dma_fence *fence, bool shared); u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo); -int amdgpu_bo_backup_to_shadow(struct amdgpu_device *adev, - struct amdgpu_ring *ring, - struct amdgpu_bo *bo, - struct reservation_object *resv, - struct dma_fence **fence, bool direct); int amdgpu_bo_validate(struct amdgpu_bo *bo); int amdgpu_bo_restore_shadow(struct amdgpu_bo *shadow, struct dma_fence **fence); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c index 59cc678de8c1..1f61ed95727c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c @@ -33,6 +33,8 @@ #include <linux/hwmon.h> #include <linux/hwmon-sysfs.h> #include <linux/nospec.h> +#include "hwmgr.h" +#define WIDTH_4K 3840 static int amdgpu_debugfs_pm_init(struct amdgpu_device *adev); @@ -1642,6 +1644,19 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj, attr == &sensor_dev_attr_fan1_enable.dev_attr.attr)) return 0; + /* Skip fan attributes on APU */ + if ((adev->flags & AMD_IS_APU) && + (attr == &sensor_dev_attr_pwm1.dev_attr.attr || + attr == &sensor_dev_attr_pwm1_enable.dev_attr.attr || + attr == &sensor_dev_attr_pwm1_max.dev_attr.attr || + attr == &sensor_dev_attr_pwm1_min.dev_attr.attr || + attr == &sensor_dev_attr_fan1_input.dev_attr.attr || + attr == &sensor_dev_attr_fan1_min.dev_attr.attr || + attr == &sensor_dev_attr_fan1_max.dev_attr.attr || + attr == &sensor_dev_attr_fan1_target.dev_attr.attr || + attr == &sensor_dev_attr_fan1_enable.dev_attr.attr)) + return 0; + /* Skip limit attributes if DPM is not enabled */ if (!adev->pm.dpm_enabled && (attr == &sensor_dev_attr_temp1_crit.dev_attr.attr || @@ -1956,6 +1971,17 @@ void amdgpu_dpm_enable_uvd(struct amdgpu_device *adev, bool enable) amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_UVD, !enable); mutex_unlock(&adev->pm.mutex); } + /* enable/disable Low Memory PState for UVD (4k videos) */ + if (adev->asic_type == CHIP_STONEY && + adev->uvd.decode_image_width >= WIDTH_4K) { + struct pp_hwmgr *hwmgr = adev->powerplay.pp_handle; + + if (hwmgr && hwmgr->hwmgr_func && + hwmgr->hwmgr_func->update_nbdpm_pstate) + hwmgr->hwmgr_func->update_nbdpm_pstate(hwmgr, + !enable, + true); + } } void amdgpu_dpm_enable_vce(struct amdgpu_device *adev, bool enable) @@ -2129,7 +2155,7 @@ void amdgpu_pm_compute_clocks(struct amdgpu_device *adev) for (i = 0; i < AMDGPU_MAX_RINGS; i++) { struct amdgpu_ring *ring = adev->rings[i]; - if (ring && ring->ready) + if (ring && ring->sched.ready) amdgpu_fence_wait_empty(ring); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c index e45e929aaab5..71913a18d142 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c @@ -39,8 +39,6 @@ #include <drm/amdgpu_drm.h> #include <linux/dma-buf.h> -static const struct dma_buf_ops amdgpu_dmabuf_ops; - /** * amdgpu_gem_prime_get_sg_table - &drm_driver.gem_prime_get_sg_table * implementation @@ -332,15 +330,13 @@ static int amdgpu_gem_begin_cpu_access(struct dma_buf *dma_buf, return ret; } -static const struct dma_buf_ops amdgpu_dmabuf_ops = { +const struct dma_buf_ops amdgpu_dmabuf_ops = { .attach = amdgpu_gem_map_attach, .detach = amdgpu_gem_map_detach, .map_dma_buf = drm_gem_map_dma_buf, .unmap_dma_buf = drm_gem_unmap_dma_buf, .release = drm_gem_dmabuf_release, .begin_cpu_access = amdgpu_gem_begin_cpu_access, - .map = drm_gem_dmabuf_kmap, - .unmap = drm_gem_dmabuf_kunmap, .mmap = drm_gem_dmabuf_mmap, .vmap = drm_gem_dmabuf_vmap, .vunmap = drm_gem_dmabuf_vunmap, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 25d2f3e757f1..6759d898b3ab 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -90,6 +90,8 @@ static int psp_sw_fini(void *handle) adev->psp.sos_fw = NULL; release_firmware(adev->psp.asd_fw); adev->psp.asd_fw = NULL; + release_firmware(adev->psp.ta_fw); + adev->psp.ta_fw = NULL; return 0; } @@ -118,21 +120,25 @@ int psp_wait_for(struct psp_context *psp, uint32_t reg_index, static int psp_cmd_submit_buf(struct psp_context *psp, struct amdgpu_firmware_info *ucode, - struct psp_gfx_cmd_resp *cmd, uint64_t fence_mc_addr, - int index) + struct psp_gfx_cmd_resp *cmd, uint64_t fence_mc_addr) { int ret; + int index; memset(psp->cmd_buf_mem, 0, PSP_CMD_BUFFER_SIZE); memcpy(psp->cmd_buf_mem, cmd, sizeof(struct psp_gfx_cmd_resp)); + index = atomic_inc_return(&psp->fence_value); ret = psp_cmd_submit(psp, ucode, psp->cmd_buf_mc_addr, fence_mc_addr, index); + if (ret) { + atomic_dec(&psp->fence_value); + return ret; + } - while (*((unsigned int *)psp->fence_buf) != index) { + while (*((unsigned int *)psp->fence_buf) != index) msleep(1); - } /* the status field must be 0 after FW is loaded */ if (ucode && psp->cmd_buf_mem->resp.status) { @@ -149,10 +155,22 @@ psp_cmd_submit_buf(struct psp_context *psp, return ret; } -static void psp_prep_tmr_cmd_buf(struct psp_gfx_cmd_resp *cmd, +bool psp_support_vmr_ring(struct psp_context *psp) +{ + if (amdgpu_sriov_vf(psp->adev) && psp->sos_fw_version > 0x80045) + return true; + else + return false; +} + +static void psp_prep_tmr_cmd_buf(struct psp_context *psp, + struct psp_gfx_cmd_resp *cmd, uint64_t tmr_mc, uint32_t size) { - cmd->cmd_id = GFX_CMD_ID_SETUP_TMR; + if (psp_support_vmr_ring(psp)) + cmd->cmd_id = GFX_CMD_ID_SETUP_VMR; + else + cmd->cmd_id = GFX_CMD_ID_SETUP_TMR; cmd->cmd.cmd_setup_tmr.buf_phy_addr_lo = lower_32_bits(tmr_mc); cmd->cmd.cmd_setup_tmr.buf_phy_addr_hi = upper_32_bits(tmr_mc); cmd->cmd.cmd_setup_tmr.buf_size = size; @@ -186,12 +204,12 @@ static int psp_tmr_load(struct psp_context *psp) if (!cmd) return -ENOMEM; - psp_prep_tmr_cmd_buf(cmd, psp->tmr_mc_addr, PSP_TMR_SIZE); + psp_prep_tmr_cmd_buf(psp, cmd, psp->tmr_mc_addr, PSP_TMR_SIZE); DRM_INFO("reserve 0x%x from 0x%llx for PSP TMR SIZE\n", PSP_TMR_SIZE, psp->tmr_mc_addr); ret = psp_cmd_submit_buf(psp, NULL, cmd, - psp->fence_buf_mc_addr, 1); + psp->fence_buf_mc_addr); if (ret) goto failed; @@ -258,13 +276,194 @@ static int psp_asd_load(struct psp_context *psp) psp->asd_ucode_size, PSP_ASD_SHARED_MEM_SIZE); ret = psp_cmd_submit_buf(psp, NULL, cmd, - psp->fence_buf_mc_addr, 2); + psp->fence_buf_mc_addr); + + kfree(cmd); + + return ret; +} + +static void psp_prep_xgmi_ta_load_cmd_buf(struct psp_gfx_cmd_resp *cmd, + uint64_t xgmi_ta_mc, uint64_t xgmi_mc_shared, + uint32_t xgmi_ta_size, uint32_t shared_size) +{ + cmd->cmd_id = GFX_CMD_ID_LOAD_TA; + cmd->cmd.cmd_load_ta.app_phy_addr_lo = lower_32_bits(xgmi_ta_mc); + cmd->cmd.cmd_load_ta.app_phy_addr_hi = upper_32_bits(xgmi_ta_mc); + cmd->cmd.cmd_load_ta.app_len = xgmi_ta_size; + + cmd->cmd.cmd_load_ta.cmd_buf_phy_addr_lo = lower_32_bits(xgmi_mc_shared); + cmd->cmd.cmd_load_ta.cmd_buf_phy_addr_hi = upper_32_bits(xgmi_mc_shared); + cmd->cmd.cmd_load_ta.cmd_buf_len = shared_size; +} + +static int psp_xgmi_init_shared_buf(struct psp_context *psp) +{ + int ret; + + /* + * Allocate 16k memory aligned to 4k from Frame Buffer (local + * physical) for xgmi ta <-> Driver + */ + ret = amdgpu_bo_create_kernel(psp->adev, PSP_XGMI_SHARED_MEM_SIZE, + PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, + &psp->xgmi_context.xgmi_shared_bo, + &psp->xgmi_context.xgmi_shared_mc_addr, + &psp->xgmi_context.xgmi_shared_buf); + + return ret; +} + +static int psp_xgmi_load(struct psp_context *psp) +{ + int ret; + struct psp_gfx_cmd_resp *cmd; + + /* + * TODO: bypass the loading in sriov for now + */ + if (amdgpu_sriov_vf(psp->adev)) + return 0; + + cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); + if (!cmd) + return -ENOMEM; + + memset(psp->fw_pri_buf, 0, PSP_1_MEG); + memcpy(psp->fw_pri_buf, psp->ta_xgmi_start_addr, psp->ta_xgmi_ucode_size); + + psp_prep_xgmi_ta_load_cmd_buf(cmd, psp->fw_pri_mc_addr, + psp->xgmi_context.xgmi_shared_mc_addr, + psp->ta_xgmi_ucode_size, PSP_XGMI_SHARED_MEM_SIZE); + + ret = psp_cmd_submit_buf(psp, NULL, cmd, + psp->fence_buf_mc_addr); + + if (!ret) { + psp->xgmi_context.initialized = 1; + psp->xgmi_context.session_id = cmd->resp.session_id; + } + + kfree(cmd); + + return ret; +} + +static void psp_prep_xgmi_ta_unload_cmd_buf(struct psp_gfx_cmd_resp *cmd, + uint32_t xgmi_session_id) +{ + cmd->cmd_id = GFX_CMD_ID_UNLOAD_TA; + cmd->cmd.cmd_unload_ta.session_id = xgmi_session_id; +} + +static int psp_xgmi_unload(struct psp_context *psp) +{ + int ret; + struct psp_gfx_cmd_resp *cmd; + + /* + * TODO: bypass the unloading in sriov for now + */ + if (amdgpu_sriov_vf(psp->adev)) + return 0; + + cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); + if (!cmd) + return -ENOMEM; + + psp_prep_xgmi_ta_unload_cmd_buf(cmd, psp->xgmi_context.session_id); + + ret = psp_cmd_submit_buf(psp, NULL, cmd, + psp->fence_buf_mc_addr); kfree(cmd); return ret; } +static void psp_prep_xgmi_ta_invoke_cmd_buf(struct psp_gfx_cmd_resp *cmd, + uint32_t ta_cmd_id, + uint32_t xgmi_session_id) +{ + cmd->cmd_id = GFX_CMD_ID_INVOKE_CMD; + cmd->cmd.cmd_invoke_cmd.session_id = xgmi_session_id; + cmd->cmd.cmd_invoke_cmd.ta_cmd_id = ta_cmd_id; + /* Note: cmd_invoke_cmd.buf is not used for now */ +} + +int psp_xgmi_invoke(struct psp_context *psp, uint32_t ta_cmd_id) +{ + int ret; + struct psp_gfx_cmd_resp *cmd; + + /* + * TODO: bypass the loading in sriov for now + */ + if (amdgpu_sriov_vf(psp->adev)) + return 0; + + cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); + if (!cmd) + return -ENOMEM; + + psp_prep_xgmi_ta_invoke_cmd_buf(cmd, ta_cmd_id, + psp->xgmi_context.session_id); + + ret = psp_cmd_submit_buf(psp, NULL, cmd, + psp->fence_buf_mc_addr); + + kfree(cmd); + + return ret; +} + +static int psp_xgmi_terminate(struct psp_context *psp) +{ + int ret; + + if (!psp->xgmi_context.initialized) + return 0; + + ret = psp_xgmi_unload(psp); + if (ret) + return ret; + + psp->xgmi_context.initialized = 0; + + /* free xgmi shared memory */ + amdgpu_bo_free_kernel(&psp->xgmi_context.xgmi_shared_bo, + &psp->xgmi_context.xgmi_shared_mc_addr, + &psp->xgmi_context.xgmi_shared_buf); + + return 0; +} + +static int psp_xgmi_initialize(struct psp_context *psp) +{ + struct ta_xgmi_shared_memory *xgmi_cmd; + int ret; + + if (!psp->xgmi_context.initialized) { + ret = psp_xgmi_init_shared_buf(psp); + if (ret) + return ret; + } + + /* Load XGMI TA */ + ret = psp_xgmi_load(psp); + if (ret) + return ret; + + /* Initialize XGMI session */ + xgmi_cmd = (struct ta_xgmi_shared_memory *)(psp->xgmi_context.xgmi_shared_buf); + memset(xgmi_cmd, 0, sizeof(struct ta_xgmi_shared_memory)); + xgmi_cmd->cmd_id = TA_COMMAND_XGMI__INITIALIZE; + + ret = psp_xgmi_invoke(psp, xgmi_cmd->cmd_id); + + return ret; +} + static int psp_hw_start(struct psp_context *psp) { struct amdgpu_device *adev = psp->adev; @@ -292,6 +491,15 @@ static int psp_hw_start(struct psp_context *psp) if (ret) return ret; + if (adev->gmc.xgmi.num_physical_nodes > 1) { + ret = psp_xgmi_initialize(psp); + /* Warning the XGMI seesion initialize failure + * Instead of stop driver initialization + */ + if (ret) + dev_err(psp->adev->dev, + "XGMI: Failed to initialize XGMI session\n"); + } return 0; } @@ -321,7 +529,7 @@ static int psp_np_fw_load(struct psp_context *psp) return ret; ret = psp_cmd_submit_buf(psp, ucode, psp->cmd, - psp->fence_buf_mc_addr, i + 3); + psp->fence_buf_mc_addr); if (ret) return ret; @@ -340,8 +548,10 @@ static int psp_load_fw(struct amdgpu_device *adev) int ret; struct psp_context *psp = &adev->psp; - if (amdgpu_sriov_vf(adev) && adev->in_gpu_reset != 0) + if (amdgpu_sriov_vf(adev) && adev->in_gpu_reset) { + psp_ring_destroy(psp, PSP_RING_TYPE__KM); goto skip_memalloc; + } psp->cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); if (!psp->cmd) @@ -452,6 +662,10 @@ static int psp_hw_fini(void *handle) if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) return 0; + if (adev->gmc.xgmi.num_physical_nodes > 1 && + psp->xgmi_context.initialized == 1) + psp_xgmi_terminate(psp); + psp_ring_destroy(psp, PSP_RING_TYPE__KM); amdgpu_bo_free_kernel(&psp->tmr_bo, &psp->tmr_mc_addr, &psp->tmr_buf); @@ -479,6 +693,15 @@ static int psp_suspend(void *handle) if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) return 0; + if (adev->gmc.xgmi.num_physical_nodes > 1 && + psp->xgmi_context.initialized == 1) { + ret = psp_xgmi_terminate(psp); + if (ret) { + DRM_ERROR("Failed to terminate xgmi ta\n"); + return ret; + } + } + ret = psp_ring_stop(psp, PSP_RING_TYPE__KM); if (ret) { DRM_ERROR("PSP ring stop failed\n"); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h index 8b8720e9c3f0..10decf70c9aa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h @@ -27,14 +27,17 @@ #include "amdgpu.h" #include "psp_gfx_if.h" +#include "ta_xgmi_if.h" #define PSP_FENCE_BUFFER_SIZE 0x1000 #define PSP_CMD_BUFFER_SIZE 0x1000 -#define PSP_ASD_SHARED_MEM_SIZE 0x4000 +#define PSP_ASD_SHARED_MEM_SIZE 0x4000 +#define PSP_XGMI_SHARED_MEM_SIZE 0x4000 #define PSP_1_MEG 0x100000 #define PSP_TMR_SIZE 0x400000 struct psp_context; +struct psp_xgmi_node_info; struct psp_xgmi_topology_info; enum psp_ring_type @@ -80,12 +83,20 @@ struct psp_funcs enum AMDGPU_UCODE_ID ucode_type); bool (*smu_reload_quirk)(struct psp_context *psp); int (*mode1_reset)(struct psp_context *psp); - uint64_t (*xgmi_get_device_id)(struct psp_context *psp); + uint64_t (*xgmi_get_node_id)(struct psp_context *psp); uint64_t (*xgmi_get_hive_id)(struct psp_context *psp); int (*xgmi_get_topology_info)(struct psp_context *psp, int number_devices, - struct psp_xgmi_topology_info *topology); + struct psp_xgmi_topology_info *topology); int (*xgmi_set_topology_info)(struct psp_context *psp, int number_devices, - struct psp_xgmi_topology_info *topology); + struct psp_xgmi_topology_info *topology); +}; + +struct psp_xgmi_context { + uint8_t initialized; + uint32_t session_id; + struct amdgpu_bo *xgmi_shared_bo; + uint64_t xgmi_shared_mc_addr; + void *xgmi_shared_buf; }; struct psp_context @@ -96,7 +107,7 @@ struct psp_context const struct psp_funcs *funcs; - /* fence buffer */ + /* firmware buffer */ struct amdgpu_bo *fw_pri_bo; uint64_t fw_pri_mc_addr; void *fw_pri_buf; @@ -134,6 +145,16 @@ struct psp_context struct amdgpu_bo *cmd_buf_bo; uint64_t cmd_buf_mc_addr; struct psp_gfx_cmd_resp *cmd_buf_mem; + + /* fence value associated with cmd buffer */ + atomic_t fence_value; + + /* xgmi ta firmware and buffer */ + const struct firmware *ta_fw; + uint32_t ta_xgmi_ucode_version; + uint32_t ta_xgmi_ucode_size; + uint8_t *ta_xgmi_start_addr; + struct psp_xgmi_context xgmi_context; }; struct amdgpu_psp_funcs { @@ -141,21 +162,17 @@ struct amdgpu_psp_funcs { enum AMDGPU_UCODE_ID); }; +#define AMDGPU_XGMI_MAX_CONNECTED_NODES 64 +struct psp_xgmi_node_info { + uint64_t node_id; + uint8_t num_hops; + uint8_t is_sharing_enabled; + enum ta_xgmi_assigned_sdma_engine sdma_engine; +}; + struct psp_xgmi_topology_info { - /* Generated by PSP to identify the GPU instance within xgmi connection */ - uint64_t device_id; - /* - * If all bits set to 0 , driver indicates it wants to retrieve the xgmi - * connection vector topology, but not access enable the connections - * if some or all bits are set to 1, driver indicates it want to retrieve the - * current xgmi topology and access enable the link to GPU[i] associated - * with the bit position in the vector. - * On return,: bits indicated which xgmi links are present/active depending - * on the value passed in. The relative bit offset for the relative GPU index - * within the hive is always marked active. - */ - uint32_t connection_mask; - uint32_t reserved; /* must be 0 */ + uint32_t num_nodes; + struct psp_xgmi_node_info nodes[AMDGPU_XGMI_MAX_CONNECTED_NODES]; }; #define psp_prep_cmd_buf(ucode, type) (psp)->funcs->prep_cmd_buf((ucode), (type)) @@ -177,8 +194,8 @@ struct psp_xgmi_topology_info { ((psp)->funcs->smu_reload_quirk ? (psp)->funcs->smu_reload_quirk((psp)) : false) #define psp_mode1_reset(psp) \ ((psp)->funcs->mode1_reset ? (psp)->funcs->mode1_reset((psp)) : false) -#define psp_xgmi_get_device_id(psp) \ - ((psp)->funcs->xgmi_get_device_id ? (psp)->funcs->xgmi_get_device_id((psp)) : 0) +#define psp_xgmi_get_node_id(psp) \ + ((psp)->funcs->xgmi_get_node_id ? (psp)->funcs->xgmi_get_node_id((psp)) : 0) #define psp_xgmi_get_hive_id(psp) \ ((psp)->funcs->xgmi_get_hive_id ? (psp)->funcs->xgmi_get_hive_id((psp)) : 0) #define psp_xgmi_get_topology_info(psp, num_device, topology) \ @@ -199,6 +216,9 @@ extern int psp_wait_for(struct psp_context *psp, uint32_t reg_index, extern const struct amdgpu_ip_block_version psp_v10_0_ip_block; int psp_gpu_reset(struct amdgpu_device *adev); +int psp_xgmi_invoke(struct psp_context *psp, uint32_t ta_cmd_id); +bool psp_support_vmr_ring(struct psp_context *psp); + extern const struct amdgpu_ip_block_version psp_v11_0_ip_block; #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index b70e85ec147d..335a0edf114b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -338,7 +338,7 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, */ void amdgpu_ring_fini(struct amdgpu_ring *ring) { - ring->ready = false; + ring->sched.ready = false; /* Not to finish a ring which is not initialized */ if (!(ring->adev) || !(ring->adev->rings[ring->idx])) @@ -397,7 +397,7 @@ bool amdgpu_ring_soft_recovery(struct amdgpu_ring *ring, unsigned int vmid, { ktime_t deadline = ktime_add_us(ktime_get(), 10000); - if (!ring->funcs->soft_recovery) + if (!ring->funcs->soft_recovery || !fence) return false; atomic_inc(&ring->adev->gpu_reset_counter); @@ -500,3 +500,29 @@ static void amdgpu_debugfs_ring_fini(struct amdgpu_ring *ring) debugfs_remove(ring->ent); #endif } + +/** + * amdgpu_ring_test_helper - tests ring and set sched readiness status + * + * @ring: ring to try the recovery on + * + * Tests ring and set sched readiness status + * + * Returns 0 on success, error on failure. + */ +int amdgpu_ring_test_helper(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + int r; + + r = amdgpu_ring_test_ring(ring); + if (r) + DRM_DEV_ERROR(adev->dev, "ring %s test failed (%d)\n", + ring->name, r); + else + DRM_DEV_DEBUG(adev->dev, "ring test on %s succeeded\n", + ring->name); + + ring->sched.ready = !r; + return r; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index 4caa301ce454..0beb01fef83f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -129,8 +129,9 @@ struct amdgpu_ring_funcs { unsigned emit_ib_size; /* command emit functions */ void (*emit_ib)(struct amdgpu_ring *ring, + struct amdgpu_job *job, struct amdgpu_ib *ib, - unsigned vmid, bool ctx_switch); + bool ctx_switch); void (*emit_fence)(struct amdgpu_ring *ring, uint64_t addr, uint64_t seq, unsigned flags); void (*emit_pipeline_sync)(struct amdgpu_ring *ring); @@ -189,7 +190,6 @@ struct amdgpu_ring { uint64_t gpu_addr; uint64_t ptr_mask; uint32_t buf_mask; - bool ready; u32 idx; u32 me; u32 pipe; @@ -229,7 +229,7 @@ struct amdgpu_ring { #define amdgpu_ring_get_rptr(r) (r)->funcs->get_rptr((r)) #define amdgpu_ring_get_wptr(r) (r)->funcs->get_wptr((r)) #define amdgpu_ring_set_wptr(r) (r)->funcs->set_wptr((r)) -#define amdgpu_ring_emit_ib(r, ib, vmid, c) (r)->funcs->emit_ib((r), (ib), (vmid), (c)) +#define amdgpu_ring_emit_ib(r, job, ib, c) ((r)->funcs->emit_ib((r), (job), (ib), (c))) #define amdgpu_ring_emit_pipeline_sync(r) (r)->funcs->emit_pipeline_sync((r)) #define amdgpu_ring_emit_vm_flush(r, vmid, addr) (r)->funcs->emit_vm_flush((r), (vmid), (addr)) #define amdgpu_ring_emit_fence(r, addr, seq, flags) (r)->funcs->emit_fence((r), (addr), (seq), (flags)) @@ -313,4 +313,6 @@ static inline void amdgpu_ring_write_multiple(struct amdgpu_ring *ring, ring->count_dw -= count_dw; } +int amdgpu_ring_test_helper(struct amdgpu_ring *ring); + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c new file mode 100644 index 000000000000..c8793e6cc3c5 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c @@ -0,0 +1,282 @@ +/* + * Copyright 2014 Advanced Micro Devices, Inc. + * Copyright 2008 Red Hat Inc. + * Copyright 2009 Jerome Glisse. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include <linux/firmware.h> +#include "amdgpu.h" +#include "amdgpu_gfx.h" +#include "amdgpu_rlc.h" + +/** + * amdgpu_gfx_rlc_enter_safe_mode - Set RLC into safe mode + * + * @adev: amdgpu_device pointer + * + * Set RLC enter into safe mode if RLC is enabled and haven't in safe mode. + */ +void amdgpu_gfx_rlc_enter_safe_mode(struct amdgpu_device *adev) +{ + if (adev->gfx.rlc.in_safe_mode) + return; + + /* if RLC is not enabled, do nothing */ + if (!adev->gfx.rlc.funcs->is_rlc_enabled(adev)) + return; + + if (adev->cg_flags & + (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG | + AMD_CG_SUPPORT_GFX_3D_CGCG)) { + adev->gfx.rlc.funcs->set_safe_mode(adev); + adev->gfx.rlc.in_safe_mode = true; + } +} + +/** + * amdgpu_gfx_rlc_exit_safe_mode - Set RLC out of safe mode + * + * @adev: amdgpu_device pointer + * + * Set RLC exit safe mode if RLC is enabled and have entered into safe mode. + */ +void amdgpu_gfx_rlc_exit_safe_mode(struct amdgpu_device *adev) +{ + if (!(adev->gfx.rlc.in_safe_mode)) + return; + + /* if RLC is not enabled, do nothing */ + if (!adev->gfx.rlc.funcs->is_rlc_enabled(adev)) + return; + + if (adev->cg_flags & + (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG | + AMD_CG_SUPPORT_GFX_3D_CGCG)) { + adev->gfx.rlc.funcs->unset_safe_mode(adev); + adev->gfx.rlc.in_safe_mode = false; + } +} + +/** + * amdgpu_gfx_rlc_init_sr - Init save restore block + * + * @adev: amdgpu_device pointer + * @dws: the size of save restore block + * + * Allocate and setup value to save restore block of rlc. + * Returns 0 on succeess or negative error code if allocate failed. + */ +int amdgpu_gfx_rlc_init_sr(struct amdgpu_device *adev, u32 dws) +{ + const u32 *src_ptr; + volatile u32 *dst_ptr; + u32 i; + int r; + + /* allocate save restore block */ + r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, + &adev->gfx.rlc.save_restore_obj, + &adev->gfx.rlc.save_restore_gpu_addr, + (void **)&adev->gfx.rlc.sr_ptr); + if (r) { + dev_warn(adev->dev, "(%d) create RLC sr bo failed\n", r); + amdgpu_gfx_rlc_fini(adev); + return r; + } + + /* write the sr buffer */ + src_ptr = adev->gfx.rlc.reg_list; + dst_ptr = adev->gfx.rlc.sr_ptr; + for (i = 0; i < adev->gfx.rlc.reg_list_size; i++) + dst_ptr[i] = cpu_to_le32(src_ptr[i]); + amdgpu_bo_kunmap(adev->gfx.rlc.save_restore_obj); + amdgpu_bo_unreserve(adev->gfx.rlc.save_restore_obj); + + return 0; +} + +/** + * amdgpu_gfx_rlc_init_csb - Init clear state block + * + * @adev: amdgpu_device pointer + * + * Allocate and setup value to clear state block of rlc. + * Returns 0 on succeess or negative error code if allocate failed. + */ +int amdgpu_gfx_rlc_init_csb(struct amdgpu_device *adev) +{ + volatile u32 *dst_ptr; + u32 dws; + int r; + + /* allocate clear state block */ + adev->gfx.rlc.clear_state_size = dws = adev->gfx.rlc.funcs->get_csb_size(adev); + r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, + &adev->gfx.rlc.clear_state_obj, + &adev->gfx.rlc.clear_state_gpu_addr, + (void **)&adev->gfx.rlc.cs_ptr); + if (r) { + dev_err(adev->dev, "(%d) failed to create rlc csb bo\n", r); + amdgpu_gfx_rlc_fini(adev); + return r; + } + + /* set up the cs buffer */ + dst_ptr = adev->gfx.rlc.cs_ptr; + adev->gfx.rlc.funcs->get_csb_buffer(adev, dst_ptr); + amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj); + amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj); + amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); + + return 0; +} + +/** + * amdgpu_gfx_rlc_init_cpt - Init cp table + * + * @adev: amdgpu_device pointer + * + * Allocate and setup value to cp table of rlc. + * Returns 0 on succeess or negative error code if allocate failed. + */ +int amdgpu_gfx_rlc_init_cpt(struct amdgpu_device *adev) +{ + int r; + + r = amdgpu_bo_create_reserved(adev, adev->gfx.rlc.cp_table_size, + PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, + &adev->gfx.rlc.cp_table_obj, + &adev->gfx.rlc.cp_table_gpu_addr, + (void **)&adev->gfx.rlc.cp_table_ptr); + if (r) { + dev_err(adev->dev, "(%d) failed to create cp table bo\n", r); + amdgpu_gfx_rlc_fini(adev); + return r; + } + + /* set up the cp table */ + amdgpu_gfx_rlc_setup_cp_table(adev); + amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj); + amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj); + + return 0; +} + +/** + * amdgpu_gfx_rlc_setup_cp_table - setup cp the buffer of cp table + * + * @adev: amdgpu_device pointer + * + * Write cp firmware data into cp table. + */ +void amdgpu_gfx_rlc_setup_cp_table(struct amdgpu_device *adev) +{ + const __le32 *fw_data; + volatile u32 *dst_ptr; + int me, i, max_me; + u32 bo_offset = 0; + u32 table_offset, table_size; + + max_me = adev->gfx.rlc.funcs->get_cp_table_num(adev); + + /* write the cp table buffer */ + dst_ptr = adev->gfx.rlc.cp_table_ptr; + for (me = 0; me < max_me; me++) { + if (me == 0) { + const struct gfx_firmware_header_v1_0 *hdr = + (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data; + fw_data = (const __le32 *) + (adev->gfx.ce_fw->data + + le32_to_cpu(hdr->header.ucode_array_offset_bytes)); + table_offset = le32_to_cpu(hdr->jt_offset); + table_size = le32_to_cpu(hdr->jt_size); + } else if (me == 1) { + const struct gfx_firmware_header_v1_0 *hdr = + (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data; + fw_data = (const __le32 *) + (adev->gfx.pfp_fw->data + + le32_to_cpu(hdr->header.ucode_array_offset_bytes)); + table_offset = le32_to_cpu(hdr->jt_offset); + table_size = le32_to_cpu(hdr->jt_size); + } else if (me == 2) { + const struct gfx_firmware_header_v1_0 *hdr = + (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data; + fw_data = (const __le32 *) + (adev->gfx.me_fw->data + + le32_to_cpu(hdr->header.ucode_array_offset_bytes)); + table_offset = le32_to_cpu(hdr->jt_offset); + table_size = le32_to_cpu(hdr->jt_size); + } else if (me == 3) { + const struct gfx_firmware_header_v1_0 *hdr = + (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; + fw_data = (const __le32 *) + (adev->gfx.mec_fw->data + + le32_to_cpu(hdr->header.ucode_array_offset_bytes)); + table_offset = le32_to_cpu(hdr->jt_offset); + table_size = le32_to_cpu(hdr->jt_size); + } else if (me == 4) { + const struct gfx_firmware_header_v1_0 *hdr = + (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data; + fw_data = (const __le32 *) + (adev->gfx.mec2_fw->data + + le32_to_cpu(hdr->header.ucode_array_offset_bytes)); + table_offset = le32_to_cpu(hdr->jt_offset); + table_size = le32_to_cpu(hdr->jt_size); + } + + for (i = 0; i < table_size; i ++) { + dst_ptr[bo_offset + i] = + cpu_to_le32(le32_to_cpu(fw_data[table_offset + i])); + } + + bo_offset += table_size; + } +} + +/** + * amdgpu_gfx_rlc_fini - Free BO which used for RLC + * + * @adev: amdgpu_device pointer + * + * Free three BO which is used for rlc_save_restore_block, rlc_clear_state_block + * and rlc_jump_table_block. + */ +void amdgpu_gfx_rlc_fini(struct amdgpu_device *adev) +{ + /* save restore block */ + if (adev->gfx.rlc.save_restore_obj) { + amdgpu_bo_free_kernel(&adev->gfx.rlc.save_restore_obj, + &adev->gfx.rlc.save_restore_gpu_addr, + (void **)&adev->gfx.rlc.sr_ptr); + } + + /* clear state block */ + amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, + &adev->gfx.rlc.clear_state_gpu_addr, + (void **)&adev->gfx.rlc.cs_ptr); + + /* jump table block */ + amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, + &adev->gfx.rlc.cp_table_gpu_addr, + (void **)&adev->gfx.rlc.cp_table_ptr); +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h new file mode 100644 index 000000000000..49a8ab52113b --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h @@ -0,0 +1,98 @@ +/* + * Copyright 2014 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __AMDGPU_RLC_H__ +#define __AMDGPU_RLC_H__ + +#include "clearstate_defs.h" + +struct amdgpu_rlc_funcs { + bool (*is_rlc_enabled)(struct amdgpu_device *adev); + void (*set_safe_mode)(struct amdgpu_device *adev); + void (*unset_safe_mode)(struct amdgpu_device *adev); + int (*init)(struct amdgpu_device *adev); + u32 (*get_csb_size)(struct amdgpu_device *adev); + void (*get_csb_buffer)(struct amdgpu_device *adev, volatile u32 *buffer); + int (*get_cp_table_num)(struct amdgpu_device *adev); + int (*resume)(struct amdgpu_device *adev); + void (*stop)(struct amdgpu_device *adev); + void (*reset)(struct amdgpu_device *adev); + void (*start)(struct amdgpu_device *adev); +}; + +struct amdgpu_rlc { + /* for power gating */ + struct amdgpu_bo *save_restore_obj; + uint64_t save_restore_gpu_addr; + volatile uint32_t *sr_ptr; + const u32 *reg_list; + u32 reg_list_size; + /* for clear state */ + struct amdgpu_bo *clear_state_obj; + uint64_t clear_state_gpu_addr; + volatile uint32_t *cs_ptr; + const struct cs_section_def *cs_data; + u32 clear_state_size; + /* for cp tables */ + struct amdgpu_bo *cp_table_obj; + uint64_t cp_table_gpu_addr; + volatile uint32_t *cp_table_ptr; + u32 cp_table_size; + + /* safe mode for updating CG/PG state */ + bool in_safe_mode; + const struct amdgpu_rlc_funcs *funcs; + + /* for firmware data */ + u32 save_and_restore_offset; + u32 clear_state_descriptor_offset; + u32 avail_scratch_ram_locations; + u32 reg_restore_list_size; + u32 reg_list_format_start; + u32 reg_list_format_separate_start; + u32 starting_offsets_start; + u32 reg_list_format_size_bytes; + u32 reg_list_size_bytes; + u32 reg_list_format_direct_reg_list_length; + u32 save_restore_list_cntl_size_bytes; + u32 save_restore_list_gpm_size_bytes; + u32 save_restore_list_srm_size_bytes; + + u32 *register_list_format; + u32 *register_restore; + u8 *save_restore_list_cntl; + u8 *save_restore_list_gpm; + u8 *save_restore_list_srm; + + bool is_rlc_v2_1; +}; + +void amdgpu_gfx_rlc_enter_safe_mode(struct amdgpu_device *adev); +void amdgpu_gfx_rlc_exit_safe_mode(struct amdgpu_device *adev); +int amdgpu_gfx_rlc_init_sr(struct amdgpu_device *adev, u32 dws); +int amdgpu_gfx_rlc_init_csb(struct amdgpu_device *adev); +int amdgpu_gfx_rlc_init_cpt(struct amdgpu_device *adev); +void amdgpu_gfx_rlc_setup_cp_table(struct amdgpu_device *adev); +void amdgpu_gfx_rlc_fini(struct amdgpu_device *adev); + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c index bc9244b429ef..115bb0c99b0f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c @@ -28,17 +28,31 @@ * GPU SDMA IP block helpers function. */ -struct amdgpu_sdma_instance * amdgpu_get_sdma_instance(struct amdgpu_ring *ring) +struct amdgpu_sdma_instance *amdgpu_sdma_get_instance_from_ring(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; int i; for (i = 0; i < adev->sdma.num_instances; i++) - if (&adev->sdma.instance[i].ring == ring) - break; + if (ring == &adev->sdma.instance[i].ring || + ring == &adev->sdma.instance[i].page) + return &adev->sdma.instance[i]; - if (i < AMDGPU_MAX_SDMA_INSTANCES) - return &adev->sdma.instance[i]; - else - return NULL; + return NULL; +} + +int amdgpu_sdma_get_index_from_ring(struct amdgpu_ring *ring, uint32_t *index) +{ + struct amdgpu_device *adev = ring->adev; + int i; + + for (i = 0; i < adev->sdma.num_instances; i++) { + if (ring == &adev->sdma.instance[i].ring || + ring == &adev->sdma.instance[i].page) { + *index = i; + return 0; + } + } + + return -EINVAL; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h index 500113ec65ca..16b1a6ae5ba6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h @@ -41,6 +41,7 @@ struct amdgpu_sdma_instance { uint32_t feature_version; struct amdgpu_ring ring; + struct amdgpu_ring page; bool burst_nop; }; @@ -50,6 +51,7 @@ struct amdgpu_sdma { struct amdgpu_irq_src illegal_inst_irq; int num_instances; uint32_t srbm_soft_reset; + bool has_page_queue; }; /* @@ -92,6 +94,7 @@ struct amdgpu_buffer_funcs { #define amdgpu_emit_fill_buffer(adev, ib, s, d, b) (adev)->mman.buffer_funcs->emit_fill_buffer((ib), (s), (d), (b)) struct amdgpu_sdma_instance * -amdgpu_get_sdma_instance(struct amdgpu_ring *ring); +amdgpu_sdma_get_instance_from_ring(struct amdgpu_ring *ring); +int amdgpu_sdma_get_index_from_ring(struct amdgpu_ring *ring, uint32_t *index); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h index e9bf70e2ac51..626abca770a0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h @@ -218,6 +218,7 @@ TRACE_EVENT(amdgpu_vm_grab_id, TP_ARGS(vm, ring, job), TP_STRUCT__entry( __field(u32, pasid) + __string(ring, ring->name) __field(u32, ring) __field(u32, vmid) __field(u32, vm_hub) @@ -227,14 +228,14 @@ TRACE_EVENT(amdgpu_vm_grab_id, TP_fast_assign( __entry->pasid = vm->pasid; - __entry->ring = ring->idx; + __assign_str(ring, ring->name) __entry->vmid = job->vmid; __entry->vm_hub = ring->funcs->vmhub, __entry->pd_addr = job->vm_pd_addr; __entry->needs_flush = job->vm_needs_flush; ), - TP_printk("pasid=%d, ring=%u, id=%u, hub=%u, pd_addr=%010Lx needs_flush=%u", - __entry->pasid, __entry->ring, __entry->vmid, + TP_printk("pasid=%d, ring=%s, id=%u, hub=%u, pd_addr=%010Lx needs_flush=%u", + __entry->pasid, __get_str(ring), __entry->vmid, __entry->vm_hub, __entry->pd_addr, __entry->needs_flush) ); @@ -366,20 +367,20 @@ TRACE_EVENT(amdgpu_vm_flush, uint64_t pd_addr), TP_ARGS(ring, vmid, pd_addr), TP_STRUCT__entry( - __field(u32, ring) + __string(ring, ring->name) __field(u32, vmid) __field(u32, vm_hub) __field(u64, pd_addr) ), TP_fast_assign( - __entry->ring = ring->idx; + __assign_str(ring, ring->name) __entry->vmid = vmid; __entry->vm_hub = ring->funcs->vmhub; __entry->pd_addr = pd_addr; ), - TP_printk("ring=%u, id=%u, hub=%u, pd_addr=%010Lx", - __entry->ring, __entry->vmid, + TP_printk("ring=%s, id=%u, hub=%u, pd_addr=%010Lx", + __get_str(ring), __entry->vmid, __entry->vm_hub,__entry->pd_addr) ); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index a44fc12ae1f9..c91ec3101d00 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -61,100 +61,6 @@ static int amdgpu_map_buffer(struct ttm_buffer_object *bo, static int amdgpu_ttm_debugfs_init(struct amdgpu_device *adev); static void amdgpu_ttm_debugfs_fini(struct amdgpu_device *adev); -/* - * Global memory. - */ - -/** - * amdgpu_ttm_mem_global_init - Initialize and acquire reference to - * memory object - * - * @ref: Object for initialization. - * - * This is called by drm_global_item_ref() when an object is being - * initialized. - */ -static int amdgpu_ttm_mem_global_init(struct drm_global_reference *ref) -{ - return ttm_mem_global_init(ref->object); -} - -/** - * amdgpu_ttm_mem_global_release - Drop reference to a memory object - * - * @ref: Object being removed - * - * This is called by drm_global_item_unref() when an object is being - * released. - */ -static void amdgpu_ttm_mem_global_release(struct drm_global_reference *ref) -{ - ttm_mem_global_release(ref->object); -} - -/** - * amdgpu_ttm_global_init - Initialize global TTM memory reference structures. - * - * @adev: AMDGPU device for which the global structures need to be registered. - * - * This is called as part of the AMDGPU ttm init from amdgpu_ttm_init() - * during bring up. - */ -static int amdgpu_ttm_global_init(struct amdgpu_device *adev) -{ - struct drm_global_reference *global_ref; - int r; - - /* ensure reference is false in case init fails */ - adev->mman.mem_global_referenced = false; - - global_ref = &adev->mman.mem_global_ref; - global_ref->global_type = DRM_GLOBAL_TTM_MEM; - global_ref->size = sizeof(struct ttm_mem_global); - global_ref->init = &amdgpu_ttm_mem_global_init; - global_ref->release = &amdgpu_ttm_mem_global_release; - r = drm_global_item_ref(global_ref); - if (r) { - DRM_ERROR("Failed setting up TTM memory accounting " - "subsystem.\n"); - goto error_mem; - } - - adev->mman.bo_global_ref.mem_glob = - adev->mman.mem_global_ref.object; - global_ref = &adev->mman.bo_global_ref.ref; - global_ref->global_type = DRM_GLOBAL_TTM_BO; - global_ref->size = sizeof(struct ttm_bo_global); - global_ref->init = &ttm_bo_global_init; - global_ref->release = &ttm_bo_global_release; - r = drm_global_item_ref(global_ref); - if (r) { - DRM_ERROR("Failed setting up TTM BO subsystem.\n"); - goto error_bo; - } - - mutex_init(&adev->mman.gtt_window_lock); - - adev->mman.mem_global_referenced = true; - - return 0; - -error_bo: - drm_global_item_unref(&adev->mman.mem_global_ref); -error_mem: - return r; -} - -static void amdgpu_ttm_global_fini(struct amdgpu_device *adev) -{ - if (adev->mman.mem_global_referenced) { - mutex_destroy(&adev->mman.gtt_window_lock); - drm_global_item_unref(&adev->mman.bo_global_ref.ref); - drm_global_item_unref(&adev->mman.mem_global_ref); - adev->mman.mem_global_referenced = false; - } -} - static int amdgpu_invalidate_caches(struct ttm_bo_device *bdev, uint32_t flags) { return 0; @@ -1758,14 +1664,10 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) int r; u64 vis_vram_limit; - /* initialize global references for vram/gtt */ - r = amdgpu_ttm_global_init(adev); - if (r) { - return r; - } + mutex_init(&adev->mman.gtt_window_lock); + /* No others user of address space so set it to 0 */ r = ttm_bo_device_init(&adev->mman.bdev, - adev->mman.bo_global_ref.ref.object, &amdgpu_bo_driver, adev->ddev->anon_inode->i_mapping, DRM_FILE_PAGE_OFFSET, @@ -1922,7 +1824,6 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev) ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_GWS); ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_OA); ttm_bo_device_release(&adev->mman.bdev); - amdgpu_ttm_global_fini(adev); adev->mman.initialized = false; DRM_INFO("amdgpu: ttm finalized\n"); } @@ -2069,7 +1970,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, unsigned i; int r; - if (direct_submit && !ring->ready) { + if (direct_submit && !ring->sched.ready) { DRM_ERROR("Trying to move memory with ring turned off.\n"); return -EINVAL; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index fe8f276e9811..b5b2d101f7db 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -39,8 +39,6 @@ #define AMDGPU_GTT_NUM_TRANSFER_WINDOWS 2 struct amdgpu_mman { - struct ttm_bo_global_ref bo_global_ref; - struct drm_global_reference mem_global_ref; struct ttm_bo_device bdev; bool mem_global_referenced; bool initialized; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h index aa6641b944a0..7ac25a1c7853 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h @@ -58,6 +58,17 @@ struct psp_firmware_header_v1_0 { }; /* version_major=1, version_minor=0 */ +struct ta_firmware_header_v1_0 { + struct common_firmware_header header; + uint32_t ta_xgmi_ucode_version; + uint32_t ta_xgmi_offset_bytes; + uint32_t ta_xgmi_size_bytes; + uint32_t ta_ras_ucode_version; + uint32_t ta_ras_offset_bytes; + uint32_t ta_ras_size_bytes; +}; + +/* version_major=1, version_minor=0 */ struct gfx_firmware_header_v1_0 { struct common_firmware_header header; uint32_t ucode_feature_version; @@ -170,6 +181,7 @@ union amdgpu_firmware_header { struct mc_firmware_header_v1_0 mc; struct smc_firmware_header_v1_0 smc; struct psp_firmware_header_v1_0 psp; + struct ta_firmware_header_v1_0 ta; struct gfx_firmware_header_v1_0 gfx; struct rlc_firmware_header_v1_0 rlc; struct rlc_firmware_header_v2_0 rlc_v2_0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index e5a6db6beab7..4e5d13e41f6a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c @@ -692,6 +692,8 @@ static int amdgpu_uvd_cs_msg_decode(struct amdgpu_device *adev, uint32_t *msg, buf_sizes[0x1] = dpb_size; buf_sizes[0x2] = image_size; buf_sizes[0x4] = min_ctx_size; + /* store image width to adjust nb memory pstate */ + adev->uvd.decode_image_width = width; return 0; } @@ -1243,30 +1245,20 @@ int amdgpu_uvd_ring_test_ib(struct amdgpu_ring *ring, long timeout) { struct dma_fence *fence; long r; - uint32_t ip_instance = ring->me; r = amdgpu_uvd_get_create_msg(ring, 1, NULL); - if (r) { - DRM_ERROR("amdgpu: (%d)failed to get create msg (%ld).\n", ip_instance, r); + if (r) goto error; - } r = amdgpu_uvd_get_destroy_msg(ring, 1, true, &fence); - if (r) { - DRM_ERROR("amdgpu: (%d)failed to get destroy ib (%ld).\n", ip_instance, r); + if (r) goto error; - } r = dma_fence_wait_timeout(fence, false, timeout); - if (r == 0) { - DRM_ERROR("amdgpu: (%d)IB test timed out.\n", ip_instance); + if (r == 0) r = -ETIMEDOUT; - } else if (r < 0) { - DRM_ERROR("amdgpu: (%d)fence wait failed (%ld).\n", ip_instance, r); - } else { - DRM_DEBUG("ib test on (%d)ring %d succeeded\n", ip_instance, ring->idx); + else if (r > 0) r = 0; - } dma_fence_put(fence); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h index a3ab1a41060f..5eb63288d157 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h @@ -65,6 +65,8 @@ struct amdgpu_uvd { struct drm_sched_entity entity; struct delayed_work idle_work; unsigned harvest_config; + /* store image width to adjust nb memory state */ + unsigned decode_image_width; }; int amdgpu_uvd_sw_init(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index 5f3f54073818..98a1b2ce2b9d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c @@ -1032,8 +1032,10 @@ out: * @ib: the IB to execute * */ -void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib, - unsigned vmid, bool ctx_switch) +void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring, + struct amdgpu_job *job, + struct amdgpu_ib *ib, + bool ctx_switch) { amdgpu_ring_write(ring, VCE_CMD_IB); amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); @@ -1079,11 +1081,9 @@ int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring) return 0; r = amdgpu_ring_alloc(ring, 16); - if (r) { - DRM_ERROR("amdgpu: vce failed to lock ring %d (%d).\n", - ring->idx, r); + if (r) return r; - } + amdgpu_ring_write(ring, VCE_CMD_END); amdgpu_ring_commit(ring); @@ -1093,14 +1093,8 @@ int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring) DRM_UDELAY(1); } - if (i < timeout) { - DRM_DEBUG("ring test on %d succeeded in %d usecs\n", - ring->idx, i); - } else { - DRM_ERROR("amdgpu: ring %d test failed\n", - ring->idx); + if (i >= timeout) r = -ETIMEDOUT; - } return r; } @@ -1121,27 +1115,19 @@ int amdgpu_vce_ring_test_ib(struct amdgpu_ring *ring, long timeout) return 0; r = amdgpu_vce_get_create_msg(ring, 1, NULL); - if (r) { - DRM_ERROR("amdgpu: failed to get create msg (%ld).\n", r); + if (r) goto error; - } r = amdgpu_vce_get_destroy_msg(ring, 1, true, &fence); - if (r) { - DRM_ERROR("amdgpu: failed to get destroy ib (%ld).\n", r); + if (r) goto error; - } r = dma_fence_wait_timeout(fence, false, timeout); - if (r == 0) { - DRM_ERROR("amdgpu: IB test timed out.\n"); + if (r == 0) r = -ETIMEDOUT; - } else if (r < 0) { - DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); - } else { - DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx); + else if (r > 0) r = 0; - } + error: dma_fence_put(fence); return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h index a1f209eed4c4..50293652af14 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h @@ -65,8 +65,8 @@ int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp); int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx); int amdgpu_vce_ring_parse_cs_vm(struct amdgpu_cs_parser *p, uint32_t ib_idx); -void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib, - unsigned vmid, bool ctx_switch); +void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job, + struct amdgpu_ib *ib, bool ctx_switch); void amdgpu_vce_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, unsigned flags); int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index 27da13df2f11..e2e42e3fbcf3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -425,11 +425,9 @@ int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring) WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9), 0xCAFEDEAD); r = amdgpu_ring_alloc(ring, 3); - if (r) { - DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", - ring->idx, r); + if (r) return r; - } + amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9), 0)); amdgpu_ring_write(ring, 0xDEADBEEF); @@ -441,14 +439,9 @@ int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring) DRM_UDELAY(1); } - if (i < adev->usec_timeout) { - DRM_DEBUG("ring test on %d succeeded in %d usecs\n", - ring->idx, i); - } else { - DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n", - ring->idx, tmp); - r = -EINVAL; - } + if (i >= adev->usec_timeout) + r = -ETIMEDOUT; + return r; } @@ -570,30 +563,20 @@ int amdgpu_vcn_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout) long r; r = amdgpu_vcn_dec_get_create_msg(ring, 1, NULL); - if (r) { - DRM_ERROR("amdgpu: failed to get create msg (%ld).\n", r); + if (r) goto error; - } r = amdgpu_vcn_dec_get_destroy_msg(ring, 1, &fence); - if (r) { - DRM_ERROR("amdgpu: failed to get destroy ib (%ld).\n", r); + if (r) goto error; - } r = dma_fence_wait_timeout(fence, false, timeout); - if (r == 0) { - DRM_ERROR("amdgpu: IB test timed out.\n"); + if (r == 0) r = -ETIMEDOUT; - } else if (r < 0) { - DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); - } else { - DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx); + else if (r > 0) r = 0; - } dma_fence_put(fence); - error: return r; } @@ -606,11 +589,9 @@ int amdgpu_vcn_enc_ring_test_ring(struct amdgpu_ring *ring) int r; r = amdgpu_ring_alloc(ring, 16); - if (r) { - DRM_ERROR("amdgpu: vcn enc failed to lock ring %d (%d).\n", - ring->idx, r); + if (r) return r; - } + amdgpu_ring_write(ring, VCN_ENC_CMD_END); amdgpu_ring_commit(ring); @@ -620,14 +601,8 @@ int amdgpu_vcn_enc_ring_test_ring(struct amdgpu_ring *ring) DRM_UDELAY(1); } - if (i < adev->usec_timeout) { - DRM_DEBUG("ring test on %d succeeded in %d usecs\n", - ring->idx, i); - } else { - DRM_ERROR("amdgpu: ring %d test failed\n", - ring->idx); + if (i >= adev->usec_timeout) r = -ETIMEDOUT; - } return r; } @@ -742,27 +717,19 @@ int amdgpu_vcn_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout) long r; r = amdgpu_vcn_enc_get_create_msg(ring, 1, NULL); - if (r) { - DRM_ERROR("amdgpu: failed to get create msg (%ld).\n", r); + if (r) goto error; - } r = amdgpu_vcn_enc_get_destroy_msg(ring, 1, &fence); - if (r) { - DRM_ERROR("amdgpu: failed to get destroy ib (%ld).\n", r); + if (r) goto error; - } r = dma_fence_wait_timeout(fence, false, timeout); - if (r == 0) { - DRM_ERROR("amdgpu: IB test timed out.\n"); + if (r == 0) r = -ETIMEDOUT; - } else if (r < 0) { - DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); - } else { - DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx); + else if (r > 0) r = 0; - } + error: dma_fence_put(fence); return r; @@ -778,11 +745,8 @@ int amdgpu_vcn_jpeg_ring_test_ring(struct amdgpu_ring *ring) WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9), 0xCAFEDEAD); r = amdgpu_ring_alloc(ring, 3); - if (r) { - DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", - ring->idx, r); + if (r) return r; - } amdgpu_ring_write(ring, PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9), 0, 0, 0)); @@ -796,14 +760,8 @@ int amdgpu_vcn_jpeg_ring_test_ring(struct amdgpu_ring *ring) DRM_UDELAY(1); } - if (i < adev->usec_timeout) { - DRM_DEBUG("ring test on %d succeeded in %d usecs\n", - ring->idx, i); - } else { - DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n", - ring->idx, tmp); - r = -EINVAL; - } + if (i >= adev->usec_timeout) + r = -ETIMEDOUT; return r; } @@ -856,21 +814,18 @@ int amdgpu_vcn_jpeg_ring_test_ib(struct amdgpu_ring *ring, long timeout) long r = 0; r = amdgpu_vcn_jpeg_set_reg(ring, 1, &fence); - if (r) { - DRM_ERROR("amdgpu: failed to set jpeg register (%ld).\n", r); + if (r) goto error; - } r = dma_fence_wait_timeout(fence, false, timeout); if (r == 0) { - DRM_ERROR("amdgpu: IB test timed out.\n"); r = -ETIMEDOUT; goto error; } else if (r < 0) { - DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); goto error; - } else + } else { r = 0; + } for (i = 0; i < adev->usec_timeout; i++) { tmp = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9)); @@ -879,15 +834,10 @@ int amdgpu_vcn_jpeg_ring_test_ib(struct amdgpu_ring *ring, long timeout) DRM_UDELAY(1); } - if (i < adev->usec_timeout) - DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx); - else { - DRM_ERROR("ib test failed (0x%08X)\n", tmp); - r = -EINVAL; - } + if (i >= adev->usec_timeout) + r = -ETIMEDOUT; dma_fence_put(fence); - error: return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index f2f358aa0597..462a04e0f5e6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -23,16 +23,6 @@ #include "amdgpu.h" -uint64_t amdgpu_csa_vaddr(struct amdgpu_device *adev) -{ - uint64_t addr = adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT; - - addr -= AMDGPU_VA_RESERVED_SIZE; - addr = amdgpu_gmc_sign_extend(addr); - - return addr; -} - bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev) { /* By now all MMIO pages except mailbox are blocked */ @@ -41,88 +31,6 @@ bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev) return RREG32_NO_KIQ(0xc040) == 0xffffffff; } -int amdgpu_allocate_static_csa(struct amdgpu_device *adev) -{ - int r; - void *ptr; - - r = amdgpu_bo_create_kernel(adev, AMDGPU_CSA_SIZE, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_VRAM, &adev->virt.csa_obj, - &adev->virt.csa_vmid0_addr, &ptr); - if (r) - return r; - - memset(ptr, 0, AMDGPU_CSA_SIZE); - return 0; -} - -void amdgpu_free_static_csa(struct amdgpu_device *adev) { - amdgpu_bo_free_kernel(&adev->virt.csa_obj, - &adev->virt.csa_vmid0_addr, - NULL); -} - -/* - * amdgpu_map_static_csa should be called during amdgpu_vm_init - * it maps virtual address amdgpu_csa_vaddr() to this VM, and each command - * submission of GFX should use this virtual address within META_DATA init - * package to support SRIOV gfx preemption. - */ -int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm, - struct amdgpu_bo_va **bo_va) -{ - uint64_t csa_addr = amdgpu_csa_vaddr(adev) & AMDGPU_GMC_HOLE_MASK; - struct ww_acquire_ctx ticket; - struct list_head list; - struct amdgpu_bo_list_entry pd; - struct ttm_validate_buffer csa_tv; - int r; - - INIT_LIST_HEAD(&list); - INIT_LIST_HEAD(&csa_tv.head); - csa_tv.bo = &adev->virt.csa_obj->tbo; - csa_tv.shared = true; - - list_add(&csa_tv.head, &list); - amdgpu_vm_get_pd_bo(vm, &list, &pd); - - r = ttm_eu_reserve_buffers(&ticket, &list, true, NULL); - if (r) { - DRM_ERROR("failed to reserve CSA,PD BOs: err=%d\n", r); - return r; - } - - *bo_va = amdgpu_vm_bo_add(adev, vm, adev->virt.csa_obj); - if (!*bo_va) { - ttm_eu_backoff_reservation(&ticket, &list); - DRM_ERROR("failed to create bo_va for static CSA\n"); - return -ENOMEM; - } - - r = amdgpu_vm_alloc_pts(adev, (*bo_va)->base.vm, csa_addr, - AMDGPU_CSA_SIZE); - if (r) { - DRM_ERROR("failed to allocate pts for static CSA, err=%d\n", r); - amdgpu_vm_bo_rmv(adev, *bo_va); - ttm_eu_backoff_reservation(&ticket, &list); - return r; - } - - r = amdgpu_vm_bo_map(adev, *bo_va, csa_addr, 0, AMDGPU_CSA_SIZE, - AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE | - AMDGPU_PTE_EXECUTABLE); - - if (r) { - DRM_ERROR("failed to do bo_map on static CSA, err=%d\n", r); - amdgpu_vm_bo_rmv(adev, *bo_va); - ttm_eu_backoff_reservation(&ticket, &list); - return r; - } - - ttm_eu_backoff_reservation(&ticket, &list); - return 0; -} - void amdgpu_virt_init_setting(struct amdgpu_device *adev) { /* enable virtual display */ @@ -162,9 +70,7 @@ uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg) if (r < 1 && (adev->in_gpu_reset || in_interrupt())) goto failed_kiq_read; - if (in_interrupt()) - might_sleep(); - + might_sleep(); while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) { msleep(MAX_KIQ_REG_BAILOUT_INTERVAL); r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); @@ -210,9 +116,7 @@ void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v) if (r < 1 && (adev->in_gpu_reset || in_interrupt())) goto failed_kiq_write; - if (in_interrupt()) - might_sleep(); - + might_sleep(); while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) { msleep(MAX_KIQ_REG_BAILOUT_INTERVAL); @@ -228,6 +132,46 @@ failed_kiq_write: pr_err("failed to write reg:%x\n", reg); } +void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev, + uint32_t reg0, uint32_t reg1, + uint32_t ref, uint32_t mask) +{ + struct amdgpu_kiq *kiq = &adev->gfx.kiq; + struct amdgpu_ring *ring = &kiq->ring; + signed long r, cnt = 0; + unsigned long flags; + uint32_t seq; + + spin_lock_irqsave(&kiq->ring_lock, flags); + amdgpu_ring_alloc(ring, 32); + amdgpu_ring_emit_reg_write_reg_wait(ring, reg0, reg1, + ref, mask); + amdgpu_fence_emit_polling(ring, &seq); + amdgpu_ring_commit(ring); + spin_unlock_irqrestore(&kiq->ring_lock, flags); + + r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); + + /* don't wait anymore for IRQ context */ + if (r < 1 && in_interrupt()) + goto failed_kiq; + + might_sleep(); + while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) { + + msleep(MAX_KIQ_REG_BAILOUT_INTERVAL); + r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); + } + + if (cnt > MAX_KIQ_REG_TRY) + goto failed_kiq; + + return; + +failed_kiq: + pr_err("failed to write reg %x wait reg %x\n", reg0, reg1); +} + /** * amdgpu_virt_request_full_gpu() - request full gpu access * @amdgpu: amdgpu device. @@ -390,7 +334,7 @@ void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev) if (adev->fw_vram_usage.va != NULL) { adev->virt.fw_reserve.p_pf2vf = - (struct amdgim_pf2vf_info_header *)( + (struct amd_sriov_msg_pf2vf_info_header *)( adev->fw_vram_usage.va + AMDGIM_DATAEXCHANGE_OFFSET); AMDGPU_FW_VRAM_PF2VF_READ(adev, header.size, &pf2vf_size); AMDGPU_FW_VRAM_PF2VF_READ(adev, checksum, &checksum); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h index 880ac113a3a9..722deefc0a7e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h @@ -63,8 +63,8 @@ struct amdgpu_virt_ops { * Firmware Reserve Frame buffer */ struct amdgpu_virt_fw_reserve { - struct amdgim_pf2vf_info_header *p_pf2vf; - struct amdgim_vf2pf_info_header *p_vf2pf; + struct amd_sriov_msg_pf2vf_info_header *p_pf2vf; + struct amd_sriov_msg_vf2pf_info_header *p_vf2pf; unsigned int checksum_key; }; /* @@ -85,15 +85,17 @@ enum AMDGIM_FEATURE_FLAG { AMDGIM_FEATURE_GIM_FLR_VRAMLOST = 0x4, }; -struct amdgim_pf2vf_info_header { +struct amd_sriov_msg_pf2vf_info_header { /* the total structure size in byte. */ uint32_t size; /* version of this structure, written by the GIM */ uint32_t version; + /* reserved */ + uint32_t reserved[2]; } __aligned(4); struct amdgim_pf2vf_info_v1 { /* header contains size and version */ - struct amdgim_pf2vf_info_header header; + struct amd_sriov_msg_pf2vf_info_header header; /* max_width * max_height */ unsigned int uvd_enc_max_pixels_count; /* 16x16 pixels/sec, codec independent */ @@ -112,7 +114,7 @@ struct amdgim_pf2vf_info_v1 { struct amdgim_pf2vf_info_v2 { /* header contains size and version */ - struct amdgim_pf2vf_info_header header; + struct amd_sriov_msg_pf2vf_info_header header; /* use private key from mailbox 2 to create chueksum */ uint32_t checksum; /* The features flags of the GIM driver supports. */ @@ -137,20 +139,22 @@ struct amdgim_pf2vf_info_v2 { uint64_t vcefw_kboffset; /* VCE FW size in KB */ uint32_t vcefw_ksize; - uint32_t reserved[AMDGIM_GET_STRUCTURE_RESERVED_SIZE(256, 0, 0, (9 + sizeof(struct amdgim_pf2vf_info_header)/sizeof(uint32_t)), 3)]; + uint32_t reserved[AMDGIM_GET_STRUCTURE_RESERVED_SIZE(256, 0, 0, (9 + sizeof(struct amd_sriov_msg_pf2vf_info_header)/sizeof(uint32_t)), 3)]; } __aligned(4); -struct amdgim_vf2pf_info_header { +struct amd_sriov_msg_vf2pf_info_header { /* the total structure size in byte. */ uint32_t size; /*version of this structure, written by the guest */ uint32_t version; + /* reserved */ + uint32_t reserved[2]; } __aligned(4); struct amdgim_vf2pf_info_v1 { /* header contains size and version */ - struct amdgim_vf2pf_info_header header; + struct amd_sriov_msg_vf2pf_info_header header; /* driver version */ char driver_version[64]; /* driver certification, 1=WHQL, 0=None */ @@ -180,7 +184,7 @@ struct amdgim_vf2pf_info_v1 { struct amdgim_vf2pf_info_v2 { /* header contains size and version */ - struct amdgim_vf2pf_info_header header; + struct amd_sriov_msg_vf2pf_info_header header; uint32_t checksum; /* driver version */ uint8_t driver_version[64]; @@ -206,7 +210,7 @@ struct amdgim_vf2pf_info_v2 { uint32_t uvd_enc_usage; /* guest uvd engine usage percentage. 0xffff means N/A. */ uint32_t uvd_enc_health; - uint32_t reserved[AMDGIM_GET_STRUCTURE_RESERVED_SIZE(256, 64, 0, (12 + sizeof(struct amdgim_vf2pf_info_header)/sizeof(uint32_t)), 0)]; + uint32_t reserved[AMDGIM_GET_STRUCTURE_RESERVED_SIZE(256, 64, 0, (12 + sizeof(struct amd_sriov_msg_vf2pf_info_header)/sizeof(uint32_t)), 0)]; } __aligned(4); #define AMDGPU_FW_VRAM_VF2PF_VER 2 @@ -238,7 +242,6 @@ typedef struct amdgim_vf2pf_info_v2 amdgim_vf2pf_info ; struct amdgpu_virt { uint32_t caps; struct amdgpu_bo *csa_obj; - uint64_t csa_vmid0_addr; bool chained_ib_support; uint32_t reg_val_offs; struct amdgpu_irq_src ack_irq; @@ -251,8 +254,6 @@ struct amdgpu_virt { uint32_t gim_feature; }; -#define AMDGPU_CSA_SIZE (8 * 1024) - #define amdgpu_sriov_enabled(adev) \ ((adev)->virt.caps & AMDGPU_SRIOV_CAPS_ENABLE_IOV) @@ -277,17 +278,13 @@ static inline bool is_virtual_machine(void) #endif } -struct amdgpu_vm; - -uint64_t amdgpu_csa_vaddr(struct amdgpu_device *adev); bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev); -int amdgpu_allocate_static_csa(struct amdgpu_device *adev); -int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm, - struct amdgpu_bo_va **bo_va); -void amdgpu_free_static_csa(struct amdgpu_device *adev); void amdgpu_virt_init_setting(struct amdgpu_device *adev); uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg); void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v); +void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev, + uint32_t reg0, uint32_t rreg1, + uint32_t ref, uint32_t mask); int amdgpu_virt_request_full_gpu(struct amdgpu_device *adev, bool init); int amdgpu_virt_release_full_gpu(struct amdgpu_device *adev, bool init); int amdgpu_virt_reset_gpu(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 0877ff9a9594..e73d152659a2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -617,7 +617,8 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm, { entry->priority = 0; entry->tv.bo = &vm->root.base.bo->tbo; - entry->tv.shared = true; + /* One for the VM updates, one for TTM and one for the CS job */ + entry->tv.num_shared = 3; entry->user_pages = NULL; list_add(&entry->tv.head, validated); } @@ -773,10 +774,6 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, ring = container_of(vm->entity.rq->sched, struct amdgpu_ring, sched); - r = reservation_object_reserve_shared(bo->tbo.resv); - if (r) - return r; - r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); if (r) goto error; @@ -1844,10 +1841,6 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, if (r) goto error_free; - r = reservation_object_reserve_shared(vm->root.base.bo->tbo.resv); - if (r) - goto error_free; - r = amdgpu_vm_update_ptes(¶ms, start, last + 1, addr, flags); if (r) goto error_free; @@ -3028,6 +3021,10 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, if (r) goto error_free_root; + r = reservation_object_reserve_shared(root->tbo.resv, 1); + if (r) + goto error_unreserve; + r = amdgpu_vm_clear_bo(adev, vm, root, adev->vm_manager.root_level, vm->pte_support_ats); @@ -3057,7 +3054,6 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, } INIT_KFIFO(vm->faults); - vm->fault_credit = 16; return 0; @@ -3270,42 +3266,6 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) } /** - * amdgpu_vm_pasid_fault_credit - Check fault credit for given PASID - * - * @adev: amdgpu_device pointer - * @pasid: PASID do identify the VM - * - * This function is expected to be called in interrupt context. - * - * Returns: - * True if there was fault credit, false otherwise - */ -bool amdgpu_vm_pasid_fault_credit(struct amdgpu_device *adev, - unsigned int pasid) -{ - struct amdgpu_vm *vm; - - spin_lock(&adev->vm_manager.pasid_lock); - vm = idr_find(&adev->vm_manager.pasid_idr, pasid); - if (!vm) { - /* VM not found, can't track fault credit */ - spin_unlock(&adev->vm_manager.pasid_lock); - return true; - } - - /* No lock needed. only accessed by IRQ handler */ - if (!vm->fault_credit) { - /* Too many faults in this VM */ - spin_unlock(&adev->vm_manager.pasid_lock); - return false; - } - - vm->fault_credit--; - spin_unlock(&adev->vm_manager.pasid_lock); - return true; -} - -/** * amdgpu_vm_manager_init - init the VM manager * * @adev: amdgpu_device pointer diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 2a8898d19c8b..e8dcfd59fc93 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -229,9 +229,6 @@ struct amdgpu_vm { /* Up to 128 pending retry page faults */ DECLARE_KFIFO(faults, u64, 128); - /* Limit non-retry fault storms */ - unsigned int fault_credit; - /* Points to the KFD process VM info */ struct amdkfd_process_info *process_info; @@ -299,8 +296,6 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm, unsigned int pasid); void amdgpu_vm_release_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm); void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm); -bool amdgpu_vm_pasid_fault_credit(struct amdgpu_device *adev, - unsigned int pasid); void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm, struct list_head *validated, struct amdgpu_bo_list_entry *entry); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index 897afbb348c1..0b263a9857c6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -23,7 +23,7 @@ */ #include <linux/list.h> #include "amdgpu.h" -#include "amdgpu_psp.h" +#include "amdgpu_xgmi.h" static DEFINE_MUTEX(xgmi_mutex); @@ -31,15 +31,16 @@ static DEFINE_MUTEX(xgmi_mutex); #define AMDGPU_MAX_XGMI_HIVE 8 #define AMDGPU_MAX_XGMI_DEVICE_PER_HIVE 4 -struct amdgpu_hive_info { - uint64_t hive_id; - struct list_head device_list; -}; - static struct amdgpu_hive_info xgmi_hives[AMDGPU_MAX_XGMI_HIVE]; static unsigned hive_count = 0; -static struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev) + +void *amdgpu_xgmi_hive_try_lock(struct amdgpu_hive_info *hive) +{ + return &hive->device_list; +} + +struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev) { int i; struct amdgpu_hive_info *tmp; @@ -58,62 +59,99 @@ static struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev) tmp = &xgmi_hives[hive_count++]; tmp->hive_id = adev->gmc.xgmi.hive_id; INIT_LIST_HEAD(&tmp->device_list); + mutex_init(&tmp->hive_lock); + return tmp; } +int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_device *adev) +{ + int ret = -EINVAL; + + /* Each psp need to set the latest topology */ + ret = psp_xgmi_set_topology_info(&adev->psp, + hive->number_devices, + &hive->topology_info); + if (ret) + dev_err(adev->dev, + "XGMI: Set topology failure on device %llx, hive %llx, ret %d", + adev->gmc.xgmi.node_id, + adev->gmc.xgmi.hive_id, ret); + else + dev_info(adev->dev, "XGMI: Set topology for node %d, hive 0x%llx.\n", + adev->gmc.xgmi.physical_node_id, + adev->gmc.xgmi.hive_id); + + return ret; +} + int amdgpu_xgmi_add_device(struct amdgpu_device *adev) { - struct psp_xgmi_topology_info tmp_topology[AMDGPU_MAX_XGMI_DEVICE_PER_HIVE]; + struct psp_xgmi_topology_info *hive_topology; struct amdgpu_hive_info *hive; struct amdgpu_xgmi *entry; - struct amdgpu_device *tmp_adev; + struct amdgpu_device *tmp_adev = NULL; int count = 0, ret = -EINVAL; - if ((adev->asic_type < CHIP_VEGA20) || - (adev->flags & AMD_IS_APU) ) + if (!adev->gmc.xgmi.supported) return 0; - adev->gmc.xgmi.device_id = psp_xgmi_get_device_id(&adev->psp); + + adev->gmc.xgmi.node_id = psp_xgmi_get_node_id(&adev->psp); adev->gmc.xgmi.hive_id = psp_xgmi_get_hive_id(&adev->psp); - memset(&tmp_topology[0], 0, sizeof(tmp_topology)); mutex_lock(&xgmi_mutex); hive = amdgpu_get_xgmi_hive(adev); if (!hive) goto exit; + hive_topology = &hive->topology_info; + list_add_tail(&adev->gmc.xgmi.head, &hive->device_list); list_for_each_entry(entry, &hive->device_list, head) - tmp_topology[count++].device_id = entry->device_id; + hive_topology->nodes[count++].node_id = entry->node_id; + hive->number_devices = count; - ret = psp_xgmi_get_topology_info(&adev->psp, count, tmp_topology); - if (ret) { - dev_err(adev->dev, - "XGMI: Get topology failure on device %llx, hive %llx, ret %d", - adev->gmc.xgmi.device_id, - adev->gmc.xgmi.hive_id, ret); - goto exit; - } - /* Each psp need to set the latest topology */ + /* Each psp need to get the latest topology */ list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { - ret = psp_xgmi_set_topology_info(&tmp_adev->psp, count, tmp_topology); + ret = psp_xgmi_get_topology_info(&tmp_adev->psp, count, hive_topology); if (ret) { dev_err(tmp_adev->dev, - "XGMI: Set topology failure on device %llx, hive %llx, ret %d", - tmp_adev->gmc.xgmi.device_id, + "XGMI: Get topology failure on device %llx, hive %llx, ret %d", + tmp_adev->gmc.xgmi.node_id, tmp_adev->gmc.xgmi.hive_id, ret); - /* To do : continue with some node failed or disable the whole hive */ + /* To do : continue with some node failed or disable the whole hive */ break; } } - if (!ret) - dev_info(adev->dev, "XGMI: Add node %d to hive 0x%llx.\n", - adev->gmc.xgmi.physical_node_id, - adev->gmc.xgmi.hive_id); + + list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { + ret = amdgpu_xgmi_update_topology(hive, tmp_adev); + if (ret) + break; + } exit: mutex_unlock(&xgmi_mutex); return ret; } +void amdgpu_xgmi_remove_device(struct amdgpu_device *adev) +{ + struct amdgpu_hive_info *hive; + if (!adev->gmc.xgmi.supported) + return; + + mutex_lock(&xgmi_mutex); + + hive = amdgpu_get_xgmi_hive(adev); + if (!hive) + goto exit; + + if (!(hive->number_devices--)) + mutex_destroy(&hive->hive_lock); + +exit: + mutex_unlock(&xgmi_mutex); +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h new file mode 100644 index 000000000000..6151eb9c8ad3 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h @@ -0,0 +1,40 @@ +/* + * Copyright 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef __AMDGPU_XGMI_H__ +#define __AMDGPU_XGMI_H__ + +#include "amdgpu_psp.h" + +struct amdgpu_hive_info { + uint64_t hive_id; + struct list_head device_list; + struct psp_xgmi_topology_info topology_info; + int number_devices; + struct mutex hive_lock; +}; + +struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev); +int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_device *adev); +int amdgpu_xgmi_add_device(struct amdgpu_device *adev); +void amdgpu_xgmi_remove_device(struct amdgpu_device *adev); + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c index 79220a91abe3..86e14c754dd4 100644 --- a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c @@ -743,19 +743,19 @@ static int ci_enable_didt(struct amdgpu_device *adev, bool enable) if (pi->caps_sq_ramping || pi->caps_db_ramping || pi->caps_td_ramping || pi->caps_tcp_ramping) { - adev->gfx.rlc.funcs->enter_safe_mode(adev); + amdgpu_gfx_rlc_enter_safe_mode(adev); if (enable) { ret = ci_program_pt_config_registers(adev, didt_config_ci); if (ret) { - adev->gfx.rlc.funcs->exit_safe_mode(adev); + amdgpu_gfx_rlc_exit_safe_mode(adev); return ret; } } ci_do_enable_didt(adev, enable); - adev->gfx.rlc.funcs->exit_safe_mode(adev); + amdgpu_gfx_rlc_exit_safe_mode(adev); } return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c index f41f5f57e9f3..71c50d8900e3 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik.c +++ b/drivers/gpu/drm/amd/amdgpu/cik.c @@ -1755,6 +1755,7 @@ static const struct amdgpu_asic_funcs cik_asic_funcs = .flush_hdp = &cik_flush_hdp, .invalidate_hdp = &cik_invalidate_hdp, .need_full_reset = &cik_need_full_reset, + .init_doorbell_index = &legacy_doorbell_index_init, }; static int cik_common_early_init(void *handle) diff --git a/drivers/gpu/drm/amd/amdgpu/cik.h b/drivers/gpu/drm/amd/amdgpu/cik.h index e49c6f15a0a0..54c625a2e570 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik.h +++ b/drivers/gpu/drm/amd/amdgpu/cik.h @@ -30,4 +30,5 @@ void cik_srbm_select(struct amdgpu_device *adev, u32 me, u32 pipe, u32 queue, u32 vmid); int cik_set_ip_blocks(struct amdgpu_device *adev); +void legacy_doorbell_index_init(struct amdgpu_device *adev); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/cik_ih.c b/drivers/gpu/drm/amd/amdgpu/cik_ih.c index b5775c6a857b..8a8b4967a101 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_ih.c @@ -228,34 +228,6 @@ static u32 cik_ih_get_wptr(struct amdgpu_device *adev) * [127:96] - reserved */ -/** - * cik_ih_prescreen_iv - prescreen an interrupt vector - * - * @adev: amdgpu_device pointer - * - * Returns true if the interrupt vector should be further processed. - */ -static bool cik_ih_prescreen_iv(struct amdgpu_device *adev) -{ - u32 ring_index = adev->irq.ih.rptr >> 2; - u16 pasid; - - switch (le32_to_cpu(adev->irq.ih.ring[ring_index]) & 0xff) { - case 146: - case 147: - pasid = le32_to_cpu(adev->irq.ih.ring[ring_index + 2]) >> 16; - if (!pasid || amdgpu_vm_pasid_fault_credit(adev, pasid)) - return true; - break; - default: - /* Not a VM fault */ - return true; - } - - adev->irq.ih.rptr += 16; - return false; -} - /** * cik_ih_decode_iv - decode an interrupt vector * @@ -461,7 +433,6 @@ static const struct amd_ip_funcs cik_ih_ip_funcs = { static const struct amdgpu_ih_funcs cik_ih_funcs = { .get_wptr = cik_ih_get_wptr, - .prescreen_iv = cik_ih_prescreen_iv, .decode_iv = cik_ih_decode_iv, .set_rptr = cik_ih_set_rptr }; diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c index b918c8886b75..45795191de1f 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c @@ -198,7 +198,7 @@ static void cik_sdma_ring_set_wptr(struct amdgpu_ring *ring) static void cik_sdma_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) { - struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring); + struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring); int i; for (i = 0; i < count; i++) @@ -218,9 +218,11 @@ static void cik_sdma_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) * Schedule an IB in the DMA ring (CIK). */ static void cik_sdma_ring_emit_ib(struct amdgpu_ring *ring, + struct amdgpu_job *job, struct amdgpu_ib *ib, - unsigned vmid, bool ctx_switch) + bool ctx_switch) { + unsigned vmid = AMDGPU_JOB_GET_VMID(job); u32 extra_bits = vmid & 0xf; /* IB packet must end on a 8 DW boundary */ @@ -316,8 +318,8 @@ static void cik_sdma_gfx_stop(struct amdgpu_device *adev) WREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i], rb_cntl); WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], 0); } - sdma0->ready = false; - sdma1->ready = false; + sdma0->sched.ready = false; + sdma1->sched.ready = false; } /** @@ -494,18 +496,16 @@ static int cik_sdma_gfx_resume(struct amdgpu_device *adev) /* enable DMA IBs */ WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl); - ring->ready = true; + ring->sched.ready = true; } cik_sdma_enable(adev, true); for (i = 0; i < adev->sdma.num_instances; i++) { ring = &adev->sdma.instance[i].ring; - r = amdgpu_ring_test_ring(ring); - if (r) { - ring->ready = false; + r = amdgpu_ring_test_helper(ring); + if (r) return r; - } if (adev->mman.buffer_funcs_ring == ring) amdgpu_ttm_set_buffer_funcs_status(adev, true); @@ -618,21 +618,17 @@ static int cik_sdma_ring_test_ring(struct amdgpu_ring *ring) u64 gpu_addr; r = amdgpu_device_wb_get(adev, &index); - if (r) { - dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r); + if (r) return r; - } gpu_addr = adev->wb.gpu_addr + (index * 4); tmp = 0xCAFEDEAD; adev->wb.wb[index] = cpu_to_le32(tmp); r = amdgpu_ring_alloc(ring, 5); - if (r) { - DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r); - amdgpu_device_wb_free(adev, index); - return r; - } + if (r) + goto error_free_wb; + amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0)); amdgpu_ring_write(ring, lower_32_bits(gpu_addr)); amdgpu_ring_write(ring, upper_32_bits(gpu_addr)); @@ -647,15 +643,11 @@ static int cik_sdma_ring_test_ring(struct amdgpu_ring *ring) DRM_UDELAY(1); } - if (i < adev->usec_timeout) { - DRM_DEBUG("ring test on %d succeeded in %d usecs\n", ring->idx, i); - } else { - DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n", - ring->idx, tmp); - r = -EINVAL; - } - amdgpu_device_wb_free(adev, index); + if (i >= adev->usec_timeout) + r = -ETIMEDOUT; +error_free_wb: + amdgpu_device_wb_free(adev, index); return r; } @@ -678,20 +670,16 @@ static int cik_sdma_ring_test_ib(struct amdgpu_ring *ring, long timeout) long r; r = amdgpu_device_wb_get(adev, &index); - if (r) { - dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r); + if (r) return r; - } gpu_addr = adev->wb.gpu_addr + (index * 4); tmp = 0xCAFEDEAD; adev->wb.wb[index] = cpu_to_le32(tmp); memset(&ib, 0, sizeof(ib)); r = amdgpu_ib_get(adev, NULL, 256, &ib); - if (r) { - DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); + if (r) goto err0; - } ib.ptr[0] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0); @@ -706,21 +694,16 @@ static int cik_sdma_ring_test_ib(struct amdgpu_ring *ring, long timeout) r = dma_fence_wait_timeout(f, false, timeout); if (r == 0) { - DRM_ERROR("amdgpu: IB test timed out\n"); r = -ETIMEDOUT; goto err1; } else if (r < 0) { - DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); goto err1; } tmp = le32_to_cpu(adev->wb.wb[index]); - if (tmp == 0xDEADBEEF) { - DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx); + if (tmp == 0xDEADBEEF) r = 0; - } else { - DRM_ERROR("amdgpu: ib test failed (0x%08X)\n", tmp); + else r = -EINVAL; - } err1: amdgpu_ib_free(adev, &ib, NULL); @@ -822,7 +805,7 @@ static void cik_sdma_vm_set_pte_pde(struct amdgpu_ib *ib, uint64_t pe, */ static void cik_sdma_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib) { - struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring); + struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring); u32 pad_count; int i; @@ -1214,8 +1197,11 @@ static int cik_sdma_process_illegal_inst_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) { + u8 instance_id; + DRM_ERROR("Illegal instruction in SDMA command stream\n"); - schedule_work(&adev->reset_work); + instance_id = (entry->ring_id & 0x3) >> 0; + drm_sched_fault(&adev->sdma.instance[instance_id].ring.sched); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/cz_ih.c b/drivers/gpu/drm/amd/amdgpu/cz_ih.c index df5ac4d85a00..9d3ea298e116 100644 --- a/drivers/gpu/drm/amd/amdgpu/cz_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/cz_ih.c @@ -208,34 +208,6 @@ static u32 cz_ih_get_wptr(struct amdgpu_device *adev) } /** - * cz_ih_prescreen_iv - prescreen an interrupt vector - * - * @adev: amdgpu_device pointer - * - * Returns true if the interrupt vector should be further processed. - */ -static bool cz_ih_prescreen_iv(struct amdgpu_device *adev) -{ - u32 ring_index = adev->irq.ih.rptr >> 2; - u16 pasid; - - switch (le32_to_cpu(adev->irq.ih.ring[ring_index]) & 0xff) { - case 146: - case 147: - pasid = le32_to_cpu(adev->irq.ih.ring[ring_index + 2]) >> 16; - if (!pasid || amdgpu_vm_pasid_fault_credit(adev, pasid)) - return true; - break; - default: - /* Not a VM fault */ - return true; - } - - adev->irq.ih.rptr += 16; - return false; -} - -/** * cz_ih_decode_iv - decode an interrupt vector * * @adev: amdgpu_device pointer @@ -442,7 +414,6 @@ static const struct amd_ip_funcs cz_ih_ip_funcs = { static const struct amdgpu_ih_funcs cz_ih_funcs = { .get_wptr = cz_ih_get_wptr, - .prescreen_iv = cz_ih_prescreen_iv, .decode_iv = cz_ih_decode_iv, .set_rptr = cz_ih_set_rptr }; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c index d76eb27945dc..1dc3013ea1d5 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c @@ -1775,18 +1775,15 @@ static int gfx_v6_0_ring_test_ring(struct amdgpu_ring *ring) int r; r = amdgpu_gfx_scratch_get(adev, &scratch); - if (r) { - DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r); + if (r) return r; - } + WREG32(scratch, 0xCAFEDEAD); r = amdgpu_ring_alloc(ring, 3); - if (r) { - DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", ring->idx, r); - amdgpu_gfx_scratch_free(adev, scratch); - return r; - } + if (r) + goto error_free_scratch; + amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); amdgpu_ring_write(ring, (scratch - PACKET3_SET_CONFIG_REG_START)); amdgpu_ring_write(ring, 0xDEADBEEF); @@ -1798,13 +1795,11 @@ static int gfx_v6_0_ring_test_ring(struct amdgpu_ring *ring) break; DRM_UDELAY(1); } - if (i < adev->usec_timeout) { - DRM_DEBUG("ring test on %d succeeded in %d usecs\n", ring->idx, i); - } else { - DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n", - ring->idx, scratch, tmp); - r = -EINVAL; - } + + if (i >= adev->usec_timeout) + r = -ETIMEDOUT; + +error_free_scratch: amdgpu_gfx_scratch_free(adev, scratch); return r; } @@ -1845,9 +1840,11 @@ static void gfx_v6_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, } static void gfx_v6_0_ring_emit_ib(struct amdgpu_ring *ring, + struct amdgpu_job *job, struct amdgpu_ib *ib, - unsigned vmid, bool ctx_switch) + bool ctx_switch) { + unsigned vmid = AMDGPU_JOB_GET_VMID(job); u32 header, control = 0; /* insert SWITCH_BUFFER packet before first IB in the ring frame */ @@ -1892,17 +1889,15 @@ static int gfx_v6_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) long r; r = amdgpu_gfx_scratch_get(adev, &scratch); - if (r) { - DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r); + if (r) return r; - } + WREG32(scratch, 0xCAFEDEAD); memset(&ib, 0, sizeof(ib)); r = amdgpu_ib_get(adev, NULL, 256, &ib); - if (r) { - DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); + if (r) goto err1; - } + ib.ptr[0] = PACKET3(PACKET3_SET_CONFIG_REG, 1); ib.ptr[1] = ((scratch - PACKET3_SET_CONFIG_REG_START)); ib.ptr[2] = 0xDEADBEEF; @@ -1914,22 +1909,16 @@ static int gfx_v6_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) r = dma_fence_wait_timeout(f, false, timeout); if (r == 0) { - DRM_ERROR("amdgpu: IB test timed out\n"); r = -ETIMEDOUT; goto err2; } else if (r < 0) { - DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); goto err2; } tmp = RREG32(scratch); - if (tmp == 0xDEADBEEF) { - DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx); + if (tmp == 0xDEADBEEF) r = 0; - } else { - DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n", - scratch, tmp); + else r = -EINVAL; - } err2: amdgpu_ib_free(adev, &ib, NULL); @@ -1950,9 +1939,9 @@ static void gfx_v6_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) CP_ME_CNTL__CE_HALT_MASK)); WREG32(mmSCRATCH_UMSK, 0); for (i = 0; i < adev->gfx.num_gfx_rings; i++) - adev->gfx.gfx_ring[i].ready = false; + adev->gfx.gfx_ring[i].sched.ready = false; for (i = 0; i < adev->gfx.num_compute_rings; i++) - adev->gfx.compute_ring[i].ready = false; + adev->gfx.compute_ring[i].sched.ready = false; } udelay(50); } @@ -2124,12 +2113,9 @@ static int gfx_v6_0_cp_gfx_resume(struct amdgpu_device *adev) /* start the rings */ gfx_v6_0_cp_gfx_start(adev); - ring->ready = true; - r = amdgpu_ring_test_ring(ring); - if (r) { - ring->ready = false; + r = amdgpu_ring_test_helper(ring); + if (r) return r; - } return 0; } @@ -2227,14 +2213,11 @@ static int gfx_v6_0_cp_compute_resume(struct amdgpu_device *adev) WREG32(mmCP_RB2_CNTL, tmp); WREG32(mmCP_RB2_BASE, ring->gpu_addr >> 8); - adev->gfx.compute_ring[0].ready = false; - adev->gfx.compute_ring[1].ready = false; for (i = 0; i < 2; i++) { - r = amdgpu_ring_test_ring(&adev->gfx.compute_ring[i]); + r = amdgpu_ring_test_helper(&adev->gfx.compute_ring[i]); if (r) return r; - adev->gfx.compute_ring[i].ready = true; } return 0; @@ -2368,18 +2351,11 @@ static void gfx_v6_0_ring_emit_wreg(struct amdgpu_ring *ring, amdgpu_ring_write(ring, val); } -static void gfx_v6_0_rlc_fini(struct amdgpu_device *adev) -{ - amdgpu_bo_free_kernel(&adev->gfx.rlc.save_restore_obj, NULL, NULL); - amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, NULL, NULL); - amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, NULL, NULL); -} - static int gfx_v6_0_rlc_init(struct amdgpu_device *adev) { const u32 *src_ptr; volatile u32 *dst_ptr; - u32 dws, i; + u32 dws; u64 reg_list_mc_addr; const struct cs_section_def *cs_data; int r; @@ -2394,26 +2370,10 @@ static int gfx_v6_0_rlc_init(struct amdgpu_device *adev) cs_data = adev->gfx.rlc.cs_data; if (src_ptr) { - /* save restore block */ - r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_VRAM, - &adev->gfx.rlc.save_restore_obj, - &adev->gfx.rlc.save_restore_gpu_addr, - (void **)&adev->gfx.rlc.sr_ptr); - if (r) { - dev_warn(adev->dev, "(%d) create RLC sr bo failed\n", - r); - gfx_v6_0_rlc_fini(adev); + /* init save restore block */ + r = amdgpu_gfx_rlc_init_sr(adev, dws); + if (r) return r; - } - - /* write the sr buffer */ - dst_ptr = adev->gfx.rlc.sr_ptr; - for (i = 0; i < adev->gfx.rlc.reg_list_size; i++) - dst_ptr[i] = cpu_to_le32(src_ptr[i]); - - amdgpu_bo_kunmap(adev->gfx.rlc.save_restore_obj); - amdgpu_bo_unreserve(adev->gfx.rlc.save_restore_obj); } if (cs_data) { @@ -2428,7 +2388,7 @@ static int gfx_v6_0_rlc_init(struct amdgpu_device *adev) (void **)&adev->gfx.rlc.cs_ptr); if (r) { dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r); - gfx_v6_0_rlc_fini(adev); + amdgpu_gfx_rlc_fini(adev); return r; } @@ -2549,8 +2509,8 @@ static int gfx_v6_0_rlc_resume(struct amdgpu_device *adev) if (!adev->gfx.rlc_fw) return -EINVAL; - gfx_v6_0_rlc_stop(adev); - gfx_v6_0_rlc_reset(adev); + adev->gfx.rlc.funcs->stop(adev); + adev->gfx.rlc.funcs->reset(adev); gfx_v6_0_init_pg(adev); gfx_v6_0_init_cg(adev); @@ -2578,7 +2538,7 @@ static int gfx_v6_0_rlc_resume(struct amdgpu_device *adev) WREG32(mmRLC_UCODE_ADDR, 0); gfx_v6_0_enable_lbpw(adev, gfx_v6_0_lbpw_supported(adev)); - gfx_v6_0_rlc_start(adev); + adev->gfx.rlc.funcs->start(adev); return 0; } @@ -3075,6 +3035,14 @@ static const struct amdgpu_gfx_funcs gfx_v6_0_gfx_funcs = { .select_me_pipe_q = &gfx_v6_0_select_me_pipe_q }; +static const struct amdgpu_rlc_funcs gfx_v6_0_rlc_funcs = { + .init = gfx_v6_0_rlc_init, + .resume = gfx_v6_0_rlc_resume, + .stop = gfx_v6_0_rlc_stop, + .reset = gfx_v6_0_rlc_reset, + .start = gfx_v6_0_rlc_start +}; + static int gfx_v6_0_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -3082,6 +3050,7 @@ static int gfx_v6_0_early_init(void *handle) adev->gfx.num_gfx_rings = GFX6_NUM_GFX_RINGS; adev->gfx.num_compute_rings = GFX6_NUM_COMPUTE_RINGS; adev->gfx.funcs = &gfx_v6_0_gfx_funcs; + adev->gfx.rlc.funcs = &gfx_v6_0_rlc_funcs; gfx_v6_0_set_ring_funcs(adev); gfx_v6_0_set_irq_funcs(adev); @@ -3114,7 +3083,7 @@ static int gfx_v6_0_sw_init(void *handle) return r; } - r = gfx_v6_0_rlc_init(adev); + r = adev->gfx.rlc.funcs->init(adev); if (r) { DRM_ERROR("Failed to init rlc BOs!\n"); return r; @@ -3165,7 +3134,7 @@ static int gfx_v6_0_sw_fini(void *handle) for (i = 0; i < adev->gfx.num_compute_rings; i++) amdgpu_ring_fini(&adev->gfx.compute_ring[i]); - gfx_v6_0_rlc_fini(adev); + amdgpu_gfx_rlc_fini(adev); return 0; } @@ -3177,7 +3146,7 @@ static int gfx_v6_0_hw_init(void *handle) gfx_v6_0_constants_init(adev); - r = gfx_v6_0_rlc_resume(adev); + r = adev->gfx.rlc.funcs->resume(adev); if (r) return r; @@ -3195,7 +3164,7 @@ static int gfx_v6_0_hw_fini(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; gfx_v6_0_cp_enable(adev, false); - gfx_v6_0_rlc_stop(adev); + adev->gfx.rlc.funcs->stop(adev); gfx_v6_0_fini_pg(adev); return 0; @@ -3393,12 +3362,31 @@ static int gfx_v6_0_eop_irq(struct amdgpu_device *adev, return 0; } +static void gfx_v6_0_fault(struct amdgpu_device *adev, + struct amdgpu_iv_entry *entry) +{ + struct amdgpu_ring *ring; + + switch (entry->ring_id) { + case 0: + ring = &adev->gfx.gfx_ring[0]; + break; + case 1: + case 2: + ring = &adev->gfx.compute_ring[entry->ring_id - 1]; + break; + default: + return; + } + drm_sched_fault(&ring->sched); +} + static int gfx_v6_0_priv_reg_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) { DRM_ERROR("Illegal register access in command stream\n"); - schedule_work(&adev->reset_work); + gfx_v6_0_fault(adev, entry); return 0; } @@ -3407,7 +3395,7 @@ static int gfx_v6_0_priv_inst_irq(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry) { DRM_ERROR("Illegal instruction in command stream\n"); - schedule_work(&adev->reset_work); + gfx_v6_0_fault(adev, entry); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index 0e72bc09939a..3a9fb6018c16 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -882,7 +882,6 @@ static const u32 kalindi_rlc_save_restore_register_list[] = static u32 gfx_v7_0_get_csb_size(struct amdgpu_device *adev); static void gfx_v7_0_get_csb_buffer(struct amdgpu_device *adev, volatile u32 *buffer); -static void gfx_v7_0_init_cp_pg_table(struct amdgpu_device *adev); static void gfx_v7_0_init_pg(struct amdgpu_device *adev); static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev); @@ -2064,17 +2063,14 @@ static int gfx_v7_0_ring_test_ring(struct amdgpu_ring *ring) int r; r = amdgpu_gfx_scratch_get(adev, &scratch); - if (r) { - DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r); + if (r) return r; - } + WREG32(scratch, 0xCAFEDEAD); r = amdgpu_ring_alloc(ring, 3); - if (r) { - DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", ring->idx, r); - amdgpu_gfx_scratch_free(adev, scratch); - return r; - } + if (r) + goto error_free_scratch; + amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); amdgpu_ring_write(ring, 0xDEADBEEF); @@ -2086,13 +2082,10 @@ static int gfx_v7_0_ring_test_ring(struct amdgpu_ring *ring) break; DRM_UDELAY(1); } - if (i < adev->usec_timeout) { - DRM_DEBUG("ring test on %d succeeded in %d usecs\n", ring->idx, i); - } else { - DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n", - ring->idx, scratch, tmp); - r = -EINVAL; - } + if (i >= adev->usec_timeout) + r = -ETIMEDOUT; + +error_free_scratch: amdgpu_gfx_scratch_free(adev, scratch); return r; } @@ -2233,9 +2226,11 @@ static void gfx_v7_0_ring_emit_fence_compute(struct amdgpu_ring *ring, * on the gfx ring for execution by the GPU. */ static void gfx_v7_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, - struct amdgpu_ib *ib, - unsigned vmid, bool ctx_switch) + struct amdgpu_job *job, + struct amdgpu_ib *ib, + bool ctx_switch) { + unsigned vmid = AMDGPU_JOB_GET_VMID(job); u32 header, control = 0; /* insert SWITCH_BUFFER packet before first IB in the ring frame */ @@ -2262,9 +2257,11 @@ static void gfx_v7_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, } static void gfx_v7_0_ring_emit_ib_compute(struct amdgpu_ring *ring, + struct amdgpu_job *job, struct amdgpu_ib *ib, - unsigned vmid, bool ctx_switch) + bool ctx_switch) { + unsigned vmid = AMDGPU_JOB_GET_VMID(job); u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); @@ -2316,17 +2313,15 @@ static int gfx_v7_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) long r; r = amdgpu_gfx_scratch_get(adev, &scratch); - if (r) { - DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r); + if (r) return r; - } + WREG32(scratch, 0xCAFEDEAD); memset(&ib, 0, sizeof(ib)); r = amdgpu_ib_get(adev, NULL, 256, &ib); - if (r) { - DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); + if (r) goto err1; - } + ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1); ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START)); ib.ptr[2] = 0xDEADBEEF; @@ -2338,22 +2333,16 @@ static int gfx_v7_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) r = dma_fence_wait_timeout(f, false, timeout); if (r == 0) { - DRM_ERROR("amdgpu: IB test timed out\n"); r = -ETIMEDOUT; goto err2; } else if (r < 0) { - DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); goto err2; } tmp = RREG32(scratch); - if (tmp == 0xDEADBEEF) { - DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx); + if (tmp == 0xDEADBEEF) r = 0; - } else { - DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n", - scratch, tmp); + else r = -EINVAL; - } err2: amdgpu_ib_free(adev, &ib, NULL); @@ -2403,7 +2392,7 @@ static void gfx_v7_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) } else { WREG32(mmCP_ME_CNTL, (CP_ME_CNTL__ME_HALT_MASK | CP_ME_CNTL__PFP_HALT_MASK | CP_ME_CNTL__CE_HALT_MASK)); for (i = 0; i < adev->gfx.num_gfx_rings; i++) - adev->gfx.gfx_ring[i].ready = false; + adev->gfx.gfx_ring[i].sched.ready = false; } udelay(50); } @@ -2613,12 +2602,9 @@ static int gfx_v7_0_cp_gfx_resume(struct amdgpu_device *adev) /* start the ring */ gfx_v7_0_cp_gfx_start(adev); - ring->ready = true; - r = amdgpu_ring_test_ring(ring); - if (r) { - ring->ready = false; + r = amdgpu_ring_test_helper(ring); + if (r) return r; - } return 0; } @@ -2675,7 +2661,7 @@ static void gfx_v7_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) } else { WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK)); for (i = 0; i < adev->gfx.num_compute_rings; i++) - adev->gfx.compute_ring[i].ready = false; + adev->gfx.compute_ring[i].sched.ready = false; } udelay(50); } @@ -2781,7 +2767,7 @@ static int gfx_v7_0_mec_init(struct amdgpu_device *adev) * GFX7_MEC_HPD_SIZE * 2; r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_GTT, + AMDGPU_GEM_DOMAIN_VRAM, &adev->gfx.mec.hpd_eop_obj, &adev->gfx.mec.hpd_eop_gpu_addr, (void **)&hpd); @@ -3106,10 +3092,7 @@ static int gfx_v7_0_cp_compute_resume(struct amdgpu_device *adev) for (i = 0; i < adev->gfx.num_compute_rings; i++) { ring = &adev->gfx.compute_ring[i]; - ring->ready = true; - r = amdgpu_ring_test_ring(ring); - if (r) - ring->ready = false; + amdgpu_ring_test_helper(ring); } return 0; @@ -3268,18 +3251,10 @@ static void gfx_v7_0_ring_emit_wreg(struct amdgpu_ring *ring, * The RLC is a multi-purpose microengine that handles a * variety of functions. */ -static void gfx_v7_0_rlc_fini(struct amdgpu_device *adev) -{ - amdgpu_bo_free_kernel(&adev->gfx.rlc.save_restore_obj, NULL, NULL); - amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, NULL, NULL); - amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, NULL, NULL); -} - static int gfx_v7_0_rlc_init(struct amdgpu_device *adev) { const u32 *src_ptr; - volatile u32 *dst_ptr; - u32 dws, i; + u32 dws; const struct cs_section_def *cs_data; int r; @@ -3306,66 +3281,23 @@ static int gfx_v7_0_rlc_init(struct amdgpu_device *adev) cs_data = adev->gfx.rlc.cs_data; if (src_ptr) { - /* save restore block */ - r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_VRAM, - &adev->gfx.rlc.save_restore_obj, - &adev->gfx.rlc.save_restore_gpu_addr, - (void **)&adev->gfx.rlc.sr_ptr); - if (r) { - dev_warn(adev->dev, "(%d) create, pin or map of RLC sr bo failed\n", r); - gfx_v7_0_rlc_fini(adev); + /* init save restore block */ + r = amdgpu_gfx_rlc_init_sr(adev, dws); + if (r) return r; - } - - /* write the sr buffer */ - dst_ptr = adev->gfx.rlc.sr_ptr; - for (i = 0; i < adev->gfx.rlc.reg_list_size; i++) - dst_ptr[i] = cpu_to_le32(src_ptr[i]); - amdgpu_bo_kunmap(adev->gfx.rlc.save_restore_obj); - amdgpu_bo_unreserve(adev->gfx.rlc.save_restore_obj); } if (cs_data) { - /* clear state block */ - adev->gfx.rlc.clear_state_size = dws = gfx_v7_0_get_csb_size(adev); - - r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_VRAM, - &adev->gfx.rlc.clear_state_obj, - &adev->gfx.rlc.clear_state_gpu_addr, - (void **)&adev->gfx.rlc.cs_ptr); - if (r) { - dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r); - gfx_v7_0_rlc_fini(adev); + /* init clear state block */ + r = amdgpu_gfx_rlc_init_csb(adev); + if (r) return r; - } - - /* set up the cs buffer */ - dst_ptr = adev->gfx.rlc.cs_ptr; - gfx_v7_0_get_csb_buffer(adev, dst_ptr); - amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj); - amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); } if (adev->gfx.rlc.cp_table_size) { - - r = amdgpu_bo_create_reserved(adev, adev->gfx.rlc.cp_table_size, - PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, - &adev->gfx.rlc.cp_table_obj, - &adev->gfx.rlc.cp_table_gpu_addr, - (void **)&adev->gfx.rlc.cp_table_ptr); - if (r) { - dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r); - gfx_v7_0_rlc_fini(adev); + r = amdgpu_gfx_rlc_init_cpt(adev); + if (r) return r; - } - - gfx_v7_0_init_cp_pg_table(adev); - - amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj); - amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj); - } return 0; @@ -3446,7 +3378,12 @@ static u32 gfx_v7_0_halt_rlc(struct amdgpu_device *adev) return orig; } -static void gfx_v7_0_enter_rlc_safe_mode(struct amdgpu_device *adev) +static bool gfx_v7_0_is_rlc_enabled(struct amdgpu_device *adev) +{ + return true; +} + +static void gfx_v7_0_set_safe_mode(struct amdgpu_device *adev) { u32 tmp, i, mask; @@ -3468,7 +3405,7 @@ static void gfx_v7_0_enter_rlc_safe_mode(struct amdgpu_device *adev) } } -static void gfx_v7_0_exit_rlc_safe_mode(struct amdgpu_device *adev) +static void gfx_v7_0_unset_safe_mode(struct amdgpu_device *adev) { u32 tmp; @@ -3545,13 +3482,13 @@ static int gfx_v7_0_rlc_resume(struct amdgpu_device *adev) adev->gfx.rlc_feature_version = le32_to_cpu( hdr->ucode_feature_version); - gfx_v7_0_rlc_stop(adev); + adev->gfx.rlc.funcs->stop(adev); /* disable CG */ tmp = RREG32(mmRLC_CGCG_CGLS_CTRL) & 0xfffffffc; WREG32(mmRLC_CGCG_CGLS_CTRL, tmp); - gfx_v7_0_rlc_reset(adev); + adev->gfx.rlc.funcs->reset(adev); gfx_v7_0_init_pg(adev); @@ -3582,7 +3519,7 @@ static int gfx_v7_0_rlc_resume(struct amdgpu_device *adev) if (adev->asic_type == CHIP_BONAIRE) WREG32(mmRLC_DRIVER_CPDMA_STATUS, 0); - gfx_v7_0_rlc_start(adev); + adev->gfx.rlc.funcs->start(adev); return 0; } @@ -3784,72 +3721,12 @@ static void gfx_v7_0_enable_gds_pg(struct amdgpu_device *adev, bool enable) WREG32(mmRLC_PG_CNTL, data); } -static void gfx_v7_0_init_cp_pg_table(struct amdgpu_device *adev) +static int gfx_v7_0_cp_pg_table_num(struct amdgpu_device *adev) { - const __le32 *fw_data; - volatile u32 *dst_ptr; - int me, i, max_me = 4; - u32 bo_offset = 0; - u32 table_offset, table_size; - if (adev->asic_type == CHIP_KAVERI) - max_me = 5; - - if (adev->gfx.rlc.cp_table_ptr == NULL) - return; - - /* write the cp table buffer */ - dst_ptr = adev->gfx.rlc.cp_table_ptr; - for (me = 0; me < max_me; me++) { - if (me == 0) { - const struct gfx_firmware_header_v1_0 *hdr = - (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data; - fw_data = (const __le32 *) - (adev->gfx.ce_fw->data + - le32_to_cpu(hdr->header.ucode_array_offset_bytes)); - table_offset = le32_to_cpu(hdr->jt_offset); - table_size = le32_to_cpu(hdr->jt_size); - } else if (me == 1) { - const struct gfx_firmware_header_v1_0 *hdr = - (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data; - fw_data = (const __le32 *) - (adev->gfx.pfp_fw->data + - le32_to_cpu(hdr->header.ucode_array_offset_bytes)); - table_offset = le32_to_cpu(hdr->jt_offset); - table_size = le32_to_cpu(hdr->jt_size); - } else if (me == 2) { - const struct gfx_firmware_header_v1_0 *hdr = - (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data; - fw_data = (const __le32 *) - (adev->gfx.me_fw->data + - le32_to_cpu(hdr->header.ucode_array_offset_bytes)); - table_offset = le32_to_cpu(hdr->jt_offset); - table_size = le32_to_cpu(hdr->jt_size); - } else if (me == 3) { - const struct gfx_firmware_header_v1_0 *hdr = - (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; - fw_data = (const __le32 *) - (adev->gfx.mec_fw->data + - le32_to_cpu(hdr->header.ucode_array_offset_bytes)); - table_offset = le32_to_cpu(hdr->jt_offset); - table_size = le32_to_cpu(hdr->jt_size); - } else { - const struct gfx_firmware_header_v1_0 *hdr = - (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data; - fw_data = (const __le32 *) - (adev->gfx.mec2_fw->data + - le32_to_cpu(hdr->header.ucode_array_offset_bytes)); - table_offset = le32_to_cpu(hdr->jt_offset); - table_size = le32_to_cpu(hdr->jt_size); - } - - for (i = 0; i < table_size; i ++) { - dst_ptr[bo_offset + i] = - cpu_to_le32(le32_to_cpu(fw_data[table_offset + i])); - } - - bo_offset += table_size; - } + return 5; + else + return 4; } static void gfx_v7_0_enable_gfx_cgpg(struct amdgpu_device *adev, @@ -4288,8 +4165,17 @@ static const struct amdgpu_gfx_funcs gfx_v7_0_gfx_funcs = { }; static const struct amdgpu_rlc_funcs gfx_v7_0_rlc_funcs = { - .enter_safe_mode = gfx_v7_0_enter_rlc_safe_mode, - .exit_safe_mode = gfx_v7_0_exit_rlc_safe_mode + .is_rlc_enabled = gfx_v7_0_is_rlc_enabled, + .set_safe_mode = gfx_v7_0_set_safe_mode, + .unset_safe_mode = gfx_v7_0_unset_safe_mode, + .init = gfx_v7_0_rlc_init, + .get_csb_size = gfx_v7_0_get_csb_size, + .get_csb_buffer = gfx_v7_0_get_csb_buffer, + .get_cp_table_num = gfx_v7_0_cp_pg_table_num, + .resume = gfx_v7_0_rlc_resume, + .stop = gfx_v7_0_rlc_stop, + .reset = gfx_v7_0_rlc_reset, + .start = gfx_v7_0_rlc_start }; static int gfx_v7_0_early_init(void *handle) @@ -4477,7 +4363,7 @@ static int gfx_v7_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, ring->ring_obj = NULL; ring->use_doorbell = true; - ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id; + ring->doorbell_index = adev->doorbell_index.mec_ring0 + ring_id; sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP @@ -4540,7 +4426,7 @@ static int gfx_v7_0_sw_init(void *handle) return r; } - r = gfx_v7_0_rlc_init(adev); + r = adev->gfx.rlc.funcs->init(adev); if (r) { DRM_ERROR("Failed to init rlc BOs!\n"); return r; @@ -4604,7 +4490,7 @@ static int gfx_v7_0_sw_fini(void *handle) amdgpu_ring_fini(&adev->gfx.compute_ring[i]); gfx_v7_0_cp_compute_fini(adev); - gfx_v7_0_rlc_fini(adev); + amdgpu_gfx_rlc_fini(adev); gfx_v7_0_mec_fini(adev); amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, &adev->gfx.rlc.clear_state_gpu_addr, @@ -4627,7 +4513,7 @@ static int gfx_v7_0_hw_init(void *handle) gfx_v7_0_constants_init(adev); /* init rlc */ - r = gfx_v7_0_rlc_resume(adev); + r = adev->gfx.rlc.funcs->resume(adev); if (r) return r; @@ -4645,7 +4531,7 @@ static int gfx_v7_0_hw_fini(void *handle) amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); gfx_v7_0_cp_enable(adev, false); - gfx_v7_0_rlc_stop(adev); + adev->gfx.rlc.funcs->stop(adev); gfx_v7_0_fini_pg(adev); return 0; @@ -4730,7 +4616,7 @@ static int gfx_v7_0_soft_reset(void *handle) gfx_v7_0_update_cg(adev, false); /* stop the rlc */ - gfx_v7_0_rlc_stop(adev); + adev->gfx.rlc.funcs->stop(adev); /* Disable GFX parsing/prefetching */ WREG32(mmCP_ME_CNTL, CP_ME_CNTL__ME_HALT_MASK | CP_ME_CNTL__PFP_HALT_MASK | CP_ME_CNTL__CE_HALT_MASK); @@ -4959,12 +4845,36 @@ static int gfx_v7_0_eop_irq(struct amdgpu_device *adev, return 0; } +static void gfx_v7_0_fault(struct amdgpu_device *adev, + struct amdgpu_iv_entry *entry) +{ + struct amdgpu_ring *ring; + u8 me_id, pipe_id; + int i; + + me_id = (entry->ring_id & 0x0c) >> 2; + pipe_id = (entry->ring_id & 0x03) >> 0; + switch (me_id) { + case 0: + drm_sched_fault(&adev->gfx.gfx_ring[0].sched); + break; + case 1: + case 2: + for (i = 0; i < adev->gfx.num_compute_rings; i++) { + ring = &adev->gfx.compute_ring[i]; + if ((ring->me == me_id) && (ring->pipe == pipe_id)) + drm_sched_fault(&ring->sched); + } + break; + } +} + static int gfx_v7_0_priv_reg_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) { DRM_ERROR("Illegal register access in command stream\n"); - schedule_work(&adev->reset_work); + gfx_v7_0_fault(adev, entry); return 0; } @@ -4974,7 +4884,7 @@ static int gfx_v7_0_priv_inst_irq(struct amdgpu_device *adev, { DRM_ERROR("Illegal instruction in command stream\n"); // XXX soft reset the gfx block only - schedule_work(&adev->reset_work); + gfx_v7_0_fault(adev, entry); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 617b0c8908a3..381f593b0cda 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -44,7 +44,6 @@ #include "gca/gfx_8_0_d.h" #include "gca/gfx_8_0_enum.h" #include "gca/gfx_8_0_sh_mask.h" -#include "gca/gfx_8_0_enum.h" #include "dce/dce_10_0_d.h" #include "dce/dce_10_0_sh_mask.h" @@ -54,7 +53,7 @@ #include "ivsrcid/ivsrcid_vislands30.h" #define GFX8_NUM_GFX_RINGS 1 -#define GFX8_MEC_HPD_SIZE 2048 +#define GFX8_MEC_HPD_SIZE 4096 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001 @@ -839,18 +838,14 @@ static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring) int r; r = amdgpu_gfx_scratch_get(adev, &scratch); - if (r) { - DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r); + if (r) return r; - } + WREG32(scratch, 0xCAFEDEAD); r = amdgpu_ring_alloc(ring, 3); - if (r) { - DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", - ring->idx, r); - amdgpu_gfx_scratch_free(adev, scratch); - return r; - } + if (r) + goto error_free_scratch; + amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); amdgpu_ring_write(ring, 0xDEADBEEF); @@ -862,14 +857,11 @@ static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring) break; DRM_UDELAY(1); } - if (i < adev->usec_timeout) { - DRM_DEBUG("ring test on %d succeeded in %d usecs\n", - ring->idx, i); - } else { - DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n", - ring->idx, scratch, tmp); - r = -EINVAL; - } + + if (i >= adev->usec_timeout) + r = -ETIMEDOUT; + +error_free_scratch: amdgpu_gfx_scratch_free(adev, scratch); return r; } @@ -886,19 +878,16 @@ static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) long r; r = amdgpu_device_wb_get(adev, &index); - if (r) { - dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r); + if (r) return r; - } gpu_addr = adev->wb.gpu_addr + (index * 4); adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); memset(&ib, 0, sizeof(ib)); r = amdgpu_ib_get(adev, NULL, 16, &ib); - if (r) { - DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); + if (r) goto err1; - } + ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3); ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM; ib.ptr[2] = lower_32_bits(gpu_addr); @@ -912,22 +901,17 @@ static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) r = dma_fence_wait_timeout(f, false, timeout); if (r == 0) { - DRM_ERROR("amdgpu: IB test timed out.\n"); r = -ETIMEDOUT; goto err2; } else if (r < 0) { - DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); goto err2; } tmp = adev->wb.wb[index]; - if (tmp == 0xDEADBEEF) { - DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx); + if (tmp == 0xDEADBEEF) r = 0; - } else { - DRM_ERROR("ib test on ring %d failed\n", ring->idx); + else r = -EINVAL; - } err2: amdgpu_ib_free(adev, &ib, NULL); @@ -1298,81 +1282,16 @@ static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev, buffer[count++] = cpu_to_le32(0); } -static void cz_init_cp_jump_table(struct amdgpu_device *adev) +static int gfx_v8_0_cp_jump_table_num(struct amdgpu_device *adev) { - const __le32 *fw_data; - volatile u32 *dst_ptr; - int me, i, max_me = 4; - u32 bo_offset = 0; - u32 table_offset, table_size; - if (adev->asic_type == CHIP_CARRIZO) - max_me = 5; - - /* write the cp table buffer */ - dst_ptr = adev->gfx.rlc.cp_table_ptr; - for (me = 0; me < max_me; me++) { - if (me == 0) { - const struct gfx_firmware_header_v1_0 *hdr = - (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data; - fw_data = (const __le32 *) - (adev->gfx.ce_fw->data + - le32_to_cpu(hdr->header.ucode_array_offset_bytes)); - table_offset = le32_to_cpu(hdr->jt_offset); - table_size = le32_to_cpu(hdr->jt_size); - } else if (me == 1) { - const struct gfx_firmware_header_v1_0 *hdr = - (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data; - fw_data = (const __le32 *) - (adev->gfx.pfp_fw->data + - le32_to_cpu(hdr->header.ucode_array_offset_bytes)); - table_offset = le32_to_cpu(hdr->jt_offset); - table_size = le32_to_cpu(hdr->jt_size); - } else if (me == 2) { - const struct gfx_firmware_header_v1_0 *hdr = - (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data; - fw_data = (const __le32 *) - (adev->gfx.me_fw->data + - le32_to_cpu(hdr->header.ucode_array_offset_bytes)); - table_offset = le32_to_cpu(hdr->jt_offset); - table_size = le32_to_cpu(hdr->jt_size); - } else if (me == 3) { - const struct gfx_firmware_header_v1_0 *hdr = - (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; - fw_data = (const __le32 *) - (adev->gfx.mec_fw->data + - le32_to_cpu(hdr->header.ucode_array_offset_bytes)); - table_offset = le32_to_cpu(hdr->jt_offset); - table_size = le32_to_cpu(hdr->jt_size); - } else if (me == 4) { - const struct gfx_firmware_header_v1_0 *hdr = - (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data; - fw_data = (const __le32 *) - (adev->gfx.mec2_fw->data + - le32_to_cpu(hdr->header.ucode_array_offset_bytes)); - table_offset = le32_to_cpu(hdr->jt_offset); - table_size = le32_to_cpu(hdr->jt_size); - } - - for (i = 0; i < table_size; i ++) { - dst_ptr[bo_offset + i] = - cpu_to_le32(le32_to_cpu(fw_data[table_offset + i])); - } - - bo_offset += table_size; - } -} - -static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev) -{ - amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, NULL, NULL); - amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, NULL, NULL); + return 5; + else + return 4; } static int gfx_v8_0_rlc_init(struct amdgpu_device *adev) { - volatile u32 *dst_ptr; - u32 dws; const struct cs_section_def *cs_data; int r; @@ -1381,44 +1300,18 @@ static int gfx_v8_0_rlc_init(struct amdgpu_device *adev) cs_data = adev->gfx.rlc.cs_data; if (cs_data) { - /* clear state block */ - adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev); - - r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_VRAM, - &adev->gfx.rlc.clear_state_obj, - &adev->gfx.rlc.clear_state_gpu_addr, - (void **)&adev->gfx.rlc.cs_ptr); - if (r) { - dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r); - gfx_v8_0_rlc_fini(adev); + /* init clear state block */ + r = amdgpu_gfx_rlc_init_csb(adev); + if (r) return r; - } - - /* set up the cs buffer */ - dst_ptr = adev->gfx.rlc.cs_ptr; - gfx_v8_0_get_csb_buffer(adev, dst_ptr); - amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj); - amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); } if ((adev->asic_type == CHIP_CARRIZO) || (adev->asic_type == CHIP_STONEY)) { adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */ - r = amdgpu_bo_create_reserved(adev, adev->gfx.rlc.cp_table_size, - PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, - &adev->gfx.rlc.cp_table_obj, - &adev->gfx.rlc.cp_table_gpu_addr, - (void **)&adev->gfx.rlc.cp_table_ptr); - if (r) { - dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r); + r = amdgpu_gfx_rlc_init_cpt(adev); + if (r) return r; - } - - cz_init_cp_jump_table(adev); - - amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj); - amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj); } return 0; @@ -1443,7 +1336,7 @@ static int gfx_v8_0_mec_init(struct amdgpu_device *adev) mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE; r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_GTT, + AMDGPU_GEM_DOMAIN_VRAM, &adev->gfx.mec.hpd_eop_obj, &adev->gfx.mec.hpd_eop_gpu_addr, (void **)&hpd); @@ -1629,7 +1522,7 @@ static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) return 0; /* bail if the compute ring is not ready */ - if (!ring->ready) + if (!ring->sched.ready) return 0; tmp = RREG32(mmGB_EDC_MODE); @@ -1997,7 +1890,7 @@ static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, ring->ring_obj = NULL; ring->use_doorbell = true; - ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id; + ring->doorbell_index = adev->doorbell_index.mec_ring0 + ring_id; ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (ring_id * GFX8_MEC_HPD_SIZE); sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); @@ -2088,7 +1981,7 @@ static int gfx_v8_0_sw_init(void *handle) return r; } - r = gfx_v8_0_rlc_init(adev); + r = adev->gfx.rlc.funcs->init(adev); if (r) { DRM_ERROR("Failed to init rlc BOs!\n"); return r; @@ -2108,7 +2001,7 @@ static int gfx_v8_0_sw_init(void *handle) /* no gfx doorbells on iceland */ if (adev->asic_type != CHIP_TOPAZ) { ring->use_doorbell = true; - ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0; + ring->doorbell_index = adev->doorbell_index.gfx_ring0; } r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, @@ -2181,7 +2074,7 @@ static int gfx_v8_0_sw_fini(void *handle) amdgpu_gfx_kiq_fini(adev); gfx_v8_0_mec_fini(adev); - gfx_v8_0_rlc_fini(adev); + amdgpu_gfx_rlc_fini(adev); amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, &adev->gfx.rlc.clear_state_gpu_addr, (void **)&adev->gfx.rlc.cs_ptr); @@ -4175,10 +4068,15 @@ static void gfx_v8_0_rlc_start(struct amdgpu_device *adev) static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev) { - gfx_v8_0_rlc_stop(adev); - gfx_v8_0_rlc_reset(adev); + if (amdgpu_sriov_vf(adev)) { + gfx_v8_0_init_csb(adev); + return 0; + } + + adev->gfx.rlc.funcs->stop(adev); + adev->gfx.rlc.funcs->reset(adev); gfx_v8_0_init_pg(adev); - gfx_v8_0_rlc_start(adev); + adev->gfx.rlc.funcs->start(adev); return 0; } @@ -4197,7 +4095,7 @@ static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1); tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1); for (i = 0; i < adev->gfx.num_gfx_rings; i++) - adev->gfx.gfx_ring[i].ready = false; + adev->gfx.gfx_ring[i].sched.ready = false; } WREG32(mmCP_ME_CNTL, tmp); udelay(50); @@ -4322,7 +4220,7 @@ static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device *adev, struct amdgpu tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER, DOORBELL_RANGE_LOWER, - AMDGPU_DOORBELL_GFX_RING0); + adev->doorbell_index.gfx_ring0); WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp); WREG32(mmCP_RB_DOORBELL_RANGE_UPPER, @@ -4379,10 +4277,8 @@ static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev) /* start the ring */ amdgpu_ring_clear_ring(ring); gfx_v8_0_cp_gfx_start(adev); - ring->ready = true; - r = amdgpu_ring_test_ring(ring); - if (r) - ring->ready = false; + ring->sched.ready = true; + r = amdgpu_ring_test_helper(ring); return r; } @@ -4396,8 +4292,8 @@ static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) } else { WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK)); for (i = 0; i < adev->gfx.num_compute_rings; i++) - adev->gfx.compute_ring[i].ready = false; - adev->gfx.kiq.ring.ready = false; + adev->gfx.compute_ring[i].sched.ready = false; + adev->gfx.kiq.ring.sched.ready = false; } udelay(50); } @@ -4473,11 +4369,9 @@ static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev) amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); } - r = amdgpu_ring_test_ring(kiq_ring); - if (r) { + r = amdgpu_ring_test_helper(kiq_ring); + if (r) DRM_ERROR("KCQ enable failed\n"); - kiq_ring->ready = false; - } return r; } @@ -4755,8 +4649,8 @@ static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring) static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev) { if (adev->asic_type > CHIP_TONGA) { - WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, AMDGPU_DOORBELL_KIQ << 2); - WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, AMDGPU_DOORBELL_MEC_RING7 << 2); + WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, adev->doorbell_index.kiq << 2); + WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, adev->doorbell_index.mec_ring7 << 2); } /* enable doorbells */ WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1); @@ -4781,7 +4675,7 @@ static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev) amdgpu_bo_kunmap(ring->mqd_obj); ring->mqd_ptr = NULL; amdgpu_bo_unreserve(ring->mqd_obj); - ring->ready = true; + ring->sched.ready = true; return 0; } @@ -4820,10 +4714,7 @@ static int gfx_v8_0_kcq_resume(struct amdgpu_device *adev) */ for (i = adev->gfx.num_compute_rings - 1; i >= 0; i--) { ring = &adev->gfx.compute_ring[i]; - ring->ready = true; - r = amdgpu_ring_test_ring(ring); - if (r) - ring->ready = false; + r = amdgpu_ring_test_helper(ring); } done: @@ -4867,7 +4758,7 @@ static int gfx_v8_0_hw_init(void *handle) gfx_v8_0_init_golden_registers(adev); gfx_v8_0_constants_init(adev); - r = gfx_v8_0_rlc_resume(adev); + r = adev->gfx.rlc.funcs->resume(adev); if (r) return r; @@ -4899,7 +4790,7 @@ static int gfx_v8_0_kcq_disable(struct amdgpu_device *adev) amdgpu_ring_write(kiq_ring, 0); amdgpu_ring_write(kiq_ring, 0); } - r = amdgpu_ring_test_ring(kiq_ring); + r = amdgpu_ring_test_helper(kiq_ring); if (r) DRM_ERROR("KCQ disable failed\n"); @@ -4973,16 +4864,16 @@ static int gfx_v8_0_hw_fini(void *handle) pr_debug("For SRIOV client, shouldn't do anything.\n"); return 0; } - adev->gfx.rlc.funcs->enter_safe_mode(adev); + amdgpu_gfx_rlc_enter_safe_mode(adev); if (!gfx_v8_0_wait_for_idle(adev)) gfx_v8_0_cp_enable(adev, false); else pr_err("cp is busy, skip halt cp\n"); if (!gfx_v8_0_wait_for_rlc_idle(adev)) - gfx_v8_0_rlc_stop(adev); + adev->gfx.rlc.funcs->stop(adev); else pr_err("rlc is busy, skip halt rlc\n"); - adev->gfx.rlc.funcs->exit_safe_mode(adev); + amdgpu_gfx_rlc_exit_safe_mode(adev); return 0; } @@ -5061,17 +4952,16 @@ static bool gfx_v8_0_check_soft_reset(void *handle) static int gfx_v8_0_pre_soft_reset(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - u32 grbm_soft_reset = 0, srbm_soft_reset = 0; + u32 grbm_soft_reset = 0; if ((!adev->gfx.grbm_soft_reset) && (!adev->gfx.srbm_soft_reset)) return 0; grbm_soft_reset = adev->gfx.grbm_soft_reset; - srbm_soft_reset = adev->gfx.srbm_soft_reset; /* stop the rlc */ - gfx_v8_0_rlc_stop(adev); + adev->gfx.rlc.funcs->stop(adev); if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) || REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX)) @@ -5165,14 +5055,13 @@ static int gfx_v8_0_soft_reset(void *handle) static int gfx_v8_0_post_soft_reset(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - u32 grbm_soft_reset = 0, srbm_soft_reset = 0; + u32 grbm_soft_reset = 0; if ((!adev->gfx.grbm_soft_reset) && (!adev->gfx.srbm_soft_reset)) return 0; grbm_soft_reset = adev->gfx.grbm_soft_reset; - srbm_soft_reset = adev->gfx.srbm_soft_reset; if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) || REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) || @@ -5197,7 +5086,7 @@ static int gfx_v8_0_post_soft_reset(void *handle) REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX)) gfx_v8_0_cp_gfx_resume(adev); - gfx_v8_0_rlc_start(adev); + adev->gfx.rlc.funcs->start(adev); return 0; } @@ -5445,7 +5334,7 @@ static int gfx_v8_0_set_powergating_state(void *handle, AMD_PG_SUPPORT_RLC_SMU_HS | AMD_PG_SUPPORT_CP | AMD_PG_SUPPORT_GFX_DMG)) - adev->gfx.rlc.funcs->enter_safe_mode(adev); + amdgpu_gfx_rlc_enter_safe_mode(adev); switch (adev->asic_type) { case CHIP_CARRIZO: case CHIP_STONEY: @@ -5499,7 +5388,7 @@ static int gfx_v8_0_set_powergating_state(void *handle, AMD_PG_SUPPORT_RLC_SMU_HS | AMD_PG_SUPPORT_CP | AMD_PG_SUPPORT_GFX_DMG)) - adev->gfx.rlc.funcs->exit_safe_mode(adev); + amdgpu_gfx_rlc_exit_safe_mode(adev); return 0; } @@ -5593,57 +5482,53 @@ static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev, #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e -static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev) +static bool gfx_v8_0_is_rlc_enabled(struct amdgpu_device *adev) { - u32 data; - unsigned i; + uint32_t rlc_setting; - data = RREG32(mmRLC_CNTL); - if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK)) - return; + rlc_setting = RREG32(mmRLC_CNTL); + if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK)) + return false; - if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) { - data |= RLC_SAFE_MODE__CMD_MASK; - data &= ~RLC_SAFE_MODE__MESSAGE_MASK; - data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT); - WREG32(mmRLC_SAFE_MODE, data); + return true; +} - for (i = 0; i < adev->usec_timeout; i++) { - if ((RREG32(mmRLC_GPM_STAT) & - (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK | - RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) == - (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK | - RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) - break; - udelay(1); - } +static void gfx_v8_0_set_safe_mode(struct amdgpu_device *adev) +{ + uint32_t data; + unsigned i; + data = RREG32(mmRLC_CNTL); + data |= RLC_SAFE_MODE__CMD_MASK; + data &= ~RLC_SAFE_MODE__MESSAGE_MASK; + data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT); + WREG32(mmRLC_SAFE_MODE, data); - for (i = 0; i < adev->usec_timeout; i++) { - if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD)) - break; - udelay(1); - } - adev->gfx.rlc.in_safe_mode = true; + /* wait for RLC_SAFE_MODE */ + for (i = 0; i < adev->usec_timeout; i++) { + if ((RREG32(mmRLC_GPM_STAT) & + (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK | + RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) == + (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK | + RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) + break; + udelay(1); + } + for (i = 0; i < adev->usec_timeout; i++) { + if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD)) + break; + udelay(1); } } -static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev) +static void gfx_v8_0_unset_safe_mode(struct amdgpu_device *adev) { - u32 data = 0; + uint32_t data; unsigned i; data = RREG32(mmRLC_CNTL); - if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK)) - return; - - if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) { - if (adev->gfx.rlc.in_safe_mode) { - data |= RLC_SAFE_MODE__CMD_MASK; - data &= ~RLC_SAFE_MODE__MESSAGE_MASK; - WREG32(mmRLC_SAFE_MODE, data); - adev->gfx.rlc.in_safe_mode = false; - } - } + data |= RLC_SAFE_MODE__CMD_MASK; + data &= ~RLC_SAFE_MODE__MESSAGE_MASK; + WREG32(mmRLC_SAFE_MODE, data); for (i = 0; i < adev->usec_timeout; i++) { if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD)) @@ -5653,8 +5538,17 @@ static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev) } static const struct amdgpu_rlc_funcs iceland_rlc_funcs = { - .enter_safe_mode = iceland_enter_rlc_safe_mode, - .exit_safe_mode = iceland_exit_rlc_safe_mode + .is_rlc_enabled = gfx_v8_0_is_rlc_enabled, + .set_safe_mode = gfx_v8_0_set_safe_mode, + .unset_safe_mode = gfx_v8_0_unset_safe_mode, + .init = gfx_v8_0_rlc_init, + .get_csb_size = gfx_v8_0_get_csb_size, + .get_csb_buffer = gfx_v8_0_get_csb_buffer, + .get_cp_table_num = gfx_v8_0_cp_jump_table_num, + .resume = gfx_v8_0_rlc_resume, + .stop = gfx_v8_0_rlc_stop, + .reset = gfx_v8_0_rlc_reset, + .start = gfx_v8_0_rlc_start }; static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, @@ -5662,7 +5556,7 @@ static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev { uint32_t temp, data; - adev->gfx.rlc.funcs->enter_safe_mode(adev); + amdgpu_gfx_rlc_enter_safe_mode(adev); /* It is disabled by HW by default */ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) { @@ -5758,7 +5652,7 @@ static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev gfx_v8_0_wait_for_rlc_serdes(adev); } - adev->gfx.rlc.funcs->exit_safe_mode(adev); + amdgpu_gfx_rlc_exit_safe_mode(adev); } static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev, @@ -5768,7 +5662,7 @@ static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL); - adev->gfx.rlc.funcs->enter_safe_mode(adev); + amdgpu_gfx_rlc_enter_safe_mode(adev); if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) { temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); @@ -5851,7 +5745,7 @@ static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev gfx_v8_0_wait_for_rlc_serdes(adev); - adev->gfx.rlc.funcs->exit_safe_mode(adev); + amdgpu_gfx_rlc_exit_safe_mode(adev); } static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev, bool enable) @@ -6131,9 +6025,11 @@ static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring) } static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, - struct amdgpu_ib *ib, - unsigned vmid, bool ctx_switch) + struct amdgpu_job *job, + struct amdgpu_ib *ib, + bool ctx_switch) { + unsigned vmid = AMDGPU_JOB_GET_VMID(job); u32 header, control = 0; if (ib->flags & AMDGPU_IB_FLAG_CE) @@ -6161,9 +6057,11 @@ static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, } static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring, + struct amdgpu_job *job, struct amdgpu_ib *ib, - unsigned vmid, bool ctx_switch) + bool ctx_switch) { + unsigned vmid = AMDGPU_JOB_GET_VMID(job); u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); @@ -6738,12 +6636,39 @@ static int gfx_v8_0_eop_irq(struct amdgpu_device *adev, return 0; } +static void gfx_v8_0_fault(struct amdgpu_device *adev, + struct amdgpu_iv_entry *entry) +{ + u8 me_id, pipe_id, queue_id; + struct amdgpu_ring *ring; + int i; + + me_id = (entry->ring_id & 0x0c) >> 2; + pipe_id = (entry->ring_id & 0x03) >> 0; + queue_id = (entry->ring_id & 0x70) >> 4; + + switch (me_id) { + case 0: + drm_sched_fault(&adev->gfx.gfx_ring[0].sched); + break; + case 1: + case 2: + for (i = 0; i < adev->gfx.num_compute_rings; i++) { + ring = &adev->gfx.compute_ring[i]; + if (ring->me == me_id && ring->pipe == pipe_id && + ring->queue == queue_id) + drm_sched_fault(&ring->sched); + } + break; + } +} + static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) { DRM_ERROR("Illegal register access in command stream\n"); - schedule_work(&adev->reset_work); + gfx_v8_0_fault(adev, entry); return 0; } @@ -6752,7 +6677,7 @@ static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry) { DRM_ERROR("Illegal instruction in command stream\n"); - schedule_work(&adev->reset_work); + gfx_v8_0_fault(adev, entry); return 0; } @@ -6976,10 +6901,8 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = { 17 + /* gfx_v8_0_ring_emit_vm_flush */ 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */ .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */ - .emit_ib = gfx_v8_0_ring_emit_ib_compute, .emit_fence = gfx_v8_0_ring_emit_fence_kiq, .test_ring = gfx_v8_0_ring_test_ring, - .test_ib = gfx_v8_0_ring_test_ib, .insert_nop = amdgpu_ring_insert_nop, .pad_ib = amdgpu_ring_generic_pad_ib, .emit_rreg = gfx_v8_0_ring_emit_rreg, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 21363b2b2ee5..7556716038d3 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -41,7 +41,7 @@ #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h" #define GFX9_NUM_GFX_RINGS 1 -#define GFX9_MEC_HPD_SIZE 2048 +#define GFX9_MEC_HPD_SIZE 4096 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L @@ -86,6 +86,7 @@ MODULE_FIRMWARE("amdgpu/picasso_me.bin"); MODULE_FIRMWARE("amdgpu/picasso_mec.bin"); MODULE_FIRMWARE("amdgpu/picasso_mec2.bin"); MODULE_FIRMWARE("amdgpu/picasso_rlc.bin"); +MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin"); MODULE_FIRMWARE("amdgpu/raven2_ce.bin"); MODULE_FIRMWARE("amdgpu/raven2_pfp.bin"); @@ -396,18 +397,14 @@ static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring) int r; r = amdgpu_gfx_scratch_get(adev, &scratch); - if (r) { - DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r); + if (r) return r; - } + WREG32(scratch, 0xCAFEDEAD); r = amdgpu_ring_alloc(ring, 3); - if (r) { - DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", - ring->idx, r); - amdgpu_gfx_scratch_free(adev, scratch); - return r; - } + if (r) + goto error_free_scratch; + amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); amdgpu_ring_write(ring, 0xDEADBEEF); @@ -419,14 +416,11 @@ static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring) break; DRM_UDELAY(1); } - if (i < adev->usec_timeout) { - DRM_DEBUG("ring test on %d succeeded in %d usecs\n", - ring->idx, i); - } else { - DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n", - ring->idx, scratch, tmp); - r = -EINVAL; - } + + if (i >= adev->usec_timeout) + r = -ETIMEDOUT; + +error_free_scratch: amdgpu_gfx_scratch_free(adev, scratch); return r; } @@ -443,19 +437,16 @@ static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) long r; r = amdgpu_device_wb_get(adev, &index); - if (r) { - dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r); + if (r) return r; - } gpu_addr = adev->wb.gpu_addr + (index * 4); adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); memset(&ib, 0, sizeof(ib)); r = amdgpu_ib_get(adev, NULL, 16, &ib); - if (r) { - DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); + if (r) goto err1; - } + ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3); ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM; ib.ptr[2] = lower_32_bits(gpu_addr); @@ -469,22 +460,17 @@ static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) r = dma_fence_wait_timeout(f, false, timeout); if (r == 0) { - DRM_ERROR("amdgpu: IB test timed out.\n"); - r = -ETIMEDOUT; - goto err2; + r = -ETIMEDOUT; + goto err2; } else if (r < 0) { - DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); - goto err2; + goto err2; } tmp = adev->wb.wb[index]; - if (tmp == 0xDEADBEEF) { - DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx); - r = 0; - } else { - DRM_ERROR("ib test on ring %d failed\n", ring->idx); - r = -EINVAL; - } + if (tmp == 0xDEADBEEF) + r = 0; + else + r = -EINVAL; err2: amdgpu_ib_free(adev, &ib, NULL); @@ -660,7 +646,20 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name); + /* + * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin + * instead of picasso_rlc.bin. + * Judgment method: + * PCO AM4: revision >= 0xC8 && revision <= 0xCF + * or revision >= 0xD8 && revision <= 0xDF + * otherwise is PCO FP5 + */ + if (!strcmp(chip_name, "picasso") && + (((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) || + ((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF)))) + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name); + else + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name); err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev); if (err) goto out; @@ -1065,85 +1064,13 @@ static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable) WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0); } -static void rv_init_cp_jump_table(struct amdgpu_device *adev) +static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev) { - const __le32 *fw_data; - volatile u32 *dst_ptr; - int me, i, max_me = 5; - u32 bo_offset = 0; - u32 table_offset, table_size; - - /* write the cp table buffer */ - dst_ptr = adev->gfx.rlc.cp_table_ptr; - for (me = 0; me < max_me; me++) { - if (me == 0) { - const struct gfx_firmware_header_v1_0 *hdr = - (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data; - fw_data = (const __le32 *) - (adev->gfx.ce_fw->data + - le32_to_cpu(hdr->header.ucode_array_offset_bytes)); - table_offset = le32_to_cpu(hdr->jt_offset); - table_size = le32_to_cpu(hdr->jt_size); - } else if (me == 1) { - const struct gfx_firmware_header_v1_0 *hdr = - (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data; - fw_data = (const __le32 *) - (adev->gfx.pfp_fw->data + - le32_to_cpu(hdr->header.ucode_array_offset_bytes)); - table_offset = le32_to_cpu(hdr->jt_offset); - table_size = le32_to_cpu(hdr->jt_size); - } else if (me == 2) { - const struct gfx_firmware_header_v1_0 *hdr = - (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data; - fw_data = (const __le32 *) - (adev->gfx.me_fw->data + - le32_to_cpu(hdr->header.ucode_array_offset_bytes)); - table_offset = le32_to_cpu(hdr->jt_offset); - table_size = le32_to_cpu(hdr->jt_size); - } else if (me == 3) { - const struct gfx_firmware_header_v1_0 *hdr = - (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; - fw_data = (const __le32 *) - (adev->gfx.mec_fw->data + - le32_to_cpu(hdr->header.ucode_array_offset_bytes)); - table_offset = le32_to_cpu(hdr->jt_offset); - table_size = le32_to_cpu(hdr->jt_size); - } else if (me == 4) { - const struct gfx_firmware_header_v1_0 *hdr = - (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data; - fw_data = (const __le32 *) - (adev->gfx.mec2_fw->data + - le32_to_cpu(hdr->header.ucode_array_offset_bytes)); - table_offset = le32_to_cpu(hdr->jt_offset); - table_size = le32_to_cpu(hdr->jt_size); - } - - for (i = 0; i < table_size; i ++) { - dst_ptr[bo_offset + i] = - cpu_to_le32(le32_to_cpu(fw_data[table_offset + i])); - } - - bo_offset += table_size; - } -} - -static void gfx_v9_0_rlc_fini(struct amdgpu_device *adev) -{ - /* clear state block */ - amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, - &adev->gfx.rlc.clear_state_gpu_addr, - (void **)&adev->gfx.rlc.cs_ptr); - - /* jump table block */ - amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, - &adev->gfx.rlc.cp_table_gpu_addr, - (void **)&adev->gfx.rlc.cp_table_ptr); + return 5; } static int gfx_v9_0_rlc_init(struct amdgpu_device *adev) { - volatile u32 *dst_ptr; - u32 dws; const struct cs_section_def *cs_data; int r; @@ -1152,45 +1079,18 @@ static int gfx_v9_0_rlc_init(struct amdgpu_device *adev) cs_data = adev->gfx.rlc.cs_data; if (cs_data) { - /* clear state block */ - adev->gfx.rlc.clear_state_size = dws = gfx_v9_0_get_csb_size(adev); - r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_VRAM, - &adev->gfx.rlc.clear_state_obj, - &adev->gfx.rlc.clear_state_gpu_addr, - (void **)&adev->gfx.rlc.cs_ptr); - if (r) { - dev_err(adev->dev, "(%d) failed to create rlc csb bo\n", - r); - gfx_v9_0_rlc_fini(adev); + /* init clear state block */ + r = amdgpu_gfx_rlc_init_csb(adev); + if (r) return r; - } - /* set up the cs buffer */ - dst_ptr = adev->gfx.rlc.cs_ptr; - gfx_v9_0_get_csb_buffer(adev, dst_ptr); - amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj); - amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj); - amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); } if (adev->asic_type == CHIP_RAVEN) { /* TODO: double check the cp_table_size for RV */ adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */ - r = amdgpu_bo_create_reserved(adev, adev->gfx.rlc.cp_table_size, - PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, - &adev->gfx.rlc.cp_table_obj, - &adev->gfx.rlc.cp_table_gpu_addr, - (void **)&adev->gfx.rlc.cp_table_ptr); - if (r) { - dev_err(adev->dev, - "(%d) failed to create cp table bo\n", r); - gfx_v9_0_rlc_fini(adev); + r = amdgpu_gfx_rlc_init_cpt(adev); + if (r) return r; - } - - rv_init_cp_jump_table(adev); - amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj); - amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj); } switch (adev->asic_type) { @@ -1264,7 +1164,7 @@ static int gfx_v9_0_mec_init(struct amdgpu_device *adev) mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE; r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_GTT, + AMDGPU_GEM_DOMAIN_VRAM, &adev->gfx.mec.hpd_eop_obj, &adev->gfx.mec.hpd_eop_gpu_addr, (void **)&hpd); @@ -1635,8 +1535,8 @@ static int gfx_v9_0_ngg_en(struct amdgpu_device *adev) /* Clear GDS reserved memory */ r = amdgpu_ring_alloc(ring, 17); if (r) { - DRM_ERROR("amdgpu: NGG failed to lock ring %d (%d).\n", - ring->idx, r); + DRM_ERROR("amdgpu: NGG failed to lock ring %s (%d).\n", + ring->name, r); return r; } @@ -1680,7 +1580,7 @@ static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, ring->ring_obj = NULL; ring->use_doorbell = true; - ring->doorbell_index = (AMDGPU_DOORBELL_MEC_RING0 + ring_id) << 1; + ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1; ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (ring_id * GFX9_MEC_HPD_SIZE); sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); @@ -1748,7 +1648,7 @@ static int gfx_v9_0_sw_init(void *handle) return r; } - r = gfx_v9_0_rlc_init(adev); + r = adev->gfx.rlc.funcs->init(adev); if (r) { DRM_ERROR("Failed to init rlc BOs!\n"); return r; @@ -1769,7 +1669,7 @@ static int gfx_v9_0_sw_init(void *handle) else sprintf(ring->name, "gfx_%d", i); ring->use_doorbell = true; - ring->doorbell_index = AMDGPU_DOORBELL64_GFX_RING0 << 1; + ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1; r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP); if (r) @@ -2499,12 +2399,12 @@ static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev) return 0; } - gfx_v9_0_rlc_stop(adev); + adev->gfx.rlc.funcs->stop(adev); /* disable CG */ WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0); - gfx_v9_0_rlc_reset(adev); + adev->gfx.rlc.funcs->reset(adev); gfx_v9_0_init_pg(adev); @@ -2515,15 +2415,24 @@ static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev) return r; } - if (adev->asic_type == CHIP_RAVEN || - adev->asic_type == CHIP_VEGA20) { - if (amdgpu_lbpw != 0) + switch (adev->asic_type) { + case CHIP_RAVEN: + if (amdgpu_lbpw == 0) + gfx_v9_0_enable_lbpw(adev, false); + else + gfx_v9_0_enable_lbpw(adev, true); + break; + case CHIP_VEGA20: + if (amdgpu_lbpw > 0) gfx_v9_0_enable_lbpw(adev, true); else gfx_v9_0_enable_lbpw(adev, false); + break; + default: + break; } - gfx_v9_0_rlc_start(adev); + adev->gfx.rlc.funcs->start(adev); return 0; } @@ -2538,7 +2447,7 @@ static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1); if (!enable) { for (i = 0; i < adev->gfx.num_gfx_rings; i++) - adev->gfx.gfx_ring[i].ready = false; + adev->gfx.gfx_ring[i].sched.ready = false; } WREG32_SOC15(GC, 0, mmCP_ME_CNTL, tmp); udelay(50); @@ -2728,7 +2637,7 @@ static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev) /* start the ring */ gfx_v9_0_cp_gfx_start(adev); - ring->ready = true; + ring->sched.ready = true; return 0; } @@ -2743,8 +2652,8 @@ static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) WREG32_SOC15(GC, 0, mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK)); for (i = 0; i < adev->gfx.num_compute_rings; i++) - adev->gfx.compute_ring[i].ready = false; - adev->gfx.kiq.ring.ready = false; + adev->gfx.compute_ring[i].sched.ready = false; + adev->gfx.kiq.ring.sched.ready = false; } udelay(50); } @@ -2867,11 +2776,9 @@ static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev) amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); } - r = amdgpu_ring_test_ring(kiq_ring); - if (r) { + r = amdgpu_ring_test_helper(kiq_ring); + if (r) DRM_ERROR("KCQ enable failed\n"); - kiq_ring->ready = false; - } return r; } @@ -3089,9 +2996,9 @@ static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring) /* enable the doorbell if requested */ if (ring->use_doorbell) { WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER, - (AMDGPU_DOORBELL64_KIQ *2) << 2); + (adev->doorbell_index.kiq * 2) << 2); WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER, - (AMDGPU_DOORBELL64_USERQUEUE_END * 2) << 2); + (adev->doorbell_index.userqueue_end * 2) << 2); } WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, @@ -3250,7 +3157,7 @@ static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev) amdgpu_bo_kunmap(ring->mqd_obj); ring->mqd_ptr = NULL; amdgpu_bo_unreserve(ring->mqd_obj); - ring->ready = true; + ring->sched.ready = true; return 0; } @@ -3315,19 +3222,13 @@ static int gfx_v9_0_cp_resume(struct amdgpu_device *adev) return r; ring = &adev->gfx.gfx_ring[0]; - r = amdgpu_ring_test_ring(ring); - if (r) { - ring->ready = false; + r = amdgpu_ring_test_helper(ring); + if (r) return r; - } for (i = 0; i < adev->gfx.num_compute_rings; i++) { ring = &adev->gfx.compute_ring[i]; - - ring->ready = true; - r = amdgpu_ring_test_ring(ring); - if (r) - ring->ready = false; + amdgpu_ring_test_helper(ring); } gfx_v9_0_enable_gui_idle_interrupt(adev, true); @@ -3354,7 +3255,7 @@ static int gfx_v9_0_hw_init(void *handle) if (r) return r; - r = gfx_v9_0_rlc_resume(adev); + r = adev->gfx.rlc.funcs->resume(adev); if (r) return r; @@ -3392,7 +3293,7 @@ static int gfx_v9_0_kcq_disable(struct amdgpu_device *adev) amdgpu_ring_write(kiq_ring, 0); amdgpu_ring_write(kiq_ring, 0); } - r = amdgpu_ring_test_ring(kiq_ring); + r = amdgpu_ring_test_helper(kiq_ring); if (r) DRM_ERROR("KCQ disable failed\n"); @@ -3434,7 +3335,7 @@ static int gfx_v9_0_hw_fini(void *handle) } gfx_v9_0_cp_enable(adev, false); - gfx_v9_0_rlc_stop(adev); + adev->gfx.rlc.funcs->stop(adev); gfx_v9_0_csb_vram_unpin(adev); @@ -3509,7 +3410,7 @@ static int gfx_v9_0_soft_reset(void *handle) if (grbm_soft_reset) { /* stop the rlc */ - gfx_v9_0_rlc_stop(adev); + adev->gfx.rlc.funcs->stop(adev); /* Disable GFX parsing/prefetching */ gfx_v9_0_cp_gfx_enable(adev, false); @@ -3608,64 +3509,47 @@ static int gfx_v9_0_late_init(void *handle) return 0; } -static void gfx_v9_0_enter_rlc_safe_mode(struct amdgpu_device *adev) +static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev) { - uint32_t rlc_setting, data; - unsigned i; - - if (adev->gfx.rlc.in_safe_mode) - return; + uint32_t rlc_setting; /* if RLC is not enabled, do nothing */ rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL); if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK)) - return; - - if (adev->cg_flags & - (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG | - AMD_CG_SUPPORT_GFX_3D_CGCG)) { - data = RLC_SAFE_MODE__CMD_MASK; - data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT); - WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data); + return false; - /* wait for RLC_SAFE_MODE */ - for (i = 0; i < adev->usec_timeout; i++) { - if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD)) - break; - udelay(1); - } - adev->gfx.rlc.in_safe_mode = true; - } + return true; } -static void gfx_v9_0_exit_rlc_safe_mode(struct amdgpu_device *adev) +static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev) { - uint32_t rlc_setting, data; + uint32_t data; + unsigned i; - if (!adev->gfx.rlc.in_safe_mode) - return; + data = RLC_SAFE_MODE__CMD_MASK; + data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT); + WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data); - /* if RLC is not enabled, do nothing */ - rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL); - if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK)) - return; - - if (adev->cg_flags & - (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) { - /* - * Try to exit safe mode only if it is already in safe - * mode. - */ - data = RLC_SAFE_MODE__CMD_MASK; - WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data); - adev->gfx.rlc.in_safe_mode = false; + /* wait for RLC_SAFE_MODE */ + for (i = 0; i < adev->usec_timeout; i++) { + if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD)) + break; + udelay(1); } } +static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev) +{ + uint32_t data; + + data = RLC_SAFE_MODE__CMD_MASK; + WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data); +} + static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev, bool enable) { - gfx_v9_0_enter_rlc_safe_mode(adev); + amdgpu_gfx_rlc_enter_safe_mode(adev); if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) { gfx_v9_0_enable_gfx_cg_power_gating(adev, true); @@ -3676,7 +3560,7 @@ static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev, gfx_v9_0_enable_gfx_pipeline_powergating(adev, false); } - gfx_v9_0_exit_rlc_safe_mode(adev); + amdgpu_gfx_rlc_exit_safe_mode(adev); } static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev, @@ -3774,7 +3658,7 @@ static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev, { uint32_t data, def; - adev->gfx.rlc.funcs->enter_safe_mode(adev); + amdgpu_gfx_rlc_enter_safe_mode(adev); /* Enable 3D CGCG/CGLS */ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)) { @@ -3814,7 +3698,7 @@ static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev, WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data); } - adev->gfx.rlc.funcs->exit_safe_mode(adev); + amdgpu_gfx_rlc_exit_safe_mode(adev); } static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev, @@ -3822,7 +3706,7 @@ static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev { uint32_t def, data; - adev->gfx.rlc.funcs->enter_safe_mode(adev); + amdgpu_gfx_rlc_enter_safe_mode(adev); if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) { def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); @@ -3862,7 +3746,7 @@ static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data); } - adev->gfx.rlc.funcs->exit_safe_mode(adev); + amdgpu_gfx_rlc_exit_safe_mode(adev); } static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev, @@ -3891,8 +3775,17 @@ static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev, } static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = { - .enter_safe_mode = gfx_v9_0_enter_rlc_safe_mode, - .exit_safe_mode = gfx_v9_0_exit_rlc_safe_mode + .is_rlc_enabled = gfx_v9_0_is_rlc_enabled, + .set_safe_mode = gfx_v9_0_set_safe_mode, + .unset_safe_mode = gfx_v9_0_unset_safe_mode, + .init = gfx_v9_0_rlc_init, + .get_csb_size = gfx_v9_0_get_csb_size, + .get_csb_buffer = gfx_v9_0_get_csb_buffer, + .get_cp_table_num = gfx_v9_0_cp_jump_table_num, + .resume = gfx_v9_0_rlc_resume, + .stop = gfx_v9_0_rlc_stop, + .reset = gfx_v9_0_rlc_reset, + .start = gfx_v9_0_rlc_start }; static int gfx_v9_0_set_powergating_state(void *handle, @@ -4073,9 +3966,11 @@ static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) } static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, - struct amdgpu_ib *ib, - unsigned vmid, bool ctx_switch) + struct amdgpu_job *job, + struct amdgpu_ib *ib, + bool ctx_switch) { + unsigned vmid = AMDGPU_JOB_GET_VMID(job); u32 header, control = 0; if (ib->flags & AMDGPU_IB_FLAG_CE) @@ -4104,20 +3999,22 @@ static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, } static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring, - struct amdgpu_ib *ib, - unsigned vmid, bool ctx_switch) + struct amdgpu_job *job, + struct amdgpu_ib *ib, + bool ctx_switch) { - u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); + unsigned vmid = AMDGPU_JOB_GET_VMID(job); + u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); - amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); + amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ - amdgpu_ring_write(ring, + amdgpu_ring_write(ring, #ifdef __BIG_ENDIAN - (2 << 0) | + (2 << 0) | #endif - lower_32_bits(ib->gpu_addr)); - amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); - amdgpu_ring_write(ring, control); + lower_32_bits(ib->gpu_addr)); + amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); + amdgpu_ring_write(ring, control); } static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, @@ -4696,12 +4593,39 @@ static int gfx_v9_0_eop_irq(struct amdgpu_device *adev, return 0; } +static void gfx_v9_0_fault(struct amdgpu_device *adev, + struct amdgpu_iv_entry *entry) +{ + u8 me_id, pipe_id, queue_id; + struct amdgpu_ring *ring; + int i; + + me_id = (entry->ring_id & 0x0c) >> 2; + pipe_id = (entry->ring_id & 0x03) >> 0; + queue_id = (entry->ring_id & 0x70) >> 4; + + switch (me_id) { + case 0: + drm_sched_fault(&adev->gfx.gfx_ring[0].sched); + break; + case 1: + case 2: + for (i = 0; i < adev->gfx.num_compute_rings; i++) { + ring = &adev->gfx.compute_ring[i]; + if (ring->me == me_id && ring->pipe == pipe_id && + ring->queue == queue_id) + drm_sched_fault(&ring->sched); + } + break; + } +} + static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) { DRM_ERROR("Illegal register access in command stream\n"); - schedule_work(&adev->reset_work); + gfx_v9_0_fault(adev, entry); return 0; } @@ -4710,7 +4634,7 @@ static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry) { DRM_ERROR("Illegal instruction in command stream\n"); - schedule_work(&adev->reset_work); + gfx_v9_0_fault(adev, entry); return 0; } @@ -4837,10 +4761,8 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = { 2 + /* gfx_v9_0_ring_emit_vm_flush */ 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */ .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_compute */ - .emit_ib = gfx_v9_0_ring_emit_ib_compute, .emit_fence = gfx_v9_0_ring_emit_fence_kiq, .test_ring = gfx_v9_0_ring_test_ring, - .test_ib = gfx_v9_0_ring_test_ib, .insert_nop = amdgpu_ring_insert_nop, .pad_ib = amdgpu_ring_generic_pad_ib, .emit_rreg = gfx_v9_0_ring_emit_rreg, diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c index bfa317ad20a9..f5edddf3b29d 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c @@ -35,20 +35,25 @@ u64 gfxhub_v1_0_get_mc_fb_offset(struct amdgpu_device *adev) return (u64)RREG32_SOC15(GC, 0, mmMC_VM_FB_OFFSET) << 24; } -static void gfxhub_v1_0_init_gart_pt_regs(struct amdgpu_device *adev) +void gfxhub_v1_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, + uint64_t page_table_base) { - uint64_t value = amdgpu_gmc_pd_addr(adev->gart.bo); + /* two registers distance between mmVM_CONTEXT0_* to mmVM_CONTEXT1_* */ + int offset = mmVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 + - mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32; - WREG32_SOC15(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, - lower_32_bits(value)); + WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, + offset * vmid, lower_32_bits(page_table_base)); - WREG32_SOC15(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32, - upper_32_bits(value)); + WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32, + offset * vmid, upper_32_bits(page_table_base)); } static void gfxhub_v1_0_init_gart_aperture_regs(struct amdgpu_device *adev) { - gfxhub_v1_0_init_gart_pt_regs(adev); + uint64_t pt_base = amdgpu_gmc_pd_addr(adev->gart.bo); + + gfxhub_v1_0_setup_vm_pt_regs(adev, 0, pt_base); WREG32_SOC15(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32, (u32)(adev->gmc.gart_start >> 12)); diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.h b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.h index 206e29cad753..92d3a70cd9b1 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.h +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.h @@ -30,5 +30,7 @@ void gfxhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev, bool value); void gfxhub_v1_0_init(struct amdgpu_device *adev); u64 gfxhub_v1_0_get_mc_fb_offset(struct amdgpu_device *adev); +void gfxhub_v1_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, + uint64_t page_table_base); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c index 73ad02aea2b2..9fc3296592fe 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c @@ -359,7 +359,8 @@ static int gmc_v6_0_mc_init(struct amdgpu_device *adev) return 0; } -static void gmc_v6_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid) +static void gmc_v6_0_flush_gpu_tlb(struct amdgpu_device *adev, + uint32_t vmid, uint32_t flush_type) { WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); } @@ -581,7 +582,7 @@ static int gmc_v6_0_gart_enable(struct amdgpu_device *adev) else gmc_v6_0_set_fault_enable_default(adev, true); - gmc_v6_0_flush_gpu_tlb(adev, 0); + gmc_v6_0_flush_gpu_tlb(adev, 0, 0); dev_info(adev->dev, "PCIE GART of %uM enabled (table at 0x%016llX).\n", (unsigned)(adev->gmc.gart_size >> 20), (unsigned long long)table_addr); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index 910c4ce19cb3..761dcfb2fec0 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c @@ -430,7 +430,8 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev) * * Flush the TLB for the requested page table (CIK). */ -static void gmc_v7_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid) +static void gmc_v7_0_flush_gpu_tlb(struct amdgpu_device *adev, + uint32_t vmid, uint32_t flush_type) { /* bits 0-15 are the VM contexts0-15 */ WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); @@ -698,7 +699,7 @@ static int gmc_v7_0_gart_enable(struct amdgpu_device *adev) WREG32(mmCHUB_CONTROL, tmp); } - gmc_v7_0_flush_gpu_tlb(adev, 0); + gmc_v7_0_flush_gpu_tlb(adev, 0, 0); DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", (unsigned)(adev->gmc.gart_size >> 20), (unsigned long long)table_addr); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index 747c068379dc..1ad7e6b8ed1d 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -633,7 +633,7 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev) * Flush the TLB for the requested page table (CIK). */ static void gmc_v8_0_flush_gpu_tlb(struct amdgpu_device *adev, - uint32_t vmid) + uint32_t vmid, uint32_t flush_type) { /* bits 0-15 are the VM contexts0-15 */ WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); @@ -942,7 +942,7 @@ static int gmc_v8_0_gart_enable(struct amdgpu_device *adev) else gmc_v8_0_set_fault_enable_default(adev, true); - gmc_v8_0_flush_gpu_tlb(adev, 0); + gmc_v8_0_flush_gpu_tlb(adev, 0, 0); DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", (unsigned)(adev->gmc.gart_size >> 20), (unsigned long long)table_addr); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index f35d7a554ad5..ce150de723c9 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -244,6 +244,62 @@ static int gmc_v9_0_vm_fault_interrupt_state(struct amdgpu_device *adev, return 0; } +/** + * vega10_ih_prescreen_iv - prescreen an interrupt vector + * + * @adev: amdgpu_device pointer + * + * Returns true if the interrupt vector should be further processed. + */ +static bool gmc_v9_0_prescreen_iv(struct amdgpu_device *adev, + struct amdgpu_iv_entry *entry, + uint64_t addr) +{ + struct amdgpu_vm *vm; + u64 key; + int r; + + /* No PASID, can't identify faulting process */ + if (!entry->pasid) + return true; + + /* Not a retry fault */ + if (!(entry->src_data[1] & 0x80)) + return true; + + /* Track retry faults in per-VM fault FIFO. */ + spin_lock(&adev->vm_manager.pasid_lock); + vm = idr_find(&adev->vm_manager.pasid_idr, entry->pasid); + if (!vm) { + /* VM not found, process it normally */ + spin_unlock(&adev->vm_manager.pasid_lock); + return true; + } + + key = AMDGPU_VM_FAULT(entry->pasid, addr); + r = amdgpu_vm_add_fault(vm->fault_hash, key); + + /* Hash table is full or the fault is already being processed, + * ignore further page faults + */ + if (r != 0) { + spin_unlock(&adev->vm_manager.pasid_lock); + return false; + } + /* No locking required with single writer and single reader */ + r = kfifo_put(&vm->faults, key); + if (!r) { + /* FIFO is full. Ignore it until there is space */ + amdgpu_vm_clear_fault(vm->fault_hash, key); + spin_unlock(&adev->vm_manager.pasid_lock); + return false; + } + + spin_unlock(&adev->vm_manager.pasid_lock); + /* It's the first fault for this address, process it normally */ + return true; +} + static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) @@ -255,6 +311,9 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, addr = (u64)entry->src_data[0] << 12; addr |= ((u64)entry->src_data[1] & 0xf) << 44; + if (!gmc_v9_0_prescreen_iv(adev, entry, addr)) + return 1; /* This also prevents sending it to KFD */ + if (!amdgpu_sriov_vf(adev)) { status = RREG32(hub->vm_l2_pro_fault_status); WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1); @@ -293,14 +352,14 @@ static void gmc_v9_0_set_irq_funcs(struct amdgpu_device *adev) adev->gmc.vm_fault.funcs = &gmc_v9_0_irq_funcs; } -static uint32_t gmc_v9_0_get_invalidate_req(unsigned int vmid) +static uint32_t gmc_v9_0_get_invalidate_req(unsigned int vmid, + uint32_t flush_type) { u32 req = 0; - /* invalidate using legacy mode on vmid*/ req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, PER_VMID_INVALIDATE_REQ, 1 << vmid); - req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, 0); + req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, flush_type); req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PTES, 1); req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE0, 1); req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE1, 1); @@ -312,48 +371,6 @@ static uint32_t gmc_v9_0_get_invalidate_req(unsigned int vmid) return req; } -static signed long amdgpu_kiq_reg_write_reg_wait(struct amdgpu_device *adev, - uint32_t reg0, uint32_t reg1, - uint32_t ref, uint32_t mask) -{ - signed long r, cnt = 0; - unsigned long flags; - uint32_t seq; - struct amdgpu_kiq *kiq = &adev->gfx.kiq; - struct amdgpu_ring *ring = &kiq->ring; - - spin_lock_irqsave(&kiq->ring_lock, flags); - - amdgpu_ring_alloc(ring, 32); - amdgpu_ring_emit_reg_write_reg_wait(ring, reg0, reg1, - ref, mask); - amdgpu_fence_emit_polling(ring, &seq); - amdgpu_ring_commit(ring); - spin_unlock_irqrestore(&kiq->ring_lock, flags); - - r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); - - /* don't wait anymore for IRQ context */ - if (r < 1 && in_interrupt()) - goto failed_kiq; - - might_sleep(); - - while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) { - msleep(MAX_KIQ_REG_BAILOUT_INTERVAL); - r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); - } - - if (cnt > MAX_KIQ_REG_TRY) - goto failed_kiq; - - return 0; - -failed_kiq: - pr_err("failed to invalidate tlb with kiq\n"); - return r; -} - /* * GART * VMID 0 is the physical GPU addresses as used by the kernel. @@ -362,64 +379,50 @@ failed_kiq: */ /** - * gmc_v9_0_flush_gpu_tlb - gart tlb flush callback + * gmc_v9_0_flush_gpu_tlb - tlb flush with certain type * * @adev: amdgpu_device pointer * @vmid: vm instance to flush + * @flush_type: the flush type * - * Flush the TLB for the requested page table. + * Flush the TLB for the requested page table using certain type. */ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, - uint32_t vmid) + uint32_t vmid, uint32_t flush_type) { - /* Use register 17 for GART */ const unsigned eng = 17; unsigned i, j; - int r; for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) { struct amdgpu_vmhub *hub = &adev->vmhub[i]; - u32 tmp = gmc_v9_0_get_invalidate_req(vmid); + u32 tmp = gmc_v9_0_get_invalidate_req(vmid, flush_type); - if (adev->gfx.kiq.ring.ready && + /* This is necessary for a HW workaround under SRIOV as well + * as GFXOFF under bare metal + */ + if (adev->gfx.kiq.ring.sched.ready && (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) && !adev->in_gpu_reset) { - r = amdgpu_kiq_reg_write_reg_wait(adev, hub->vm_inv_eng0_req + eng, - hub->vm_inv_eng0_ack + eng, tmp, 1 << vmid); - if (!r) - continue; - } - - spin_lock(&adev->gmc.invalidate_lock); - - WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp); + uint32_t req = hub->vm_inv_eng0_req + eng; + uint32_t ack = hub->vm_inv_eng0_ack + eng; - /* Busy wait for ACK.*/ - for (j = 0; j < 100; j++) { - tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack + eng); - tmp &= 1 << vmid; - if (tmp) - break; - cpu_relax(); - } - if (j < 100) { - spin_unlock(&adev->gmc.invalidate_lock); + amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, tmp, + 1 << vmid); continue; } - /* Wait for ACK with a delay.*/ + spin_lock(&adev->gmc.invalidate_lock); + WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp); for (j = 0; j < adev->usec_timeout; j++) { tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack + eng); - tmp &= 1 << vmid; - if (tmp) + if (tmp & (1 << vmid)) break; udelay(1); } - if (j < adev->usec_timeout) { - spin_unlock(&adev->gmc.invalidate_lock); - continue; - } spin_unlock(&adev->gmc.invalidate_lock); + if (j < adev->usec_timeout) + continue; + DRM_ERROR("Timeout waiting for VM flush ACK!\n"); } } @@ -429,7 +432,7 @@ static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, { struct amdgpu_device *adev = ring->adev; struct amdgpu_vmhub *hub = &adev->vmhub[ring->funcs->vmhub]; - uint32_t req = gmc_v9_0_get_invalidate_req(vmid); + uint32_t req = gmc_v9_0_get_invalidate_req(vmid, 0); unsigned eng = ring->vm_inv_eng; amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 + (2 * vmid), @@ -739,9 +742,8 @@ static int gmc_v9_0_late_init(void *handle) unsigned vmhub = ring->funcs->vmhub; ring->vm_inv_eng = vm_inv_eng[vmhub]++; - dev_info(adev->dev, "ring %u(%s) uses VM inv eng %u on hub %u\n", - ring->idx, ring->name, ring->vm_inv_eng, - ring->funcs->vmhub); + dev_info(adev->dev, "ring %s uses VM inv eng %u on hub %u\n", + ring->name, ring->vm_inv_eng, ring->funcs->vmhub); } /* Engine 16 is used for KFD and 17 for GART flushes */ @@ -959,6 +961,9 @@ static int gmc_v9_0_sw_init(void *handle) /* This interrupt is VMC page fault.*/ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VMC, VMC_1_0__SRCID__VM_FAULT, &adev->gmc.vm_fault); + if (r) + return r; + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_UTCL2, UTCL2_1_0__SRCID__FAULT, &adev->gmc.vm_fault); @@ -991,7 +996,7 @@ static int gmc_v9_0_sw_init(void *handle) } adev->need_swiotlb = drm_get_max_iomem() > ((u64)1 << dma_bits); - if (adev->asic_type == CHIP_VEGA20) { + if (adev->gmc.xgmi.supported) { r = gfxhub_v1_1_get_xgmi_info(adev); if (r) return r; @@ -1122,7 +1127,7 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev) gfxhub_v1_0_set_fault_enable_default(adev, value); mmhub_v1_0_set_fault_enable_default(adev, value); - gmc_v9_0_flush_gpu_tlb(adev, 0); + gmc_v9_0_flush_gpu_tlb(adev, 0, 0); DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", (unsigned)(adev->gmc.gart_size >> 20), diff --git a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c index cf0fc61aebe6..a3984d10b604 100644 --- a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c @@ -208,34 +208,6 @@ static u32 iceland_ih_get_wptr(struct amdgpu_device *adev) } /** - * iceland_ih_prescreen_iv - prescreen an interrupt vector - * - * @adev: amdgpu_device pointer - * - * Returns true if the interrupt vector should be further processed. - */ -static bool iceland_ih_prescreen_iv(struct amdgpu_device *adev) -{ - u32 ring_index = adev->irq.ih.rptr >> 2; - u16 pasid; - - switch (le32_to_cpu(adev->irq.ih.ring[ring_index]) & 0xff) { - case 146: - case 147: - pasid = le32_to_cpu(adev->irq.ih.ring[ring_index + 2]) >> 16; - if (!pasid || amdgpu_vm_pasid_fault_credit(adev, pasid)) - return true; - break; - default: - /* Not a VM fault */ - return true; - } - - adev->irq.ih.rptr += 16; - return false; -} - -/** * iceland_ih_decode_iv - decode an interrupt vector * * @adev: amdgpu_device pointer @@ -440,7 +412,6 @@ static const struct amd_ip_funcs iceland_ih_ip_funcs = { static const struct amdgpu_ih_funcs iceland_ih_funcs = { .get_wptr = iceland_ih_get_wptr, - .prescreen_iv = iceland_ih_prescreen_iv, .decode_iv = iceland_ih_decode_iv, .set_rptr = iceland_ih_set_rptr }; diff --git a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c index d0e478f43443..0c9a2c03504e 100644 --- a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c @@ -508,19 +508,19 @@ static int kv_enable_didt(struct amdgpu_device *adev, bool enable) pi->caps_db_ramping || pi->caps_td_ramping || pi->caps_tcp_ramping) { - adev->gfx.rlc.funcs->enter_safe_mode(adev); + amdgpu_gfx_rlc_enter_safe_mode(adev); if (enable) { ret = kv_program_pt_config_registers(adev, didt_config_kv); if (ret) { - adev->gfx.rlc.funcs->exit_safe_mode(adev); + amdgpu_gfx_rlc_exit_safe_mode(adev); return ret; } } kv_do_enable_didt(adev, enable); - adev->gfx.rlc.funcs->exit_safe_mode(adev); + amdgpu_gfx_rlc_exit_safe_mode(adev); } return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c index a0db67adc34c..d0d966d6080a 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c @@ -52,20 +52,25 @@ u64 mmhub_v1_0_get_fb_location(struct amdgpu_device *adev) return base; } -static void mmhub_v1_0_init_gart_pt_regs(struct amdgpu_device *adev) +void mmhub_v1_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, + uint64_t page_table_base) { - uint64_t value = amdgpu_gmc_pd_addr(adev->gart.bo); + /* two registers distance between mmVM_CONTEXT0_* to mmVM_CONTEXT1_* */ + int offset = mmVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 + - mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32; - WREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, - lower_32_bits(value)); + WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, + offset * vmid, lower_32_bits(page_table_base)); - WREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32, - upper_32_bits(value)); + WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32, + offset * vmid, upper_32_bits(page_table_base)); } static void mmhub_v1_0_init_gart_aperture_regs(struct amdgpu_device *adev) { - mmhub_v1_0_init_gart_pt_regs(adev); + uint64_t pt_base = amdgpu_gmc_pd_addr(adev->gart.bo); + + mmhub_v1_0_setup_vm_pt_regs(adev, 0, pt_base); WREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32, (u32)(adev->gmc.gart_start >> 12)); diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.h b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.h index bef3d0c0c117..0de0fdf98c00 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.h +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.h @@ -34,5 +34,7 @@ int mmhub_v1_0_set_clockgating(struct amdgpu_device *adev, void mmhub_v1_0_get_clockgating(struct amdgpu_device *adev, u32 *flags); void mmhub_v1_0_update_power_gating(struct amdgpu_device *adev, bool enable); +void mmhub_v1_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, + uint64_t page_table_base); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c index 64e875d528dd..6a0fcd67662a 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c @@ -37,7 +37,6 @@ #include "gmc/gmc_8_2_sh_mask.h" #include "oss/oss_3_0_d.h" #include "oss/oss_3_0_sh_mask.h" -#include "gca/gfx_8_0_sh_mask.h" #include "dce/dce_10_0_d.h" #include "dce/dce_10_0_sh_mask.h" #include "smu/smu_7_1_3_d.h" diff --git a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h index 882bd83a28c4..0de00fbe9233 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h +++ b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h @@ -43,6 +43,8 @@ enum psp_gfx_crtl_cmd_id GFX_CTRL_CMD_ID_ENABLE_INT = 0x00050000, /* enable PSP-to-Gfx interrupt */ GFX_CTRL_CMD_ID_DISABLE_INT = 0x00060000, /* disable PSP-to-Gfx interrupt */ GFX_CTRL_CMD_ID_MODE1_RST = 0x00070000, /* trigger the Mode 1 reset */ + GFX_CTRL_CMD_ID_CONSUME_CMD = 0x000A0000, /* send interrupt to psp for updating write pointer of vf */ + GFX_CTRL_CMD_ID_DESTROY_GPCOM_RING = 0x000C0000, /* destroy GPCOM ring */ GFX_CTRL_CMD_ID_MAX = 0x000F0000, /* max command ID */ }; @@ -89,7 +91,8 @@ enum psp_gfx_cmd_id GFX_CMD_ID_LOAD_IP_FW = 0x00000006, /* load HW IP FW */ GFX_CMD_ID_DESTROY_TMR = 0x00000007, /* destroy TMR region */ GFX_CMD_ID_SAVE_RESTORE = 0x00000008, /* save/restore HW IP FW */ - + GFX_CMD_ID_SETUP_VMR = 0x00000009, /* setup VMR region */ + GFX_CMD_ID_DESTROY_VMR = 0x0000000A, /* destroy VMR region */ }; diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c index 295c2205485a..d78b4306a36f 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c @@ -240,12 +240,9 @@ static int psp_v10_0_ring_stop(struct psp_context *psp, enum psp_ring_type ring_type) { int ret = 0; - struct psp_ring *ring; unsigned int psp_ring_reg = 0; struct amdgpu_device *adev = psp->adev; - ring = &psp->km_ring; - /* Write the ring destroy command to C2PMSG_64 */ psp_ring_reg = 3 << 16; WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg); diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c index 3f3fac2d50cd..6c9a1b748ca7 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c @@ -34,6 +34,7 @@ #include "nbio/nbio_7_4_offset.h" MODULE_FIRMWARE("amdgpu/vega20_sos.bin"); +MODULE_FIRMWARE("amdgpu/vega20_ta.bin"); /* address block */ #define smnMP1_FIRMWARE_FLAGS 0x3010024 @@ -98,7 +99,8 @@ static int psp_v11_0_init_microcode(struct psp_context *psp) const char *chip_name; char fw_name[30]; int err = 0; - const struct psp_firmware_header_v1_0 *hdr; + const struct psp_firmware_header_v1_0 *sos_hdr; + const struct ta_firmware_header_v1_0 *ta_hdr; DRM_DEBUG("\n"); @@ -119,16 +121,32 @@ static int psp_v11_0_init_microcode(struct psp_context *psp) if (err) goto out; - hdr = (const struct psp_firmware_header_v1_0 *)adev->psp.sos_fw->data; - adev->psp.sos_fw_version = le32_to_cpu(hdr->header.ucode_version); - adev->psp.sos_feature_version = le32_to_cpu(hdr->ucode_feature_version); - adev->psp.sos_bin_size = le32_to_cpu(hdr->sos_size_bytes); - adev->psp.sys_bin_size = le32_to_cpu(hdr->header.ucode_size_bytes) - - le32_to_cpu(hdr->sos_size_bytes); - adev->psp.sys_start_addr = (uint8_t *)hdr + - le32_to_cpu(hdr->header.ucode_array_offset_bytes); + sos_hdr = (const struct psp_firmware_header_v1_0 *)adev->psp.sos_fw->data; + adev->psp.sos_fw_version = le32_to_cpu(sos_hdr->header.ucode_version); + adev->psp.sos_feature_version = le32_to_cpu(sos_hdr->ucode_feature_version); + adev->psp.sos_bin_size = le32_to_cpu(sos_hdr->sos_size_bytes); + adev->psp.sys_bin_size = le32_to_cpu(sos_hdr->header.ucode_size_bytes) - + le32_to_cpu(sos_hdr->sos_size_bytes); + adev->psp.sys_start_addr = (uint8_t *)sos_hdr + + le32_to_cpu(sos_hdr->header.ucode_array_offset_bytes); adev->psp.sos_start_addr = (uint8_t *)adev->psp.sys_start_addr + - le32_to_cpu(hdr->sos_offset_bytes); + le32_to_cpu(sos_hdr->sos_offset_bytes); + + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ta.bin", chip_name); + err = request_firmware(&adev->psp.ta_fw, fw_name, adev->dev); + if (err) + goto out; + + err = amdgpu_ucode_validate(adev->psp.ta_fw); + if (err) + goto out; + + ta_hdr = (const struct ta_firmware_header_v1_0 *)adev->psp.ta_fw->data; + adev->psp.ta_xgmi_ucode_version = le32_to_cpu(ta_hdr->ta_xgmi_ucode_version); + adev->psp.ta_xgmi_ucode_size = le32_to_cpu(ta_hdr->ta_xgmi_size_bytes); + adev->psp.ta_xgmi_start_addr = (uint8_t *)ta_hdr + + le32_to_cpu(ta_hdr->header.ucode_array_offset_bytes); + return 0; out: if (err) { @@ -153,8 +171,11 @@ static int psp_v11_0_bootloader_load_sysdrv(struct psp_context *psp) * are already been loaded. */ sol_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81); - if (sol_reg) + if (sol_reg) { + psp->sos_fw_version = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_58); + printk("sos fw version = 0x%x.\n", psp->sos_fw_version); return 0; + } /* Wait for bootloader to signify that is ready having bit 31 of C2PMSG_35 set to 1 */ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35), @@ -167,7 +188,7 @@ static int psp_v11_0_bootloader_load_sysdrv(struct psp_context *psp) /* Copy PSP System Driver binary to memory */ memcpy(psp->fw_pri_buf, psp->sys_start_addr, psp->sys_bin_size); - /* Provide the sys driver to bootrom */ + /* Provide the sys driver to bootloader */ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36, (uint32_t)(psp->fw_pri_mc_addr >> 20)); psp_gfxdrv_command_reg = 1 << 16; @@ -208,7 +229,7 @@ static int psp_v11_0_bootloader_load_sos(struct psp_context *psp) /* Copy Secure OS binary to PSP memory */ memcpy(psp->fw_pri_buf, psp->sos_start_addr, psp->sos_bin_size); - /* Provide the PSP secure OS to bootrom */ + /* Provide the PSP secure OS to bootloader */ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36, (uint32_t)(psp->fw_pri_mc_addr >> 20)); psp_gfxdrv_command_reg = 2 << 16; @@ -278,26 +299,47 @@ static int psp_v11_0_ring_create(struct psp_context *psp, struct psp_ring *ring = &psp->km_ring; struct amdgpu_device *adev = psp->adev; - /* Write low address of the ring to C2PMSG_69 */ - psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr); - WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, psp_ring_reg); - /* Write high address of the ring to C2PMSG_70 */ - psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr); - WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, psp_ring_reg); - /* Write size of ring to C2PMSG_71 */ - psp_ring_reg = ring->ring_size; - WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_71, psp_ring_reg); - /* Write the ring initialization command to C2PMSG_64 */ - psp_ring_reg = ring_type; - psp_ring_reg = psp_ring_reg << 16; - WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg); - - /* there might be handshake issue with hardware which needs delay */ - mdelay(20); - - /* Wait for response flag (bit 31) in C2PMSG_64 */ - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), - 0x80000000, 0x8000FFFF, false); + if (psp_support_vmr_ring(psp)) { + /* Write low address of the ring to C2PMSG_102 */ + psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102, psp_ring_reg); + /* Write high address of the ring to C2PMSG_103 */ + psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_103, psp_ring_reg); + + /* Write the ring initialization command to C2PMSG_101 */ + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101, + GFX_CTRL_CMD_ID_INIT_GPCOM_RING); + + /* there might be handshake issue with hardware which needs delay */ + mdelay(20); + + /* Wait for response flag (bit 31) in C2PMSG_101 */ + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101), + 0x80000000, 0x8000FFFF, false); + + } else { + /* Write low address of the ring to C2PMSG_69 */ + psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, psp_ring_reg); + /* Write high address of the ring to C2PMSG_70 */ + psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, psp_ring_reg); + /* Write size of ring to C2PMSG_71 */ + psp_ring_reg = ring->ring_size; + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_71, psp_ring_reg); + /* Write the ring initialization command to C2PMSG_64 */ + psp_ring_reg = ring_type; + psp_ring_reg = psp_ring_reg << 16; + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg); + + /* there might be handshake issue with hardware which needs delay */ + mdelay(20); + + /* Wait for response flag (bit 31) in C2PMSG_64 */ + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), + 0x80000000, 0x8000FFFF, false); + } return ret; } @@ -308,15 +350,24 @@ static int psp_v11_0_ring_stop(struct psp_context *psp, int ret = 0; struct amdgpu_device *adev = psp->adev; - /* Write the ring destroy command to C2PMSG_64 */ - WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, GFX_CTRL_CMD_ID_DESTROY_RINGS); + /* Write the ring destroy command*/ + if (psp_support_vmr_ring(psp)) + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101, + GFX_CTRL_CMD_ID_DESTROY_GPCOM_RING); + else + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, + GFX_CTRL_CMD_ID_DESTROY_RINGS); /* there might be handshake issue with hardware which needs delay */ mdelay(20); - /* Wait for response flag (bit 31) in C2PMSG_64 */ - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), - 0x80000000, 0x80000000, false); + /* Wait for response flag (bit 31) */ + if (psp_support_vmr_ring(psp)) + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101), + 0x80000000, 0x80000000, false); + else + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), + 0x80000000, 0x80000000, false); return ret; } @@ -355,7 +406,10 @@ static int psp_v11_0_cmd_submit(struct psp_context *psp, uint32_t rb_frame_size_dw = sizeof(struct psp_gfx_rb_frame) / 4; /* KM (GPCOM) prepare write pointer */ - psp_write_ptr_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67); + if (psp_support_vmr_ring(psp)) + psp_write_ptr_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102); + else + psp_write_ptr_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67); /* Update KM RB frame pointer to new frame */ /* write_frame ptr increments by size of rb_frame in bytes */ @@ -384,7 +438,11 @@ static int psp_v11_0_cmd_submit(struct psp_context *psp, /* Update the write Pointer in DWORDs */ psp_write_ptr_reg = (psp_write_ptr_reg + rb_frame_size_dw) % ring_size_dw; - WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67, psp_write_ptr_reg); + if (psp_support_vmr_ring(psp)) { + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102, psp_write_ptr_reg); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101, GFX_CTRL_CMD_ID_CONSUME_CMD); + } else + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67, psp_write_ptr_reg); return 0; } @@ -529,7 +587,7 @@ static int psp_v11_0_mode1_reset(struct psp_context *psp) /*send the mode 1 reset command*/ WREG32(offset, GFX_CTRL_CMD_ID_MODE1_RST); - mdelay(1000); + msleep(500); offset = SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_33); @@ -552,24 +610,110 @@ static int psp_v11_0_mode1_reset(struct psp_context *psp) static int psp_v11_0_xgmi_get_topology_info(struct psp_context *psp, int number_devices, struct psp_xgmi_topology_info *topology) { + struct ta_xgmi_shared_memory *xgmi_cmd; + struct ta_xgmi_cmd_get_topology_info_input *topology_info_input; + struct ta_xgmi_cmd_get_topology_info_output *topology_info_output; + int i; + int ret; + + if (!topology || topology->num_nodes > TA_XGMI__MAX_CONNECTED_NODES) + return -EINVAL; + + xgmi_cmd = (struct ta_xgmi_shared_memory*)psp->xgmi_context.xgmi_shared_buf; + memset(xgmi_cmd, 0, sizeof(struct ta_xgmi_shared_memory)); + + /* Fill in the shared memory with topology information as input */ + topology_info_input = &xgmi_cmd->xgmi_in_message.get_topology_info; + xgmi_cmd->cmd_id = TA_COMMAND_XGMI__GET_GET_TOPOLOGY_INFO; + topology_info_input->num_nodes = number_devices; + + for (i = 0; i < topology_info_input->num_nodes; i++) { + topology_info_input->nodes[i].node_id = topology->nodes[i].node_id; + topology_info_input->nodes[i].num_hops = topology->nodes[i].num_hops; + topology_info_input->nodes[i].is_sharing_enabled = topology->nodes[i].is_sharing_enabled; + topology_info_input->nodes[i].sdma_engine = topology->nodes[i].sdma_engine; + } + + /* Invoke xgmi ta to get the topology information */ + ret = psp_xgmi_invoke(psp, TA_COMMAND_XGMI__GET_GET_TOPOLOGY_INFO); + if (ret) + return ret; + + /* Read the output topology information from the shared memory */ + topology_info_output = &xgmi_cmd->xgmi_out_message.get_topology_info; + topology->num_nodes = xgmi_cmd->xgmi_out_message.get_topology_info.num_nodes; + for (i = 0; i < topology->num_nodes; i++) { + topology->nodes[i].node_id = topology_info_output->nodes[i].node_id; + topology->nodes[i].num_hops = topology_info_output->nodes[i].num_hops; + topology->nodes[i].is_sharing_enabled = topology_info_output->nodes[i].is_sharing_enabled; + topology->nodes[i].sdma_engine = topology_info_output->nodes[i].sdma_engine; + } + return 0; } static int psp_v11_0_xgmi_set_topology_info(struct psp_context *psp, int number_devices, struct psp_xgmi_topology_info *topology) { - return 0; + struct ta_xgmi_shared_memory *xgmi_cmd; + struct ta_xgmi_cmd_get_topology_info_input *topology_info_input; + int i; + + if (!topology || topology->num_nodes > TA_XGMI__MAX_CONNECTED_NODES) + return -EINVAL; + + xgmi_cmd = (struct ta_xgmi_shared_memory*)psp->xgmi_context.xgmi_shared_buf; + memset(xgmi_cmd, 0, sizeof(struct ta_xgmi_shared_memory)); + + topology_info_input = &xgmi_cmd->xgmi_in_message.get_topology_info; + xgmi_cmd->cmd_id = TA_COMMAND_XGMI__SET_TOPOLOGY_INFO; + topology_info_input->num_nodes = number_devices; + + for (i = 0; i < topology_info_input->num_nodes; i++) { + topology_info_input->nodes[i].node_id = topology->nodes[i].node_id; + topology_info_input->nodes[i].num_hops = topology->nodes[i].num_hops; + topology_info_input->nodes[i].is_sharing_enabled = topology->nodes[i].is_sharing_enabled; + topology_info_input->nodes[i].sdma_engine = topology->nodes[i].sdma_engine; + } + + /* Invoke xgmi ta to set topology information */ + return psp_xgmi_invoke(psp, TA_COMMAND_XGMI__SET_TOPOLOGY_INFO); } static u64 psp_v11_0_xgmi_get_hive_id(struct psp_context *psp) { - u64 hive_id = 0; + struct ta_xgmi_shared_memory *xgmi_cmd; + int ret; + + xgmi_cmd = (struct ta_xgmi_shared_memory*)psp->xgmi_context.xgmi_shared_buf; + memset(xgmi_cmd, 0, sizeof(struct ta_xgmi_shared_memory)); - /* Remove me when we can get correct hive_id through PSP */ - if (psp->adev->gmc.xgmi.num_physical_nodes) - hive_id = 0x123456789abcdef; + xgmi_cmd->cmd_id = TA_COMMAND_XGMI__GET_HIVE_ID; - return hive_id; + /* Invoke xgmi ta to get hive id */ + ret = psp_xgmi_invoke(psp, xgmi_cmd->cmd_id); + if (ret) + return 0; + else + return xgmi_cmd->xgmi_out_message.get_hive_id.hive_id; +} + +static u64 psp_v11_0_xgmi_get_node_id(struct psp_context *psp) +{ + struct ta_xgmi_shared_memory *xgmi_cmd; + int ret; + + xgmi_cmd = (struct ta_xgmi_shared_memory*)psp->xgmi_context.xgmi_shared_buf; + memset(xgmi_cmd, 0, sizeof(struct ta_xgmi_shared_memory)); + + xgmi_cmd->cmd_id = TA_COMMAND_XGMI__GET_NODE_ID; + + /* Invoke xgmi ta to get the node id */ + ret = psp_xgmi_invoke(psp, xgmi_cmd->cmd_id); + if (ret) + return 0; + else + return xgmi_cmd->xgmi_out_message.get_node_id.node_id; } static const struct psp_funcs psp_v11_0_funcs = { @@ -587,6 +731,7 @@ static const struct psp_funcs psp_v11_0_funcs = { .xgmi_get_topology_info = psp_v11_0_xgmi_get_topology_info, .xgmi_set_topology_info = psp_v11_0_xgmi_set_topology_info, .xgmi_get_hive_id = psp_v11_0_xgmi_get_hive_id, + .xgmi_get_node_id = psp_v11_0_xgmi_get_node_id, }; void psp_v11_0_set_psp_funcs(struct psp_context *psp) diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c index e1ebf770c303..7357fd56e614 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c @@ -194,7 +194,7 @@ static int psp_v3_1_bootloader_load_sysdrv(struct psp_context *psp) /* Copy PSP System Driver binary to memory */ memcpy(psp->fw_pri_buf, psp->sys_start_addr, psp->sys_bin_size); - /* Provide the sys driver to bootrom */ + /* Provide the sys driver to bootloader */ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36, (uint32_t)(psp->fw_pri_mc_addr >> 20)); psp_gfxdrv_command_reg = 1 << 16; @@ -254,7 +254,7 @@ static int psp_v3_1_bootloader_load_sos(struct psp_context *psp) /* Copy Secure OS binary to PSP memory */ memcpy(psp->fw_pri_buf, psp->sos_start_addr, psp->sos_bin_size); - /* Provide the PSP secure OS to bootrom */ + /* Provide the PSP secure OS to bootloader */ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36, (uint32_t)(psp->fw_pri_mc_addr >> 20)); psp_gfxdrv_command_reg = 2 << 16; @@ -356,12 +356,9 @@ static int psp_v3_1_ring_stop(struct psp_context *psp, enum psp_ring_type ring_type) { int ret = 0; - struct psp_ring *ring; unsigned int psp_ring_reg = 0; struct amdgpu_device *adev = psp->adev; - ring = &psp->km_ring; - /* Write the ring destroy command to C2PMSG_64 */ psp_ring_reg = 3 << 16; WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg); @@ -593,9 +590,9 @@ static int psp_v3_1_mode1_reset(struct psp_context *psp) } /*send the mode 1 reset command*/ - WREG32(offset, 0x70000); + WREG32(offset, GFX_CTRL_CMD_ID_MODE1_RST); - mdelay(1000); + msleep(500); offset = SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_33); diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c index 2d4770e173dd..9f3cb2aec7c2 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c @@ -225,7 +225,7 @@ static void sdma_v2_4_ring_set_wptr(struct amdgpu_ring *ring) static void sdma_v2_4_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) { - struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring); + struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring); int i; for (i = 0; i < count; i++) @@ -245,9 +245,12 @@ static void sdma_v2_4_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) * Schedule an IB in the DMA ring (VI). */ static void sdma_v2_4_ring_emit_ib(struct amdgpu_ring *ring, + struct amdgpu_job *job, struct amdgpu_ib *ib, - unsigned vmid, bool ctx_switch) + bool ctx_switch) { + unsigned vmid = AMDGPU_JOB_GET_VMID(job); + /* IB packet must end on a 8 DW boundary */ sdma_v2_4_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8); @@ -349,8 +352,8 @@ static void sdma_v2_4_gfx_stop(struct amdgpu_device *adev) ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0); WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl); } - sdma0->ready = false; - sdma1->ready = false; + sdma0->sched.ready = false; + sdma1->sched.ready = false; } /** @@ -471,17 +474,15 @@ static int sdma_v2_4_gfx_resume(struct amdgpu_device *adev) /* enable DMA IBs */ WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl); - ring->ready = true; + ring->sched.ready = true; } sdma_v2_4_enable(adev, true); for (i = 0; i < adev->sdma.num_instances; i++) { ring = &adev->sdma.instance[i].ring; - r = amdgpu_ring_test_ring(ring); - if (r) { - ring->ready = false; + r = amdgpu_ring_test_helper(ring); + if (r) return r; - } if (adev->mman.buffer_funcs_ring == ring) amdgpu_ttm_set_buffer_funcs_status(adev, true); @@ -550,21 +551,16 @@ static int sdma_v2_4_ring_test_ring(struct amdgpu_ring *ring) u64 gpu_addr; r = amdgpu_device_wb_get(adev, &index); - if (r) { - dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r); + if (r) return r; - } gpu_addr = adev->wb.gpu_addr + (index * 4); tmp = 0xCAFEDEAD; adev->wb.wb[index] = cpu_to_le32(tmp); r = amdgpu_ring_alloc(ring, 5); - if (r) { - DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r); - amdgpu_device_wb_free(adev, index); - return r; - } + if (r) + goto error_free_wb; amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) | SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR)); @@ -581,15 +577,11 @@ static int sdma_v2_4_ring_test_ring(struct amdgpu_ring *ring) DRM_UDELAY(1); } - if (i < adev->usec_timeout) { - DRM_DEBUG("ring test on %d succeeded in %d usecs\n", ring->idx, i); - } else { - DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n", - ring->idx, tmp); - r = -EINVAL; - } - amdgpu_device_wb_free(adev, index); + if (i >= adev->usec_timeout) + r = -ETIMEDOUT; +error_free_wb: + amdgpu_device_wb_free(adev, index); return r; } @@ -612,20 +604,16 @@ static int sdma_v2_4_ring_test_ib(struct amdgpu_ring *ring, long timeout) long r; r = amdgpu_device_wb_get(adev, &index); - if (r) { - dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r); + if (r) return r; - } gpu_addr = adev->wb.gpu_addr + (index * 4); tmp = 0xCAFEDEAD; adev->wb.wb[index] = cpu_to_le32(tmp); memset(&ib, 0, sizeof(ib)); r = amdgpu_ib_get(adev, NULL, 256, &ib); - if (r) { - DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); + if (r) goto err0; - } ib.ptr[0] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) | SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR); @@ -644,21 +632,16 @@ static int sdma_v2_4_ring_test_ib(struct amdgpu_ring *ring, long timeout) r = dma_fence_wait_timeout(f, false, timeout); if (r == 0) { - DRM_ERROR("amdgpu: IB test timed out\n"); r = -ETIMEDOUT; goto err1; } else if (r < 0) { - DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); goto err1; } tmp = le32_to_cpu(adev->wb.wb[index]); - if (tmp == 0xDEADBEEF) { - DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx); + if (tmp == 0xDEADBEEF) r = 0; - } else { - DRM_ERROR("amdgpu: ib test failed (0x%08X)\n", tmp); + else r = -EINVAL; - } err1: amdgpu_ib_free(adev, &ib, NULL); @@ -760,7 +743,7 @@ static void sdma_v2_4_vm_set_pte_pde(struct amdgpu_ib *ib, uint64_t pe, */ static void sdma_v2_4_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib) { - struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring); + struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring); u32 pad_count; int i; @@ -1105,8 +1088,14 @@ static int sdma_v2_4_process_illegal_inst_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) { + u8 instance_id, queue_id; + DRM_ERROR("Illegal instruction in SDMA command stream\n"); - schedule_work(&adev->reset_work); + instance_id = (entry->ring_id & 0x3) >> 0; + queue_id = (entry->ring_id & 0xc) >> 2; + + if (instance_id <= 1 && queue_id == 0) + drm_sched_fault(&adev->sdma.instance[instance_id].ring.sched); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c index 6fb3edaba0ec..1bccc5fe2d9d 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c @@ -399,7 +399,7 @@ static void sdma_v3_0_ring_set_wptr(struct amdgpu_ring *ring) static void sdma_v3_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) { - struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring); + struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring); int i; for (i = 0; i < count; i++) @@ -419,9 +419,12 @@ static void sdma_v3_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) * Schedule an IB in the DMA ring (VI). */ static void sdma_v3_0_ring_emit_ib(struct amdgpu_ring *ring, + struct amdgpu_job *job, struct amdgpu_ib *ib, - unsigned vmid, bool ctx_switch) + bool ctx_switch) { + unsigned vmid = AMDGPU_JOB_GET_VMID(job); + /* IB packet must end on a 8 DW boundary */ sdma_v3_0_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8); @@ -523,8 +526,8 @@ static void sdma_v3_0_gfx_stop(struct amdgpu_device *adev) ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0); WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl); } - sdma0->ready = false; - sdma1->ready = false; + sdma0->sched.ready = false; + sdma1->sched.ready = false; } /** @@ -739,7 +742,7 @@ static int sdma_v3_0_gfx_resume(struct amdgpu_device *adev) /* enable DMA IBs */ WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl); - ring->ready = true; + ring->sched.ready = true; } /* unhalt the MEs */ @@ -749,11 +752,9 @@ static int sdma_v3_0_gfx_resume(struct amdgpu_device *adev) for (i = 0; i < adev->sdma.num_instances; i++) { ring = &adev->sdma.instance[i].ring; - r = amdgpu_ring_test_ring(ring); - if (r) { - ring->ready = false; + r = amdgpu_ring_test_helper(ring); + if (r) return r; - } if (adev->mman.buffer_funcs_ring == ring) amdgpu_ttm_set_buffer_funcs_status(adev, true); @@ -822,21 +823,16 @@ static int sdma_v3_0_ring_test_ring(struct amdgpu_ring *ring) u64 gpu_addr; r = amdgpu_device_wb_get(adev, &index); - if (r) { - dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r); + if (r) return r; - } gpu_addr = adev->wb.gpu_addr + (index * 4); tmp = 0xCAFEDEAD; adev->wb.wb[index] = cpu_to_le32(tmp); r = amdgpu_ring_alloc(ring, 5); - if (r) { - DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r); - amdgpu_device_wb_free(adev, index); - return r; - } + if (r) + goto error_free_wb; amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) | SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR)); @@ -853,15 +849,11 @@ static int sdma_v3_0_ring_test_ring(struct amdgpu_ring *ring) DRM_UDELAY(1); } - if (i < adev->usec_timeout) { - DRM_DEBUG("ring test on %d succeeded in %d usecs\n", ring->idx, i); - } else { - DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n", - ring->idx, tmp); - r = -EINVAL; - } - amdgpu_device_wb_free(adev, index); + if (i >= adev->usec_timeout) + r = -ETIMEDOUT; +error_free_wb: + amdgpu_device_wb_free(adev, index); return r; } @@ -884,20 +876,16 @@ static int sdma_v3_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) long r; r = amdgpu_device_wb_get(adev, &index); - if (r) { - dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r); + if (r) return r; - } gpu_addr = adev->wb.gpu_addr + (index * 4); tmp = 0xCAFEDEAD; adev->wb.wb[index] = cpu_to_le32(tmp); memset(&ib, 0, sizeof(ib)); r = amdgpu_ib_get(adev, NULL, 256, &ib); - if (r) { - DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); + if (r) goto err0; - } ib.ptr[0] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) | SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR); @@ -916,21 +904,16 @@ static int sdma_v3_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) r = dma_fence_wait_timeout(f, false, timeout); if (r == 0) { - DRM_ERROR("amdgpu: IB test timed out\n"); r = -ETIMEDOUT; goto err1; } else if (r < 0) { - DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); goto err1; } tmp = le32_to_cpu(adev->wb.wb[index]); - if (tmp == 0xDEADBEEF) { - DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx); + if (tmp == 0xDEADBEEF) r = 0; - } else { - DRM_ERROR("amdgpu: ib test failed (0x%08X)\n", tmp); + else r = -EINVAL; - } err1: amdgpu_ib_free(adev, &ib, NULL); dma_fence_put(f); @@ -1031,7 +1014,7 @@ static void sdma_v3_0_vm_set_pte_pde(struct amdgpu_ib *ib, uint64_t pe, */ static void sdma_v3_0_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib) { - struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring); + struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring); u32 pad_count; int i; @@ -1163,7 +1146,7 @@ static int sdma_v3_0_sw_init(void *handle) if (!amdgpu_sriov_vf(adev)) { ring->use_doorbell = true; ring->doorbell_index = (i == 0) ? - AMDGPU_DOORBELL_sDMA_ENGINE0 : AMDGPU_DOORBELL_sDMA_ENGINE1; + adev->doorbell_index.sdma_engine0 : adev->doorbell_index.sdma_engine1; } else { ring->use_pollmem = true; } @@ -1440,8 +1423,14 @@ static int sdma_v3_0_process_illegal_inst_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) { + u8 instance_id, queue_id; + DRM_ERROR("Illegal instruction in SDMA command stream\n"); - schedule_work(&adev->reset_work); + instance_id = (entry->ring_id & 0x3) >> 0; + queue_id = (entry->ring_id & 0xc) >> 2; + + if (instance_id <= 1 && queue_id == 0) + drm_sched_fault(&adev->sdma.instance[instance_id].ring.sched); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index 7a8c9172d30a..4b6d3e5c821f 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -54,6 +54,11 @@ MODULE_FIRMWARE("amdgpu/raven2_sdma.bin"); #define SDMA0_POWER_CNTL__ON_OFF_CONDITION_HOLD_TIME_MASK 0x000000F8L #define SDMA0_POWER_CNTL__ON_OFF_STATUS_DURATION_TIME_MASK 0xFC000000L +#define WREG32_SDMA(instance, offset, value) \ + WREG32(sdma_v4_0_get_reg_offset(adev, (instance), (offset)), value) +#define RREG32_SDMA(instance, offset) \ + RREG32(sdma_v4_0_get_reg_offset(adev, (instance), (offset))) + static void sdma_v4_0_set_ring_funcs(struct amdgpu_device *adev); static void sdma_v4_0_set_buffer_funcs(struct amdgpu_device *adev); static void sdma_v4_0_set_vm_pte_funcs(struct amdgpu_device *adev); @@ -367,16 +372,11 @@ static uint64_t sdma_v4_0_ring_get_wptr(struct amdgpu_ring *ring) wptr = READ_ONCE(*((u64 *)&adev->wb.wb[ring->wptr_offs])); DRM_DEBUG("wptr/doorbell before shift == 0x%016llx\n", wptr); } else { - u32 lowbit, highbit; - - lowbit = RREG32(sdma_v4_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR)) >> 2; - highbit = RREG32(sdma_v4_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI)) >> 2; - - DRM_DEBUG("wptr [%i]high== 0x%08x low==0x%08x\n", - ring->me, highbit, lowbit); - wptr = highbit; + wptr = RREG32_SDMA(ring->me, mmSDMA0_GFX_RB_WPTR_HI); wptr = wptr << 32; - wptr |= lowbit; + wptr |= RREG32_SDMA(ring->me, mmSDMA0_GFX_RB_WPTR); + DRM_DEBUG("wptr before shift [%i] wptr == 0x%016llx\n", + ring->me, wptr); } return wptr >> 2; @@ -417,14 +417,67 @@ static void sdma_v4_0_ring_set_wptr(struct amdgpu_ring *ring) lower_32_bits(ring->wptr << 2), ring->me, upper_32_bits(ring->wptr << 2)); - WREG32(sdma_v4_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr << 2)); - WREG32(sdma_v4_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr << 2)); + WREG32_SDMA(ring->me, mmSDMA0_GFX_RB_WPTR, + lower_32_bits(ring->wptr << 2)); + WREG32_SDMA(ring->me, mmSDMA0_GFX_RB_WPTR_HI, + upper_32_bits(ring->wptr << 2)); + } +} + +/** + * sdma_v4_0_page_ring_get_wptr - get the current write pointer + * + * @ring: amdgpu ring pointer + * + * Get the current wptr from the hardware (VEGA10+). + */ +static uint64_t sdma_v4_0_page_ring_get_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + u64 wptr; + + if (ring->use_doorbell) { + /* XXX check if swapping is necessary on BE */ + wptr = READ_ONCE(*((u64 *)&adev->wb.wb[ring->wptr_offs])); + } else { + wptr = RREG32_SDMA(ring->me, mmSDMA0_PAGE_RB_WPTR_HI); + wptr = wptr << 32; + wptr |= RREG32_SDMA(ring->me, mmSDMA0_PAGE_RB_WPTR); + } + + return wptr >> 2; +} + +/** + * sdma_v4_0_ring_set_wptr - commit the write pointer + * + * @ring: amdgpu ring pointer + * + * Write the wptr back to the hardware (VEGA10+). + */ +static void sdma_v4_0_page_ring_set_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring->use_doorbell) { + u64 *wb = (u64 *)&adev->wb.wb[ring->wptr_offs]; + + /* XXX check if swapping is necessary on BE */ + WRITE_ONCE(*wb, (ring->wptr << 2)); + WDOORBELL64(ring->doorbell_index, ring->wptr << 2); + } else { + uint64_t wptr = ring->wptr << 2; + + WREG32_SDMA(ring->me, mmSDMA0_PAGE_RB_WPTR, + lower_32_bits(wptr)); + WREG32_SDMA(ring->me, mmSDMA0_PAGE_RB_WPTR_HI, + upper_32_bits(wptr)); } } static void sdma_v4_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) { - struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring); + struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring); int i; for (i = 0; i < count; i++) @@ -444,9 +497,12 @@ static void sdma_v4_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) * Schedule an IB in the DMA ring (VEGA10). */ static void sdma_v4_0_ring_emit_ib(struct amdgpu_ring *ring, - struct amdgpu_ib *ib, - unsigned vmid, bool ctx_switch) + struct amdgpu_job *job, + struct amdgpu_ib *ib, + bool ctx_switch) { + unsigned vmid = AMDGPU_JOB_GET_VMID(job); + /* IB packet must end on a 8 DW boundary */ sdma_v4_0_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8); @@ -568,16 +624,16 @@ static void sdma_v4_0_gfx_stop(struct amdgpu_device *adev) amdgpu_ttm_set_buffer_funcs_status(adev, false); for (i = 0; i < adev->sdma.num_instances; i++) { - rb_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL)); + rb_cntl = RREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL); rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0); - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl); - ib_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL)); + WREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL, rb_cntl); + ib_cntl = RREG32_SDMA(i, mmSDMA0_GFX_IB_CNTL); ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0); - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl); + WREG32_SDMA(i, mmSDMA0_GFX_IB_CNTL, ib_cntl); } - sdma0->ready = false; - sdma1->ready = false; + sdma0->sched.ready = false; + sdma1->sched.ready = false; } /** @@ -593,6 +649,39 @@ static void sdma_v4_0_rlc_stop(struct amdgpu_device *adev) } /** + * sdma_v4_0_page_stop - stop the page async dma engines + * + * @adev: amdgpu_device pointer + * + * Stop the page async dma ring buffers (VEGA10). + */ +static void sdma_v4_0_page_stop(struct amdgpu_device *adev) +{ + struct amdgpu_ring *sdma0 = &adev->sdma.instance[0].page; + struct amdgpu_ring *sdma1 = &adev->sdma.instance[1].page; + u32 rb_cntl, ib_cntl; + int i; + + if ((adev->mman.buffer_funcs_ring == sdma0) || + (adev->mman.buffer_funcs_ring == sdma1)) + amdgpu_ttm_set_buffer_funcs_status(adev, false); + + for (i = 0; i < adev->sdma.num_instances; i++) { + rb_cntl = RREG32_SDMA(i, mmSDMA0_PAGE_RB_CNTL); + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_PAGE_RB_CNTL, + RB_ENABLE, 0); + WREG32_SDMA(i, mmSDMA0_PAGE_RB_CNTL, rb_cntl); + ib_cntl = RREG32_SDMA(i, mmSDMA0_PAGE_IB_CNTL); + ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_PAGE_IB_CNTL, + IB_ENABLE, 0); + WREG32_SDMA(i, mmSDMA0_PAGE_IB_CNTL, ib_cntl); + } + + sdma0->sched.ready = false; + sdma1->sched.ready = false; +} + +/** * sdma_v_0_ctx_switch_enable - stop the async dma engines context switch * * @adev: amdgpu_device pointer @@ -630,18 +719,15 @@ static void sdma_v4_0_ctx_switch_enable(struct amdgpu_device *adev, bool enable) } for (i = 0; i < adev->sdma.num_instances; i++) { - f32_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_CNTL)); + f32_cntl = RREG32_SDMA(i, mmSDMA0_CNTL); f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL, AUTO_CTXSW_ENABLE, enable ? 1 : 0); if (enable && amdgpu_sdma_phase_quantum) { - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_PHASE0_QUANTUM), - phase_quantum); - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_PHASE1_QUANTUM), - phase_quantum); - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_PHASE2_QUANTUM), - phase_quantum); + WREG32_SDMA(i, mmSDMA0_PHASE0_QUANTUM, phase_quantum); + WREG32_SDMA(i, mmSDMA0_PHASE1_QUANTUM, phase_quantum); + WREG32_SDMA(i, mmSDMA0_PHASE2_QUANTUM, phase_quantum); } - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_CNTL), f32_cntl); + WREG32_SDMA(i, mmSDMA0_CNTL, f32_cntl); } } @@ -662,156 +748,215 @@ static void sdma_v4_0_enable(struct amdgpu_device *adev, bool enable) if (enable == false) { sdma_v4_0_gfx_stop(adev); sdma_v4_0_rlc_stop(adev); + if (adev->sdma.has_page_queue) + sdma_v4_0_page_stop(adev); } for (i = 0; i < adev->sdma.num_instances; i++) { - f32_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL)); + f32_cntl = RREG32_SDMA(i, mmSDMA0_F32_CNTL); f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, enable ? 0 : 1); - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), f32_cntl); + WREG32_SDMA(i, mmSDMA0_F32_CNTL, f32_cntl); } } /** + * sdma_v4_0_rb_cntl - get parameters for rb_cntl + */ +static uint32_t sdma_v4_0_rb_cntl(struct amdgpu_ring *ring, uint32_t rb_cntl) +{ + /* Set ring buffer size in dwords */ + uint32_t rb_bufsz = order_base_2(ring->ring_size / 4); + + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SIZE, rb_bufsz); +#ifdef __BIG_ENDIAN + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SWAP_ENABLE, 1); + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, + RPTR_WRITEBACK_SWAP_ENABLE, 1); +#endif + return rb_cntl; +} + +/** * sdma_v4_0_gfx_resume - setup and start the async dma engines * * @adev: amdgpu_device pointer + * @i: instance to resume * * Set up the gfx DMA ring buffers and enable them (VEGA10). * Returns 0 for success, error for failure. */ -static int sdma_v4_0_gfx_resume(struct amdgpu_device *adev) +static void sdma_v4_0_gfx_resume(struct amdgpu_device *adev, unsigned int i) { - struct amdgpu_ring *ring; + struct amdgpu_ring *ring = &adev->sdma.instance[i].ring; u32 rb_cntl, ib_cntl, wptr_poll_cntl; - u32 rb_bufsz; u32 wb_offset; u32 doorbell; u32 doorbell_offset; - u32 temp; u64 wptr_gpu_addr; - int i, r; - for (i = 0; i < adev->sdma.num_instances; i++) { - ring = &adev->sdma.instance[i].ring; - wb_offset = (ring->rptr_offs * 4); + wb_offset = (ring->rptr_offs * 4); - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL), 0); + rb_cntl = RREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL); + rb_cntl = sdma_v4_0_rb_cntl(ring, rb_cntl); + WREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL, rb_cntl); - /* Set ring buffer size in dwords */ - rb_bufsz = order_base_2(ring->ring_size / 4); - rb_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL)); - rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SIZE, rb_bufsz); -#ifdef __BIG_ENDIAN - rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SWAP_ENABLE, 1); - rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, - RPTR_WRITEBACK_SWAP_ENABLE, 1); -#endif - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl); + /* Initialize the ring buffer's read and write pointers */ + WREG32_SDMA(i, mmSDMA0_GFX_RB_RPTR, 0); + WREG32_SDMA(i, mmSDMA0_GFX_RB_RPTR_HI, 0); + WREG32_SDMA(i, mmSDMA0_GFX_RB_WPTR, 0); + WREG32_SDMA(i, mmSDMA0_GFX_RB_WPTR_HI, 0); - /* Initialize the ring buffer's read and write pointers */ - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR), 0); - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_HI), 0); - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), 0); - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), 0); + /* set the wb address whether it's enabled or not */ + WREG32_SDMA(i, mmSDMA0_GFX_RB_RPTR_ADDR_HI, + upper_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF); + WREG32_SDMA(i, mmSDMA0_GFX_RB_RPTR_ADDR_LO, + lower_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC); - /* set the wb address whether it's enabled or not */ - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_HI), - upper_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF); - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_LO), - lower_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC); + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, + RPTR_WRITEBACK_ENABLE, 1); - rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1); + WREG32_SDMA(i, mmSDMA0_GFX_RB_BASE, ring->gpu_addr >> 8); + WREG32_SDMA(i, mmSDMA0_GFX_RB_BASE_HI, ring->gpu_addr >> 40); - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE), ring->gpu_addr >> 8); - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE_HI), ring->gpu_addr >> 40); + ring->wptr = 0; - ring->wptr = 0; + /* before programing wptr to a less value, need set minor_ptr_update first */ + WREG32_SDMA(i, mmSDMA0_GFX_MINOR_PTR_UPDATE, 1); - /* before programing wptr to a less value, need set minor_ptr_update first */ - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 1); - - if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write for wptr */ - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr) << 2); - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2); - } + doorbell = RREG32_SDMA(i, mmSDMA0_GFX_DOORBELL); + doorbell_offset = RREG32_SDMA(i, mmSDMA0_GFX_DOORBELL_OFFSET); - doorbell = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL)); - doorbell_offset = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET)); - - if (ring->use_doorbell) { - doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 1); - doorbell_offset = REG_SET_FIELD(doorbell_offset, SDMA0_GFX_DOORBELL_OFFSET, + doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, + ring->use_doorbell); + doorbell_offset = REG_SET_FIELD(doorbell_offset, + SDMA0_GFX_DOORBELL_OFFSET, OFFSET, ring->doorbell_index); - } else { - doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 0); - } - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL), doorbell); - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET), doorbell_offset); - adev->nbio_funcs->sdma_doorbell_range(adev, i, ring->use_doorbell, - ring->doorbell_index); + WREG32_SDMA(i, mmSDMA0_GFX_DOORBELL, doorbell); + WREG32_SDMA(i, mmSDMA0_GFX_DOORBELL_OFFSET, doorbell_offset); + adev->nbio_funcs->sdma_doorbell_range(adev, i, ring->use_doorbell, + ring->doorbell_index); + + sdma_v4_0_ring_set_wptr(ring); + + /* set minor_ptr_update to 0 after wptr programed */ + WREG32_SDMA(i, mmSDMA0_GFX_MINOR_PTR_UPDATE, 0); + + /* setup the wptr shadow polling */ + wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); + WREG32_SDMA(i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_LO, + lower_32_bits(wptr_gpu_addr)); + WREG32_SDMA(i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI, + upper_32_bits(wptr_gpu_addr)); + wptr_poll_cntl = RREG32_SDMA(i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL); + wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, + SDMA0_GFX_RB_WPTR_POLL_CNTL, + F32_POLL_ENABLE, amdgpu_sriov_vf(adev)); + WREG32_SDMA(i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL, wptr_poll_cntl); + + /* enable DMA RB */ + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1); + WREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL, rb_cntl); + + ib_cntl = RREG32_SDMA(i, mmSDMA0_GFX_IB_CNTL); + ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 1); +#ifdef __BIG_ENDIAN + ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_SWAP_ENABLE, 1); +#endif + /* enable DMA IBs */ + WREG32_SDMA(i, mmSDMA0_GFX_IB_CNTL, ib_cntl); - if (amdgpu_sriov_vf(adev)) - sdma_v4_0_ring_set_wptr(ring); + ring->sched.ready = true; +} - /* set minor_ptr_update to 0 after wptr programed */ - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 0); +/** + * sdma_v4_0_page_resume - setup and start the async dma engines + * + * @adev: amdgpu_device pointer + * @i: instance to resume + * + * Set up the page DMA ring buffers and enable them (VEGA10). + * Returns 0 for success, error for failure. + */ +static void sdma_v4_0_page_resume(struct amdgpu_device *adev, unsigned int i) +{ + struct amdgpu_ring *ring = &adev->sdma.instance[i].page; + u32 rb_cntl, ib_cntl, wptr_poll_cntl; + u32 wb_offset; + u32 doorbell; + u32 doorbell_offset; + u64 wptr_gpu_addr; - /* set utc l1 enable flag always to 1 */ - temp = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_CNTL)); - temp = REG_SET_FIELD(temp, SDMA0_CNTL, UTC_L1_ENABLE, 1); - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_CNTL), temp); + wb_offset = (ring->rptr_offs * 4); - if (!amdgpu_sriov_vf(adev)) { - /* unhalt engine */ - temp = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL)); - temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, HALT, 0); - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), temp); - } + rb_cntl = RREG32_SDMA(i, mmSDMA0_PAGE_RB_CNTL); + rb_cntl = sdma_v4_0_rb_cntl(ring, rb_cntl); + WREG32_SDMA(i, mmSDMA0_PAGE_RB_CNTL, rb_cntl); - /* setup the wptr shadow polling */ - wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_LO), - lower_32_bits(wptr_gpu_addr)); - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI), - upper_32_bits(wptr_gpu_addr)); - wptr_poll_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL)); - if (amdgpu_sriov_vf(adev)) - wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 1); - else - wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 0); - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL), wptr_poll_cntl); + /* Initialize the ring buffer's read and write pointers */ + WREG32_SDMA(i, mmSDMA0_PAGE_RB_RPTR, 0); + WREG32_SDMA(i, mmSDMA0_PAGE_RB_RPTR_HI, 0); + WREG32_SDMA(i, mmSDMA0_PAGE_RB_WPTR, 0); + WREG32_SDMA(i, mmSDMA0_PAGE_RB_WPTR_HI, 0); - /* enable DMA RB */ - rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1); - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl); + /* set the wb address whether it's enabled or not */ + WREG32_SDMA(i, mmSDMA0_PAGE_RB_RPTR_ADDR_HI, + upper_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF); + WREG32_SDMA(i, mmSDMA0_PAGE_RB_RPTR_ADDR_LO, + lower_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC); - ib_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL)); - ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 1); -#ifdef __BIG_ENDIAN - ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_SWAP_ENABLE, 1); -#endif - /* enable DMA IBs */ - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl); + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_PAGE_RB_CNTL, + RPTR_WRITEBACK_ENABLE, 1); - ring->ready = true; + WREG32_SDMA(i, mmSDMA0_PAGE_RB_BASE, ring->gpu_addr >> 8); + WREG32_SDMA(i, mmSDMA0_PAGE_RB_BASE_HI, ring->gpu_addr >> 40); - if (amdgpu_sriov_vf(adev)) { /* bare-metal sequence doesn't need below to lines */ - sdma_v4_0_ctx_switch_enable(adev, true); - sdma_v4_0_enable(adev, true); - } + ring->wptr = 0; - r = amdgpu_ring_test_ring(ring); - if (r) { - ring->ready = false; - return r; - } + /* before programing wptr to a less value, need set minor_ptr_update first */ + WREG32_SDMA(i, mmSDMA0_PAGE_MINOR_PTR_UPDATE, 1); - if (adev->mman.buffer_funcs_ring == ring) - amdgpu_ttm_set_buffer_funcs_status(adev, true); + doorbell = RREG32_SDMA(i, mmSDMA0_PAGE_DOORBELL); + doorbell_offset = RREG32_SDMA(i, mmSDMA0_PAGE_DOORBELL_OFFSET); - } + doorbell = REG_SET_FIELD(doorbell, SDMA0_PAGE_DOORBELL, ENABLE, + ring->use_doorbell); + doorbell_offset = REG_SET_FIELD(doorbell_offset, + SDMA0_PAGE_DOORBELL_OFFSET, + OFFSET, ring->doorbell_index); + WREG32_SDMA(i, mmSDMA0_PAGE_DOORBELL, doorbell); + WREG32_SDMA(i, mmSDMA0_PAGE_DOORBELL_OFFSET, doorbell_offset); + + /* paging queue doorbell range is setup at sdma_v4_0_gfx_resume */ + sdma_v4_0_page_ring_set_wptr(ring); + + /* set minor_ptr_update to 0 after wptr programed */ + WREG32_SDMA(i, mmSDMA0_PAGE_MINOR_PTR_UPDATE, 0); + + /* setup the wptr shadow polling */ + wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); + WREG32_SDMA(i, mmSDMA0_PAGE_RB_WPTR_POLL_ADDR_LO, + lower_32_bits(wptr_gpu_addr)); + WREG32_SDMA(i, mmSDMA0_PAGE_RB_WPTR_POLL_ADDR_HI, + upper_32_bits(wptr_gpu_addr)); + wptr_poll_cntl = RREG32_SDMA(i, mmSDMA0_PAGE_RB_WPTR_POLL_CNTL); + wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, + SDMA0_PAGE_RB_WPTR_POLL_CNTL, + F32_POLL_ENABLE, amdgpu_sriov_vf(adev)); + WREG32_SDMA(i, mmSDMA0_PAGE_RB_WPTR_POLL_CNTL, wptr_poll_cntl); + + /* enable DMA RB */ + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_PAGE_RB_CNTL, RB_ENABLE, 1); + WREG32_SDMA(i, mmSDMA0_PAGE_RB_CNTL, rb_cntl); + + ib_cntl = RREG32_SDMA(i, mmSDMA0_PAGE_IB_CNTL); + ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_PAGE_IB_CNTL, IB_ENABLE, 1); +#ifdef __BIG_ENDIAN + ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_PAGE_IB_CNTL, IB_SWAP_ENABLE, 1); +#endif + /* enable DMA IBs */ + WREG32_SDMA(i, mmSDMA0_PAGE_IB_CNTL, ib_cntl); - return 0; + ring->sched.ready = true; } static void @@ -922,12 +1067,14 @@ static int sdma_v4_0_load_microcode(struct amdgpu_device *adev) (adev->sdma.instance[i].fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes)); - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_UCODE_ADDR), 0); + WREG32_SDMA(i, mmSDMA0_UCODE_ADDR, 0); for (j = 0; j < fw_size; j++) - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_UCODE_DATA), le32_to_cpup(fw_data++)); + WREG32_SDMA(i, mmSDMA0_UCODE_DATA, + le32_to_cpup(fw_data++)); - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_UCODE_ADDR), adev->sdma.instance[i].fw_version); + WREG32_SDMA(i, mmSDMA0_UCODE_ADDR, + adev->sdma.instance[i].fw_version); } return 0; @@ -943,33 +1090,78 @@ static int sdma_v4_0_load_microcode(struct amdgpu_device *adev) */ static int sdma_v4_0_start(struct amdgpu_device *adev) { - int r = 0; + struct amdgpu_ring *ring; + int i, r; if (amdgpu_sriov_vf(adev)) { sdma_v4_0_ctx_switch_enable(adev, false); sdma_v4_0_enable(adev, false); + } else { - /* set RB registers */ - r = sdma_v4_0_gfx_resume(adev); - return r; + if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { + r = sdma_v4_0_load_microcode(adev); + if (r) + return r; + } + + /* unhalt the MEs */ + sdma_v4_0_enable(adev, true); + /* enable sdma ring preemption */ + sdma_v4_0_ctx_switch_enable(adev, true); } - if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { - r = sdma_v4_0_load_microcode(adev); + /* start the gfx rings and rlc compute queues */ + for (i = 0; i < adev->sdma.num_instances; i++) { + uint32_t temp; + + WREG32_SDMA(i, mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL, 0); + sdma_v4_0_gfx_resume(adev, i); + if (adev->sdma.has_page_queue) + sdma_v4_0_page_resume(adev, i); + + /* set utc l1 enable flag always to 1 */ + temp = RREG32_SDMA(i, mmSDMA0_CNTL); + temp = REG_SET_FIELD(temp, SDMA0_CNTL, UTC_L1_ENABLE, 1); + WREG32_SDMA(i, mmSDMA0_CNTL, temp); + + if (!amdgpu_sriov_vf(adev)) { + /* unhalt engine */ + temp = RREG32_SDMA(i, mmSDMA0_F32_CNTL); + temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, HALT, 0); + WREG32_SDMA(i, mmSDMA0_F32_CNTL, temp); + } + } + + if (amdgpu_sriov_vf(adev)) { + sdma_v4_0_ctx_switch_enable(adev, true); + sdma_v4_0_enable(adev, true); + } else { + r = sdma_v4_0_rlc_resume(adev); if (r) return r; } - /* unhalt the MEs */ - sdma_v4_0_enable(adev, true); - /* enable sdma ring preemption */ - sdma_v4_0_ctx_switch_enable(adev, true); + for (i = 0; i < adev->sdma.num_instances; i++) { + ring = &adev->sdma.instance[i].ring; + + r = amdgpu_ring_test_helper(ring); + if (r) + return r; - /* start the gfx rings and rlc compute queues */ - r = sdma_v4_0_gfx_resume(adev); - if (r) - return r; - r = sdma_v4_0_rlc_resume(adev); + if (adev->sdma.has_page_queue) { + struct amdgpu_ring *page = &adev->sdma.instance[i].page; + + r = amdgpu_ring_test_helper(page); + if (r) + return r; + + if (adev->mman.buffer_funcs_ring == page) + amdgpu_ttm_set_buffer_funcs_status(adev, true); + } + + if (adev->mman.buffer_funcs_ring == ring) + amdgpu_ttm_set_buffer_funcs_status(adev, true); + } return r; } @@ -993,21 +1185,16 @@ static int sdma_v4_0_ring_test_ring(struct amdgpu_ring *ring) u64 gpu_addr; r = amdgpu_device_wb_get(adev, &index); - if (r) { - dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r); + if (r) return r; - } gpu_addr = adev->wb.gpu_addr + (index * 4); tmp = 0xCAFEDEAD; adev->wb.wb[index] = cpu_to_le32(tmp); r = amdgpu_ring_alloc(ring, 5); - if (r) { - DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r); - amdgpu_device_wb_free(adev, index); - return r; - } + if (r) + goto error_free_wb; amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) | SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR)); @@ -1024,15 +1211,11 @@ static int sdma_v4_0_ring_test_ring(struct amdgpu_ring *ring) DRM_UDELAY(1); } - if (i < adev->usec_timeout) { - DRM_DEBUG("ring test on %d succeeded in %d usecs\n", ring->idx, i); - } else { - DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n", - ring->idx, tmp); - r = -EINVAL; - } - amdgpu_device_wb_free(adev, index); + if (i >= adev->usec_timeout) + r = -ETIMEDOUT; +error_free_wb: + amdgpu_device_wb_free(adev, index); return r; } @@ -1055,20 +1238,16 @@ static int sdma_v4_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) u64 gpu_addr; r = amdgpu_device_wb_get(adev, &index); - if (r) { - dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r); + if (r) return r; - } gpu_addr = adev->wb.gpu_addr + (index * 4); tmp = 0xCAFEDEAD; adev->wb.wb[index] = cpu_to_le32(tmp); memset(&ib, 0, sizeof(ib)); r = amdgpu_ib_get(adev, NULL, 256, &ib); - if (r) { - DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); + if (r) goto err0; - } ib.ptr[0] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) | SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR); @@ -1087,21 +1266,17 @@ static int sdma_v4_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) r = dma_fence_wait_timeout(f, false, timeout); if (r == 0) { - DRM_ERROR("amdgpu: IB test timed out\n"); r = -ETIMEDOUT; goto err1; } else if (r < 0) { - DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); goto err1; } tmp = le32_to_cpu(adev->wb.wb[index]); - if (tmp == 0xDEADBEEF) { - DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx); + if (tmp == 0xDEADBEEF) r = 0; - } else { - DRM_ERROR("amdgpu: ib test failed (0x%08X)\n", tmp); + else r = -EINVAL; - } + err1: amdgpu_ib_free(adev, &ib, NULL); dma_fence_put(f); @@ -1206,7 +1381,7 @@ static void sdma_v4_0_vm_set_pte_pde(struct amdgpu_ib *ib, */ static void sdma_v4_0_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib) { - struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring); + struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring); u32 pad_count; int i; @@ -1272,15 +1447,46 @@ static void sdma_v4_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, sdma_v4_0_wait_reg_mem(ring, 0, 0, reg, 0, val, mask, 10); } +static bool sdma_v4_0_fw_support_paging_queue(struct amdgpu_device *adev) +{ + uint fw_version = adev->sdma.instance[0].fw_version; + + switch (adev->asic_type) { + case CHIP_VEGA10: + return fw_version >= 430; + case CHIP_VEGA12: + /*return fw_version >= 31;*/ + return false; + case CHIP_VEGA20: + /*return fw_version >= 115;*/ + return false; + default: + return false; + } +} + static int sdma_v4_0_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int r; if (adev->asic_type == CHIP_RAVEN) adev->sdma.num_instances = 1; else adev->sdma.num_instances = 2; + r = sdma_v4_0_init_microcode(adev); + if (r) { + DRM_ERROR("Failed to load sdma firmware!\n"); + return r; + } + + /* TODO: Page queue breaks driver reload under SRIOV */ + if ((adev->asic_type == CHIP_VEGA10) && amdgpu_sriov_vf((adev))) + adev->sdma.has_page_queue = false; + else if (sdma_v4_0_fw_support_paging_queue(adev)) + adev->sdma.has_page_queue = true; + sdma_v4_0_set_ring_funcs(adev); sdma_v4_0_set_buffer_funcs(adev); sdma_v4_0_set_vm_pte_funcs(adev); @@ -1289,7 +1495,6 @@ static int sdma_v4_0_early_init(void *handle) return 0; } - static int sdma_v4_0_sw_init(void *handle) { struct amdgpu_ring *ring; @@ -1308,12 +1513,6 @@ static int sdma_v4_0_sw_init(void *handle) if (r) return r; - r = sdma_v4_0_init_microcode(adev); - if (r) { - DRM_ERROR("Failed to load sdma firmware!\n"); - return r; - } - for (i = 0; i < adev->sdma.num_instances; i++) { ring = &adev->sdma.instance[i].ring; ring->ring_obj = NULL; @@ -1322,15 +1521,10 @@ static int sdma_v4_0_sw_init(void *handle) DRM_INFO("use_doorbell being set to: [%s]\n", ring->use_doorbell?"true":"false"); - if (adev->asic_type == CHIP_VEGA10) - ring->doorbell_index = (i == 0) ? - (AMDGPU_VEGA10_DOORBELL64_sDMA_ENGINE0 << 1) //get DWORD offset - : (AMDGPU_VEGA10_DOORBELL64_sDMA_ENGINE1 << 1); // get DWORD offset - else - ring->doorbell_index = (i == 0) ? - (AMDGPU_DOORBELL64_sDMA_ENGINE0 << 1) //get DWORD offset - : (AMDGPU_DOORBELL64_sDMA_ENGINE1 << 1); // get DWORD offset - + /* doorbell size is 2 dwords, get DWORD offset */ + ring->doorbell_index = (i == 0) ? + (adev->doorbell_index.sdma_engine0 << 1) + : (adev->doorbell_index.sdma_engine1 << 1); sprintf(ring->name, "sdma%d", i); r = amdgpu_ring_init(adev, ring, 1024, @@ -1340,6 +1534,29 @@ static int sdma_v4_0_sw_init(void *handle) AMDGPU_SDMA_IRQ_TRAP1); if (r) return r; + + if (adev->sdma.has_page_queue) { + ring = &adev->sdma.instance[i].page; + ring->ring_obj = NULL; + ring->use_doorbell = true; + + /* paging queue use same doorbell index/routing as gfx queue + * with 0x400 (4096 dwords) offset on second doorbell page + */ + ring->doorbell_index = (i == 0) ? + (adev->doorbell_index.sdma_engine0 << 1) + : (adev->doorbell_index.sdma_engine1 << 1); + ring->doorbell_index += 0x400; + + sprintf(ring->name, "page%d", i); + r = amdgpu_ring_init(adev, ring, 1024, + &adev->sdma.trap_irq, + (i == 0) ? + AMDGPU_SDMA_IRQ_TRAP0 : + AMDGPU_SDMA_IRQ_TRAP1); + if (r) + return r; + } } return r; @@ -1350,8 +1567,11 @@ static int sdma_v4_0_sw_fini(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; int i; - for (i = 0; i < adev->sdma.num_instances; i++) + for (i = 0; i < adev->sdma.num_instances; i++) { amdgpu_ring_fini(&adev->sdma.instance[i].ring); + if (adev->sdma.has_page_queue) + amdgpu_ring_fini(&adev->sdma.instance[i].page); + } for (i = 0; i < adev->sdma.num_instances; i++) { release_firmware(adev->sdma.instance[i].fw); @@ -1414,7 +1634,7 @@ static bool sdma_v4_0_is_idle(void *handle) u32 i; for (i = 0; i < adev->sdma.num_instances; i++) { - u32 tmp = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_STATUS_REG)); + u32 tmp = RREG32_SDMA(i, mmSDMA0_STATUS_REG); if (!(tmp & SDMA0_STATUS_REG__IDLE_MASK)) return false; @@ -1430,8 +1650,8 @@ static int sdma_v4_0_wait_for_idle(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; for (i = 0; i < adev->usec_timeout; i++) { - sdma0 = RREG32(sdma_v4_0_get_reg_offset(adev, 0, mmSDMA0_STATUS_REG)); - sdma1 = RREG32(sdma_v4_0_get_reg_offset(adev, 1, mmSDMA0_STATUS_REG)); + sdma0 = RREG32_SDMA(0, mmSDMA0_STATUS_REG); + sdma1 = RREG32_SDMA(1, mmSDMA0_STATUS_REG); if (sdma0 & sdma1 & SDMA0_STATUS_REG__IDLE_MASK) return 0; @@ -1452,16 +1672,13 @@ static int sdma_v4_0_set_trap_irq_state(struct amdgpu_device *adev, unsigned type, enum amdgpu_interrupt_state state) { + unsigned int instance = (type == AMDGPU_SDMA_IRQ_TRAP0) ? 0 : 1; u32 sdma_cntl; - u32 reg_offset = (type == AMDGPU_SDMA_IRQ_TRAP0) ? - sdma_v4_0_get_reg_offset(adev, 0, mmSDMA0_CNTL) : - sdma_v4_0_get_reg_offset(adev, 1, mmSDMA0_CNTL); - - sdma_cntl = RREG32(reg_offset); + sdma_cntl = RREG32_SDMA(instance, mmSDMA0_CNTL); sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_CNTL, TRAP_ENABLE, state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); - WREG32(reg_offset, sdma_cntl); + WREG32_SDMA(instance, mmSDMA0_CNTL, sdma_cntl); return 0; } @@ -1470,39 +1687,32 @@ static int sdma_v4_0_process_trap_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) { + uint32_t instance; + DRM_DEBUG("IH: SDMA trap\n"); switch (entry->client_id) { case SOC15_IH_CLIENTID_SDMA0: - switch (entry->ring_id) { - case 0: - amdgpu_fence_process(&adev->sdma.instance[0].ring); - break; - case 1: - /* XXX compute */ - break; - case 2: - /* XXX compute */ - break; - case 3: - /* XXX page queue*/ - break; - } + instance = 0; break; case SOC15_IH_CLIENTID_SDMA1: - switch (entry->ring_id) { - case 0: - amdgpu_fence_process(&adev->sdma.instance[1].ring); - break; - case 1: - /* XXX compute */ - break; - case 2: - /* XXX compute */ - break; - case 3: - /* XXX page queue*/ - break; - } + instance = 1; + break; + default: + return 0; + } + + switch (entry->ring_id) { + case 0: + amdgpu_fence_process(&adev->sdma.instance[instance].ring); + break; + case 1: + /* XXX compute */ + break; + case 2: + /* XXX compute */ + break; + case 3: + amdgpu_fence_process(&adev->sdma.instance[instance].page); break; } return 0; @@ -1512,12 +1722,29 @@ static int sdma_v4_0_process_illegal_inst_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) { + int instance; + DRM_ERROR("Illegal instruction in SDMA command stream\n"); - schedule_work(&adev->reset_work); + + switch (entry->client_id) { + case SOC15_IH_CLIENTID_SDMA0: + instance = 0; + break; + case SOC15_IH_CLIENTID_SDMA1: + instance = 1; + break; + default: + return 0; + } + + switch (entry->ring_id) { + case 0: + drm_sched_fault(&adev->sdma.instance[instance].ring.sched); + break; + } return 0; } - static void sdma_v4_0_update_medium_grain_clock_gating( struct amdgpu_device *adev, bool enable) @@ -1730,6 +1957,38 @@ static const struct amdgpu_ring_funcs sdma_v4_0_ring_funcs = { .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, }; +static const struct amdgpu_ring_funcs sdma_v4_0_page_ring_funcs = { + .type = AMDGPU_RING_TYPE_SDMA, + .align_mask = 0xf, + .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP), + .support_64bit_ptrs = true, + .vmhub = AMDGPU_MMHUB, + .get_rptr = sdma_v4_0_ring_get_rptr, + .get_wptr = sdma_v4_0_page_ring_get_wptr, + .set_wptr = sdma_v4_0_page_ring_set_wptr, + .emit_frame_size = + 6 + /* sdma_v4_0_ring_emit_hdp_flush */ + 3 + /* hdp invalidate */ + 6 + /* sdma_v4_0_ring_emit_pipeline_sync */ + /* sdma_v4_0_ring_emit_vm_flush */ + SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 + + 10 + 10 + 10, /* sdma_v4_0_ring_emit_fence x3 for user fence, vm fence */ + .emit_ib_size = 7 + 6, /* sdma_v4_0_ring_emit_ib */ + .emit_ib = sdma_v4_0_ring_emit_ib, + .emit_fence = sdma_v4_0_ring_emit_fence, + .emit_pipeline_sync = sdma_v4_0_ring_emit_pipeline_sync, + .emit_vm_flush = sdma_v4_0_ring_emit_vm_flush, + .emit_hdp_flush = sdma_v4_0_ring_emit_hdp_flush, + .test_ring = sdma_v4_0_ring_test_ring, + .test_ib = sdma_v4_0_ring_test_ib, + .insert_nop = sdma_v4_0_ring_insert_nop, + .pad_ib = sdma_v4_0_ring_pad_ib, + .emit_wreg = sdma_v4_0_ring_emit_wreg, + .emit_reg_wait = sdma_v4_0_ring_emit_reg_wait, + .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, +}; + static void sdma_v4_0_set_ring_funcs(struct amdgpu_device *adev) { int i; @@ -1737,6 +1996,10 @@ static void sdma_v4_0_set_ring_funcs(struct amdgpu_device *adev) for (i = 0; i < adev->sdma.num_instances; i++) { adev->sdma.instance[i].ring.funcs = &sdma_v4_0_ring_funcs; adev->sdma.instance[i].ring.me = i; + if (adev->sdma.has_page_queue) { + adev->sdma.instance[i].page.funcs = &sdma_v4_0_page_ring_funcs; + adev->sdma.instance[i].page.me = i; + } } } @@ -1818,7 +2081,10 @@ static const struct amdgpu_buffer_funcs sdma_v4_0_buffer_funcs = { static void sdma_v4_0_set_buffer_funcs(struct amdgpu_device *adev) { adev->mman.buffer_funcs = &sdma_v4_0_buffer_funcs; - adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring; + if (adev->sdma.has_page_queue) + adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].page; + else + adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring; } static const struct amdgpu_vm_pte_funcs sdma_v4_0_vm_pte_funcs = { @@ -1836,7 +2102,10 @@ static void sdma_v4_0_set_vm_pte_funcs(struct amdgpu_device *adev) adev->vm_manager.vm_pte_funcs = &sdma_v4_0_vm_pte_funcs; for (i = 0; i < adev->sdma.num_instances; i++) { - sched = &adev->sdma.instance[i].ring.sched; + if (adev->sdma.has_page_queue) + sched = &adev->sdma.instance[i].page.sched; + else + sched = &adev->sdma.instance[i].ring.sched; adev->vm_manager.vm_pte_rqs[i] = &sched->sched_rq[DRM_SCHED_PRIORITY_KERNEL]; } diff --git a/drivers/gpu/drm/amd/amdgpu/si_dma.c b/drivers/gpu/drm/amd/amdgpu/si_dma.c index adbaea6da0d7..b6e473134e19 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_dma.c +++ b/drivers/gpu/drm/amd/amdgpu/si_dma.c @@ -61,9 +61,11 @@ static void si_dma_ring_set_wptr(struct amdgpu_ring *ring) } static void si_dma_ring_emit_ib(struct amdgpu_ring *ring, + struct amdgpu_job *job, struct amdgpu_ib *ib, - unsigned vmid, bool ctx_switch) + bool ctx_switch) { + unsigned vmid = AMDGPU_JOB_GET_VMID(job); /* The indirect buffer packet must end on an 8 DW boundary in the DMA ring. * Pad as necessary with NOPs. */ @@ -122,7 +124,7 @@ static void si_dma_stop(struct amdgpu_device *adev) if (adev->mman.buffer_funcs_ring == ring) amdgpu_ttm_set_buffer_funcs_status(adev, false); - ring->ready = false; + ring->sched.ready = false; } } @@ -175,13 +177,11 @@ static int si_dma_start(struct amdgpu_device *adev) WREG32(DMA_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr) << 2); WREG32(DMA_RB_CNTL + sdma_offsets[i], rb_cntl | DMA_RB_ENABLE); - ring->ready = true; + ring->sched.ready = true; - r = amdgpu_ring_test_ring(ring); - if (r) { - ring->ready = false; + r = amdgpu_ring_test_helper(ring); + if (r) return r; - } if (adev->mman.buffer_funcs_ring == ring) amdgpu_ttm_set_buffer_funcs_status(adev, true); @@ -209,21 +209,16 @@ static int si_dma_ring_test_ring(struct amdgpu_ring *ring) u64 gpu_addr; r = amdgpu_device_wb_get(adev, &index); - if (r) { - dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r); + if (r) return r; - } gpu_addr = adev->wb.gpu_addr + (index * 4); tmp = 0xCAFEDEAD; adev->wb.wb[index] = cpu_to_le32(tmp); r = amdgpu_ring_alloc(ring, 4); - if (r) { - DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r); - amdgpu_device_wb_free(adev, index); - return r; - } + if (r) + goto error_free_wb; amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, 1)); amdgpu_ring_write(ring, lower_32_bits(gpu_addr)); @@ -238,15 +233,11 @@ static int si_dma_ring_test_ring(struct amdgpu_ring *ring) DRM_UDELAY(1); } - if (i < adev->usec_timeout) { - DRM_DEBUG("ring test on %d succeeded in %d usecs\n", ring->idx, i); - } else { - DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n", - ring->idx, tmp); - r = -EINVAL; - } - amdgpu_device_wb_free(adev, index); + if (i >= adev->usec_timeout) + r = -ETIMEDOUT; +error_free_wb: + amdgpu_device_wb_free(adev, index); return r; } @@ -269,20 +260,16 @@ static int si_dma_ring_test_ib(struct amdgpu_ring *ring, long timeout) long r; r = amdgpu_device_wb_get(adev, &index); - if (r) { - dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r); + if (r) return r; - } gpu_addr = adev->wb.gpu_addr + (index * 4); tmp = 0xCAFEDEAD; adev->wb.wb[index] = cpu_to_le32(tmp); memset(&ib, 0, sizeof(ib)); r = amdgpu_ib_get(adev, NULL, 256, &ib); - if (r) { - DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); + if (r) goto err0; - } ib.ptr[0] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, 1); ib.ptr[1] = lower_32_bits(gpu_addr); @@ -295,21 +282,16 @@ static int si_dma_ring_test_ib(struct amdgpu_ring *ring, long timeout) r = dma_fence_wait_timeout(f, false, timeout); if (r == 0) { - DRM_ERROR("amdgpu: IB test timed out\n"); r = -ETIMEDOUT; goto err1; } else if (r < 0) { - DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); goto err1; } tmp = le32_to_cpu(adev->wb.wb[index]); - if (tmp == 0xDEADBEEF) { - DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx); + if (tmp == 0xDEADBEEF) r = 0; - } else { - DRM_ERROR("amdgpu: ib test failed (0x%08X)\n", tmp); + else r = -EINVAL; - } err1: amdgpu_ib_free(adev, &ib, NULL); @@ -658,15 +640,6 @@ static int si_dma_process_trap_irq(struct amdgpu_device *adev, return 0; } -static int si_dma_process_illegal_inst_irq(struct amdgpu_device *adev, - struct amdgpu_irq_src *source, - struct amdgpu_iv_entry *entry) -{ - DRM_ERROR("Illegal instruction in SDMA command stream\n"); - schedule_work(&adev->reset_work); - return 0; -} - static int si_dma_set_clockgating_state(void *handle, enum amd_clockgating_state state) { @@ -781,15 +754,10 @@ static const struct amdgpu_irq_src_funcs si_dma_trap_irq_funcs = { .process = si_dma_process_trap_irq, }; -static const struct amdgpu_irq_src_funcs si_dma_illegal_inst_irq_funcs = { - .process = si_dma_process_illegal_inst_irq, -}; - static void si_dma_set_irq_funcs(struct amdgpu_device *adev) { adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_LAST; adev->sdma.trap_irq.funcs = &si_dma_trap_irq_funcs; - adev->sdma.illegal_inst_irq.funcs = &si_dma_illegal_inst_irq_funcs; } /** diff --git a/drivers/gpu/drm/amd/amdgpu/si_ih.c b/drivers/gpu/drm/amd/amdgpu/si_ih.c index b3d7d9f83202..2938fb9f17cc 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/si_ih.c @@ -118,19 +118,6 @@ static u32 si_ih_get_wptr(struct amdgpu_device *adev) return (wptr & adev->irq.ih.ptr_mask); } -/** - * si_ih_prescreen_iv - prescreen an interrupt vector - * - * @adev: amdgpu_device pointer - * - * Returns true if the interrupt vector should be further processed. - */ -static bool si_ih_prescreen_iv(struct amdgpu_device *adev) -{ - /* Process all interrupts */ - return true; -} - static void si_ih_decode_iv(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry) { @@ -301,7 +288,6 @@ static const struct amd_ip_funcs si_ih_ip_funcs = { static const struct amdgpu_ih_funcs si_ih_funcs = { .get_wptr = si_ih_get_wptr, - .prescreen_iv = si_ih_prescreen_iv, .decode_iv = si_ih_decode_iv, .set_rptr = si_ih_set_rptr }; diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 4cc0dcb1a187..8849b74078d6 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -507,6 +507,9 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev) return -EINVAL; } + if (adev->asic_type == CHIP_VEGA20) + adev->gmc.xgmi.supported = true; + if (adev->flags & AMD_IS_APU) adev->nbio_funcs = &nbio_v7_0_funcs; else if (adev->asic_type == CHIP_VEGA20) @@ -613,6 +616,24 @@ static const struct amdgpu_asic_funcs soc15_asic_funcs = .flush_hdp = &soc15_flush_hdp, .invalidate_hdp = &soc15_invalidate_hdp, .need_full_reset = &soc15_need_full_reset, + .init_doorbell_index = &vega10_doorbell_index_init, +}; + +static const struct amdgpu_asic_funcs vega20_asic_funcs = +{ + .read_disabled_bios = &soc15_read_disabled_bios, + .read_bios_from_rom = &soc15_read_bios_from_rom, + .read_register = &soc15_read_register, + .reset = &soc15_asic_reset, + .set_vga_state = &soc15_vga_set_state, + .get_xclk = &soc15_get_xclk, + .set_uvd_clocks = &soc15_set_uvd_clocks, + .set_vce_clocks = &soc15_set_vce_clocks, + .get_config_memsize = &soc15_get_config_memsize, + .flush_hdp = &soc15_flush_hdp, + .invalidate_hdp = &soc15_invalidate_hdp, + .need_full_reset = &soc15_need_full_reset, + .init_doorbell_index = &vega20_doorbell_index_init, }; static int soc15_common_early_init(void *handle) @@ -632,11 +653,11 @@ static int soc15_common_early_init(void *handle) adev->se_cac_rreg = &soc15_se_cac_rreg; adev->se_cac_wreg = &soc15_se_cac_wreg; - adev->asic_funcs = &soc15_asic_funcs; adev->external_rev_id = 0xFF; switch (adev->asic_type) { case CHIP_VEGA10: + adev->asic_funcs = &soc15_asic_funcs; adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS | AMD_CG_SUPPORT_GFX_RLC_LS | @@ -660,6 +681,7 @@ static int soc15_common_early_init(void *handle) adev->external_rev_id = 0x1; break; case CHIP_VEGA12: + adev->asic_funcs = &soc15_asic_funcs; adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS | AMD_CG_SUPPORT_GFX_CGCG | @@ -682,6 +704,7 @@ static int soc15_common_early_init(void *handle) adev->external_rev_id = adev->rev_id + 0x14; break; case CHIP_VEGA20: + adev->asic_funcs = &vega20_asic_funcs; adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS | AMD_CG_SUPPORT_GFX_CGCG | @@ -704,6 +727,7 @@ static int soc15_common_early_init(void *handle) adev->external_rev_id = adev->rev_id + 0x28; break; case CHIP_RAVEN: + adev->asic_funcs = &soc15_asic_funcs; if (adev->rev_id >= 0x8) adev->external_rev_id = adev->rev_id + 0x81; else if (adev->pdev->device == 0x15d8) diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.h b/drivers/gpu/drm/amd/amdgpu/soc15.h index f8ad7804dc40..a66c8bfbbaa6 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15.h @@ -58,4 +58,6 @@ void soc15_program_register_sequence(struct amdgpu_device *adev, int vega10_reg_base_init(struct amdgpu_device *adev); int vega20_reg_base_init(struct amdgpu_device *adev); +void vega10_doorbell_index_init(struct amdgpu_device *adev); +void vega20_doorbell_index_init(struct amdgpu_device *adev); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/ta_xgmi_if.h b/drivers/gpu/drm/amd/amdgpu/ta_xgmi_if.h new file mode 100644 index 000000000000..ac2c27b7630c --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/ta_xgmi_if.h @@ -0,0 +1,130 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef _TA_XGMI_IF_H +#define _TA_XGMI_IF_H + +/* Responses have bit 31 set */ +#define RSP_ID_MASK (1U << 31) +#define RSP_ID(cmdId) (((uint32_t)(cmdId)) | RSP_ID_MASK) + +enum ta_command_xgmi { + TA_COMMAND_XGMI__INITIALIZE = 0x00, + TA_COMMAND_XGMI__GET_NODE_ID = 0x01, + TA_COMMAND_XGMI__GET_HIVE_ID = 0x02, + TA_COMMAND_XGMI__GET_GET_TOPOLOGY_INFO = 0x03, + TA_COMMAND_XGMI__SET_TOPOLOGY_INFO = 0x04 +}; + +/* XGMI related enumerations */ +/**********************************************************/; +enum ta_xgmi_connected_nodes { + TA_XGMI__MAX_CONNECTED_NODES = 64 +}; + +enum ta_xgmi_status { + TA_XGMI_STATUS__SUCCESS = 0x00, + TA_XGMI_STATUS__GENERIC_FAILURE = 0x01, + TA_XGMI_STATUS__NULL_POINTER = 0x02, + TA_XGMI_STATUS__INVALID_PARAMETER = 0x03, + TA_XGMI_STATUS__NOT_INITIALIZED = 0x04, + TA_XGMI_STATUS__INVALID_NODE_NUM = 0x05, + TA_XGMI_STATUS__INVALID_NODE_ID = 0x06, + TA_XGMI_STATUS__INVALID_TOPOLOGY = 0x07, + TA_XGMI_STATUS__FAILED_ID_GEN = 0x08, + TA_XGMI_STATUS__FAILED_TOPOLOGY_INIT = 0x09, + TA_XGMI_STATUS__SET_SHARING_ERROR = 0x0A +}; + +enum ta_xgmi_assigned_sdma_engine { + TA_XGMI_ASSIGNED_SDMA_ENGINE__NOT_ASSIGNED = -1, + TA_XGMI_ASSIGNED_SDMA_ENGINE__SDMA0 = 0, + TA_XGMI_ASSIGNED_SDMA_ENGINE__SDMA1 = 1, + TA_XGMI_ASSIGNED_SDMA_ENGINE__SDMA2 = 2, + TA_XGMI_ASSIGNED_SDMA_ENGINE__SDMA3 = 3, + TA_XGMI_ASSIGNED_SDMA_ENGINE__SDMA4 = 4, + TA_XGMI_ASSIGNED_SDMA_ENGINE__SDMA5 = 5 +}; + +/* input/output structures for XGMI commands */ +/**********************************************************/ +struct ta_xgmi_node_info { + uint64_t node_id; + uint8_t num_hops; + uint8_t is_sharing_enabled; + enum ta_xgmi_assigned_sdma_engine sdma_engine; +}; + +struct ta_xgmi_cmd_initialize_output { + uint32_t status; +}; + +struct ta_xgmi_cmd_get_node_id_output { + uint64_t node_id; +}; + +struct ta_xgmi_cmd_get_hive_id_output { + uint64_t hive_id; +}; + +struct ta_xgmi_cmd_get_topology_info_input { + uint32_t num_nodes; + struct ta_xgmi_node_info nodes[TA_XGMI__MAX_CONNECTED_NODES]; +}; + +struct ta_xgmi_cmd_get_topology_info_output { + uint32_t num_nodes; + struct ta_xgmi_node_info nodes[TA_XGMI__MAX_CONNECTED_NODES]; +}; + +struct ta_xgmi_cmd_set_topology_info_input { + uint32_t num_nodes; + struct ta_xgmi_node_info nodes[TA_XGMI__MAX_CONNECTED_NODES]; +}; + +/**********************************************************/ +/* Common input structure for XGMI callbacks */ +union ta_xgmi_cmd_input { + struct ta_xgmi_cmd_get_topology_info_input get_topology_info; + struct ta_xgmi_cmd_set_topology_info_input set_topology_info; +}; + +/* Common output structure for XGMI callbacks */ +union ta_xgmi_cmd_output { + struct ta_xgmi_cmd_initialize_output initialize; + struct ta_xgmi_cmd_get_node_id_output get_node_id; + struct ta_xgmi_cmd_get_hive_id_output get_hive_id; + struct ta_xgmi_cmd_get_topology_info_output get_topology_info; +}; +/**********************************************************/ + +struct ta_xgmi_shared_memory { + uint32_t cmd_id; + uint32_t resp_id; + enum ta_xgmi_status xgmi_status; + uint32_t reserved; + union ta_xgmi_cmd_input xgmi_in_message; + union ta_xgmi_cmd_output xgmi_out_message; +}; + +#endif //_TA_XGMI_IF_H diff --git a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c index 3abffd06b5c7..15da06ddeb75 100644 --- a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c @@ -219,34 +219,6 @@ static u32 tonga_ih_get_wptr(struct amdgpu_device *adev) } /** - * tonga_ih_prescreen_iv - prescreen an interrupt vector - * - * @adev: amdgpu_device pointer - * - * Returns true if the interrupt vector should be further processed. - */ -static bool tonga_ih_prescreen_iv(struct amdgpu_device *adev) -{ - u32 ring_index = adev->irq.ih.rptr >> 2; - u16 pasid; - - switch (le32_to_cpu(adev->irq.ih.ring[ring_index]) & 0xff) { - case 146: - case 147: - pasid = le32_to_cpu(adev->irq.ih.ring[ring_index + 2]) >> 16; - if (!pasid || amdgpu_vm_pasid_fault_credit(adev, pasid)) - return true; - break; - default: - /* Not a VM fault */ - return true; - } - - adev->irq.ih.rptr += 16; - return false; -} - -/** * tonga_ih_decode_iv - decode an interrupt vector * * @adev: amdgpu_device pointer @@ -322,7 +294,7 @@ static int tonga_ih_sw_init(void *handle) return r; adev->irq.ih.use_doorbell = true; - adev->irq.ih.doorbell_index = AMDGPU_DOORBELL_IH; + adev->irq.ih.doorbell_index = adev->doorbell_index.ih; r = amdgpu_irq_init(adev); @@ -506,7 +478,6 @@ static const struct amd_ip_funcs tonga_ih_ip_funcs = { static const struct amdgpu_ih_funcs tonga_ih_funcs = { .get_wptr = tonga_ih_get_wptr, - .prescreen_iv = tonga_ih_prescreen_iv, .decode_iv = tonga_ih_decode_iv, .set_rptr = tonga_ih_set_rptr }; diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c index 1fc17bf39fed..d69c8f6daaf8 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c @@ -116,16 +116,16 @@ static int uvd_v4_2_sw_init(void *handle) if (r) return r; - r = amdgpu_uvd_resume(adev); - if (r) - return r; - ring = &adev->uvd.inst->ring; sprintf(ring->name, "uvd"); r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst->irq, 0); if (r) return r; + r = amdgpu_uvd_resume(adev); + if (r) + return r; + r = amdgpu_uvd_entity_init(adev); return r; @@ -162,12 +162,9 @@ static int uvd_v4_2_hw_init(void *handle) uvd_v4_2_enable_mgcg(adev, true); amdgpu_asic_set_uvd_clocks(adev, 10000, 10000); - ring->ready = true; - r = amdgpu_ring_test_ring(ring); - if (r) { - ring->ready = false; + r = amdgpu_ring_test_helper(ring); + if (r) goto done; - } r = amdgpu_ring_alloc(ring, 10); if (r) { @@ -218,7 +215,7 @@ static int uvd_v4_2_hw_fini(void *handle) if (RREG32(mmUVD_STATUS) != 0) uvd_v4_2_stop(adev); - ring->ready = false; + ring->sched.ready = false; return 0; } @@ -484,11 +481,9 @@ static int uvd_v4_2_ring_test_ring(struct amdgpu_ring *ring) WREG32(mmUVD_CONTEXT_ID, 0xCAFEDEAD); r = amdgpu_ring_alloc(ring, 3); - if (r) { - DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", - ring->idx, r); + if (r) return r; - } + amdgpu_ring_write(ring, PACKET0(mmUVD_CONTEXT_ID, 0)); amdgpu_ring_write(ring, 0xDEADBEEF); amdgpu_ring_commit(ring); @@ -499,14 +494,9 @@ static int uvd_v4_2_ring_test_ring(struct amdgpu_ring *ring) DRM_UDELAY(1); } - if (i < adev->usec_timeout) { - DRM_DEBUG("ring test on %d succeeded in %d usecs\n", - ring->idx, i); - } else { - DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n", - ring->idx, tmp); - r = -EINVAL; - } + if (i >= adev->usec_timeout) + r = -ETIMEDOUT; + return r; } @@ -519,8 +509,9 @@ static int uvd_v4_2_ring_test_ring(struct amdgpu_ring *ring) * Write ring commands to execute the indirect buffer */ static void uvd_v4_2_ring_emit_ib(struct amdgpu_ring *ring, + struct amdgpu_job *job, struct amdgpu_ib *ib, - unsigned vmid, bool ctx_switch) + bool ctx_switch) { amdgpu_ring_write(ring, PACKET0(mmUVD_RBC_IB_BASE, 0)); amdgpu_ring_write(ring, ib->gpu_addr); diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c index fde6ad5ac9ab..ee8cd06ddc38 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c @@ -113,16 +113,16 @@ static int uvd_v5_0_sw_init(void *handle) if (r) return r; - r = amdgpu_uvd_resume(adev); - if (r) - return r; - ring = &adev->uvd.inst->ring; sprintf(ring->name, "uvd"); r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst->irq, 0); if (r) return r; + r = amdgpu_uvd_resume(adev); + if (r) + return r; + r = amdgpu_uvd_entity_init(adev); return r; @@ -158,12 +158,9 @@ static int uvd_v5_0_hw_init(void *handle) uvd_v5_0_set_clockgating_state(adev, AMD_CG_STATE_UNGATE); uvd_v5_0_enable_mgcg(adev, true); - ring->ready = true; - r = amdgpu_ring_test_ring(ring); - if (r) { - ring->ready = false; + r = amdgpu_ring_test_helper(ring); + if (r) goto done; - } r = amdgpu_ring_alloc(ring, 10); if (r) { @@ -215,7 +212,7 @@ static int uvd_v5_0_hw_fini(void *handle) if (RREG32(mmUVD_STATUS) != 0) uvd_v5_0_stop(adev); - ring->ready = false; + ring->sched.ready = false; return 0; } @@ -500,11 +497,8 @@ static int uvd_v5_0_ring_test_ring(struct amdgpu_ring *ring) WREG32(mmUVD_CONTEXT_ID, 0xCAFEDEAD); r = amdgpu_ring_alloc(ring, 3); - if (r) { - DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", - ring->idx, r); + if (r) return r; - } amdgpu_ring_write(ring, PACKET0(mmUVD_CONTEXT_ID, 0)); amdgpu_ring_write(ring, 0xDEADBEEF); amdgpu_ring_commit(ring); @@ -515,14 +509,9 @@ static int uvd_v5_0_ring_test_ring(struct amdgpu_ring *ring) DRM_UDELAY(1); } - if (i < adev->usec_timeout) { - DRM_DEBUG("ring test on %d succeeded in %d usecs\n", - ring->idx, i); - } else { - DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n", - ring->idx, tmp); - r = -EINVAL; - } + if (i >= adev->usec_timeout) + r = -ETIMEDOUT; + return r; } @@ -535,8 +524,9 @@ static int uvd_v5_0_ring_test_ring(struct amdgpu_ring *ring) * Write ring commands to execute the indirect buffer */ static void uvd_v5_0_ring_emit_ib(struct amdgpu_ring *ring, + struct amdgpu_job *job, struct amdgpu_ib *ib, - unsigned vmid, bool ctx_switch) + bool ctx_switch) { amdgpu_ring_write(ring, PACKET0(mmUVD_LMI_RBC_IB_64BIT_BAR_LOW, 0)); amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c index 7a5b40275e8e..d4f4a66f8324 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c @@ -175,11 +175,8 @@ static int uvd_v6_0_enc_ring_test_ring(struct amdgpu_ring *ring) int r; r = amdgpu_ring_alloc(ring, 16); - if (r) { - DRM_ERROR("amdgpu: uvd enc failed to lock ring %d (%d).\n", - ring->idx, r); + if (r) return r; - } amdgpu_ring_write(ring, HEVC_ENC_CMD_END); amdgpu_ring_commit(ring); @@ -189,14 +186,8 @@ static int uvd_v6_0_enc_ring_test_ring(struct amdgpu_ring *ring) DRM_UDELAY(1); } - if (i < adev->usec_timeout) { - DRM_DEBUG("ring test on %d succeeded in %d usecs\n", - ring->idx, i); - } else { - DRM_ERROR("amdgpu: ring %d test failed\n", - ring->idx); + if (i >= adev->usec_timeout) r = -ETIMEDOUT; - } return r; } @@ -336,31 +327,24 @@ static int uvd_v6_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout) long r; r = uvd_v6_0_enc_get_create_msg(ring, 1, NULL); - if (r) { - DRM_ERROR("amdgpu: failed to get create msg (%ld).\n", r); + if (r) goto error; - } r = uvd_v6_0_enc_get_destroy_msg(ring, 1, &fence); - if (r) { - DRM_ERROR("amdgpu: failed to get destroy ib (%ld).\n", r); + if (r) goto error; - } r = dma_fence_wait_timeout(fence, false, timeout); - if (r == 0) { - DRM_ERROR("amdgpu: IB test timed out.\n"); + if (r == 0) r = -ETIMEDOUT; - } else if (r < 0) { - DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); - } else { - DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx); + else if (r > 0) r = 0; - } + error: dma_fence_put(fence); return r; } + static int uvd_v6_0_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -416,16 +400,16 @@ static int uvd_v6_0_sw_init(void *handle) DRM_INFO("UVD ENC is disabled\n"); } - r = amdgpu_uvd_resume(adev); - if (r) - return r; - ring = &adev->uvd.inst->ring; sprintf(ring->name, "uvd"); r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst->irq, 0); if (r) return r; + r = amdgpu_uvd_resume(adev); + if (r) + return r; + if (uvd_v6_0_enc_support(adev)) { for (i = 0; i < adev->uvd.num_enc_rings; ++i) { ring = &adev->uvd.inst->ring_enc[i]; @@ -476,12 +460,9 @@ static int uvd_v6_0_hw_init(void *handle) uvd_v6_0_set_clockgating_state(adev, AMD_CG_STATE_UNGATE); uvd_v6_0_enable_mgcg(adev, true); - ring->ready = true; - r = amdgpu_ring_test_ring(ring); - if (r) { - ring->ready = false; + r = amdgpu_ring_test_helper(ring); + if (r) goto done; - } r = amdgpu_ring_alloc(ring, 10); if (r) { @@ -513,12 +494,9 @@ static int uvd_v6_0_hw_init(void *handle) if (uvd_v6_0_enc_support(adev)) { for (i = 0; i < adev->uvd.num_enc_rings; ++i) { ring = &adev->uvd.inst->ring_enc[i]; - ring->ready = true; - r = amdgpu_ring_test_ring(ring); - if (r) { - ring->ready = false; + r = amdgpu_ring_test_helper(ring); + if (r) goto done; - } } } @@ -548,7 +526,7 @@ static int uvd_v6_0_hw_fini(void *handle) if (RREG32(mmUVD_STATUS) != 0) uvd_v6_0_stop(adev); - ring->ready = false; + ring->sched.ready = false; return 0; } @@ -969,11 +947,9 @@ static int uvd_v6_0_ring_test_ring(struct amdgpu_ring *ring) WREG32(mmUVD_CONTEXT_ID, 0xCAFEDEAD); r = amdgpu_ring_alloc(ring, 3); - if (r) { - DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", - ring->idx, r); + if (r) return r; - } + amdgpu_ring_write(ring, PACKET0(mmUVD_CONTEXT_ID, 0)); amdgpu_ring_write(ring, 0xDEADBEEF); amdgpu_ring_commit(ring); @@ -984,14 +960,9 @@ static int uvd_v6_0_ring_test_ring(struct amdgpu_ring *ring) DRM_UDELAY(1); } - if (i < adev->usec_timeout) { - DRM_DEBUG("ring test on %d succeeded in %d usecs\n", - ring->idx, i); - } else { - DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n", - ring->idx, tmp); - r = -EINVAL; - } + if (i >= adev->usec_timeout) + r = -ETIMEDOUT; + return r; } @@ -1004,9 +975,12 @@ static int uvd_v6_0_ring_test_ring(struct amdgpu_ring *ring) * Write ring commands to execute the indirect buffer */ static void uvd_v6_0_ring_emit_ib(struct amdgpu_ring *ring, + struct amdgpu_job *job, struct amdgpu_ib *ib, - unsigned vmid, bool ctx_switch) + bool ctx_switch) { + unsigned vmid = AMDGPU_JOB_GET_VMID(job); + amdgpu_ring_write(ring, PACKET0(mmUVD_LMI_RBC_IB_VMID, 0)); amdgpu_ring_write(ring, vmid); @@ -1027,8 +1001,12 @@ static void uvd_v6_0_ring_emit_ib(struct amdgpu_ring *ring, * Write enc ring commands to execute the indirect buffer */ static void uvd_v6_0_enc_ring_emit_ib(struct amdgpu_ring *ring, - struct amdgpu_ib *ib, unsigned int vmid, bool ctx_switch) + struct amdgpu_job *job, + struct amdgpu_ib *ib, + bool ctx_switch) { + unsigned vmid = AMDGPU_JOB_GET_VMID(job); + amdgpu_ring_write(ring, HEVC_ENC_CMD_IB_VM); amdgpu_ring_write(ring, vmid); amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c index 58b39afcfb86..089645e78f98 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c @@ -183,11 +183,8 @@ static int uvd_v7_0_enc_ring_test_ring(struct amdgpu_ring *ring) return 0; r = amdgpu_ring_alloc(ring, 16); - if (r) { - DRM_ERROR("amdgpu: uvd enc failed to lock (%d)ring %d (%d).\n", - ring->me, ring->idx, r); + if (r) return r; - } amdgpu_ring_write(ring, HEVC_ENC_CMD_END); amdgpu_ring_commit(ring); @@ -197,14 +194,8 @@ static int uvd_v7_0_enc_ring_test_ring(struct amdgpu_ring *ring) DRM_UDELAY(1); } - if (i < adev->usec_timeout) { - DRM_DEBUG("(%d)ring test on %d succeeded in %d usecs\n", - ring->me, ring->idx, i); - } else { - DRM_ERROR("amdgpu: (%d)ring %d test failed\n", - ring->me, ring->idx); + if (i >= adev->usec_timeout) r = -ETIMEDOUT; - } return r; } @@ -343,27 +334,19 @@ static int uvd_v7_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout) long r; r = uvd_v7_0_enc_get_create_msg(ring, 1, NULL); - if (r) { - DRM_ERROR("amdgpu: (%d)failed to get create msg (%ld).\n", ring->me, r); + if (r) goto error; - } r = uvd_v7_0_enc_get_destroy_msg(ring, 1, &fence); - if (r) { - DRM_ERROR("amdgpu: (%d)failed to get destroy ib (%ld).\n", ring->me, r); + if (r) goto error; - } r = dma_fence_wait_timeout(fence, false, timeout); - if (r == 0) { - DRM_ERROR("amdgpu: (%d)IB test timed out.\n", ring->me); + if (r == 0) r = -ETIMEDOUT; - } else if (r < 0) { - DRM_ERROR("amdgpu: (%d)fence wait failed (%ld).\n", ring->me, r); - } else { - DRM_DEBUG("ib test on (%d)ring %d succeeded\n", ring->me, ring->idx); + else if (r > 0) r = 0; - } + error: dma_fence_put(fence); return r; @@ -447,10 +430,6 @@ static int uvd_v7_0_sw_init(void *handle) DRM_INFO("PSP loading UVD firmware\n"); } - r = amdgpu_uvd_resume(adev); - if (r) - return r; - for (j = 0; j < adev->uvd.num_uvd_inst; j++) { if (adev->uvd.harvest_config & (1 << j)) continue; @@ -472,9 +451,9 @@ static int uvd_v7_0_sw_init(void *handle) * sriov, so set unused location for other unused rings. */ if (i == 0) - ring->doorbell_index = AMDGPU_DOORBELL64_UVD_RING0_1 * 2; + ring->doorbell_index = adev->doorbell_index.uvd_vce.uvd_ring0_1 * 2; else - ring->doorbell_index = AMDGPU_DOORBELL64_UVD_RING2_3 * 2 + 1; + ring->doorbell_index = adev->doorbell_index.uvd_vce.uvd_ring2_3 * 2 + 1; } r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst[j].irq, 0); if (r) @@ -482,6 +461,10 @@ static int uvd_v7_0_sw_init(void *handle) } } + r = amdgpu_uvd_resume(adev); + if (r) + return r; + r = amdgpu_uvd_entity_init(adev); if (r) return r; @@ -540,12 +523,9 @@ static int uvd_v7_0_hw_init(void *handle) ring = &adev->uvd.inst[j].ring; if (!amdgpu_sriov_vf(adev)) { - ring->ready = true; - r = amdgpu_ring_test_ring(ring); - if (r) { - ring->ready = false; + r = amdgpu_ring_test_helper(ring); + if (r) goto done; - } r = amdgpu_ring_alloc(ring, 10); if (r) { @@ -582,12 +562,9 @@ static int uvd_v7_0_hw_init(void *handle) for (i = 0; i < adev->uvd.num_enc_rings; ++i) { ring = &adev->uvd.inst[j].ring_enc[i]; - ring->ready = true; - r = amdgpu_ring_test_ring(ring); - if (r) { - ring->ready = false; + r = amdgpu_ring_test_helper(ring); + if (r) goto done; - } } } done: @@ -619,7 +596,7 @@ static int uvd_v7_0_hw_fini(void *handle) for (i = 0; i < adev->uvd.num_uvd_inst; ++i) { if (adev->uvd.harvest_config & (1 << i)) continue; - adev->uvd.inst[i].ring.ready = false; + adev->uvd.inst[i].ring.sched.ready = false; } return 0; @@ -1235,11 +1212,9 @@ static int uvd_v7_0_ring_test_ring(struct amdgpu_ring *ring) WREG32_SOC15(UVD, ring->me, mmUVD_CONTEXT_ID, 0xCAFEDEAD); r = amdgpu_ring_alloc(ring, 3); - if (r) { - DRM_ERROR("amdgpu: (%d)cp failed to lock ring %d (%d).\n", - ring->me, ring->idx, r); + if (r) return r; - } + amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_CONTEXT_ID), 0)); amdgpu_ring_write(ring, 0xDEADBEEF); @@ -1251,14 +1226,9 @@ static int uvd_v7_0_ring_test_ring(struct amdgpu_ring *ring) DRM_UDELAY(1); } - if (i < adev->usec_timeout) { - DRM_DEBUG("(%d)ring test on %d succeeded in %d usecs\n", - ring->me, ring->idx, i); - } else { - DRM_ERROR("(%d)amdgpu: ring %d test failed (0x%08X)\n", - ring->me, ring->idx, tmp); - r = -EINVAL; - } + if (i >= adev->usec_timeout) + r = -ETIMEDOUT; + return r; } @@ -1300,10 +1270,12 @@ static int uvd_v7_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p, * Write ring commands to execute the indirect buffer */ static void uvd_v7_0_ring_emit_ib(struct amdgpu_ring *ring, + struct amdgpu_job *job, struct amdgpu_ib *ib, - unsigned vmid, bool ctx_switch) + bool ctx_switch) { struct amdgpu_device *adev = ring->adev; + unsigned vmid = AMDGPU_JOB_GET_VMID(job); amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_LMI_RBC_IB_VMID), 0)); @@ -1329,8 +1301,12 @@ static void uvd_v7_0_ring_emit_ib(struct amdgpu_ring *ring, * Write enc ring commands to execute the indirect buffer */ static void uvd_v7_0_enc_ring_emit_ib(struct amdgpu_ring *ring, - struct amdgpu_ib *ib, unsigned int vmid, bool ctx_switch) + struct amdgpu_job *job, + struct amdgpu_ib *ib, + bool ctx_switch) { + unsigned vmid = AMDGPU_JOB_GET_VMID(job); + amdgpu_ring_write(ring, HEVC_ENC_CMD_IB_VM); amdgpu_ring_write(ring, vmid); amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c index ea28828360d3..bed78a778e3f 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c @@ -463,15 +463,11 @@ static int vce_v2_0_hw_init(void *handle) amdgpu_asic_set_vce_clocks(adev, 10000, 10000); vce_v2_0_enable_mgcg(adev, true, false); - for (i = 0; i < adev->vce.num_rings; i++) - adev->vce.ring[i].ready = false; for (i = 0; i < adev->vce.num_rings; i++) { - r = amdgpu_ring_test_ring(&adev->vce.ring[i]); + r = amdgpu_ring_test_helper(&adev->vce.ring[i]); if (r) return r; - else - adev->vce.ring[i].ready = true; } DRM_INFO("VCE initialized successfully.\n"); diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c index 6dbd39730070..2668effadd27 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c @@ -37,7 +37,6 @@ #include "gca/gfx_8_0_d.h" #include "smu/smu_7_1_2_d.h" #include "smu/smu_7_1_2_sh_mask.h" -#include "gca/gfx_8_0_d.h" #include "gca/gfx_8_0_sh_mask.h" #include "ivsrcid/ivsrcid_vislands30.h" @@ -474,15 +473,10 @@ static int vce_v3_0_hw_init(void *handle) amdgpu_asic_set_vce_clocks(adev, 10000, 10000); - for (i = 0; i < adev->vce.num_rings; i++) - adev->vce.ring[i].ready = false; - for (i = 0; i < adev->vce.num_rings; i++) { - r = amdgpu_ring_test_ring(&adev->vce.ring[i]); + r = amdgpu_ring_test_helper(&adev->vce.ring[i]); if (r) return r; - else - adev->vce.ring[i].ready = true; } DRM_INFO("VCE initialized successfully.\n"); @@ -838,8 +832,12 @@ out: } static void vce_v3_0_ring_emit_ib(struct amdgpu_ring *ring, - struct amdgpu_ib *ib, unsigned int vmid, bool ctx_switch) + struct amdgpu_job *job, + struct amdgpu_ib *ib, + bool ctx_switch) { + unsigned vmid = AMDGPU_JOB_GET_VMID(job); + amdgpu_ring_write(ring, VCE_CMD_IB_VM); amdgpu_ring_write(ring, vmid); amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c index 1c9471890bf7..9fb34b7d8e03 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c @@ -466,9 +466,9 @@ static int vce_v4_0_sw_init(void *handle) * so set unused location for other unused rings. */ if (i == 0) - ring->doorbell_index = AMDGPU_DOORBELL64_VCE_RING0_1 * 2; + ring->doorbell_index = adev->doorbell_index.uvd_vce.vce_ring0_1 * 2; else - ring->doorbell_index = AMDGPU_DOORBELL64_VCE_RING2_3 * 2 + 1; + ring->doorbell_index = adev->doorbell_index.uvd_vce.vce_ring2_3 * 2 + 1; } r = amdgpu_ring_init(adev, ring, 512, &adev->vce.irq, 0); if (r) @@ -519,15 +519,10 @@ static int vce_v4_0_hw_init(void *handle) if (r) return r; - for (i = 0; i < adev->vce.num_rings; i++) - adev->vce.ring[i].ready = false; - for (i = 0; i < adev->vce.num_rings; i++) { - r = amdgpu_ring_test_ring(&adev->vce.ring[i]); + r = amdgpu_ring_test_helper(&adev->vce.ring[i]); if (r) return r; - else - adev->vce.ring[i].ready = true; } DRM_INFO("VCE initialized successfully.\n"); @@ -549,7 +544,7 @@ static int vce_v4_0_hw_fini(void *handle) } for (i = 0; i < adev->vce.num_rings; i++) - adev->vce.ring[i].ready = false; + adev->vce.ring[i].sched.ready = false; return 0; } @@ -951,9 +946,11 @@ static int vce_v4_0_set_powergating_state(void *handle, } #endif -static void vce_v4_0_ring_emit_ib(struct amdgpu_ring *ring, - struct amdgpu_ib *ib, unsigned int vmid, bool ctx_switch) +static void vce_v4_0_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job, + struct amdgpu_ib *ib, bool ctx_switch) { + unsigned vmid = AMDGPU_JOB_GET_VMID(job); + amdgpu_ring_write(ring, VCE_CMD_IB_VM); amdgpu_ring_write(ring, vmid); amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c index 322e09b5b448..4f8352044563 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c @@ -177,30 +177,22 @@ static int vcn_v1_0_hw_init(void *handle) struct amdgpu_ring *ring = &adev->vcn.ring_dec; int i, r; - ring->ready = true; - r = amdgpu_ring_test_ring(ring); - if (r) { - ring->ready = false; + r = amdgpu_ring_test_helper(ring); + if (r) goto done; - } for (i = 0; i < adev->vcn.num_enc_rings; ++i) { ring = &adev->vcn.ring_enc[i]; - ring->ready = true; - r = amdgpu_ring_test_ring(ring); - if (r) { - ring->ready = false; + ring->sched.ready = true; + r = amdgpu_ring_test_helper(ring); + if (r) goto done; - } } ring = &adev->vcn.ring_jpeg; - ring->ready = true; - r = amdgpu_ring_test_ring(ring); - if (r) { - ring->ready = false; + r = amdgpu_ring_test_helper(ring); + if (r) goto done; - } done: if (!r) @@ -225,7 +217,7 @@ static int vcn_v1_0_hw_fini(void *handle) if (RREG32_SOC15(VCN, 0, mmUVD_STATUS)) vcn_v1_0_set_powergating_state(adev, AMD_PG_STATE_GATE); - ring->ready = false; + ring->sched.ready = false; return 0; } @@ -1367,10 +1359,12 @@ static void vcn_v1_0_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 * Write ring commands to execute the indirect buffer */ static void vcn_v1_0_dec_ring_emit_ib(struct amdgpu_ring *ring, - struct amdgpu_ib *ib, - unsigned vmid, bool ctx_switch) + struct amdgpu_job *job, + struct amdgpu_ib *ib, + bool ctx_switch) { struct amdgpu_device *adev = ring->adev; + unsigned vmid = AMDGPU_JOB_GET_VMID(job); amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_RBC_IB_VMID), 0)); @@ -1525,8 +1519,12 @@ static void vcn_v1_0_enc_ring_insert_end(struct amdgpu_ring *ring) * Write enc ring commands to execute the indirect buffer */ static void vcn_v1_0_enc_ring_emit_ib(struct amdgpu_ring *ring, - struct amdgpu_ib *ib, unsigned int vmid, bool ctx_switch) + struct amdgpu_job *job, + struct amdgpu_ib *ib, + bool ctx_switch) { + unsigned vmid = AMDGPU_JOB_GET_VMID(job); + amdgpu_ring_write(ring, VCN_ENC_CMD_IB); amdgpu_ring_write(ring, vmid); amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); @@ -1726,10 +1724,12 @@ static void vcn_v1_0_jpeg_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u6 * Write ring commands to execute the indirect buffer. */ static void vcn_v1_0_jpeg_ring_emit_ib(struct amdgpu_ring *ring, - struct amdgpu_ib *ib, - unsigned vmid, bool ctx_switch) + struct amdgpu_job *job, + struct amdgpu_ib *ib, + bool ctx_switch) { struct amdgpu_device *adev = ring->adev; + unsigned vmid = AMDGPU_JOB_GET_VMID(job); amdgpu_ring_write(ring, PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_IB_VMID), 0, 0, PACKETJ_TYPE0)); diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c index a0fda6f9252a..2c250b01a903 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c @@ -220,90 +220,6 @@ static u32 vega10_ih_get_wptr(struct amdgpu_device *adev) } /** - * vega10_ih_prescreen_iv - prescreen an interrupt vector - * - * @adev: amdgpu_device pointer - * - * Returns true if the interrupt vector should be further processed. - */ -static bool vega10_ih_prescreen_iv(struct amdgpu_device *adev) -{ - u32 ring_index = adev->irq.ih.rptr >> 2; - u32 dw0, dw3, dw4, dw5; - u16 pasid; - u64 addr, key; - struct amdgpu_vm *vm; - int r; - - dw0 = le32_to_cpu(adev->irq.ih.ring[ring_index + 0]); - dw3 = le32_to_cpu(adev->irq.ih.ring[ring_index + 3]); - dw4 = le32_to_cpu(adev->irq.ih.ring[ring_index + 4]); - dw5 = le32_to_cpu(adev->irq.ih.ring[ring_index + 5]); - - /* Filter retry page faults, let only the first one pass. If - * there are too many outstanding faults, ignore them until - * some faults get cleared. - */ - switch (dw0 & 0xff) { - case SOC15_IH_CLIENTID_VMC: - case SOC15_IH_CLIENTID_UTCL2: - break; - default: - /* Not a VM fault */ - return true; - } - - pasid = dw3 & 0xffff; - /* No PASID, can't identify faulting process */ - if (!pasid) - return true; - - /* Not a retry fault, check fault credit */ - if (!(dw5 & 0x80)) { - if (!amdgpu_vm_pasid_fault_credit(adev, pasid)) - goto ignore_iv; - return true; - } - - /* Track retry faults in per-VM fault FIFO. */ - spin_lock(&adev->vm_manager.pasid_lock); - vm = idr_find(&adev->vm_manager.pasid_idr, pasid); - addr = ((u64)(dw5 & 0xf) << 44) | ((u64)dw4 << 12); - key = AMDGPU_VM_FAULT(pasid, addr); - if (!vm) { - /* VM not found, process it normally */ - spin_unlock(&adev->vm_manager.pasid_lock); - return true; - } else { - r = amdgpu_vm_add_fault(vm->fault_hash, key); - - /* Hash table is full or the fault is already being processed, - * ignore further page faults - */ - if (r != 0) { - spin_unlock(&adev->vm_manager.pasid_lock); - goto ignore_iv; - } - } - /* No locking required with single writer and single reader */ - r = kfifo_put(&vm->faults, key); - if (!r) { - /* FIFO is full. Ignore it until there is space */ - amdgpu_vm_clear_fault(vm->fault_hash, key); - spin_unlock(&adev->vm_manager.pasid_lock); - goto ignore_iv; - } - - spin_unlock(&adev->vm_manager.pasid_lock); - /* It's the first fault for this address, process it normally */ - return true; - -ignore_iv: - adev->irq.ih.rptr += 32; - return false; -} - -/** * vega10_ih_decode_iv - decode an interrupt vector * * @adev: amdgpu_device pointer @@ -385,7 +301,7 @@ static int vega10_ih_sw_init(void *handle) return r; adev->irq.ih.use_doorbell = true; - adev->irq.ih.doorbell_index = AMDGPU_DOORBELL64_IH << 1; + adev->irq.ih.doorbell_index = adev->doorbell_index.ih << 1; r = amdgpu_irq_init(adev); @@ -487,7 +403,6 @@ const struct amd_ip_funcs vega10_ih_ip_funcs = { static const struct amdgpu_ih_funcs vega10_ih_funcs = { .get_wptr = vega10_ih_get_wptr, - .prescreen_iv = vega10_ih_prescreen_iv, .decode_iv = vega10_ih_decode_iv, .set_rptr = vega10_ih_set_rptr }; diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c b/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c index c5c9b2bc190d..422674bb3cdf 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c +++ b/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c @@ -56,4 +56,32 @@ int vega10_reg_base_init(struct amdgpu_device *adev) return 0; } +void vega10_doorbell_index_init(struct amdgpu_device *adev) +{ + adev->doorbell_index.kiq = AMDGPU_DOORBELL64_KIQ; + adev->doorbell_index.mec_ring0 = AMDGPU_DOORBELL64_MEC_RING0; + adev->doorbell_index.mec_ring1 = AMDGPU_DOORBELL64_MEC_RING1; + adev->doorbell_index.mec_ring2 = AMDGPU_DOORBELL64_MEC_RING2; + adev->doorbell_index.mec_ring3 = AMDGPU_DOORBELL64_MEC_RING3; + adev->doorbell_index.mec_ring4 = AMDGPU_DOORBELL64_MEC_RING4; + adev->doorbell_index.mec_ring5 = AMDGPU_DOORBELL64_MEC_RING5; + adev->doorbell_index.mec_ring6 = AMDGPU_DOORBELL64_MEC_RING6; + adev->doorbell_index.mec_ring7 = AMDGPU_DOORBELL64_MEC_RING7; + adev->doorbell_index.userqueue_start = AMDGPU_DOORBELL64_USERQUEUE_START; + adev->doorbell_index.userqueue_end = AMDGPU_DOORBELL64_USERQUEUE_END; + adev->doorbell_index.gfx_ring0 = AMDGPU_DOORBELL64_GFX_RING0; + adev->doorbell_index.sdma_engine0 = AMDGPU_DOORBELL64_sDMA_ENGINE0; + adev->doorbell_index.sdma_engine1 = AMDGPU_DOORBELL64_sDMA_ENGINE1; + adev->doorbell_index.ih = AMDGPU_DOORBELL64_IH; + adev->doorbell_index.uvd_vce.uvd_ring0_1 = AMDGPU_DOORBELL64_UVD_RING0_1; + adev->doorbell_index.uvd_vce.uvd_ring2_3 = AMDGPU_DOORBELL64_UVD_RING2_3; + adev->doorbell_index.uvd_vce.uvd_ring4_5 = AMDGPU_DOORBELL64_UVD_RING4_5; + adev->doorbell_index.uvd_vce.uvd_ring6_7 = AMDGPU_DOORBELL64_UVD_RING6_7; + adev->doorbell_index.uvd_vce.vce_ring0_1 = AMDGPU_DOORBELL64_VCE_RING0_1; + adev->doorbell_index.uvd_vce.vce_ring2_3 = AMDGPU_DOORBELL64_VCE_RING2_3; + adev->doorbell_index.uvd_vce.vce_ring4_5 = AMDGPU_DOORBELL64_VCE_RING4_5; + adev->doorbell_index.uvd_vce.vce_ring6_7 = AMDGPU_DOORBELL64_VCE_RING6_7; + /* In unit of dword doorbell */ + adev->doorbell_index.max_assignment = AMDGPU_DOORBELL64_MAX_ASSIGNMENT << 1; +} diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c b/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c index d13fc4fcb517..edce413fda9a 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c +++ b/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c @@ -54,4 +54,37 @@ int vega20_reg_base_init(struct amdgpu_device *adev) return 0; } +void vega20_doorbell_index_init(struct amdgpu_device *adev) +{ + adev->doorbell_index.kiq = AMDGPU_VEGA20_DOORBELL_KIQ; + adev->doorbell_index.mec_ring0 = AMDGPU_VEGA20_DOORBELL_MEC_RING0; + adev->doorbell_index.mec_ring1 = AMDGPU_VEGA20_DOORBELL_MEC_RING1; + adev->doorbell_index.mec_ring2 = AMDGPU_VEGA20_DOORBELL_MEC_RING2; + adev->doorbell_index.mec_ring3 = AMDGPU_VEGA20_DOORBELL_MEC_RING3; + adev->doorbell_index.mec_ring4 = AMDGPU_VEGA20_DOORBELL_MEC_RING4; + adev->doorbell_index.mec_ring5 = AMDGPU_VEGA20_DOORBELL_MEC_RING5; + adev->doorbell_index.mec_ring6 = AMDGPU_VEGA20_DOORBELL_MEC_RING6; + adev->doorbell_index.mec_ring7 = AMDGPU_VEGA20_DOORBELL_MEC_RING7; + adev->doorbell_index.userqueue_start = AMDGPU_VEGA20_DOORBELL_USERQUEUE_START; + adev->doorbell_index.userqueue_end = AMDGPU_VEGA20_DOORBELL_USERQUEUE_END; + adev->doorbell_index.gfx_ring0 = AMDGPU_VEGA20_DOORBELL_GFX_RING0; + adev->doorbell_index.sdma_engine0 = AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE0; + adev->doorbell_index.sdma_engine1 = AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE1; + adev->doorbell_index.sdma_engine2 = AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE2; + adev->doorbell_index.sdma_engine3 = AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE3; + adev->doorbell_index.sdma_engine4 = AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE4; + adev->doorbell_index.sdma_engine5 = AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE5; + adev->doorbell_index.sdma_engine6 = AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE6; + adev->doorbell_index.sdma_engine7 = AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE7; + adev->doorbell_index.ih = AMDGPU_VEGA20_DOORBELL_IH; + adev->doorbell_index.uvd_vce.uvd_ring0_1 = AMDGPU_VEGA20_DOORBELL64_UVD_RING0_1; + adev->doorbell_index.uvd_vce.uvd_ring2_3 = AMDGPU_VEGA20_DOORBELL64_UVD_RING2_3; + adev->doorbell_index.uvd_vce.uvd_ring4_5 = AMDGPU_VEGA20_DOORBELL64_UVD_RING4_5; + adev->doorbell_index.uvd_vce.uvd_ring6_7 = AMDGPU_VEGA20_DOORBELL64_UVD_RING6_7; + adev->doorbell_index.uvd_vce.vce_ring0_1 = AMDGPU_VEGA20_DOORBELL64_VCE_RING0_1; + adev->doorbell_index.uvd_vce.vce_ring2_3 = AMDGPU_VEGA20_DOORBELL64_VCE_RING2_3; + adev->doorbell_index.uvd_vce.vce_ring4_5 = AMDGPU_VEGA20_DOORBELL64_VCE_RING4_5; + adev->doorbell_index.uvd_vce.vce_ring6_7 = AMDGPU_VEGA20_DOORBELL64_VCE_RING6_7; + adev->doorbell_index.max_assignment = AMDGPU_VEGA20_DOORBELL_MAX_ASSIGNMENT << 1; +} diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index 07880d35e9de..ff2906c215fa 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -955,6 +955,7 @@ static const struct amdgpu_asic_funcs vi_asic_funcs = .flush_hdp = &vi_flush_hdp, .invalidate_hdp = &vi_invalidate_hdp, .need_full_reset = &vi_need_full_reset, + .init_doorbell_index = &legacy_doorbell_index_init, }; #define CZ_REV_BRISTOL(rev) \ @@ -1712,3 +1713,21 @@ int vi_set_ip_blocks(struct amdgpu_device *adev) return 0; } + +void legacy_doorbell_index_init(struct amdgpu_device *adev) +{ + adev->doorbell_index.kiq = AMDGPU_DOORBELL_KIQ; + adev->doorbell_index.mec_ring0 = AMDGPU_DOORBELL_MEC_RING0; + adev->doorbell_index.mec_ring1 = AMDGPU_DOORBELL_MEC_RING1; + adev->doorbell_index.mec_ring2 = AMDGPU_DOORBELL_MEC_RING2; + adev->doorbell_index.mec_ring3 = AMDGPU_DOORBELL_MEC_RING3; + adev->doorbell_index.mec_ring4 = AMDGPU_DOORBELL_MEC_RING4; + adev->doorbell_index.mec_ring5 = AMDGPU_DOORBELL_MEC_RING5; + adev->doorbell_index.mec_ring6 = AMDGPU_DOORBELL_MEC_RING6; + adev->doorbell_index.mec_ring7 = AMDGPU_DOORBELL_MEC_RING7; + adev->doorbell_index.gfx_ring0 = AMDGPU_DOORBELL_GFX_RING0; + adev->doorbell_index.sdma_engine0 = AMDGPU_DOORBELL_sDMA_ENGINE0; + adev->doorbell_index.sdma_engine1 = AMDGPU_DOORBELL_sDMA_ENGINE1; + adev->doorbell_index.ih = AMDGPU_DOORBELL_IH; + adev->doorbell_index.max_assignment = AMDGPU_DOORBELL_MAX_ASSIGNMENT; +} diff --git a/drivers/gpu/drm/amd/amdgpu/vi.h b/drivers/gpu/drm/amd/amdgpu/vi.h index 0429fe332269..8de0772f986c 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.h +++ b/drivers/gpu/drm/amd/amdgpu/vi.h @@ -30,4 +30,5 @@ void vi_srbm_select(struct amdgpu_device *adev, u32 me, u32 pipe, u32 queue, u32 vmid); int vi_set_ip_blocks(struct amdgpu_device *adev); +void legacy_doorbell_index_init(struct amdgpu_device *adev); #endif diff --git a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c index 5d2475d5392c..177d1e5329a5 100644 --- a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c +++ b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c @@ -23,6 +23,7 @@ #include "kfd_priv.h" #include "kfd_events.h" #include "cik_int.h" +#include "amdgpu_amdkfd.h" static bool cik_event_interrupt_isr(struct kfd_dev *dev, const uint32_t *ih_ring_entry, @@ -107,7 +108,7 @@ static void cik_event_interrupt_wq(struct kfd_dev *dev, kfd_process_vm_fault(dev->dqm, pasid); memset(&info, 0, sizeof(info)); - dev->kfd2kgd->get_vm_fault_info(dev->kgd, &info); + amdgpu_amdkfd_gpuvm_get_vm_fault_info(dev->kgd, &info); if (!info.page_addr && !info.status) return; diff --git a/drivers/gpu/drm/amd/amdkfd/cik_regs.h b/drivers/gpu/drm/amd/amdkfd/cik_regs.h index 37ce6dd65391..8e2a1663c4db 100644 --- a/drivers/gpu/drm/amd/amdkfd/cik_regs.h +++ b/drivers/gpu/drm/amd/amdkfd/cik_regs.h @@ -68,6 +68,4 @@ #define GRBM_GFX_INDEX 0x30800 -#define ATC_VMID_PASID_MAPPING_VALID (1U << 31) - #endif diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 14d5b5fa822d..3623538baf6f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -33,10 +33,12 @@ #include <linux/time.h> #include <linux/mm.h> #include <linux/mman.h> +#include <linux/dma-buf.h> #include <asm/processor.h> #include "kfd_priv.h" #include "kfd_device_queue_manager.h" #include "kfd_dbgmgr.h" +#include "amdgpu_amdkfd.h" static long kfd_ioctl(struct file *, unsigned int, unsigned long); static int kfd_open(struct inode *, struct file *); @@ -834,8 +836,7 @@ static int kfd_ioctl_get_clock_counters(struct file *filep, dev = kfd_device_by_id(args->gpu_id); if (dev) /* Reading GPU clock counter from KGD */ - args->gpu_clock_counter = - dev->kfd2kgd->get_gpu_clock_counter(dev->kgd); + args->gpu_clock_counter = amdgpu_amdkfd_get_gpu_clock_counter(dev->kgd); else /* Node without GPU resource */ args->gpu_clock_counter = 0; @@ -1042,7 +1043,7 @@ static int kfd_ioctl_create_event(struct file *filp, struct kfd_process *p, } mutex_unlock(&p->mutex); - err = kfd->kfd2kgd->map_gtt_bo_to_kernel(kfd->kgd, + err = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(kfd->kgd, mem, &kern_addr, &size); if (err) { pr_err("Failed to map event page to kernel\n"); @@ -1240,7 +1241,7 @@ bool kfd_dev_is_large_bar(struct kfd_dev *dev) if (dev->device_info->needs_iommu_device) return false; - dev->kfd2kgd->get_local_mem_info(dev->kgd, &mem_info); + amdgpu_amdkfd_get_local_mem_info(dev->kgd, &mem_info); if (mem_info.local_mem_size_private == 0 && mem_info.local_mem_size_public > 0) return true; @@ -1273,6 +1274,12 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep, return -EINVAL; } + if (flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) { + if (args->size != kfd_doorbell_process_slice(dev)) + return -EINVAL; + offset = kfd_get_process_doorbells(dev, p); + } + mutex_lock(&p->mutex); pdd = kfd_bind_process_to_device(dev, p); @@ -1281,7 +1288,7 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep, goto err_unlock; } - err = dev->kfd2kgd->alloc_memory_of_gpu( + err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( dev->kgd, args->va_addr, args->size, pdd->vm, (struct kgd_mem **) &mem, &offset, flags); @@ -1303,7 +1310,7 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep, return 0; err_free: - dev->kfd2kgd->free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem); + amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem); err_unlock: mutex_unlock(&p->mutex); return err; @@ -1338,7 +1345,8 @@ static int kfd_ioctl_free_memory_of_gpu(struct file *filep, goto err_unlock; } - ret = dev->kfd2kgd->free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem); + ret = amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, + (struct kgd_mem *)mem); /* If freeing the buffer failed, leave the handle in place for * clean-up during process tear-down. @@ -1418,7 +1426,7 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep, err = PTR_ERR(peer_pdd); goto get_mem_obj_from_handle_failed; } - err = peer->kfd2kgd->map_memory_to_gpu( + err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu( peer->kgd, (struct kgd_mem *)mem, peer_pdd->vm); if (err) { pr_err("Failed to map to gpu %d/%d\n", @@ -1430,7 +1438,7 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep, mutex_unlock(&p->mutex); - err = dev->kfd2kgd->sync_memory(dev->kgd, (struct kgd_mem *) mem, true); + err = amdgpu_amdkfd_gpuvm_sync_memory(dev->kgd, (struct kgd_mem *) mem, true); if (err) { pr_debug("Sync memory failed, wait interrupted by user signal\n"); goto sync_memory_failed; @@ -1525,7 +1533,7 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep, err = -ENODEV; goto get_mem_obj_from_handle_failed; } - err = dev->kfd2kgd->unmap_memory_to_gpu( + err = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( peer->kgd, (struct kgd_mem *)mem, peer_pdd->vm); if (err) { pr_err("Failed to unmap from gpu %d/%d\n", @@ -1549,6 +1557,115 @@ copy_from_user_failed: return err; } +static int kfd_ioctl_get_dmabuf_info(struct file *filep, + struct kfd_process *p, void *data) +{ + struct kfd_ioctl_get_dmabuf_info_args *args = data; + struct kfd_dev *dev = NULL; + struct kgd_dev *dma_buf_kgd; + void *metadata_buffer = NULL; + uint32_t flags; + unsigned int i; + int r; + + /* Find a KFD GPU device that supports the get_dmabuf_info query */ + for (i = 0; kfd_topology_enum_kfd_devices(i, &dev) == 0; i++) + if (dev) + break; + if (!dev) + return -EINVAL; + + if (args->metadata_ptr) { + metadata_buffer = kzalloc(args->metadata_size, GFP_KERNEL); + if (!metadata_buffer) + return -ENOMEM; + } + + /* Get dmabuf info from KGD */ + r = amdgpu_amdkfd_get_dmabuf_info(dev->kgd, args->dmabuf_fd, + &dma_buf_kgd, &args->size, + metadata_buffer, args->metadata_size, + &args->metadata_size, &flags); + if (r) + goto exit; + + /* Reverse-lookup gpu_id from kgd pointer */ + dev = kfd_device_by_kgd(dma_buf_kgd); + if (!dev) { + r = -EINVAL; + goto exit; + } + args->gpu_id = dev->id; + args->flags = flags; + + /* Copy metadata buffer to user mode */ + if (metadata_buffer) { + r = copy_to_user((void __user *)args->metadata_ptr, + metadata_buffer, args->metadata_size); + if (r != 0) + r = -EFAULT; + } + +exit: + kfree(metadata_buffer); + + return r; +} + +static int kfd_ioctl_import_dmabuf(struct file *filep, + struct kfd_process *p, void *data) +{ + struct kfd_ioctl_import_dmabuf_args *args = data; + struct kfd_process_device *pdd; + struct dma_buf *dmabuf; + struct kfd_dev *dev; + int idr_handle; + uint64_t size; + void *mem; + int r; + + dev = kfd_device_by_id(args->gpu_id); + if (!dev) + return -EINVAL; + + dmabuf = dma_buf_get(args->dmabuf_fd); + if (!dmabuf) + return -EINVAL; + + mutex_lock(&p->mutex); + + pdd = kfd_bind_process_to_device(dev, p); + if (IS_ERR(pdd)) { + r = PTR_ERR(pdd); + goto err_unlock; + } + + r = amdgpu_amdkfd_gpuvm_import_dmabuf(dev->kgd, dmabuf, + args->va_addr, pdd->vm, + (struct kgd_mem **)&mem, &size, + NULL); + if (r) + goto err_unlock; + + idr_handle = kfd_process_device_create_obj_handle(pdd, mem); + if (idr_handle < 0) { + r = -EFAULT; + goto err_free; + } + + mutex_unlock(&p->mutex); + + args->handle = MAKE_HANDLE(args->gpu_id, idr_handle); + + return 0; + +err_free: + amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem); +err_unlock: + mutex_unlock(&p->mutex); + return r; +} + #define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \ [_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \ .cmd_drv = 0, .name = #ioctl} @@ -1634,7 +1751,13 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = { kfd_ioctl_set_cu_mask, 0), AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_QUEUE_WAVE_STATE, - kfd_ioctl_get_queue_wave_state, 0) + kfd_ioctl_get_queue_wave_state, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_DMABUF_INFO, + kfd_ioctl_get_dmabuf_info, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_IMPORT_DMABUF, + kfd_ioctl_import_dmabuf, 0), }; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c index 56412b0e7e1c..c02adbbeef2a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c @@ -26,6 +26,7 @@ #include "kfd_priv.h" #include "kfd_topology.h" #include "kfd_iommu.h" +#include "amdgpu_amdkfd.h" /* GPU Processor ID base for dGPUs for which VCRAT needs to be created. * GPU processor ID are expressed with Bit[31]=1. @@ -132,6 +133,7 @@ static struct kfd_gpu_cache_info carrizo_cache_info[] = { #define fiji_cache_info carrizo_cache_info #define polaris10_cache_info carrizo_cache_info #define polaris11_cache_info carrizo_cache_info +#define polaris12_cache_info carrizo_cache_info /* TODO - check & update Vega10 cache details */ #define vega10_cache_info carrizo_cache_info #define raven_cache_info carrizo_cache_info @@ -646,7 +648,12 @@ static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev, pcache_info = polaris11_cache_info; num_of_cache_types = ARRAY_SIZE(polaris11_cache_info); break; + case CHIP_POLARIS12: + pcache_info = polaris12_cache_info; + num_of_cache_types = ARRAY_SIZE(polaris12_cache_info); + break; case CHIP_VEGA10: + case CHIP_VEGA12: case CHIP_VEGA20: pcache_info = vega10_cache_info; num_of_cache_types = ARRAY_SIZE(vega10_cache_info); @@ -753,12 +760,10 @@ int kfd_create_crat_image_acpi(void **crat_image, size_t *size) return -ENODATA; } - pcrat_image = kmalloc(crat_table->length, GFP_KERNEL); + pcrat_image = kmemdup(crat_table, crat_table->length, GFP_KERNEL); if (!pcrat_image) return -ENOMEM; - memcpy(pcrat_image, crat_table, crat_table->length); - *crat_image = pcrat_image; *size = crat_table->length; @@ -1161,7 +1166,7 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image, cu->flags |= CRAT_CU_FLAGS_GPU_PRESENT; cu->proximity_domain = proximity_domain; - kdev->kfd2kgd->get_cu_info(kdev->kgd, &cu_info); + amdgpu_amdkfd_get_cu_info(kdev->kgd, &cu_info); cu->num_simd_per_cu = cu_info.simd_per_cu; cu->num_simd_cores = cu_info.simd_per_cu * cu_info.cu_active_number; cu->max_waves_simd = cu_info.max_waves_per_simd; @@ -1192,7 +1197,7 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image, * report the total FB size (public+private) as a single * private heap. */ - kdev->kfd2kgd->get_local_mem_info(kdev->kgd, &local_mem_info); + amdgpu_amdkfd_get_local_mem_info(kdev->kgd, &local_mem_info); sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr + sub_type_hdr->length); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index e4ded890b1cb..8be9677c0c07 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -28,6 +28,7 @@ #include "kfd_pm4_headers_vi.h" #include "cwsr_trap_handler.h" #include "kfd_iommu.h" +#include "amdgpu_amdkfd.h" #define MQD_SIZE_ALIGNED 768 @@ -204,6 +205,22 @@ static const struct kfd_device_info polaris11_device_info = { .num_sdma_queues_per_engine = 2, }; +static const struct kfd_device_info polaris12_device_info = { + .asic_family = CHIP_POLARIS12, + .max_pasid_bits = 16, + .max_no_of_hqd = 24, + .doorbell_size = 4, + .ih_ring_entry_size = 4 * sizeof(uint32_t), + .event_interrupt_class = &event_interrupt_class_cik, + .num_of_watch_points = 4, + .mqd_size_aligned = MQD_SIZE_ALIGNED, + .supports_cwsr = true, + .needs_iommu_device = false, + .needs_pci_atomics = true, + .num_sdma_engines = 2, + .num_sdma_queues_per_engine = 2, +}; + static const struct kfd_device_info vega10_device_info = { .asic_family = CHIP_VEGA10, .max_pasid_bits = 16, @@ -236,6 +253,22 @@ static const struct kfd_device_info vega10_vf_device_info = { .num_sdma_queues_per_engine = 2, }; +static const struct kfd_device_info vega12_device_info = { + .asic_family = CHIP_VEGA12, + .max_pasid_bits = 16, + .max_no_of_hqd = 24, + .doorbell_size = 8, + .ih_ring_entry_size = 8 * sizeof(uint32_t), + .event_interrupt_class = &event_interrupt_class_v9, + .num_of_watch_points = 4, + .mqd_size_aligned = MQD_SIZE_ALIGNED, + .supports_cwsr = true, + .needs_iommu_device = false, + .needs_pci_atomics = false, + .num_sdma_engines = 2, + .num_sdma_queues_per_engine = 2, +}; + static const struct kfd_device_info vega20_device_info = { .asic_family = CHIP_VEGA20, .max_pasid_bits = 16, @@ -330,6 +363,14 @@ static const struct kfd_deviceid supported_devices[] = { { 0x67EB, &polaris11_device_info }, /* Polaris11 */ { 0x67EF, &polaris11_device_info }, /* Polaris11 */ { 0x67FF, &polaris11_device_info }, /* Polaris11 */ + { 0x6980, &polaris12_device_info }, /* Polaris12 */ + { 0x6981, &polaris12_device_info }, /* Polaris12 */ + { 0x6985, &polaris12_device_info }, /* Polaris12 */ + { 0x6986, &polaris12_device_info }, /* Polaris12 */ + { 0x6987, &polaris12_device_info }, /* Polaris12 */ + { 0x6995, &polaris12_device_info }, /* Polaris12 */ + { 0x6997, &polaris12_device_info }, /* Polaris12 */ + { 0x699F, &polaris12_device_info }, /* Polaris12 */ { 0x6860, &vega10_device_info }, /* Vega10 */ { 0x6861, &vega10_device_info }, /* Vega10 */ { 0x6862, &vega10_device_info }, /* Vega10 */ @@ -345,6 +386,11 @@ static const struct kfd_deviceid supported_devices[] = { { 0x686E, &vega10_device_info }, /* Vega10 */ { 0x686F, &vega10_device_info }, /* Vega10 */ { 0x687F, &vega10_device_info }, /* Vega10 */ + { 0x69A0, &vega12_device_info }, /* Vega12 */ + { 0x69A1, &vega12_device_info }, /* Vega12 */ + { 0x69A2, &vega12_device_info }, /* Vega12 */ + { 0x69A3, &vega12_device_info }, /* Vega12 */ + { 0x69AF, &vega12_device_info }, /* Vega12 */ { 0x66a0, &vega20_device_info }, /* Vega20 */ { 0x66a1, &vega20_device_info }, /* Vega20 */ { 0x66a2, &vega20_device_info }, /* Vega20 */ @@ -485,7 +531,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, /* add another 512KB for all other allocations on gart (HPD, fences) */ size += 512 * 1024; - if (kfd->kfd2kgd->init_gtt_mem_allocation( + if (amdgpu_amdkfd_alloc_gtt_mem( kfd->kgd, size, &kfd->gtt_mem, &kfd->gtt_start_gpu_addr, &kfd->gtt_start_cpu_ptr, false)) { @@ -559,7 +605,7 @@ kfd_topology_add_device_error: kfd_doorbell_error: kfd_gtt_sa_fini(kfd); kfd_gtt_sa_init_error: - kfd->kfd2kgd->free_gtt_mem(kfd->kgd, kfd->gtt_mem); + amdgpu_amdkfd_free_gtt_mem(kfd->kgd, kfd->gtt_mem); dev_err(kfd_device, "device %x:%x NOT added due to errors\n", kfd->pdev->vendor, kfd->pdev->device); @@ -576,7 +622,7 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd) kfd_topology_remove_device(kfd); kfd_doorbell_fini(kfd); kfd_gtt_sa_fini(kfd); - kfd->kfd2kgd->free_gtt_mem(kfd->kgd, kfd->gtt_mem); + amdgpu_amdkfd_free_gtt_mem(kfd->kgd, kfd->gtt_mem); } kfree(kfd); @@ -688,6 +734,7 @@ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry) { uint32_t patched_ihre[KFD_MAX_RING_ENTRY_SIZE]; bool is_patched = false; + unsigned long flags; if (!kfd->init_complete) return; @@ -697,7 +744,7 @@ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry) return; } - spin_lock(&kfd->interrupt_lock); + spin_lock_irqsave(&kfd->interrupt_lock, flags); if (kfd->interrupts_active && interrupt_is_wanted(kfd, ih_ring_entry, @@ -706,7 +753,7 @@ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry) is_patched ? patched_ihre : ih_ring_entry)) queue_work(kfd->ih_wq, &kfd->interrupt_work); - spin_unlock(&kfd->interrupt_lock); + spin_unlock_irqrestore(&kfd->interrupt_lock, flags); } int kgd2kfd_quiesce_mm(struct mm_struct *mm) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index a3b933967171..8372556b52eb 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -33,6 +33,7 @@ #include "kfd_mqd_manager.h" #include "cik_regs.h" #include "kfd_kernel_queue.h" +#include "amdgpu_amdkfd.h" /* Size of the per-pipe EOP queue */ #define CIK_HPD_EOP_BYTES_LOG2 11 @@ -219,7 +220,7 @@ static int flush_texture_cache_nocpsch(struct kfd_dev *kdev, if (ret) return ret; - return kdev->kfd2kgd->submit_ib(kdev->kgd, KGD_ENGINE_MEC1, qpd->vmid, + return amdgpu_amdkfd_submit_ib(kdev->kgd, KGD_ENGINE_MEC1, qpd->vmid, qpd->ib_base, (uint32_t *)qpd->ib_kaddr, pmf->release_mem_size / sizeof(uint32_t)); } @@ -672,7 +673,7 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm, pdd = qpd_to_pdd(qpd); /* Retrieve PD base */ - pd_base = dqm->dev->kfd2kgd->get_process_page_dir(pdd->vm); + pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->vm); dqm_lock(dqm); if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */ @@ -743,7 +744,7 @@ static int restore_process_queues_cpsch(struct device_queue_manager *dqm, pdd = qpd_to_pdd(qpd); /* Retrieve PD base */ - pd_base = dqm->dev->kfd2kgd->get_process_page_dir(pdd->vm); + pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->vm); dqm_lock(dqm); if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */ @@ -793,7 +794,7 @@ static int register_process(struct device_queue_manager *dqm, pdd = qpd_to_pdd(qpd); /* Retrieve PD base */ - pd_base = dqm->dev->kfd2kgd->get_process_page_dir(pdd->vm); + pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->vm); dqm_lock(dqm); list_add(&n->list, &dqm->queues); @@ -805,7 +806,7 @@ static int register_process(struct device_queue_manager *dqm, retval = dqm->asic_ops.update_qpd(dqm, qpd); if (dqm->processes_count++ == 0) - dqm->dev->kfd2kgd->set_compute_idle(dqm->dev->kgd, false); + amdgpu_amdkfd_set_compute_idle(dqm->dev->kgd, false); dqm_unlock(dqm); @@ -829,7 +830,7 @@ static int unregister_process(struct device_queue_manager *dqm, list_del(&cur->list); kfree(cur); if (--dqm->processes_count == 0) - dqm->dev->kfd2kgd->set_compute_idle( + amdgpu_amdkfd_set_compute_idle( dqm->dev->kgd, true); goto out; } @@ -845,15 +846,8 @@ static int set_pasid_vmid_mapping(struct device_queue_manager *dqm, unsigned int pasid, unsigned int vmid) { - uint32_t pasid_mapping; - - pasid_mapping = (pasid == 0) ? 0 : - (uint32_t)pasid | - ATC_VMID_PASID_MAPPING_VALID; - return dqm->dev->kfd2kgd->set_pasid_vmid_mapping( - dqm->dev->kgd, pasid_mapping, - vmid); + dqm->dev->kgd, pasid, vmid); } static void init_interrupts(struct device_queue_manager *dqm) @@ -1553,7 +1547,7 @@ static int get_wave_state(struct device_queue_manager *dqm, u32 *ctl_stack_used_size, u32 *save_area_used_size) { - struct mqd_manager *mqd; + struct mqd_manager *mqd_mgr; int r; dqm_lock(dqm); @@ -1564,19 +1558,19 @@ static int get_wave_state(struct device_queue_manager *dqm, goto dqm_unlock; } - mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE); - if (!mqd) { + mqd_mgr = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE); + if (!mqd_mgr) { r = -ENOMEM; goto dqm_unlock; } - if (!mqd->get_wave_state) { + if (!mqd_mgr->get_wave_state) { r = -EINVAL; goto dqm_unlock; } - r = mqd->get_wave_state(mqd, q->mqd, ctl_stack, ctl_stack_used_size, - save_area_used_size); + r = mqd_mgr->get_wave_state(mqd_mgr, q->mqd, ctl_stack, + ctl_stack_used_size, save_area_used_size); dqm_unlock: dqm_unlock(dqm); @@ -1747,10 +1741,12 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) case CHIP_FIJI: case CHIP_POLARIS10: case CHIP_POLARIS11: + case CHIP_POLARIS12: device_queue_manager_init_vi_tonga(&dqm->asic_ops); break; case CHIP_VEGA10: + case CHIP_VEGA12: case CHIP_VEGA20: case CHIP_RAVEN: device_queue_manager_init_v9(&dqm->asic_ops); @@ -1796,7 +1792,7 @@ static void kfd_process_hw_exception(struct work_struct *work) { struct device_queue_manager *dqm = container_of(work, struct device_queue_manager, hw_exception_work); - dqm->dev->kfd2kgd->gpu_recover(dqm->dev->kgd); + amdgpu_amdkfd_gpu_reset(dqm->dev->kgd); } #if defined(CONFIG_DEBUG_FS) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c index fd60a116be37..c3a5dcfe877a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c @@ -24,7 +24,6 @@ #include "kfd_device_queue_manager.h" #include "gca/gfx_8_0_enum.h" #include "gca/gfx_8_0_sh_mask.h" -#include "gca/gfx_8_0_enum.h" #include "oss/oss_3_0_sh_mask.h" static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c index 3d66cec414af..213ea5454d11 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c @@ -397,9 +397,11 @@ int kfd_init_apertures(struct kfd_process *process) case CHIP_FIJI: case CHIP_POLARIS10: case CHIP_POLARIS11: + case CHIP_POLARIS12: kfd_init_apertures_vi(pdd, id); break; case CHIP_VEGA10: + case CHIP_VEGA12: case CHIP_VEGA20: case CHIP_RAVEN: kfd_init_apertures_v9(pdd, id); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c index f836897bbf58..a85904ad0d5f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c @@ -23,7 +23,7 @@ #include "kfd_priv.h" #include "kfd_events.h" #include "soc15_int.h" - +#include "kfd_device_queue_manager.h" static bool event_interrupt_isr_v9(struct kfd_dev *dev, const uint32_t *ih_ring_entry, @@ -39,20 +39,39 @@ static bool event_interrupt_isr_v9(struct kfd_dev *dev, vmid > dev->vm_info.last_vmid_kfd) return 0; - /* If there is no valid PASID, it's likely a firmware bug */ - pasid = SOC15_PASID_FROM_IH_ENTRY(ih_ring_entry); - if (WARN_ONCE(pasid == 0, "FW bug: No PASID in KFD interrupt")) - return 0; - source_id = SOC15_SOURCE_ID_FROM_IH_ENTRY(ih_ring_entry); client_id = SOC15_CLIENT_ID_FROM_IH_ENTRY(ih_ring_entry); + pasid = SOC15_PASID_FROM_IH_ENTRY(ih_ring_entry); + + /* This is a known issue for gfx9. Under non HWS, pasid is not set + * in the interrupt payload, so we need to find out the pasid on our + * own. + */ + if (!pasid && dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) { + const uint32_t pasid_mask = 0xffff; - pr_debug("client id 0x%x, source id %d, pasid 0x%x. raw data:\n", - client_id, source_id, pasid); + *patched_flag = true; + memcpy(patched_ihre, ih_ring_entry, + dev->device_info->ih_ring_entry_size); + + pasid = dev->kfd2kgd->get_atc_vmid_pasid_mapping_pasid( + dev->kgd, vmid); + + /* Patch the pasid field */ + patched_ihre[3] = cpu_to_le32((le32_to_cpu(patched_ihre[3]) + & ~pasid_mask) | pasid); + } + + pr_debug("client id 0x%x, source id %d, vmid %d, pasid 0x%x. raw data:\n", + client_id, source_id, vmid, pasid); pr_debug("%8X, %8X, %8X, %8X, %8X, %8X, %8X, %8X.\n", data[0], data[1], data[2], data[3], data[4], data[5], data[6], data[7]); + /* If there is no valid PASID, it's likely a bug */ + if (WARN_ONCE(pasid == 0, "Bug: No PASID in KFD interrupt")) + return 0; + /* Interrupt types we care about: various signals and faults. * They will be forwarded to a work queue (see below). */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c index 6c31f7370193..f1596881f20a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c @@ -313,6 +313,7 @@ struct kernel_queue *kernel_queue_init(struct kfd_dev *dev, case CHIP_FIJI: case CHIP_POLARIS10: case CHIP_POLARIS11: + case CHIP_POLARIS12: kernel_queue_init_vi(&kq->ops_asic_specific); break; @@ -322,6 +323,7 @@ struct kernel_queue *kernel_queue_init(struct kfd_dev *dev, break; case CHIP_VEGA10: + case CHIP_VEGA12: case CHIP_VEGA20: case CHIP_RAVEN: kernel_queue_init_v9(&kq->ops_asic_specific); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c index e33019a7a883..aed9b9b82213 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c @@ -22,6 +22,7 @@ */ #include "kfd_mqd_manager.h" +#include "amdgpu_amdkfd.h" struct mqd_manager *mqd_manager_init(enum KFD_MQD_TYPE type, struct kfd_dev *dev) @@ -37,8 +38,10 @@ struct mqd_manager *mqd_manager_init(enum KFD_MQD_TYPE type, case CHIP_FIJI: case CHIP_POLARIS10: case CHIP_POLARIS11: + case CHIP_POLARIS12: return mqd_manager_init_vi_tonga(type, dev); case CHIP_VEGA10: + case CHIP_VEGA12: case CHIP_VEGA20: case CHIP_RAVEN: return mqd_manager_init_v9(type, dev); @@ -58,7 +61,7 @@ void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm, uint32_t cu_per_sh[4] = {0}; int i, se, cu = 0; - mm->dev->kfd2kgd->get_cu_info(mm->dev->kgd, &cu_info); + amdgpu_amdkfd_get_cu_info(mm->dev->kgd, &cu_info); if (cu_mask_count > cu_info.cu_active_number) cu_mask_count = cu_info.cu_active_number; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c index f381c1cb27bd..9dbba609450e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c @@ -30,6 +30,7 @@ #include "gc/gc_9_0_offset.h" #include "gc/gc_9_0_sh_mask.h" #include "sdma0/sdma0_4_0_sh_mask.h" +#include "amdgpu_amdkfd.h" static inline struct v9_mqd *get_mqd(void *mqd) { @@ -83,7 +84,7 @@ static int init_mqd(struct mqd_manager *mm, void **mqd, *mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL); if (!*mqd_mem_obj) return -ENOMEM; - retval = kfd->kfd2kgd->init_gtt_mem_allocation(kfd->kgd, + retval = amdgpu_amdkfd_alloc_gtt_mem(kfd->kgd, ALIGN(q->ctl_stack_size, PAGE_SIZE) + ALIGN(sizeof(struct v9_mqd), PAGE_SIZE), &((*mqd_mem_obj)->gtt_mem), @@ -250,7 +251,7 @@ static void uninit_mqd(struct mqd_manager *mm, void *mqd, struct kfd_dev *kfd = mm->dev; if (mqd_mem_obj->gtt_mem) { - kfd->kfd2kgd->free_gtt_mem(kfd->kgd, mqd_mem_obj->gtt_mem); + amdgpu_amdkfd_free_gtt_mem(kfd->kgd, mqd_mem_obj->gtt_mem); kfree(mqd_mem_obj); } else { kfd_gtt_sa_free(mm->dev, mqd_mem_obj); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c index c6080ed3b6a7..045a229436a0 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c @@ -226,9 +226,11 @@ int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm) case CHIP_FIJI: case CHIP_POLARIS10: case CHIP_POLARIS11: + case CHIP_POLARIS12: pm->pmf = &kfd_vi_pm_funcs; break; case CHIP_VEGA10: + case CHIP_VEGA12: case CHIP_VEGA20: case CHIP_RAVEN: pm->pmf = &kfd_v9_pm_funcs; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c b/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c index 15fff4420e53..33b08ff00b50 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c @@ -22,6 +22,7 @@ #include <linux/types.h> #include "kfd_priv.h" +#include "amdgpu_ids.h" static unsigned int pasid_bits = 16; static const struct kfd2kgd_calls *kfd2kgd; @@ -71,7 +72,7 @@ unsigned int kfd_pasid_alloc(void) return false; } - r = kfd2kgd->alloc_pasid(pasid_bits); + r = amdgpu_pasid_alloc(pasid_bits); return r > 0 ? r : 0; } @@ -79,5 +80,5 @@ unsigned int kfd_pasid_alloc(void) void kfd_pasid_free(unsigned int pasid) { if (kfd2kgd) - kfd2kgd->free_pasid(pasid); + amdgpu_pasid_free(pasid); } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 53ff86d45d91..0689d4ccbbc0 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -507,6 +507,7 @@ struct qcm_process_device { * All the memory management data should be here too */ uint64_t gds_context_area; + /* Contains page table flags such as AMDGPU_PTE_VALID since gfx9 */ uint64_t page_table_base; uint32_t sh_mem_config; uint32_t sh_mem_bases; @@ -792,6 +793,7 @@ struct kfd_topology_device *kfd_topology_device_by_proximity_domain( struct kfd_topology_device *kfd_topology_device_by_id(uint32_t gpu_id); struct kfd_dev *kfd_device_by_id(uint32_t gpu_id); struct kfd_dev *kfd_device_by_pci_dev(const struct pci_dev *pdev); +struct kfd_dev *kfd_device_by_kgd(const struct kgd_dev *kgd); int kfd_topology_enum_kfd_devices(uint8_t idx, struct kfd_dev **kdev); int kfd_numa_node_to_apic_id(int numa_node_id); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 0039e451d9af..80b36e860a0a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -31,6 +31,7 @@ #include <linux/compat.h> #include <linux/mman.h> #include <linux/file.h> +#include "amdgpu_amdkfd.h" struct mm_struct; @@ -100,8 +101,8 @@ static void kfd_process_free_gpuvm(struct kgd_mem *mem, { struct kfd_dev *dev = pdd->dev; - dev->kfd2kgd->unmap_memory_to_gpu(dev->kgd, mem, pdd->vm); - dev->kfd2kgd->free_memory_of_gpu(dev->kgd, mem); + amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(dev->kgd, mem, pdd->vm); + amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, mem); } /* kfd_process_alloc_gpuvm - Allocate GPU VM for the KFD process @@ -119,16 +120,16 @@ static int kfd_process_alloc_gpuvm(struct kfd_process_device *pdd, int handle; int err; - err = kdev->kfd2kgd->alloc_memory_of_gpu(kdev->kgd, gpu_va, size, + err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(kdev->kgd, gpu_va, size, pdd->vm, &mem, NULL, flags); if (err) goto err_alloc_mem; - err = kdev->kfd2kgd->map_memory_to_gpu(kdev->kgd, mem, pdd->vm); + err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(kdev->kgd, mem, pdd->vm); if (err) goto err_map_mem; - err = kdev->kfd2kgd->sync_memory(kdev->kgd, mem, true); + err = amdgpu_amdkfd_gpuvm_sync_memory(kdev->kgd, mem, true); if (err) { pr_debug("Sync memory failed, wait interrupted by user signal\n"); goto sync_memory_failed; @@ -147,7 +148,7 @@ static int kfd_process_alloc_gpuvm(struct kfd_process_device *pdd, } if (kptr) { - err = kdev->kfd2kgd->map_gtt_bo_to_kernel(kdev->kgd, + err = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(kdev->kgd, (struct kgd_mem *)mem, kptr, NULL); if (err) { pr_debug("Map GTT BO to kernel failed\n"); @@ -165,7 +166,7 @@ sync_memory_failed: return err; err_map_mem: - kdev->kfd2kgd->free_memory_of_gpu(kdev->kgd, mem); + amdgpu_amdkfd_gpuvm_free_memory_of_gpu(kdev->kgd, mem); err_alloc_mem: *kptr = NULL; return err; @@ -296,11 +297,11 @@ static void kfd_process_device_free_bos(struct kfd_process_device *pdd) per_device_list) { if (!peer_pdd->vm) continue; - peer_pdd->dev->kfd2kgd->unmap_memory_to_gpu( + amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( peer_pdd->dev->kgd, mem, peer_pdd->vm); } - pdd->dev->kfd2kgd->free_memory_of_gpu(pdd->dev->kgd, mem); + amdgpu_amdkfd_gpuvm_free_memory_of_gpu(pdd->dev->kgd, mem); kfd_process_device_remove_obj_handle(pdd, id); } } @@ -323,11 +324,12 @@ static void kfd_process_destroy_pdds(struct kfd_process *p) pdd->dev->id, p->pasid); if (pdd->drm_file) { - pdd->dev->kfd2kgd->release_process_vm(pdd->dev->kgd, pdd->vm); + amdgpu_amdkfd_gpuvm_release_process_vm( + pdd->dev->kgd, pdd->vm); fput(pdd->drm_file); } else if (pdd->vm) - pdd->dev->kfd2kgd->destroy_process_vm( + amdgpu_amdkfd_gpuvm_destroy_process_vm( pdd->dev->kgd, pdd->vm); list_del(&pdd->per_device_list); @@ -688,12 +690,12 @@ int kfd_process_device_init_vm(struct kfd_process_device *pdd, dev = pdd->dev; if (drm_file) - ret = dev->kfd2kgd->acquire_process_vm( + ret = amdgpu_amdkfd_gpuvm_acquire_process_vm( dev->kgd, drm_file, p->pasid, &pdd->vm, &p->kgd_process_info, &p->ef); else - ret = dev->kfd2kgd->create_process_vm( - dev->kgd, p->pasid, &pdd->vm, &p->kgd_process_info, &p->ef); + ret = amdgpu_amdkfd_gpuvm_create_process_vm(dev->kgd, p->pasid, + &pdd->vm, &p->kgd_process_info, &p->ef); if (ret) { pr_err("Failed to create process VM object\n"); return ret; @@ -714,7 +716,7 @@ err_init_cwsr: err_reserve_ib_mem: kfd_process_device_free_bos(pdd); if (!drm_file) - dev->kfd2kgd->destroy_process_vm(dev->kgd, pdd->vm); + amdgpu_amdkfd_gpuvm_destroy_process_vm(dev->kgd, pdd->vm); pdd->vm = NULL; return ret; @@ -972,7 +974,7 @@ static void restore_process_worker(struct work_struct *work) */ p->last_restore_timestamp = get_jiffies_64(); - ret = pdd->dev->kfd2kgd->restore_process_bos(p->kgd_process_info, + ret = amdgpu_amdkfd_gpuvm_restore_process_bos(p->kgd_process_info, &p->ef); if (ret) { pr_debug("Failed to restore BOs of pasid %d, retry after %d ms\n", diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index e3843c5929ed..5f5b2acedbac 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -36,6 +36,7 @@ #include "kfd_topology.h" #include "kfd_device_queue_manager.h" #include "kfd_iommu.h" +#include "amdgpu_amdkfd.h" /* topology_device_list - Master list of all topology devices */ static struct list_head topology_device_list; @@ -100,7 +101,25 @@ struct kfd_dev *kfd_device_by_pci_dev(const struct pci_dev *pdev) down_read(&topology_lock); list_for_each_entry(top_dev, &topology_device_list, list) - if (top_dev->gpu->pdev == pdev) { + if (top_dev->gpu && top_dev->gpu->pdev == pdev) { + device = top_dev->gpu; + break; + } + + up_read(&topology_lock); + + return device; +} + +struct kfd_dev *kfd_device_by_kgd(const struct kgd_dev *kgd) +{ + struct kfd_topology_device *top_dev; + struct kfd_dev *device = NULL; + + down_read(&topology_lock); + + list_for_each_entry(top_dev, &topology_device_list, list) + if (top_dev->gpu && top_dev->gpu->kgd == kgd) { device = top_dev->gpu; break; } @@ -1052,7 +1071,7 @@ static uint32_t kfd_generate_gpu_id(struct kfd_dev *gpu) if (!gpu) return 0; - gpu->kfd2kgd->get_local_mem_info(gpu->kgd, &local_mem_info); + amdgpu_amdkfd_get_local_mem_info(gpu->kgd, &local_mem_info); local_mem_size = local_mem_info.local_mem_size_private + local_mem_info.local_mem_size_public; @@ -1118,8 +1137,7 @@ static void kfd_fill_mem_clk_max_info(struct kfd_topology_device *dev) * for APUs - If CRAT from ACPI reports more than one bank, then * all the banks will report the same mem_clk_max information */ - dev->gpu->kfd2kgd->get_local_mem_info(dev->gpu->kgd, - &local_mem_info); + amdgpu_amdkfd_get_local_mem_info(dev->gpu->kgd, &local_mem_info); list_for_each_entry(mem, &dev->mem_props, list) mem->mem_clk_max = local_mem_info.mem_clk_max; @@ -1240,7 +1258,7 @@ int kfd_topology_add_device(struct kfd_dev *gpu) * needed for the topology */ - dev->gpu->kfd2kgd->get_cu_info(dev->gpu->kgd, &cu_info); + amdgpu_amdkfd_get_cu_info(dev->gpu->kgd, &cu_info); dev->node_props.simd_arrays_per_engine = cu_info.num_shader_arrays_per_engine; @@ -1249,7 +1267,7 @@ int kfd_topology_add_device(struct kfd_dev *gpu) dev->node_props.location_id = PCI_DEVID(gpu->pdev->bus->number, gpu->pdev->devfn); dev->node_props.max_engine_clk_fcompute = - dev->gpu->kfd2kgd->get_max_engine_clock_in_mhz(dev->gpu->kgd); + amdgpu_amdkfd_get_max_engine_clock_in_mhz(dev->gpu->kgd); dev->node_props.max_engine_clk_ccompute = cpufreq_quick_get_max(0) / 1000; dev->node_props.drm_render_minor = @@ -1272,12 +1290,14 @@ int kfd_topology_add_device(struct kfd_dev *gpu) case CHIP_FIJI: case CHIP_POLARIS10: case CHIP_POLARIS11: + case CHIP_POLARIS12: pr_debug("Adding doorbell packet type capability\n"); dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_1_0 << HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) & HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK); break; case CHIP_VEGA10: + case CHIP_VEGA12: case CHIP_VEGA20: case CHIP_RAVEN: dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_2_0 << diff --git a/drivers/gpu/drm/amd/display/Makefile b/drivers/gpu/drm/amd/display/Makefile index c97dc9613325..cfde1568c79a 100644 --- a/drivers/gpu/drm/amd/display/Makefile +++ b/drivers/gpu/drm/amd/display/Makefile @@ -32,11 +32,12 @@ subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/modules/inc subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/modules/freesync subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/modules/color subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/modules/info_packet +subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/modules/power #TODO: remove when Timing Sync feature is complete subdir-ccflags-y += -DBUILD_FEATURE_TIMING_SYNC=0 -DAL_LIBS = amdgpu_dm dc modules/freesync modules/color modules/info_packet +DAL_LIBS = amdgpu_dm dc modules/freesync modules/color modules/info_packet modules/power AMD_DAL = $(addsuffix /Makefile, $(addprefix $(FULL_AMD_DISPLAY_PATH)/,$(DAL_LIBS))) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 5a6edf65c9ea..d01315965af0 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -23,6 +23,9 @@ * */ +/* The caprices of the preprocessor require that this be declared right here */ +#define CREATE_TRACE_POINTS + #include "dm_services_types.h" #include "dc.h" #include "dc/inc/core_types.h" @@ -38,7 +41,6 @@ #include "amd_shared.h" #include "amdgpu_dm_irq.h" #include "dm_helpers.h" -#include "dm_services_types.h" #include "amdgpu_dm_mst_types.h" #if defined(CONFIG_DEBUG_FS) #include "amdgpu_dm_debugfs.h" @@ -55,6 +57,7 @@ #include <drm/drmP.h> #include <drm/drm_atomic.h> +#include <drm/drm_atomic_uapi.h> #include <drm/drm_atomic_helper.h> #include <drm/drm_dp_mst_helper.h> #include <drm/drm_fb_helper.h> @@ -72,10 +75,22 @@ #endif #include "modules/inc/mod_freesync.h" +#include "modules/power/power_helpers.h" +#include "modules/inc/mod_info_packet.h" #define FIRMWARE_RAVEN_DMCU "amdgpu/raven_dmcu.bin" MODULE_FIRMWARE(FIRMWARE_RAVEN_DMCU); +/** + * DOC: overview + * + * The AMDgpu display manager, **amdgpu_dm** (or even simpler, + * **dm**) sits between DRM and DC. It acts as a liason, converting DRM + * requests into DC requests, and DC responses into DRM responses. + * + * The root control structure is &struct amdgpu_display_manager. + */ + /* basic init/fini API */ static int amdgpu_dm_init(struct amdgpu_device *adev); static void amdgpu_dm_fini(struct amdgpu_device *adev); @@ -95,7 +110,7 @@ static void amdgpu_dm_update_connector_after_detect(struct amdgpu_dm_connector *aconnector); static int amdgpu_dm_plane_init(struct amdgpu_display_manager *dm, - struct amdgpu_plane *aplane, + struct drm_plane *plane, unsigned long possible_crtcs); static int amdgpu_dm_crtc_init(struct amdgpu_display_manager *dm, struct drm_plane *plane, @@ -119,6 +134,8 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state); static int amdgpu_dm_atomic_check(struct drm_device *dev, struct drm_atomic_state *state); +static void handle_cursor_update(struct drm_plane *plane, + struct drm_plane_state *old_plane_state); @@ -379,11 +396,6 @@ static void amdgpu_dm_fbc_init(struct drm_connector *connector) } -/* - * Init display KMS - * - * Returns 0 on success - */ static int amdgpu_dm_init(struct amdgpu_device *adev) { struct dc_init_data init_data; @@ -393,6 +405,8 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) /* Zero all the fields */ memset(&init_data, 0, sizeof(init_data)); + mutex_init(&adev->dm.dc_lock); + if(amdgpu_dm_irq_init(adev)) { DRM_ERROR("amdgpu: failed to initialize DM IRQ support.\n"); goto error; @@ -507,6 +521,9 @@ static void amdgpu_dm_fini(struct amdgpu_device *adev) /* DC Destroy TODO: Replace destroy DAL */ if (adev->dm.dc) dc_destroy(&adev->dm.dc); + + mutex_destroy(&adev->dm.dc_lock); + return; } @@ -638,6 +655,26 @@ static int dm_late_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct dmcu_iram_parameters params; + unsigned int linear_lut[16]; + int i; + struct dmcu *dmcu = adev->dm.dc->res_pool->dmcu; + bool ret; + + for (i = 0; i < 16; i++) + linear_lut[i] = 0xFFFF * i / 15; + + params.set = 0; + params.backlight_ramping_start = 0xCCCC; + params.backlight_ramping_reduction = 0xCCCCCCCC; + params.backlight_lut_array_size = 16; + params.backlight_lut_array = linear_lut; + + ret = dmcu_load_iram(dmcu, params); + + if (!ret) + return -EINVAL; + return detect_mst_link_for_all_connectors(adev->ddev); } @@ -663,6 +700,26 @@ static void s3_handle_mst(struct drm_device *dev, bool suspend) drm_modeset_unlock(&dev->mode_config.connection_mutex); } +/** + * dm_hw_init() - Initialize DC device + * @handle: The base driver device containing the amdpgu_dm device. + * + * Initialize the &struct amdgpu_display_manager device. This involves calling + * the initializers of each DM component, then populating the struct with them. + * + * Although the function implies hardware initialization, both hardware and + * software are initialized here. Splitting them out to their relevant init + * hooks is a future TODO item. + * + * Some notable things that are initialized here: + * + * - Display Core, both software and hardware + * - DC modules that we need (freesync and color management) + * - DRM software states + * - Interrupt sources and handlers + * - Vblank support + * - Debug FS entries, if enabled + */ static int dm_hw_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -673,6 +730,14 @@ static int dm_hw_init(void *handle) return 0; } +/** + * dm_hw_fini() - Teardown DC device + * @handle: The base driver device containing the amdpgu_dm device. + * + * Teardown components within &struct amdgpu_display_manager that require + * cleanup. This involves cleaning up the DRM device, DC, and any modules that + * were loaded. Also flush IRQ workqueues and disable them. + */ static int dm_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -898,6 +963,16 @@ static int dm_resume(void *handle) return ret; } +/** + * DOC: DM Lifecycle + * + * DM (and consequently DC) is registered in the amdgpu base driver as a IP + * block. When CONFIG_DRM_AMD_DC is enabled, the DM device IP block is added to + * the base driver's device list to be initialized and torn down accordingly. + * + * The functions to do so are provided as hooks in &struct amd_ip_funcs. + */ + static const struct amd_ip_funcs amdgpu_dm_funcs = { .name = "dm", .early_init = dm_early_init, @@ -926,53 +1001,17 @@ const struct amdgpu_ip_block_version dm_ip_block = }; -static struct drm_atomic_state * -dm_atomic_state_alloc(struct drm_device *dev) -{ - struct dm_atomic_state *state = kzalloc(sizeof(*state), GFP_KERNEL); - - if (!state) - return NULL; - - if (drm_atomic_state_init(dev, &state->base) < 0) - goto fail; - - return &state->base; - -fail: - kfree(state); - return NULL; -} - -static void -dm_atomic_state_clear(struct drm_atomic_state *state) -{ - struct dm_atomic_state *dm_state = to_dm_atomic_state(state); - - if (dm_state->context) { - dc_release_state(dm_state->context); - dm_state->context = NULL; - } - - drm_atomic_state_default_clear(state); -} - -static void -dm_atomic_state_alloc_free(struct drm_atomic_state *state) -{ - struct dm_atomic_state *dm_state = to_dm_atomic_state(state); - drm_atomic_state_default_release(state); - kfree(dm_state); -} +/** + * DOC: atomic + * + * *WIP* + */ static const struct drm_mode_config_funcs amdgpu_dm_mode_funcs = { .fb_create = amdgpu_display_user_framebuffer_create, .output_poll_changed = drm_fb_helper_output_poll_changed, .atomic_check = amdgpu_dm_atomic_check, .atomic_commit = amdgpu_dm_atomic_commit, - .atomic_state_alloc = dm_atomic_state_alloc, - .atomic_state_clear = dm_atomic_state_clear, - .atomic_state_free = dm_atomic_state_alloc_free }; static struct drm_mode_config_helper_funcs amdgpu_dm_mode_config_helperfuncs = { @@ -1494,8 +1533,117 @@ static int dcn10_register_irq_handlers(struct amdgpu_device *adev) } #endif +/* + * Acquires the lock for the atomic state object and returns + * the new atomic state. + * + * This should only be called during atomic check. + */ +static int dm_atomic_get_state(struct drm_atomic_state *state, + struct dm_atomic_state **dm_state) +{ + struct drm_device *dev = state->dev; + struct amdgpu_device *adev = dev->dev_private; + struct amdgpu_display_manager *dm = &adev->dm; + struct drm_private_state *priv_state; + int ret; + + if (*dm_state) + return 0; + + ret = drm_modeset_lock(&dm->atomic_obj_lock, state->acquire_ctx); + if (ret) + return ret; + + priv_state = drm_atomic_get_private_obj_state(state, &dm->atomic_obj); + if (IS_ERR(priv_state)) + return PTR_ERR(priv_state); + + *dm_state = to_dm_atomic_state(priv_state); + + return 0; +} + +struct dm_atomic_state * +dm_atomic_get_new_state(struct drm_atomic_state *state) +{ + struct drm_device *dev = state->dev; + struct amdgpu_device *adev = dev->dev_private; + struct amdgpu_display_manager *dm = &adev->dm; + struct drm_private_obj *obj; + struct drm_private_state *new_obj_state; + int i; + + for_each_new_private_obj_in_state(state, obj, new_obj_state, i) { + if (obj->funcs == dm->atomic_obj.funcs) + return to_dm_atomic_state(new_obj_state); + } + + return NULL; +} + +struct dm_atomic_state * +dm_atomic_get_old_state(struct drm_atomic_state *state) +{ + struct drm_device *dev = state->dev; + struct amdgpu_device *adev = dev->dev_private; + struct amdgpu_display_manager *dm = &adev->dm; + struct drm_private_obj *obj; + struct drm_private_state *old_obj_state; + int i; + + for_each_old_private_obj_in_state(state, obj, old_obj_state, i) { + if (obj->funcs == dm->atomic_obj.funcs) + return to_dm_atomic_state(old_obj_state); + } + + return NULL; +} + +static struct drm_private_state * +dm_atomic_duplicate_state(struct drm_private_obj *obj) +{ + struct dm_atomic_state *old_state, *new_state; + + new_state = kzalloc(sizeof(*new_state), GFP_KERNEL); + if (!new_state) + return NULL; + + __drm_atomic_helper_private_obj_duplicate_state(obj, &new_state->base); + + new_state->context = dc_create_state(); + if (!new_state->context) { + kfree(new_state); + return NULL; + } + + old_state = to_dm_atomic_state(obj->state); + if (old_state && old_state->context) + dc_resource_state_copy_construct(old_state->context, + new_state->context); + + return &new_state->base; +} + +static void dm_atomic_destroy_state(struct drm_private_obj *obj, + struct drm_private_state *state) +{ + struct dm_atomic_state *dm_state = to_dm_atomic_state(state); + + if (dm_state && dm_state->context) + dc_release_state(dm_state->context); + + kfree(dm_state); +} + +static struct drm_private_state_funcs dm_atomic_state_funcs = { + .atomic_duplicate_state = dm_atomic_duplicate_state, + .atomic_destroy_state = dm_atomic_destroy_state, +}; + static int amdgpu_dm_mode_config_init(struct amdgpu_device *adev) { + struct dm_atomic_state *state; int r; adev->mode_info.mode_config_initialized = true; @@ -1513,6 +1661,24 @@ static int amdgpu_dm_mode_config_init(struct amdgpu_device *adev) adev->ddev->mode_config.fb_base = adev->gmc.aper_base; + drm_modeset_lock_init(&adev->dm.atomic_obj_lock); + + state = kzalloc(sizeof(*state), GFP_KERNEL); + if (!state) + return -ENOMEM; + + state->context = dc_create_state(); + if (!state->context) { + kfree(state); + return -ENOMEM; + } + + dc_resource_state_copy_construct_current(adev->dm.dc, state->context); + + drm_atomic_private_obj_init(&adev->dm.atomic_obj, + &state->base, + &dm_atomic_state_funcs); + r = amdgpu_display_modeset_create_props(adev); if (r) return r; @@ -1520,15 +1686,63 @@ static int amdgpu_dm_mode_config_init(struct amdgpu_device *adev) return 0; } +#define AMDGPU_DM_DEFAULT_MIN_BACKLIGHT 12 +#define AMDGPU_DM_DEFAULT_MAX_BACKLIGHT 255 + #if defined(CONFIG_BACKLIGHT_CLASS_DEVICE) ||\ defined(CONFIG_BACKLIGHT_CLASS_DEVICE_MODULE) +static void amdgpu_dm_update_backlight_caps(struct amdgpu_display_manager *dm) +{ +#if defined(CONFIG_ACPI) + struct amdgpu_dm_backlight_caps caps; + + if (dm->backlight_caps.caps_valid) + return; + + amdgpu_acpi_get_backlight_caps(dm->adev, &caps); + if (caps.caps_valid) { + dm->backlight_caps.min_input_signal = caps.min_input_signal; + dm->backlight_caps.max_input_signal = caps.max_input_signal; + dm->backlight_caps.caps_valid = true; + } else { + dm->backlight_caps.min_input_signal = + AMDGPU_DM_DEFAULT_MIN_BACKLIGHT; + dm->backlight_caps.max_input_signal = + AMDGPU_DM_DEFAULT_MAX_BACKLIGHT; + } +#else + dm->backlight_caps.min_input_signal = AMDGPU_DM_DEFAULT_MIN_BACKLIGHT; + dm->backlight_caps.max_input_signal = AMDGPU_DM_DEFAULT_MAX_BACKLIGHT; +#endif +} + static int amdgpu_dm_backlight_update_status(struct backlight_device *bd) { struct amdgpu_display_manager *dm = bl_get_data(bd); + struct amdgpu_dm_backlight_caps caps; + uint32_t brightness = bd->props.brightness; + + amdgpu_dm_update_backlight_caps(dm); + caps = dm->backlight_caps; + /* + * The brightness input is in the range 0-255 + * It needs to be rescaled to be between the + * requested min and max input signal + * + * It also needs to be scaled up by 0x101 to + * match the DC interface which has a range of + * 0 to 0xffff + */ + brightness = + brightness + * 0x101 + * (caps.max_input_signal - caps.min_input_signal) + / AMDGPU_MAX_BL_LEVEL + + caps.min_input_signal * 0x101; if (dc_link_set_backlight_level(dm->backlight_link, - bd->props.brightness, 0, 0)) + brightness, 0, 0)) return 0; else return 1; @@ -1555,6 +1769,8 @@ amdgpu_dm_register_backlight_device(struct amdgpu_display_manager *dm) char bl_name[16]; struct backlight_properties props = { 0 }; + amdgpu_dm_update_backlight_caps(dm); + props.max_brightness = AMDGPU_MAX_BL_LEVEL; props.brightness = AMDGPU_MAX_BL_LEVEL; props.type = BACKLIGHT_RAW; @@ -1580,18 +1796,18 @@ static int initialize_plane(struct amdgpu_display_manager *dm, struct amdgpu_mode_info *mode_info, int plane_id) { - struct amdgpu_plane *plane; + struct drm_plane *plane; unsigned long possible_crtcs; int ret = 0; - plane = kzalloc(sizeof(struct amdgpu_plane), GFP_KERNEL); + plane = kzalloc(sizeof(struct drm_plane), GFP_KERNEL); mode_info->planes[plane_id] = plane; if (!plane) { DRM_ERROR("KMS: Failed to allocate plane\n"); return -ENOMEM; } - plane->base.type = mode_info->plane_type[plane_id]; + plane->type = mode_info->plane_type[plane_id]; /* * HACK: IGT tests expect that each plane can only have @@ -1682,7 +1898,7 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev) } for (i = 0; i < dm->dc->caps.max_streams; i++) - if (amdgpu_dm_crtc_init(dm, &mode_info->planes[i]->base, i)) { + if (amdgpu_dm_crtc_init(dm, mode_info->planes[i], i)) { DRM_ERROR("KMS: Failed to initialize crtc\n"); goto fail; } @@ -1786,6 +2002,7 @@ fail: static void amdgpu_dm_destroy_drm_device(struct amdgpu_display_manager *dm) { drm_mode_config_cleanup(dm->ddev); + drm_atomic_private_obj_fini(&dm->atomic_obj); return; } @@ -1805,73 +2022,6 @@ static void dm_bandwidth_update(struct amdgpu_device *adev) /* TODO: implement later */ } -static int amdgpu_notify_freesync(struct drm_device *dev, void *data, - struct drm_file *filp) -{ - struct drm_atomic_state *state; - struct drm_modeset_acquire_ctx ctx; - struct drm_crtc *crtc; - struct drm_connector *connector; - struct drm_connector_state *old_con_state, *new_con_state; - int ret = 0; - uint8_t i; - bool enable = false; - - drm_modeset_acquire_init(&ctx, 0); - - state = drm_atomic_state_alloc(dev); - if (!state) { - ret = -ENOMEM; - goto out; - } - state->acquire_ctx = &ctx; - -retry: - drm_for_each_crtc(crtc, dev) { - ret = drm_atomic_add_affected_connectors(state, crtc); - if (ret) - goto fail; - - /* TODO rework amdgpu_dm_commit_planes so we don't need this */ - ret = drm_atomic_add_affected_planes(state, crtc); - if (ret) - goto fail; - } - - for_each_oldnew_connector_in_state(state, connector, old_con_state, new_con_state, i) { - struct dm_connector_state *dm_new_con_state = to_dm_connector_state(new_con_state); - struct drm_crtc_state *new_crtc_state; - struct amdgpu_crtc *acrtc = to_amdgpu_crtc(dm_new_con_state->base.crtc); - struct dm_crtc_state *dm_new_crtc_state; - - if (!acrtc) { - ASSERT(0); - continue; - } - - new_crtc_state = drm_atomic_get_new_crtc_state(state, &acrtc->base); - dm_new_crtc_state = to_dm_crtc_state(new_crtc_state); - - dm_new_crtc_state->freesync_enabled = enable; - } - - ret = drm_atomic_commit(state); - -fail: - if (ret == -EDEADLK) { - drm_atomic_state_clear(state); - drm_modeset_backoff(&ctx); - goto retry; - } - - drm_atomic_state_put(state); - -out: - drm_modeset_drop_locks(&ctx); - drm_modeset_acquire_fini(&ctx); - return ret; -} - static const struct amdgpu_display_funcs dm_display_funcs = { .bandwidth_update = dm_bandwidth_update, /* called unconditionally */ .vblank_get_counter = dm_vblank_get_counter,/* called unconditionally */ @@ -1884,8 +2034,6 @@ static const struct amdgpu_display_funcs dm_display_funcs = { dm_crtc_get_scanoutpos,/* called unconditionally */ .add_encoder = NULL, /* VBIOS parsing. DAL does it. */ .add_connector = NULL, /* VBIOS parsing. DAL does it. */ - .notify_freesync = amdgpu_notify_freesync, - }; #if defined(CONFIG_DEBUG_KERNEL_DC) @@ -2486,7 +2634,8 @@ static void adjust_colour_depth_from_display_info(struct dc_crtc_timing *timing_ static void fill_stream_properties_from_drm_display_mode(struct dc_stream_state *stream, const struct drm_display_mode *mode_in, - const struct drm_connector *connector) + const struct drm_connector *connector, + const struct dc_stream_state *old_stream) { struct dc_crtc_timing *timing_out = &stream->timing; const struct drm_display_info *info = &connector->display_info; @@ -2512,7 +2661,18 @@ fill_stream_properties_from_drm_display_mode(struct dc_stream_state *stream, connector); timing_out->scan_type = SCANNING_TYPE_NODATA; timing_out->hdmi_vic = 0; - timing_out->vic = drm_match_cea_mode(mode_in); + + if(old_stream) { + timing_out->vic = old_stream->timing.vic; + timing_out->flags.HSYNC_POSITIVE_POLARITY = old_stream->timing.flags.HSYNC_POSITIVE_POLARITY; + timing_out->flags.VSYNC_POSITIVE_POLARITY = old_stream->timing.flags.VSYNC_POSITIVE_POLARITY; + } else { + timing_out->vic = drm_match_cea_mode(mode_in); + if (mode_in->flags & DRM_MODE_FLAG_PHSYNC) + timing_out->flags.HSYNC_POSITIVE_POLARITY = 1; + if (mode_in->flags & DRM_MODE_FLAG_PVSYNC) + timing_out->flags.VSYNC_POSITIVE_POLARITY = 1; + } timing_out->h_addressable = mode_in->crtc_hdisplay; timing_out->h_total = mode_in->crtc_htotal; @@ -2528,10 +2688,6 @@ fill_stream_properties_from_drm_display_mode(struct dc_stream_state *stream, mode_in->crtc_vsync_end - mode_in->crtc_vsync_start; timing_out->pix_clk_khz = mode_in->crtc_clock; timing_out->aspect_ratio = get_aspect_ratio(mode_in); - if (mode_in->flags & DRM_MODE_FLAG_PHSYNC) - timing_out->flags.HSYNC_POSITIVE_POLARITY = 1; - if (mode_in->flags & DRM_MODE_FLAG_PVSYNC) - timing_out->flags.VSYNC_POSITIVE_POLARITY = 1; stream->output_color_space = get_output_color_space(timing_out); @@ -2694,13 +2850,18 @@ static void dm_enable_per_frame_crtc_master_sync(struct dc_state *context) static struct dc_stream_state * create_stream_for_sink(struct amdgpu_dm_connector *aconnector, const struct drm_display_mode *drm_mode, - const struct dm_connector_state *dm_state) + const struct dm_connector_state *dm_state, + const struct dc_stream_state *old_stream) { struct drm_display_mode *preferred_mode = NULL; struct drm_connector *drm_connector; struct dc_stream_state *stream = NULL; struct drm_display_mode mode = *drm_mode; bool native_mode_found = false; + bool scale = dm_state ? (dm_state->scaling != RMX_OFF) : false; + int mode_refresh; + int preferred_refresh = 0; + struct dc_sink *sink = NULL; if (aconnector == NULL) { DRM_ERROR("aconnector is NULL!\n"); @@ -2739,6 +2900,8 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector, struct drm_display_mode, head); + mode_refresh = drm_mode_vrefresh(&mode); + if (preferred_mode == NULL) { /* * This may not be an error, the use case is when we have no @@ -2751,13 +2914,23 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector, decide_crtc_timing_for_drm_display_mode( &mode, preferred_mode, dm_state ? (dm_state->scaling != RMX_OFF) : false); + preferred_refresh = drm_mode_vrefresh(preferred_mode); } if (!dm_state) drm_mode_set_crtcinfo(&mode, 0); - fill_stream_properties_from_drm_display_mode(stream, - &mode, &aconnector->base); + /* + * If scaling is enabled and refresh rate didn't change + * we copy the vic and polarities of the old timings + */ + if (!scale || mode_refresh != preferred_refresh) + fill_stream_properties_from_drm_display_mode(stream, + &mode, &aconnector->base, NULL); + else + fill_stream_properties_from_drm_display_mode(stream, + &mode, &aconnector->base, old_stream); + update_stream_scaling_settings(&mode, dm_state, stream); fill_audio_info( @@ -2769,6 +2942,7 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector, if (dm_state && dm_state->freesync_capable) stream->ignore_msa_timing_param = true; + finish: if (sink && sink->sink_signal == SIGNAL_TYPE_VIRTUAL && aconnector->base.force != DRM_FORCE_ON) dc_sink_release(sink); @@ -2837,7 +3011,10 @@ dm_crtc_duplicate_state(struct drm_crtc *crtc) state->adjust = cur->adjust; state->vrr_infopacket = cur->vrr_infopacket; - state->freesync_enabled = cur->freesync_enabled; + state->abm_level = cur->abm_level; + state->vrr_supported = cur->vrr_supported; + state->freesync_config = cur->freesync_config; + state->crc_enabled = cur->crc_enabled; /* TODO Duplicate dc_stream after objects are stream object is flattened */ @@ -2953,6 +3130,9 @@ int amdgpu_dm_connector_atomic_set_property(struct drm_connector *connector, } else if (property == adev->mode_info.max_bpc_property) { dm_new_state->max_bpc = val; ret = 0; + } else if (property == adev->mode_info.abm_level_property) { + dm_new_state->abm_level = val; + ret = 0; } return ret; @@ -2998,7 +3178,11 @@ int amdgpu_dm_connector_atomic_get_property(struct drm_connector *connector, } else if (property == adev->mode_info.max_bpc_property) { *val = dm_state->max_bpc; ret = 0; + } else if (property == adev->mode_info.abm_level_property) { + *val = dm_state->abm_level; + ret = 0; } + return ret; } @@ -3063,7 +3247,11 @@ amdgpu_dm_connector_atomic_duplicate_state(struct drm_connector *connector) __drm_atomic_helper_connector_duplicate_state(connector, &new_state->base); new_state->freesync_capable = state->freesync_capable; - new_state->freesync_enable = state->freesync_enable; + new_state->abm_level = state->abm_level; + new_state->scaling = state->scaling; + new_state->underscan_enable = state->underscan_enable; + new_state->underscan_hborder = state->underscan_hborder; + new_state->underscan_vborder = state->underscan_vborder; new_state->max_bpc = state->max_bpc; return &new_state->base; @@ -3166,7 +3354,7 @@ enum drm_mode_status amdgpu_dm_connector_mode_valid(struct drm_connector *connec goto fail; } - stream = create_stream_for_sink(aconnector, mode, NULL); + stream = create_stream_for_sink(aconnector, mode, NULL, NULL); if (stream == NULL) { DRM_ERROR("Failed to create stream for sink!\n"); goto fail; @@ -3200,7 +3388,6 @@ amdgpu_dm_connector_helper_funcs = { */ .get_modes = get_modes, .mode_valid = amdgpu_dm_connector_mode_valid, - .best_encoder = drm_atomic_helper_best_encoder }; static void dm_crtc_helper_disable(struct drm_crtc *crtc) @@ -3438,10 +3625,43 @@ static int dm_plane_atomic_check(struct drm_plane *plane, return -EINVAL; } +static int dm_plane_atomic_async_check(struct drm_plane *plane, + struct drm_plane_state *new_plane_state) +{ + /* Only support async updates on cursor planes. */ + if (plane->type != DRM_PLANE_TYPE_CURSOR) + return -EINVAL; + + return 0; +} + +static void dm_plane_atomic_async_update(struct drm_plane *plane, + struct drm_plane_state *new_state) +{ + struct drm_plane_state *old_state = + drm_atomic_get_old_plane_state(new_state->state, plane); + + if (plane->state->fb != new_state->fb) + drm_atomic_set_fb_for_plane(plane->state, new_state->fb); + + plane->state->src_x = new_state->src_x; + plane->state->src_y = new_state->src_y; + plane->state->src_w = new_state->src_w; + plane->state->src_h = new_state->src_h; + plane->state->crtc_x = new_state->crtc_x; + plane->state->crtc_y = new_state->crtc_y; + plane->state->crtc_w = new_state->crtc_w; + plane->state->crtc_h = new_state->crtc_h; + + handle_cursor_update(plane, old_state); +} + static const struct drm_plane_helper_funcs dm_plane_helper_funcs = { .prepare_fb = dm_plane_helper_prepare_fb, .cleanup_fb = dm_plane_helper_cleanup_fb, .atomic_check = dm_plane_atomic_check, + .atomic_async_check = dm_plane_atomic_async_check, + .atomic_async_update = dm_plane_atomic_async_update }; /* @@ -3473,49 +3693,49 @@ static const u32 cursor_formats[] = { }; static int amdgpu_dm_plane_init(struct amdgpu_display_manager *dm, - struct amdgpu_plane *aplane, + struct drm_plane *plane, unsigned long possible_crtcs) { int res = -EPERM; - switch (aplane->base.type) { + switch (plane->type) { case DRM_PLANE_TYPE_PRIMARY: res = drm_universal_plane_init( dm->adev->ddev, - &aplane->base, + plane, possible_crtcs, &dm_plane_funcs, rgb_formats, ARRAY_SIZE(rgb_formats), - NULL, aplane->base.type, NULL); + NULL, plane->type, NULL); break; case DRM_PLANE_TYPE_OVERLAY: res = drm_universal_plane_init( dm->adev->ddev, - &aplane->base, + plane, possible_crtcs, &dm_plane_funcs, yuv_formats, ARRAY_SIZE(yuv_formats), - NULL, aplane->base.type, NULL); + NULL, plane->type, NULL); break; case DRM_PLANE_TYPE_CURSOR: res = drm_universal_plane_init( dm->adev->ddev, - &aplane->base, + plane, possible_crtcs, &dm_plane_funcs, cursor_formats, ARRAY_SIZE(cursor_formats), - NULL, aplane->base.type, NULL); + NULL, plane->type, NULL); break; } - drm_plane_helper_add(&aplane->base, &dm_plane_helper_funcs); + drm_plane_helper_add(plane, &dm_plane_helper_funcs); /* Create (reset) the plane state */ - if (aplane->base.funcs->reset) - aplane->base.funcs->reset(&aplane->base); + if (plane->funcs->reset) + plane->funcs->reset(plane); return res; @@ -3526,7 +3746,7 @@ static int amdgpu_dm_crtc_init(struct amdgpu_display_manager *dm, uint32_t crtc_index) { struct amdgpu_crtc *acrtc = NULL; - struct amdgpu_plane *cursor_plane; + struct drm_plane *cursor_plane; int res = -ENOMEM; @@ -3534,7 +3754,7 @@ static int amdgpu_dm_crtc_init(struct amdgpu_display_manager *dm, if (!cursor_plane) goto fail; - cursor_plane->base.type = DRM_PLANE_TYPE_CURSOR; + cursor_plane->type = DRM_PLANE_TYPE_CURSOR; res = amdgpu_dm_plane_init(dm, cursor_plane, 0); acrtc = kzalloc(sizeof(struct amdgpu_crtc), GFP_KERNEL); @@ -3545,7 +3765,7 @@ static int amdgpu_dm_crtc_init(struct amdgpu_display_manager *dm, dm->ddev, &acrtc->base, plane, - &cursor_plane->base, + cursor_plane, &amdgpu_dm_crtc_funcs, NULL); if (res) @@ -3603,14 +3823,17 @@ static int to_drm_connector_type(enum signal_type st) } } +static struct drm_encoder *amdgpu_dm_connector_to_encoder(struct drm_connector *connector) +{ + return drm_encoder_find(connector->dev, NULL, connector->encoder_ids[0]); +} + static void amdgpu_dm_get_native_mode(struct drm_connector *connector) { - const struct drm_connector_helper_funcs *helper = - connector->helper_private; struct drm_encoder *encoder; struct amdgpu_encoder *amdgpu_encoder; - encoder = helper->best_encoder(connector); + encoder = amdgpu_dm_connector_to_encoder(connector); if (encoder == NULL) return; @@ -3737,14 +3960,12 @@ static void amdgpu_dm_connector_ddc_get_modes(struct drm_connector *connector, static int amdgpu_dm_connector_get_modes(struct drm_connector *connector) { - const struct drm_connector_helper_funcs *helper = - connector->helper_private; struct amdgpu_dm_connector *amdgpu_dm_connector = to_amdgpu_dm_connector(connector); struct drm_encoder *encoder; struct edid *edid = amdgpu_dm_connector->edid; - encoder = helper->best_encoder(connector); + encoder = amdgpu_dm_connector_to_encoder(connector); if (!edid || !drm_edid_is_valid(edid)) { amdgpu_dm_connector->num_modes = @@ -3783,12 +4004,12 @@ void amdgpu_dm_connector_init_helper(struct amdgpu_display_manager *dm, case DRM_MODE_CONNECTOR_HDMIA: aconnector->base.polled = DRM_CONNECTOR_POLL_HPD; aconnector->base.ycbcr_420_allowed = - link->link_enc->features.ycbcr420_supported ? true : false; + link->link_enc->features.hdmi_ycbcr420_supported ? true : false; break; case DRM_MODE_CONNECTOR_DisplayPort: aconnector->base.polled = DRM_CONNECTOR_POLL_HPD; aconnector->base.ycbcr_420_allowed = - link->link_enc->features.ycbcr420_supported ? true : false; + link->link_enc->features.dp_ycbcr420_supported ? true : false; break; case DRM_MODE_CONNECTOR_DVID: aconnector->base.polled = DRM_CONNECTOR_POLL_HPD; @@ -3814,6 +4035,17 @@ void amdgpu_dm_connector_init_helper(struct amdgpu_display_manager *dm, adev->mode_info.max_bpc_property, 0); + if (connector_type == DRM_MODE_CONNECTOR_eDP && + dc_is_dmcu_initialized(adev->dm.dc)) { + drm_object_attach_property(&aconnector->base.base, + adev->mode_info.abm_level_property, 0); + } + + if (connector_type == DRM_MODE_CONNECTOR_HDMIA || + connector_type == DRM_MODE_CONNECTOR_DisplayPort) { + drm_connector_attach_vrr_capable_property( + &aconnector->base); + } } static int amdgpu_dm_i2c_xfer(struct i2c_adapter *i2c_adap, @@ -4118,6 +4350,7 @@ static int get_cursor_position(struct drm_plane *plane, struct drm_crtc *crtc, static void handle_cursor_update(struct drm_plane *plane, struct drm_plane_state *old_plane_state) { + struct amdgpu_device *adev = plane->dev->dev_private; struct amdgpu_framebuffer *afb = to_amdgpu_framebuffer(plane->state->fb); struct drm_crtc *crtc = afb ? plane->state->crtc : old_plane_state->crtc; struct dm_crtc_state *crtc_state = crtc ? to_dm_crtc_state(crtc->state) : NULL; @@ -4142,9 +4375,12 @@ static void handle_cursor_update(struct drm_plane *plane, if (!position.enable) { /* turn off cursor */ - if (crtc_state && crtc_state->stream) + if (crtc_state && crtc_state->stream) { + mutex_lock(&adev->dm.dc_lock); dc_stream_set_cursor_position(crtc_state->stream, &position); + mutex_unlock(&adev->dm.dc_lock); + } return; } @@ -4162,6 +4398,7 @@ static void handle_cursor_update(struct drm_plane *plane, attributes.pitch = attributes.width; if (crtc_state->stream) { + mutex_lock(&adev->dm.dc_lock); if (!dc_stream_set_cursor_attributes(crtc_state->stream, &attributes)) DRM_ERROR("DC failed to set cursor attributes\n"); @@ -4169,6 +4406,7 @@ static void handle_cursor_update(struct drm_plane *plane, if (!dc_stream_set_cursor_position(crtc_state->stream, &position)) DRM_ERROR("DC failed to set cursor position\n"); + mutex_unlock(&adev->dm.dc_lock); } } @@ -4190,6 +4428,91 @@ static void prepare_flip_isr(struct amdgpu_crtc *acrtc) acrtc->crtc_id); } +struct dc_stream_status *dc_state_get_stream_status( + struct dc_state *state, + struct dc_stream_state *stream) +{ + uint8_t i; + + for (i = 0; i < state->stream_count; i++) { + if (stream == state->streams[i]) + return &state->stream_status[i]; + } + + return NULL; +} + +static void update_freesync_state_on_stream( + struct amdgpu_display_manager *dm, + struct dm_crtc_state *new_crtc_state, + struct dc_stream_state *new_stream) +{ + struct mod_vrr_params vrr = {0}; + struct dc_info_packet vrr_infopacket = {0}; + struct mod_freesync_config config = new_crtc_state->freesync_config; + + if (!new_stream) + return; + + /* + * TODO: Determine why min/max totals and vrefresh can be 0 here. + * For now it's sufficient to just guard against these conditions. + */ + + if (!new_stream->timing.h_total || !new_stream->timing.v_total) + return; + + if (new_crtc_state->vrr_supported && + config.min_refresh_in_uhz && + config.max_refresh_in_uhz) { + config.state = new_crtc_state->base.vrr_enabled ? + VRR_STATE_ACTIVE_VARIABLE : + VRR_STATE_INACTIVE; + } else { + config.state = VRR_STATE_UNSUPPORTED; + } + + mod_freesync_build_vrr_params(dm->freesync_module, + new_stream, + &config, &vrr); + + mod_freesync_build_vrr_infopacket( + dm->freesync_module, + new_stream, + &vrr, + PACKET_TYPE_VRR, + TRANSFER_FUNC_UNKNOWN, + &vrr_infopacket); + + new_crtc_state->freesync_timing_changed = + (memcmp(&new_crtc_state->adjust, + &vrr.adjust, + sizeof(vrr.adjust)) != 0); + + new_crtc_state->freesync_vrr_info_changed = + (memcmp(&new_crtc_state->vrr_infopacket, + &vrr_infopacket, + sizeof(vrr_infopacket)) != 0); + + new_crtc_state->adjust = vrr.adjust; + new_crtc_state->vrr_infopacket = vrr_infopacket; + + new_stream->adjust = new_crtc_state->adjust; + new_stream->vrr_infopacket = vrr_infopacket; + + if (new_crtc_state->freesync_vrr_info_changed) + DRM_DEBUG_KMS("VRR packet update: crtc=%u enabled=%d state=%d", + new_crtc_state->base.crtc->base.id, + (int)new_crtc_state->base.vrr_enabled, + (int)vrr.state); + + if (new_crtc_state->freesync_timing_changed) + DRM_DEBUG_KMS("VRR timing update: crtc=%u min=%u max=%u\n", + new_crtc_state->base.crtc->base.id, + vrr.adjust.v_total_min, + vrr.adjust.v_total_max); +} + /* * Executes flip * @@ -4211,6 +4534,7 @@ static void amdgpu_dm_do_flip(struct drm_crtc *crtc, struct dc_flip_addrs addr = { {0} }; /* TODO eliminate or rename surface_update */ struct dc_surface_update surface_updates[1] = { {0} }; + struct dc_stream_update stream_update = {0}; struct dm_crtc_state *acrtc_state = to_dm_crtc_state(crtc->state); struct dc_stream_status *stream_status; @@ -4283,13 +4607,30 @@ static void amdgpu_dm_do_flip(struct drm_crtc *crtc, } surface_updates->flip_addr = &addr; + if (acrtc_state->stream) { + update_freesync_state_on_stream( + &adev->dm, + acrtc_state, + acrtc_state->stream); + + if (acrtc_state->freesync_timing_changed) + stream_update.adjust = + &acrtc_state->stream->adjust; + + if (acrtc_state->freesync_vrr_info_changed) + stream_update.vrr_infopacket = + &acrtc_state->stream->vrr_infopacket; + } + + mutex_lock(&adev->dm.dc_lock); dc_commit_updates_for_stream(adev->dm.dc, surface_updates, 1, acrtc_state->stream, - NULL, + &stream_update, &surface_updates->surface, state); + mutex_unlock(&adev->dm.dc_lock); DRM_DEBUG_DRIVER("%s Flipping to hi: 0x%x, low: 0x%x \n", __func__, @@ -4304,6 +4645,7 @@ static void amdgpu_dm_do_flip(struct drm_crtc *crtc, * with a dc_plane_state and follow the atomic model a bit more closely here. */ static bool commit_planes_to_stream( + struct amdgpu_display_manager *dm, struct dc *dc, struct dc_plane_state **plane_states, uint8_t new_plane_count, @@ -4320,6 +4662,7 @@ static bool commit_planes_to_stream( struct dc_stream_state *dc_stream = dm_new_crtc_state->stream; struct dc_stream_update *stream_update = kzalloc(sizeof(struct dc_stream_update), GFP_KERNEL); + unsigned int abm_level; if (!stream_update) { BREAK_TO_DEBUGGER(); @@ -4347,9 +4690,9 @@ static bool commit_planes_to_stream( stream_update->dst = dc_stream->dst; stream_update->out_transfer_func = dc_stream->out_transfer_func; - if (dm_new_crtc_state->freesync_enabled != dm_old_crtc_state->freesync_enabled) { - stream_update->vrr_infopacket = &dc_stream->vrr_infopacket; - stream_update->adjust = &dc_stream->adjust; + if (dm_new_crtc_state->abm_level != dm_old_crtc_state->abm_level) { + abm_level = dm_new_crtc_state->abm_level; + stream_update->abm_level = &abm_level; } for (i = 0; i < new_plane_count; i++) { @@ -4379,11 +4722,13 @@ static bool commit_planes_to_stream( updates[i].scaling_info = &scaling_info[i]; } + mutex_lock(&dm->dc_lock); dc_commit_updates_for_stream( dc, updates, new_plane_count, dc_stream, stream_update, plane_states, state); + mutex_unlock(&dm->dc_lock); kfree(flip_addr); kfree(plane_info); @@ -4393,6 +4738,7 @@ static bool commit_planes_to_stream( } static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, + struct dc_state *dc_state, struct drm_device *dev, struct amdgpu_display_manager *dm, struct drm_crtc *pcrtc, @@ -4409,7 +4755,6 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, struct dm_crtc_state *acrtc_state = to_dm_crtc_state(new_pcrtc_state); struct dm_crtc_state *dm_old_crtc_state = to_dm_crtc_state(drm_atomic_get_old_crtc_state(state, pcrtc)); - struct dm_atomic_state *dm_state = to_dm_atomic_state(state); int planes_count = 0; unsigned long flags; @@ -4470,7 +4815,7 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, crtc, fb, (uint32_t)drm_crtc_vblank_count(crtc) + *wait_for_vblank, - dm_state->context); + dc_state); } } @@ -4487,15 +4832,15 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, spin_unlock_irqrestore(&pcrtc->dev->event_lock, flags); } - dc_stream_attach->adjust = acrtc_state->adjust; - dc_stream_attach->vrr_infopacket = acrtc_state->vrr_infopacket; + dc_stream_attach->abm_level = acrtc_state->abm_level; - if (false == commit_planes_to_stream(dm->dc, + if (false == commit_planes_to_stream(dm, + dm->dc, plane_states_constructed, planes_count, acrtc_state, dm_old_crtc_state, - dm_state->context)) + dc_state)) dm_error("%s: Failed to attach plane!\n", __func__); } else { /*TODO BUG Here should go disable planes on CRTC. */ @@ -4549,12 +4894,21 @@ static int amdgpu_dm_atomic_commit(struct drm_device *dev, /*TODO Handle EINTR, reenable IRQ*/ } +/** + * amdgpu_dm_atomic_commit_tail() - AMDgpu DM's commit tail implementation. + * @state: The atomic state to commit + * + * This will tell DC to commit the constructed DC state from atomic_check, + * programming the hardware. Any failures here implies a hardware failure, since + * atomic check should have filtered anything non-kosher. + */ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) { struct drm_device *dev = state->dev; struct amdgpu_device *adev = dev->dev_private; struct amdgpu_display_manager *dm = &adev->dm; struct dm_atomic_state *dm_state; + struct dc_state *dc_state = NULL, *dc_state_temp = NULL; uint32_t i, j; struct drm_crtc *crtc; struct drm_crtc_state *old_crtc_state, *new_crtc_state; @@ -4567,7 +4921,16 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) drm_atomic_helper_update_legacy_modeset_state(dev, state); - dm_state = to_dm_atomic_state(state); + dm_state = dm_atomic_get_new_state(state); + if (dm_state && dm_state->context) { + dc_state = dm_state->context; + } else { + /* No state changes, retain current state. */ + dc_state_temp = dc_create_state(); + ASSERT(dc_state_temp); + dc_state = dc_state_temp; + dc_resource_state_copy_construct_current(dm->dc, dc_state); + } /* update changed items */ for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) { @@ -4640,9 +5003,11 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) } } /* for_each_crtc_in_state() */ - if (dm_state->context) { - dm_enable_per_frame_crtc_master_sync(dm_state->context); - WARN_ON(!dc_commit_state(dm->dc, dm_state->context)); + if (dc_state) { + dm_enable_per_frame_crtc_master_sync(dc_state); + mutex_lock(&dm->dc_lock); + WARN_ON(!dc_commit_state(dm->dc, dc_state)); + mutex_unlock(&dm->dc_lock); } for_each_new_crtc_in_state(state, crtc, new_crtc_state, i) { @@ -4655,13 +5020,17 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) dc_stream_get_status(dm_new_crtc_state->stream); if (!status) + status = dc_state_get_stream_status(dc_state, + dm_new_crtc_state->stream); + + if (!status) DC_ERR("got no status for stream %p on acrtc%p\n", dm_new_crtc_state->stream, acrtc); else acrtc->otg_inst = status->primary_otg_inst; } } - /* Handle scaling and underscan changes*/ + /* Handle scaling, underscan, and abm changes*/ for_each_oldnew_connector_in_state(state, connector, old_con_state, new_con_state, i) { struct dm_connector_state *dm_new_con_state = to_dm_connector_state(new_con_state); struct dm_connector_state *dm_old_con_state = to_dm_connector_state(old_con_state); @@ -4677,11 +5046,14 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) if (!acrtc || drm_atomic_crtc_needs_modeset(new_crtc_state)) continue; - /* Skip anything that is not scaling or underscan changes */ - if (!is_scaling_state_different(dm_new_con_state, dm_old_con_state)) - continue; dm_new_crtc_state = to_dm_crtc_state(new_crtc_state); + dm_old_crtc_state = to_dm_crtc_state(old_crtc_state); + + /* Skip anything that is not scaling or underscan changes */ + if (!is_scaling_state_different(dm_new_con_state, dm_old_con_state) && + (dm_new_crtc_state->abm_level == dm_old_crtc_state->abm_level)) + continue; update_stream_scaling_settings(&dm_new_con_state->base.crtc->mode, dm_new_con_state, (struct dc_stream_state *)dm_new_crtc_state->stream); @@ -4693,17 +5065,17 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) WARN_ON(!status); WARN_ON(!status->plane_count); - dm_new_crtc_state->stream->adjust = dm_new_crtc_state->adjust; - dm_new_crtc_state->stream->vrr_infopacket = dm_new_crtc_state->vrr_infopacket; + dm_new_crtc_state->stream->abm_level = dm_new_crtc_state->abm_level; /*TODO How it works with MPO ?*/ if (!commit_planes_to_stream( + dm, dm->dc, status->plane_states, status->plane_count, dm_new_crtc_state, to_dm_crtc_state(old_crtc_state), - dm_state->context)) + dc_state)) dm_error("%s: Failed to update stream scaling!\n", __func__); } @@ -4736,7 +5108,8 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) dm_new_crtc_state = to_dm_crtc_state(new_crtc_state); if (dm_new_crtc_state->stream) - amdgpu_dm_commit_planes(state, dev, dm, crtc, &wait_for_vblank); + amdgpu_dm_commit_planes(state, dc_state, dev, + dm, crtc, &wait_for_vblank); } @@ -4776,6 +5149,9 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) for (i = 0; i < crtc_disable_count; i++) pm_runtime_put_autosuspend(dev->dev); pm_runtime_mark_last_busy(dev->dev); + + if (dc_state_temp) + dc_release_state(dc_state_temp); } @@ -4919,20 +5295,18 @@ static int do_aquire_global_lock(struct drm_device *dev, return ret < 0 ? ret : 0; } -void set_freesync_on_stream(struct amdgpu_display_manager *dm, - struct dm_crtc_state *new_crtc_state, - struct dm_connector_state *new_con_state, - struct dc_stream_state *new_stream) +static void get_freesync_config_for_crtc( + struct dm_crtc_state *new_crtc_state, + struct dm_connector_state *new_con_state) { struct mod_freesync_config config = {0}; - struct mod_vrr_params vrr = {0}; - struct dc_info_packet vrr_infopacket = {0}; struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(new_con_state->base.connector); - if (new_con_state->freesync_capable && - new_con_state->freesync_enable) { - config.state = new_crtc_state->freesync_enabled ? + new_crtc_state->vrr_supported = new_con_state->freesync_capable; + + if (new_con_state->freesync_capable) { + config.state = new_crtc_state->base.vrr_enabled ? VRR_STATE_ACTIVE_VARIABLE : VRR_STATE_INACTIVE; config.min_refresh_in_uhz = @@ -4942,19 +5316,18 @@ void set_freesync_on_stream(struct amdgpu_display_manager *dm, config.vsif_supported = true; } - mod_freesync_build_vrr_params(dm->freesync_module, - new_stream, - &config, &vrr); + new_crtc_state->freesync_config = config; +} - mod_freesync_build_vrr_infopacket(dm->freesync_module, - new_stream, - &vrr, - packet_type_fs1, - NULL, - &vrr_infopacket); +static void reset_freesync_config_for_crtc( + struct dm_crtc_state *new_crtc_state) +{ + new_crtc_state->vrr_supported = false; - new_crtc_state->adjust = vrr.adjust; - new_crtc_state->vrr_infopacket = vrr_infopacket; + memset(&new_crtc_state->adjust, 0, + sizeof(new_crtc_state->adjust)); + memset(&new_crtc_state->vrr_infopacket, 0, + sizeof(new_crtc_state->vrr_infopacket)); } static int dm_update_crtcs_state(struct amdgpu_display_manager *dm, @@ -4962,11 +5335,11 @@ static int dm_update_crtcs_state(struct amdgpu_display_manager *dm, bool enable, bool *lock_and_validation_needed) { + struct dm_atomic_state *dm_state = NULL; struct drm_crtc *crtc; struct drm_crtc_state *old_crtc_state, *new_crtc_state; int i; struct dm_crtc_state *dm_old_crtc_state, *dm_new_crtc_state; - struct dm_atomic_state *dm_state = to_dm_atomic_state(state); struct dc_stream_state *new_stream; int ret = 0; @@ -5014,7 +5387,8 @@ static int dm_update_crtcs_state(struct amdgpu_display_manager *dm, new_stream = create_stream_for_sink(aconnector, &new_crtc_state->mode, - dm_new_conn_state); + dm_new_conn_state, + dm_old_crtc_state->stream); /* * we can have no stream on ACTION_SET if a display @@ -5029,8 +5403,7 @@ static int dm_update_crtcs_state(struct amdgpu_display_manager *dm, break; } - set_freesync_on_stream(dm, dm_new_crtc_state, - dm_new_conn_state, new_stream); + dm_new_crtc_state->abm_level = dm_new_conn_state->abm_level; if (dc_is_stream_unchanged(new_stream, dm_old_crtc_state->stream) && dc_is_stream_scaling_unchanged(new_stream, dm_old_crtc_state->stream)) { @@ -5040,9 +5413,6 @@ static int dm_update_crtcs_state(struct amdgpu_display_manager *dm, } } - if (dm_old_crtc_state->freesync_enabled != dm_new_crtc_state->freesync_enabled) - new_crtc_state->mode_changed = true; - if (!drm_atomic_crtc_needs_modeset(new_crtc_state)) goto next_crtc; @@ -5064,6 +5434,10 @@ static int dm_update_crtcs_state(struct amdgpu_display_manager *dm, if (!dm_old_crtc_state->stream) goto next_crtc; + ret = dm_atomic_get_state(state, &dm_state); + if (ret) + goto fail; + DRM_DEBUG_DRIVER("Disabling DRM crtc: %d\n", crtc->base.id); @@ -5079,6 +5453,8 @@ static int dm_update_crtcs_state(struct amdgpu_display_manager *dm, dc_stream_release(dm_old_crtc_state->stream); dm_new_crtc_state->stream = NULL; + reset_freesync_config_for_crtc(dm_new_crtc_state); + *lock_and_validation_needed = true; } else {/* Add stream for any updated/enabled CRTC */ @@ -5098,6 +5474,10 @@ static int dm_update_crtcs_state(struct amdgpu_display_manager *dm, WARN_ON(dm_new_crtc_state->stream); + ret = dm_atomic_get_state(state, &dm_state); + if (ret) + goto fail; + dm_new_crtc_state->stream = new_stream; dc_stream_retain(new_stream); @@ -5156,7 +5536,9 @@ next_crtc: amdgpu_dm_set_ctm(dm_new_crtc_state); } - + /* Update Freesync settings. */ + get_freesync_config_for_crtc(dm_new_crtc_state, + dm_new_conn_state); } return ret; @@ -5172,12 +5554,13 @@ static int dm_update_planes_state(struct dc *dc, bool enable, bool *lock_and_validation_needed) { + + struct dm_atomic_state *dm_state = NULL; struct drm_crtc *new_plane_crtc, *old_plane_crtc; struct drm_crtc_state *old_crtc_state, *new_crtc_state; struct drm_plane *plane; struct drm_plane_state *old_plane_state, *new_plane_state; struct dm_crtc_state *dm_new_crtc_state, *dm_old_crtc_state; - struct dm_atomic_state *dm_state = to_dm_atomic_state(state); struct dm_plane_state *dm_new_plane_state, *dm_old_plane_state; int i ; /* TODO return page_flip_needed() function */ @@ -5215,6 +5598,10 @@ static int dm_update_planes_state(struct dc *dc, DRM_DEBUG_ATOMIC("Disabling DRM plane: %d on DRM crtc %d\n", plane->base.id, old_plane_crtc->base.id); + ret = dm_atomic_get_state(state, &dm_state); + if (ret) + return ret; + if (!dc_remove_plane_from_context( dc, dm_old_crtc_state->stream, @@ -5269,6 +5656,12 @@ static int dm_update_planes_state(struct dc *dc, return ret; } + ret = dm_atomic_get_state(state, &dm_state); + if (ret) { + dc_plane_state_release(dc_new_plane_state); + return ret; + } + /* * Any atomic check errors that occur after this will * not need a release. The plane state will be attached @@ -5300,11 +5693,14 @@ static int dm_update_planes_state(struct dc *dc, return ret; } -enum surface_update_type dm_determine_update_type_for_commit(struct dc *dc, struct drm_atomic_state *state) -{ - - int i, j, num_plane; +static int +dm_determine_update_type_for_commit(struct dc *dc, + struct drm_atomic_state *state, + enum surface_update_type *out_type) +{ + struct dm_atomic_state *dm_state = NULL, *old_dm_state = NULL; + int i, j, num_plane, ret = 0; struct drm_plane_state *old_plane_state, *new_plane_state; struct dm_plane_state *new_dm_plane_state, *old_dm_plane_state; struct drm_crtc *new_plane_crtc, *old_plane_crtc; @@ -5320,6 +5716,12 @@ enum surface_update_type dm_determine_update_type_for_commit(struct dc *dc, stru struct dc_stream_update stream_update; enum surface_update_type update_type = UPDATE_TYPE_FAST; + if (!updates || !surface) { + DRM_ERROR("Plane or surface update failed to allocate"); + /* Set type to FULL to avoid crashing in DC*/ + update_type = UPDATE_TYPE_FULL; + goto cleanup; + } for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) { new_dm_crtc_state = to_dm_crtc_state(new_crtc_state); @@ -5372,35 +5774,73 @@ enum surface_update_type dm_determine_update_type_for_commit(struct dc *dc, stru } if (num_plane > 0) { - status = dc_stream_get_status(new_dm_crtc_state->stream); + ret = dm_atomic_get_state(state, &dm_state); + if (ret) + goto cleanup; + + old_dm_state = dm_atomic_get_old_state(state); + if (!old_dm_state) { + ret = -EINVAL; + goto cleanup; + } + + status = dc_state_get_stream_status(old_dm_state->context, + new_dm_crtc_state->stream); + update_type = dc_check_update_surfaces_for_stream(dc, updates, num_plane, &stream_update, status); if (update_type > UPDATE_TYPE_MED) { update_type = UPDATE_TYPE_FULL; - goto ret; + goto cleanup; } } } else if (!new_dm_crtc_state->stream && old_dm_crtc_state->stream) { update_type = UPDATE_TYPE_FULL; - goto ret; + goto cleanup; } } -ret: +cleanup: kfree(updates); kfree(surface); - return update_type; + *out_type = update_type; + return ret; } +/** + * amdgpu_dm_atomic_check() - Atomic check implementation for AMDgpu DM. + * @dev: The DRM device + * @state: The atomic state to commit + * + * Validate that the given atomic state is programmable by DC into hardware. + * This involves constructing a &struct dc_state reflecting the new hardware + * state we wish to commit, then querying DC to see if it is programmable. It's + * important not to modify the existing DC state. Otherwise, atomic_check + * may unexpectedly commit hardware changes. + * + * When validating the DC state, it's important that the right locks are + * acquired. For full updates case which removes/adds/updates streams on one + * CRTC while flipping on another CRTC, acquiring global lock will guarantee + * that any such full update commit will wait for completion of any outstanding + * flip using DRMs synchronization events. See + * dm_determine_update_type_for_commit() + * + * Note that DM adds the affected connectors for all CRTCs in state, when that + * might not seem necessary. This is because DC stream creation requires the + * DC sink, which is tied to the DRM connector state. Cleaning this up should + * be possible but non-trivial - a possible TODO item. + * + * Return: -Error code if validation failed. + */ static int amdgpu_dm_atomic_check(struct drm_device *dev, struct drm_atomic_state *state) { struct amdgpu_device *adev = dev->dev_private; + struct dm_atomic_state *dm_state = NULL; struct dc *dc = adev->dm.dc; - struct dm_atomic_state *dm_state = to_dm_atomic_state(state); struct drm_connector *connector; struct drm_connector_state *old_con_state, *new_con_state; struct drm_crtc *crtc; @@ -5421,12 +5861,9 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, goto fail; for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) { - struct dm_crtc_state *dm_new_crtc_state = to_dm_crtc_state(new_crtc_state); - struct dm_crtc_state *dm_old_crtc_state = to_dm_crtc_state(old_crtc_state); - if (!drm_atomic_crtc_needs_modeset(new_crtc_state) && !new_crtc_state->color_mgmt_changed && - (dm_old_crtc_state->freesync_enabled == dm_new_crtc_state->freesync_enabled)) + !new_crtc_state->vrr_enabled) continue; if (!new_crtc_state->enable) @@ -5441,10 +5878,6 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, goto fail; } - dm_state->context = dc_create_state(); - ASSERT(dm_state->context); - dc_resource_state_copy_construct_current(dc, dm_state->context); - /* Remove exiting planes if they are modified */ ret = dm_update_planes_state(dc, state, false, &lock_and_validation_needed); if (ret) { @@ -5497,16 +5930,9 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, lock_and_validation_needed = true; } - /* - * For full updates case when - * removing/adding/updating streams on one CRTC while flipping - * on another CRTC, - * acquiring global lock will guarantee that any such full - * update commit - * will wait for completion of any outstanding flip using DRMs - * synchronization events. - */ - update_type = dm_determine_update_type_for_commit(dc, state); + ret = dm_determine_update_type_for_commit(dc, state, &update_type); + if (ret) + goto fail; if (overall_update_type < update_type) overall_update_type = update_type; @@ -5524,6 +5950,9 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, if (overall_update_type > UPDATE_TYPE_FAST) { + ret = dm_atomic_get_state(state, &dm_state); + if (ret) + goto fail; ret = do_aquire_global_lock(dev, state); if (ret) @@ -5533,6 +5962,13 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, ret = -EINVAL; goto fail; } + } else if (state->legacy_cursor_update) { + /* + * This is a fast cursor update coming from the plane update + * helper, check if it can be done asynchronously for better + * performance. + */ + state->async_update = !drm_atomic_helper_async_check(dev, state); } /* Must be success */ @@ -5578,14 +6014,15 @@ void amdgpu_dm_update_freesync_caps(struct drm_connector *connector, struct detailed_data_monitor_range *range; struct amdgpu_dm_connector *amdgpu_dm_connector = to_amdgpu_dm_connector(connector); - struct dm_connector_state *dm_con_state; + struct dm_connector_state *dm_con_state = NULL; struct drm_device *dev = connector->dev; struct amdgpu_device *adev = dev->dev_private; + bool freesync_capable = false; if (!connector->state) { DRM_ERROR("%s - Connector has no state", __func__); - return; + goto update; } if (!edid) { @@ -5595,9 +6032,7 @@ void amdgpu_dm_update_freesync_caps(struct drm_connector *connector, amdgpu_dm_connector->max_vfreq = 0; amdgpu_dm_connector->pixel_clock_mhz = 0; - dm_con_state->freesync_capable = false; - dm_con_state->freesync_enable = false; - return; + goto update; } dm_con_state = to_dm_connector_state(connector->state); @@ -5605,10 +6040,10 @@ void amdgpu_dm_update_freesync_caps(struct drm_connector *connector, edid_check_required = false; if (!amdgpu_dm_connector->dc_sink) { DRM_ERROR("dc_sink NULL, could not add free_sync module.\n"); - return; + goto update; } if (!adev->dm.freesync_module) - return; + goto update; /* * if edid non zero restrict freesync only for dp and edp */ @@ -5620,7 +6055,6 @@ void amdgpu_dm_update_freesync_caps(struct drm_connector *connector, amdgpu_dm_connector); } } - dm_con_state->freesync_capable = false; if (edid_check_required == true && (edid->version > 1 || (edid->version == 1 && edid->revision > 1))) { for (i = 0; i < 4; i++) { @@ -5652,8 +6086,16 @@ void amdgpu_dm_update_freesync_caps(struct drm_connector *connector, if (amdgpu_dm_connector->max_vfreq - amdgpu_dm_connector->min_vfreq > 10) { - dm_con_state->freesync_capable = true; + freesync_capable = true; } } + +update: + if (dm_con_state) + dm_con_state->freesync_capable = freesync_capable; + + if (connector->vrr_capable_property) + drm_connector_set_vrr_capable_property(connector, + freesync_capable); } diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h index 6e069d777ab2..25bb91ee80ba 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h @@ -59,60 +59,140 @@ struct common_irq_params { enum dc_irq_source irq_src; }; +/** + * struct irq_list_head - Linked-list for low context IRQ handlers. + * + * @head: The list_head within &struct handler_data + * @work: A work_struct containing the deferred handler work + */ struct irq_list_head { struct list_head head; /* In case this interrupt needs post-processing, 'work' will be queued*/ struct work_struct work; }; +/** + * struct dm_compressor_info - Buffer info used by frame buffer compression + * @cpu_addr: MMIO cpu addr + * @bo_ptr: Pointer to the buffer object + * @gpu_addr: MMIO gpu addr + */ struct dm_comressor_info { void *cpu_addr; struct amdgpu_bo *bo_ptr; uint64_t gpu_addr; }; +/** + * struct amdgpu_dm_backlight_caps - Usable range of backlight values from ACPI + * @min_input_signal: minimum possible input in range 0-255 + * @max_input_signal: maximum possible input in range 0-255 + * @caps_valid: true if these values are from the ACPI interface + */ +struct amdgpu_dm_backlight_caps { + int min_input_signal; + int max_input_signal; + bool caps_valid; +}; + +/** + * struct amdgpu_display_manager - Central amdgpu display manager device + * + * @dc: Display Core control structure + * @adev: AMDGPU base driver structure + * @ddev: DRM base driver structure + * @display_indexes_num: Max number of display streams supported + * @irq_handler_list_table_lock: Synchronizes access to IRQ tables + * @backlight_dev: Backlight control device + * @cached_state: Caches device atomic state for suspend/resume + * @compressor: Frame buffer compression buffer. See &struct dm_comressor_info + */ struct amdgpu_display_manager { + struct dc *dc; + + /** + * @cgs_device: + * + * The Common Graphics Services device. It provides an interface for + * accessing registers. + */ struct cgs_device *cgs_device; - struct amdgpu_device *adev; /*AMD base driver*/ - struct drm_device *ddev; /*DRM base driver*/ + struct amdgpu_device *adev; + struct drm_device *ddev; u16 display_indexes_num; - /* - * 'irq_source_handler_table' holds a list of handlers - * per (DAL) IRQ source. + /** + * @atomic_obj + * + * In combination with &dm_atomic_state it helps manage + * global atomic state that doesn't map cleanly into existing + * drm resources, like &dc_context. + */ + struct drm_private_obj atomic_obj; + + struct drm_modeset_lock atomic_obj_lock; + + /** + * @dc_lock: + * + * Guards access to DC functions that can issue register write + * sequences. + */ + struct mutex dc_lock; + + /** + * @irq_handler_list_low_tab: + * + * Low priority IRQ handler table. * - * Each IRQ source may need to be handled at different contexts. - * By 'context' we mean, for example: - * - The ISR context, which is the direct interrupt handler. - * - The 'deferred' context - this is the post-processing of the - * interrupt, but at a lower priority. + * It is a n*m table consisting of n IRQ sources, and m handlers per IRQ + * source. Low priority IRQ handlers are deferred to a workqueue to be + * processed. Hence, they can sleep. * * Note that handlers are called in the same order as they were * registered (FIFO). */ struct irq_list_head irq_handler_list_low_tab[DAL_IRQ_SOURCES_NUMBER]; + + /** + * @irq_handler_list_high_tab: + * + * High priority IRQ handler table. + * + * It is a n*m table, same as &irq_handler_list_low_tab. However, + * handlers in this table are not deferred and are called immediately. + */ struct list_head irq_handler_list_high_tab[DAL_IRQ_SOURCES_NUMBER]; + /** + * @pflip_params: + * + * Page flip IRQ parameters, passed to registered handlers when + * triggered. + */ struct common_irq_params pflip_params[DC_IRQ_SOURCE_PFLIP_LAST - DC_IRQ_SOURCE_PFLIP_FIRST + 1]; + /** + * @vblank_params: + * + * Vertical blanking IRQ parameters, passed to registered handlers when + * triggered. + */ struct common_irq_params vblank_params[DC_IRQ_SOURCE_VBLANK6 - DC_IRQ_SOURCE_VBLANK1 + 1]; - /* this spin lock synchronizes access to 'irq_handler_list_table' */ spinlock_t irq_handler_list_table_lock; struct backlight_device *backlight_dev; const struct dc_link *backlight_link; + struct amdgpu_dm_backlight_caps backlight_caps; struct mod_freesync *freesync_module; - /** - * Caches device atomic state for suspend/resume - */ struct drm_atomic_state *cached_state; struct dm_comressor_info compressor; @@ -183,15 +263,21 @@ struct dm_crtc_state { int crc_skip_count; bool crc_enabled; - bool freesync_enabled; + bool freesync_timing_changed; + bool freesync_vrr_info_changed; + + bool vrr_supported; + struct mod_freesync_config freesync_config; struct dc_crtc_timing_adjust adjust; struct dc_info_packet vrr_infopacket; + + int abm_level; }; #define to_dm_crtc_state(x) container_of(x, struct dm_crtc_state, base) struct dm_atomic_state { - struct drm_atomic_state base; + struct drm_private_state base; struct dc_state *context; }; @@ -206,8 +292,8 @@ struct dm_connector_state { uint8_t underscan_hborder; uint8_t max_bpc; bool underscan_enable; - bool freesync_enable; bool freesync_capable; + uint8_t abm_level; }; #define to_dm_connector_state(x)\ diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c index be19e6861189..216e48cec716 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c @@ -164,7 +164,7 @@ int amdgpu_dm_set_regamma_lut(struct dm_crtc_state *crtc) */ stream->out_transfer_func->type = TF_TYPE_DISTRIBUTED_POINTS; ret = mod_color_calculate_regamma_params(stream->out_transfer_func, - gamma, true, adev->asic_type <= CHIP_RAVEN); + gamma, true, adev->asic_type <= CHIP_RAVEN, NULL); dc_gamma_release(&gamma); if (!ret) { stream->out_transfer_func->type = old_type; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c index 01fc5717b657..f088ac585978 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c @@ -75,6 +75,11 @@ int amdgpu_dm_crtc_set_crc_source(struct drm_crtc *crtc, const char *src_name) return -EINVAL; } + if (!stream_state) { + DRM_ERROR("No stream state for CRTC%d\n", crtc->index); + return -EINVAL; + } + /* When enabling CRC, we should also disable dithering. */ if (source == AMDGPU_DM_PIPE_CRC_SOURCE_AUTO) { if (dc_stream_configure_crc(stream_state->ctx->dc, diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c index a212178f2edc..cd10f77cdeb0 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c @@ -32,16 +32,55 @@ #include "amdgpu_dm.h" #include "amdgpu_dm_irq.h" +/** + * DOC: overview + * + * DM provides another layer of IRQ management on top of what the base driver + * already provides. This is something that could be cleaned up, and is a + * future TODO item. + * + * The base driver provides IRQ source registration with DRM, handler + * registration into the base driver's IRQ table, and a handler callback + * amdgpu_irq_handler(), with which DRM calls on interrupts. This generic + * handler looks up the IRQ table, and calls the respective + * &amdgpu_irq_src_funcs.process hookups. + * + * What DM provides on top are two IRQ tables specifically for top-half and + * bottom-half IRQ handling, with the bottom-half implementing workqueues: + * + * - &amdgpu_display_manager.irq_handler_list_high_tab + * - &amdgpu_display_manager.irq_handler_list_low_tab + * + * They override the base driver's IRQ table, and the effect can be seen + * in the hooks that DM provides for &amdgpu_irq_src_funcs.process. They + * are all set to the DM generic handler amdgpu_dm_irq_handler(), which looks up + * DM's IRQ tables. However, in order for base driver to recognize this hook, DM + * still needs to register the IRQ with the base driver. See + * dce110_register_irq_handlers() and dcn10_register_irq_handlers(). + * + * To expose DC's hardware interrupt toggle to the base driver, DM implements + * &amdgpu_irq_src_funcs.set hooks. Base driver calls it through + * amdgpu_irq_update() to enable or disable the interrupt. + */ + /****************************************************************************** * Private declarations. *****************************************************************************/ +/** + * struct amdgpu_dm_irq_handler_data - Data for DM interrupt handlers. + * + * @list: Linked list entry referencing the next/previous handler + * @handler: Handler function + * @handler_arg: Argument passed to the handler when triggered + * @dm: DM which this handler belongs to + * @irq_source: DC interrupt source that this handler is registered for + */ struct amdgpu_dm_irq_handler_data { struct list_head list; interrupt_handler handler; void *handler_arg; - /* DM which this handler belongs to */ struct amdgpu_display_manager *dm; /* DAL irq source which registered for this interrupt. */ enum dc_irq_source irq_source; @@ -68,7 +107,7 @@ static void init_handler_common_data(struct amdgpu_dm_irq_handler_data *hcd, } /** - * dm_irq_work_func - Handle an IRQ outside of the interrupt handler proper. + * dm_irq_work_func() - Handle an IRQ outside of the interrupt handler proper. * * @work: work struct */ @@ -99,8 +138,8 @@ static void dm_irq_work_func(struct work_struct *work) * (The most common use is HPD interrupt) */ } -/** - * Remove a handler and return a pointer to hander list from which the +/* + * Remove a handler and return a pointer to handler list from which the * handler was removed. */ static struct list_head *remove_irq_handler(struct amdgpu_device *adev, @@ -203,6 +242,24 @@ static bool validate_irq_unregistration_params(enum dc_irq_source irq_source, * Note: caller is responsible for input validation. *****************************************************************************/ +/** + * amdgpu_dm_irq_register_interrupt() - Register a handler within DM. + * @adev: The base driver device containing the DM device. + * @int_params: Interrupt parameters containing the source, and handler context + * @ih: Function pointer to the interrupt handler to register + * @handler_args: Arguments passed to the handler when the interrupt occurs + * + * Register an interrupt handler for the given IRQ source, under the given + * context. The context can either be high or low. High context handlers are + * executed directly within ISR context, while low context is executed within a + * workqueue, thereby allowing operations that sleep. + * + * Registered handlers are called in a FIFO manner, i.e. the most recently + * registered handler will be called first. + * + * Return: Handler data &struct amdgpu_dm_irq_handler_data containing the IRQ + * source, handler function, and args + */ void *amdgpu_dm_irq_register_interrupt(struct amdgpu_device *adev, struct dc_interrupt_params *int_params, void (*ih)(void *), @@ -261,6 +318,15 @@ void *amdgpu_dm_irq_register_interrupt(struct amdgpu_device *adev, return handler_data; } +/** + * amdgpu_dm_irq_unregister_interrupt() - Remove a handler from the DM IRQ table + * @adev: The base driver device containing the DM device + * @irq_source: IRQ source to remove the given handler from + * @ih: Function pointer to the interrupt handler to unregister + * + * Go through both low and high context IRQ tables, and find the given handler + * for the given irq source. If found, remove it. Otherwise, do nothing. + */ void amdgpu_dm_irq_unregister_interrupt(struct amdgpu_device *adev, enum dc_irq_source irq_source, void *ih) @@ -295,6 +361,20 @@ void amdgpu_dm_irq_unregister_interrupt(struct amdgpu_device *adev, } } +/** + * amdgpu_dm_irq_init() - Initialize DM IRQ management + * @adev: The base driver device containing the DM device + * + * Initialize DM's high and low context IRQ tables. + * + * The N by M table contains N IRQ sources, with M + * &struct amdgpu_dm_irq_handler_data hooked together in a linked list. The + * list_heads are initialized here. When an interrupt n is triggered, all m + * handlers are called in sequence, FIFO according to registration order. + * + * The low context table requires special steps to initialize, since handlers + * will be deferred to a workqueue. See &struct irq_list_head. + */ int amdgpu_dm_irq_init(struct amdgpu_device *adev) { int src; @@ -317,7 +397,12 @@ int amdgpu_dm_irq_init(struct amdgpu_device *adev) return 0; } -/* DM IRQ and timer resource release */ +/** + * amdgpu_dm_irq_fini() - Tear down DM IRQ management + * @adev: The base driver device containing the DM device + * + * Flush all work within the low context IRQ table. + */ void amdgpu_dm_irq_fini(struct amdgpu_device *adev) { int src; @@ -414,7 +499,7 @@ int amdgpu_dm_irq_resume_late(struct amdgpu_device *adev) return 0; } -/** +/* * amdgpu_dm_irq_schedule_work - schedule all work items registered for the * "irq_source". */ @@ -439,8 +524,9 @@ static void amdgpu_dm_irq_schedule_work(struct amdgpu_device *adev, } -/** amdgpu_dm_irq_immediate_work - * Callback high irq work immediately, don't send to work queue +/* + * amdgpu_dm_irq_immediate_work + * Callback high irq work immediately, don't send to work queue */ static void amdgpu_dm_irq_immediate_work(struct amdgpu_device *adev, enum dc_irq_source irq_source) @@ -467,11 +553,14 @@ static void amdgpu_dm_irq_immediate_work(struct amdgpu_device *adev, DM_IRQ_TABLE_UNLOCK(adev, irq_table_flags); } -/* - * amdgpu_dm_irq_handler +/** + * amdgpu_dm_irq_handler - Generic DM IRQ handler + * @adev: amdgpu base driver device containing the DM device + * @source: Unused + * @entry: Data about the triggered interrupt * - * Generic IRQ handler, calls all registered high irq work immediately, and - * schedules work for low irq + * Calls all registered high irq work immediately, and schedules work for low + * irq. The DM IRQ table is used to find the corresponding handlers. */ static int amdgpu_dm_irq_handler(struct amdgpu_device *adev, struct amdgpu_irq_src *source, @@ -613,7 +702,7 @@ void amdgpu_dm_set_irq_funcs(struct amdgpu_device *adev) adev->hpd_irq.funcs = &dm_hpd_irq_funcs; } -/* +/** * amdgpu_dm_hpd_init - hpd setup callback. * * @adev: amdgpu_device pointer diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c index 12001a006b2d..9d2d6986b983 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c @@ -485,11 +485,11 @@ void pp_rv_set_display_requirement(struct pp_smu *pp, return; clock.clock_type = amd_pp_dcf_clock; - clock.clock_freq_in_khz = req->hard_min_dcefclk_khz; + clock.clock_freq_in_khz = req->hard_min_dcefclk_mhz * 1000; pp_funcs->display_clock_voltage_request(pp_handle, &clock); clock.clock_type = amd_pp_f_clock; - clock.clock_freq_in_khz = req->hard_min_fclk_khz; + clock.clock_freq_in_khz = req->hard_min_fclk_mhz * 1000; pp_funcs->display_clock_voltage_request(pp_handle, &clock); } @@ -518,13 +518,13 @@ void pp_rv_set_wm_ranges(struct pp_smu *pp, wm_dce_clocks[i].wm_set_id = ranges->reader_wm_sets[i].wm_inst; wm_dce_clocks[i].wm_max_dcfclk_clk_in_khz = - ranges->reader_wm_sets[i].max_drain_clk_khz; + ranges->reader_wm_sets[i].max_drain_clk_mhz * 1000; wm_dce_clocks[i].wm_min_dcfclk_clk_in_khz = - ranges->reader_wm_sets[i].min_drain_clk_khz; + ranges->reader_wm_sets[i].min_drain_clk_mhz * 1000; wm_dce_clocks[i].wm_max_mem_clk_in_khz = - ranges->reader_wm_sets[i].max_fill_clk_khz; + ranges->reader_wm_sets[i].max_fill_clk_mhz * 1000; wm_dce_clocks[i].wm_min_mem_clk_in_khz = - ranges->reader_wm_sets[i].min_fill_clk_khz; + ranges->reader_wm_sets[i].min_fill_clk_mhz * 1000; } for (i = 0; i < wm_with_clock_ranges.num_wm_mcif_sets; i++) { @@ -534,13 +534,13 @@ void pp_rv_set_wm_ranges(struct pp_smu *pp, wm_soc_clocks[i].wm_set_id = ranges->writer_wm_sets[i].wm_inst; wm_soc_clocks[i].wm_max_socclk_clk_in_khz = - ranges->writer_wm_sets[i].max_fill_clk_khz; + ranges->writer_wm_sets[i].max_fill_clk_mhz * 1000; wm_soc_clocks[i].wm_min_socclk_clk_in_khz = - ranges->writer_wm_sets[i].min_fill_clk_khz; + ranges->writer_wm_sets[i].min_fill_clk_mhz * 1000; wm_soc_clocks[i].wm_max_mem_clk_in_khz = - ranges->writer_wm_sets[i].max_drain_clk_khz; + ranges->writer_wm_sets[i].max_drain_clk_mhz * 1000; wm_soc_clocks[i].wm_min_mem_clk_in_khz = - ranges->writer_wm_sets[i].min_drain_clk_khz; + ranges->writer_wm_sets[i].min_drain_clk_mhz * 1000; } pp_funcs->set_watermarks_for_clocks_ranges(pp_handle, &wm_with_clock_ranges); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_trace.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_trace.h new file mode 100644 index 000000000000..d898981684d5 --- /dev/null +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_trace.h @@ -0,0 +1,104 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM amdgpu_dm + +#if !defined(_AMDGPU_DM_TRACE_H) || defined(TRACE_HEADER_MULTI_READ) +#define _AMDGPU_DM_TRACE_H_ + +#include <linux/tracepoint.h> + +TRACE_EVENT(amdgpu_dc_rreg, + TP_PROTO(unsigned long *read_count, uint32_t reg, uint32_t value), + TP_ARGS(read_count, reg, value), + TP_STRUCT__entry( + __field(uint32_t, reg) + __field(uint32_t, value) + ), + TP_fast_assign( + __entry->reg = reg; + __entry->value = value; + *read_count = *read_count + 1; + ), + TP_printk("reg=0x%08lx, value=0x%08lx", + (unsigned long)__entry->reg, + (unsigned long)__entry->value) +); + +TRACE_EVENT(amdgpu_dc_wreg, + TP_PROTO(unsigned long *write_count, uint32_t reg, uint32_t value), + TP_ARGS(write_count, reg, value), + TP_STRUCT__entry( + __field(uint32_t, reg) + __field(uint32_t, value) + ), + TP_fast_assign( + __entry->reg = reg; + __entry->value = value; + *write_count = *write_count + 1; + ), + TP_printk("reg=0x%08lx, value=0x%08lx", + (unsigned long)__entry->reg, + (unsigned long)__entry->value) +); + + +TRACE_EVENT(amdgpu_dc_performance, + TP_PROTO(unsigned long read_count, unsigned long write_count, + unsigned long *last_read, unsigned long *last_write, + const char *func, unsigned int line), + TP_ARGS(read_count, write_count, last_read, last_write, func, line), + TP_STRUCT__entry( + __field(uint32_t, reads) + __field(uint32_t, writes) + __field(uint32_t, read_delta) + __field(uint32_t, write_delta) + __string(func, func) + __field(uint32_t, line) + ), + TP_fast_assign( + __entry->reads = read_count; + __entry->writes = write_count; + __entry->read_delta = read_count - *last_read; + __entry->write_delta = write_count - *last_write; + __assign_str(func, func); + __entry->line = line; + *last_read = read_count; + *last_write = write_count; + ), + TP_printk("%s:%d reads=%08ld (%08ld total), writes=%08ld (%08ld total)", + __get_str(func), __entry->line, + (unsigned long)__entry->read_delta, + (unsigned long)__entry->reads, + (unsigned long)__entry->write_delta, + (unsigned long)__entry->writes) +); +#endif /* _AMDGPU_DM_TRACE_H_ */ + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE amdgpu_dm_trace +#include <trace/define_trace.h> diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c index 0e1dc1b1a48d..c2ab026aee91 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c +++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c @@ -2030,7 +2030,7 @@ static uint32_t get_src_obj_list(struct bios_parser *bp, ATOM_OBJECT *object, static struct device_id device_type_from_device_id(uint16_t device_id) { - struct device_id result_device_id; + struct device_id result_device_id = {0}; switch (device_id) { case ATOM_DEVICE_LCD1_SUPPORT: diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c index ff764da21b6f..751bb614fc0e 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c +++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c @@ -1884,6 +1884,8 @@ static const struct dc_vbios_funcs vbios_funcs = { .is_accelerated_mode = bios_parser_is_accelerated_mode, + .is_active_display = bios_is_active_display, + .set_scratch_critical_state = bios_parser_set_scratch_critical_state, diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser_helper.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser_helper.c index d4589470985c..fdda8aa8e303 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser_helper.c +++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser_helper.c @@ -88,3 +88,96 @@ uint32_t bios_get_vga_enabled_displays( return active_disp; } +bool bios_is_active_display( + struct dc_bios *bios, + enum signal_type signal, + const struct connector_device_tag_info *device_tag) +{ + uint32_t active = 0; + uint32_t connected = 0; + uint32_t bios_scratch_0 = 0; + uint32_t bios_scratch_3 = 0; + + switch (signal) { + case SIGNAL_TYPE_DVI_SINGLE_LINK: + case SIGNAL_TYPE_DVI_DUAL_LINK: + case SIGNAL_TYPE_HDMI_TYPE_A: + case SIGNAL_TYPE_DISPLAY_PORT: + case SIGNAL_TYPE_DISPLAY_PORT_MST: + { + if (device_tag->dev_id.device_type == DEVICE_TYPE_DFP) { + switch (device_tag->dev_id.enum_id) { + case 1: + { + active = ATOM_S3_DFP1_ACTIVE; + connected = 0x0008; //ATOM_DISPLAY_DFP1_CONNECT + } + break; + + case 2: + { + active = ATOM_S3_DFP2_ACTIVE; + connected = 0x0080; //ATOM_DISPLAY_DFP2_CONNECT + } + break; + + case 3: + { + active = ATOM_S3_DFP3_ACTIVE; + connected = 0x0200; //ATOM_DISPLAY_DFP3_CONNECT + } + break; + + case 4: + { + active = ATOM_S3_DFP4_ACTIVE; + connected = 0x0400; //ATOM_DISPLAY_DFP4_CONNECT + } + break; + + case 5: + { + active = ATOM_S3_DFP5_ACTIVE; + connected = 0x0800; //ATOM_DISPLAY_DFP5_CONNECT + } + break; + + case 6: + { + active = ATOM_S3_DFP6_ACTIVE; + connected = 0x0040; //ATOM_DISPLAY_DFP6_CONNECT + } + break; + + default: + break; + } + } + } + break; + + case SIGNAL_TYPE_LVDS: + case SIGNAL_TYPE_EDP: + { + active = ATOM_S3_LCD1_ACTIVE; + connected = 0x0002; //ATOM_DISPLAY_LCD1_CONNECT + } + break; + + default: + break; + } + + + if (bios->regs->BIOS_SCRATCH_0) /*follow up with other asic, todo*/ + bios_scratch_0 = REG_READ(BIOS_SCRATCH_0); + if (bios->regs->BIOS_SCRATCH_3) /*follow up with other asic, todo*/ + bios_scratch_3 = REG_READ(BIOS_SCRATCH_3); + + bios_scratch_3 &= ATOM_S3_DEVICE_ACTIVE_MASK; + if ((active & bios_scratch_3) && (connected & bios_scratch_0)) + return true; + + return false; +} + diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser_helper.h b/drivers/gpu/drm/amd/display/dc/bios/bios_parser_helper.h index 75a29e68fb27..f33cac2147e3 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser_helper.h +++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser_helper.h @@ -35,6 +35,10 @@ bool bios_is_accelerated_mode(struct dc_bios *bios); void bios_set_scratch_acc_mode_change(struct dc_bios *bios); void bios_set_scratch_critical_state(struct dc_bios *bios, bool state); uint32_t bios_get_vga_enabled_displays(struct dc_bios *bios); +bool bios_is_active_display( + struct dc_bios *bios, + enum signal_type signal, + const struct connector_device_tag_info *device_tag); #define GET_IMAGE(type, offset) ((type *) bios_get_image(&bp->base, offset, sizeof(type))) diff --git a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c index 3208188b7ed4..43e4a2be0fa6 100644 --- a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c +++ b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c @@ -1423,27 +1423,27 @@ void dcn_bw_notify_pplib_of_wm_ranges(struct dc *dc) ranges.num_reader_wm_sets = WM_SET_COUNT; ranges.num_writer_wm_sets = WM_SET_COUNT; ranges.reader_wm_sets[0].wm_inst = WM_A; - ranges.reader_wm_sets[0].min_drain_clk_khz = min_dcfclk_khz; - ranges.reader_wm_sets[0].max_drain_clk_khz = overdrive; - ranges.reader_wm_sets[0].min_fill_clk_khz = min_fclk_khz; - ranges.reader_wm_sets[0].max_fill_clk_khz = overdrive; + ranges.reader_wm_sets[0].min_drain_clk_mhz = min_dcfclk_khz / 1000; + ranges.reader_wm_sets[0].max_drain_clk_mhz = overdrive / 1000; + ranges.reader_wm_sets[0].min_fill_clk_mhz = min_fclk_khz / 1000; + ranges.reader_wm_sets[0].max_fill_clk_mhz = overdrive / 1000; ranges.writer_wm_sets[0].wm_inst = WM_A; - ranges.writer_wm_sets[0].min_fill_clk_khz = socclk_khz; - ranges.writer_wm_sets[0].max_fill_clk_khz = overdrive; - ranges.writer_wm_sets[0].min_drain_clk_khz = min_fclk_khz; - ranges.writer_wm_sets[0].max_drain_clk_khz = overdrive; + ranges.writer_wm_sets[0].min_fill_clk_mhz = socclk_khz / 1000; + ranges.writer_wm_sets[0].max_fill_clk_mhz = overdrive / 1000; + ranges.writer_wm_sets[0].min_drain_clk_mhz = min_fclk_khz / 1000; + ranges.writer_wm_sets[0].max_drain_clk_mhz = overdrive / 1000; if (dc->debug.pplib_wm_report_mode == WM_REPORT_OVERRIDE) { ranges.reader_wm_sets[0].wm_inst = WM_A; - ranges.reader_wm_sets[0].min_drain_clk_khz = 300000; - ranges.reader_wm_sets[0].max_drain_clk_khz = 5000000; - ranges.reader_wm_sets[0].min_fill_clk_khz = 800000; - ranges.reader_wm_sets[0].max_fill_clk_khz = 5000000; + ranges.reader_wm_sets[0].min_drain_clk_mhz = 300; + ranges.reader_wm_sets[0].max_drain_clk_mhz = 5000; + ranges.reader_wm_sets[0].min_fill_clk_mhz = 800; + ranges.reader_wm_sets[0].max_fill_clk_mhz = 5000; ranges.writer_wm_sets[0].wm_inst = WM_A; - ranges.writer_wm_sets[0].min_fill_clk_khz = 200000; - ranges.writer_wm_sets[0].max_fill_clk_khz = 5000000; - ranges.writer_wm_sets[0].min_drain_clk_khz = 800000; - ranges.writer_wm_sets[0].max_drain_clk_khz = 5000000; + ranges.writer_wm_sets[0].min_fill_clk_mhz = 200; + ranges.writer_wm_sets[0].max_fill_clk_mhz = 5000; + ranges.writer_wm_sets[0].min_drain_clk_mhz = 800; + ranges.writer_wm_sets[0].max_drain_clk_mhz = 5000; } ranges.reader_wm_sets[1] = ranges.writer_wm_sets[0]; diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 7c491c91465f..d9c57984394b 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -62,6 +62,55 @@ const static char DC_BUILD_ID[] = "production-build"; +/** + * DOC: Overview + * + * DC is the OS-agnostic component of the amdgpu DC driver. + * + * DC maintains and validates a set of structs representing the state of the + * driver and writes that state to AMD hardware + * + * Main DC HW structs: + * + * struct dc - The central struct. One per driver. Created on driver load, + * destroyed on driver unload. + * + * struct dc_context - One per driver. + * Used as a backpointer by most other structs in dc. + * + * struct dc_link - One per connector (the physical DP, HDMI, miniDP, or eDP + * plugpoints). Created on driver load, destroyed on driver unload. + * + * struct dc_sink - One per display. Created on boot or hotplug. + * Destroyed on shutdown or hotunplug. A dc_link can have a local sink + * (the display directly attached). It may also have one or more remote + * sinks (in the Multi-Stream Transport case) + * + * struct resource_pool - One per driver. Represents the hw blocks not in the + * main pipeline. Not directly accessible by dm. + * + * Main dc state structs: + * + * These structs can be created and destroyed as needed. There is a full set of + * these structs in dc->current_state representing the currently programmed state. + * + * struct dc_state - The global DC state to track global state information, + * such as bandwidth values. + * + * struct dc_stream_state - Represents the hw configuration for the pipeline from + * a framebuffer to a display. Maps one-to-one with dc_sink. + * + * struct dc_plane_state - Represents a framebuffer. Each stream has at least one, + * and may have more in the Multi-Plane Overlay case. + * + * struct resource_context - Represents the programmable state of everything in + * the resource_pool. Not directly accessible by dm. + * + * struct pipe_ctx - A member of struct resource_context. Represents the + * internal hardware pipeline components. Each dc_plane_state has either + * one or two (in the pipe-split case). + */ + /******************************************************************************* * Private functions ******************************************************************************/ @@ -175,6 +224,17 @@ failed_alloc: return false; } +static struct dc_perf_trace *dc_perf_trace_create(void) +{ + return kzalloc(sizeof(struct dc_perf_trace), GFP_KERNEL); +} + +static void dc_perf_trace_destroy(struct dc_perf_trace **perf_trace) +{ + kfree(*perf_trace); + *perf_trace = NULL; +} + /** ***************************************************************************** * Function: dc_stream_adjust_vmin_vmax @@ -240,7 +300,7 @@ bool dc_stream_get_crtc_position(struct dc *dc, } /** - * dc_stream_configure_crc: Configure CRC capture for the given stream. + * dc_stream_configure_crc() - Configure CRC capture for the given stream. * @dc: DC Object * @stream: The stream to configure CRC on. * @enable: Enable CRC if true, disable otherwise. @@ -292,7 +352,7 @@ bool dc_stream_configure_crc(struct dc *dc, struct dc_stream_state *stream, } /** - * dc_stream_get_crc: Get CRC values for the given stream. + * dc_stream_get_crc() - Get CRC values for the given stream. * @dc: DC object * @stream: The DC stream state of the stream to get CRCs from. * @r_cr, g_y, b_cb: CRC values for the three channels are stored here. @@ -328,7 +388,7 @@ void dc_stream_set_dither_option(struct dc_stream_state *stream, enum dc_dither_option option) { struct bit_depth_reduction_params params; - struct dc_link *link = stream->status.link; + struct dc_link *link = stream->sink->link; struct pipe_ctx *pipes = NULL; int i; @@ -391,9 +451,11 @@ bool dc_stream_program_csc_matrix(struct dc *dc, struct dc_stream_state *stream) == stream) { pipes = &dc->current_state->res_ctx.pipe_ctx[i]; - dc->hwss.program_csc_matrix(pipes, - stream->output_color_space, - stream->csc_color_matrix.matrix); + dc->hwss.program_output_csc(dc, + pipes, + stream->output_color_space, + stream->csc_color_matrix.matrix, + pipes->plane_res.hubp->opp_id); ret = true; } } @@ -534,6 +596,8 @@ static void destruct(struct dc *dc) if (dc->ctx->created_bios) dal_bios_parser_destroy(&dc->ctx->dc_bios); + dc_perf_trace_destroy(&dc->ctx->perf_trace); + kfree(dc->ctx); dc->ctx = NULL; @@ -657,6 +721,12 @@ static bool construct(struct dc *dc, goto fail; } + dc_ctx->perf_trace = dc_perf_trace_create(); + if (!dc_ctx->perf_trace) { + ASSERT_CRITICAL(false); + goto fail; + } + /* Create GPIO service */ dc_ctx->gpio_service = dal_gpio_service_create( dc_version, @@ -941,7 +1011,7 @@ static enum dc_status dc_commit_state_no_check(struct dc *dc, struct dc_state *c if (!dcb->funcs->is_accelerated_mode(dcb)) dc->hwss.enable_accelerated_mode(dc, context); - dc->hwss.set_bandwidth(dc, context, false); + dc->hwss.prepare_bandwidth(dc, context); /* re-program planes for existing stream, in case we need to * free up plane resource for later use @@ -957,8 +1027,6 @@ static enum dc_status dc_commit_state_no_check(struct dc *dc, struct dc_state *c } /* Program hardware */ - dc->hwss.ready_shared_resources(dc, context); - for (i = 0; i < dc->res_pool->pipe_count; i++) { pipe = &context->res_ctx.pipe_ctx[i]; dc->hwss.wait_for_mpcc_disconnect(dc, dc->res_pool, pipe); @@ -1012,7 +1080,7 @@ static enum dc_status dc_commit_state_no_check(struct dc *dc, struct dc_state *c dc_enable_stereo(dc, context, dc_streams, context->stream_count); /* pplib is notified if disp_num changed */ - dc->hwss.set_bandwidth(dc, context, true); + dc->hwss.optimize_bandwidth(dc, context); dc_release_state(dc->current_state); @@ -1020,8 +1088,6 @@ static enum dc_status dc_commit_state_no_check(struct dc *dc, struct dc_state *c dc_retain_state(dc->current_state); - dc->hwss.optimize_shared_resources(dc); - return result; } @@ -1063,7 +1129,7 @@ bool dc_post_update_surfaces_to_stream(struct dc *dc) dc->optimized_required = false; - dc->hwss.set_bandwidth(dc, context, true); + dc->hwss.optimize_bandwidth(dc, context); return true; } @@ -1331,6 +1397,11 @@ static enum surface_update_type check_update_surfaces_for_stream( return overall_type; } +/** + * dc_check_update_surfaces_for_stream() - Determine update type (fast, med, or full) + * + * See :c:type:`enum surface_update_type <surface_update_type>` for explanation of update types + */ enum surface_update_type dc_check_update_surfaces_for_stream( struct dc *dc, struct dc_surface_update *updates, @@ -1369,35 +1440,6 @@ static struct dc_stream_status *stream_get_status( static const enum surface_update_type update_surface_trace_level = UPDATE_TYPE_FULL; -static void notify_display_count_to_smu( - struct dc *dc, - struct dc_state *context) -{ - int i, display_count; - struct pp_smu_funcs_rv *pp_smu = dc->res_pool->pp_smu; - - /* - * if function pointer not set up, this message is - * sent as part of pplib_apply_display_requirements. - * So just return. - */ - if (!pp_smu || !pp_smu->set_display_count) - return; - - display_count = 0; - for (i = 0; i < context->stream_count; i++) { - const struct dc_stream_state *stream = context->streams[i]; - - /* only notify active stream */ - if (stream->dpms_off) - continue; - - display_count++; - } - - pp_smu->set_display_count(&pp_smu->pp_smu, display_count); -} - static void commit_planes_do_stream_update(struct dc *dc, struct dc_stream_state *stream, struct dc_stream_update *stream_update, @@ -1422,7 +1464,6 @@ static void commit_planes_do_stream_update(struct dc *dc, stream_update->adjust->v_total_max); if (stream_update->periodic_fn_vsync_delta && - pipe_ctx->stream_res.tg && pipe_ctx->stream_res.tg->funcs->program_vline_interrupt) pipe_ctx->stream_res.tg->funcs->program_vline_interrupt( pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing, @@ -1441,6 +1482,14 @@ static void commit_planes_do_stream_update(struct dc *dc, if (stream_update->output_csc_transform) dc_stream_program_csc_matrix(dc, stream); + if (stream_update->dither_option) { + resource_build_bit_depth_reduction_params(pipe_ctx->stream, + &pipe_ctx->stream->bit_depth_params); + pipe_ctx->stream_res.opp->funcs->opp_program_fmt(pipe_ctx->stream_res.opp, + &stream->bit_depth_params, + &stream->clamping); + } + /* Full fe update*/ if (update_type == UPDATE_TYPE_FAST) continue; @@ -1448,19 +1497,13 @@ static void commit_planes_do_stream_update(struct dc *dc, if (stream_update->dpms_off) { if (*stream_update->dpms_off) { core_link_disable_stream(pipe_ctx, KEEP_ACQUIRED_RESOURCE); - dc->hwss.pplib_apply_display_requirements( - dc, dc->current_state); - notify_display_count_to_smu(dc, dc->current_state); + dc->hwss.optimize_bandwidth(dc, dc->current_state); } else { - dc->hwss.pplib_apply_display_requirements( - dc, dc->current_state); - notify_display_count_to_smu(dc, dc->current_state); + dc->hwss.prepare_bandwidth(dc, dc->current_state); core_link_enable_stream(dc->current_state, pipe_ctx); } } - - if (stream_update->abm_level && pipe_ctx->stream_res.abm) { if (pipe_ctx->stream_res.tg->funcs->is_blanked) { // if otg funcs defined check if blanked before programming @@ -1487,7 +1530,7 @@ static void commit_planes_for_stream(struct dc *dc, struct pipe_ctx *top_pipe_to_program = NULL; if (update_type == UPDATE_TYPE_FULL) { - dc->hwss.set_bandwidth(dc, context, false); + dc->hwss.prepare_bandwidth(dc, context); context_clock_trace(dc, context); } @@ -1669,6 +1712,9 @@ enum dc_irq_source dc_interrupt_to_irq_source( return dal_irq_service_to_irq_source(dc->res_pool->irqs, src_id, ext_id); } +/** + * dc_interrupt_set() - Enable/disable an AMD hw interrupt source + */ bool dc_interrupt_set(struct dc *dc, enum dc_irq_source src, bool enable) { @@ -1724,6 +1770,15 @@ void dc_resume(struct dc *dc) core_link_resume(dc->links[i]); } +bool dc_is_dmcu_initialized(struct dc *dc) +{ + struct dmcu *dmcu = dc->res_pool->dmcu; + + if (dmcu) + return dmcu->funcs->is_dmcu_initialized(dmcu); + return false; +} + bool dc_submit_i2c( struct dc *dc, uint32_t link_index, @@ -1753,6 +1808,11 @@ static bool link_add_remote_sink_helper(struct dc_link *dc_link, struct dc_sink return true; } +/** + * dc_link_add_remote_sink() - Create a sink and attach it to an existing link + * + * EDID length is in bytes + */ struct dc_sink *dc_link_add_remote_sink( struct dc_link *link, const uint8_t *edid, @@ -1811,6 +1871,12 @@ fail_add_sink: return NULL; } +/** + * dc_link_remove_remote_sink() - Remove a remote sink from a dc_link + * + * Note that this just removes the struct dc_sink - it doesn't + * program hardware or alter other members of dc_link + */ void dc_link_remove_remote_sink(struct dc_link *link, struct dc_sink *sink) { int i; @@ -1848,4 +1914,4 @@ void get_clock_requirements_for_state(struct dc_state *state, struct AsicStateEx info->dcfClockDeepSleep = (unsigned int)state->bw.dcn.clk.dcfclk_deep_sleep_khz; info->fClock = (unsigned int)state->bw.dcn.clk.fclk_khz; info->phyClock = (unsigned int)state->bw.dcn.clk.phyclk_khz; -}
\ No newline at end of file +} diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_debug.c b/drivers/gpu/drm/amd/display/dc/core/dc_debug.c index e1ebdf7b5eaf..73d049506618 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_debug.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_debug.c @@ -311,7 +311,7 @@ void context_timing_trace( { int i; struct dc *core_dc = dc; - int h_pos[MAX_PIPES], v_pos[MAX_PIPES]; + int h_pos[MAX_PIPES] = {0}, v_pos[MAX_PIPES] = {0}; struct crtc_position position; unsigned int underlay_idx = core_dc->res_pool->underlay_pipe_index; DC_LOGGER_INIT(dc->ctx->logger); @@ -322,8 +322,7 @@ void context_timing_trace( /* get_position() returns CRTC vertical/horizontal counter * hence not applicable for underlay pipe */ - if (pipe_ctx->stream == NULL - || pipe_ctx->pipe_idx == underlay_idx) + if (pipe_ctx->stream == NULL || pipe_ctx->pipe_idx == underlay_idx) continue; pipe_ctx->stream_res.tg->funcs->get_position(pipe_ctx->stream_res.tg, &position); @@ -333,7 +332,7 @@ void context_timing_trace( for (i = 0; i < core_dc->res_pool->pipe_count; i++) { struct pipe_ctx *pipe_ctx = &res_ctx->pipe_ctx[i]; - if (pipe_ctx->stream == NULL) + if (pipe_ctx->stream == NULL || pipe_ctx->pipe_idx == underlay_idx) continue; TIMING_TRACE("OTG_%d H_tot:%d V_tot:%d H_pos:%d V_pos:%d\n", diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index 5da2186b3615..4dc5846de5c4 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -198,6 +198,13 @@ static bool program_hpd_filter( return result; } +/** + * dc_link_detect_sink() - Determine if there is a sink connected + * + * @type: Returned connection type + * Does not detect downstream devices, such as MST sinks + * or display connected through active dongles + */ bool dc_link_detect_sink(struct dc_link *link, enum dc_connection_type *type) { uint32_t is_hpd_high = 0; @@ -324,9 +331,9 @@ static enum signal_type get_basic_signal_type( return SIGNAL_TYPE_NONE; } -/* - * @brief - * Check whether there is a dongle on DP connector +/** + * dc_link_is_dp_sink_present() - Check if there is a native DP + * or passive DP-HDMI dongle connected */ bool dc_link_is_dp_sink_present(struct dc_link *link) { @@ -593,6 +600,14 @@ static bool is_same_edid(struct dc_edid *old_edid, struct dc_edid *new_edid) return (memcmp(old_edid->raw_edid, new_edid->raw_edid, new_edid->length) == 0); } +/** + * dc_link_detect() - Detect if a sink is attached to a given link + * + * link->local_sink is created or destroyed as needed. + * + * This does not create remote sinks but will trigger DM + * to start MST detection if a branch is detected. + */ bool dc_link_detect(struct dc_link *link, enum dc_detect_reason reason) { struct dc_sink_init_data sink_init_data = { 0 }; @@ -1357,28 +1372,13 @@ static enum dc_status enable_link_dp( struct dc_link *link = stream->sink->link; struct dc_link_settings link_settings = {0}; enum dp_panel_mode panel_mode; - enum dc_link_rate max_link_rate = LINK_RATE_HIGH2; /* get link settings for video mode timing */ decide_link_settings(stream, &link_settings); - /* raise clock state for HBR3 if required. Confirmed with HW DCE/DPCS - * logic for HBR3 still needs Nominal (0.8V) on VDDC rail - */ - if (link->link_enc->features.flags.bits.IS_HBR3_CAPABLE) - max_link_rate = LINK_RATE_HIGH3; - - if (link_settings.link_rate == max_link_rate) { - struct dc_clocks clocks = state->bw.dcn.clk; - - /* dce/dcn compat, do not update dispclk */ - clocks.dispclk_khz = 0; - /* 27mhz = 27000000hz= 27000khz */ - clocks.phyclk_khz = link_settings.link_rate * 27000; - - state->dis_clk->funcs->update_clocks( - state->dis_clk, &clocks, false); - } + pipe_ctx->stream_res.pix_clk_params.requested_sym_clk = + link_settings.link_rate * LINK_RATE_REF_FREQ_IN_KHZ; + state->dccg->funcs->update_clocks(state->dccg, state, false); dp_enable_link_phy( link, @@ -1411,8 +1411,6 @@ static enum dc_status enable_link_dp( else status = DC_FAIL_DP_LINK_TRAINING; - enable_stream_features(pipe_ctx); - return status; } @@ -2156,14 +2154,16 @@ int dc_link_get_backlight_level(const struct dc_link *link) { struct abm *abm = link->ctx->dc->res_pool->abm; - if (abm == NULL || abm->funcs->get_current_backlight_8_bit == NULL) + if (abm == NULL || abm->funcs->get_current_backlight == NULL) return DC_ERROR_UNEXPECTED; - return (int) abm->funcs->get_current_backlight_8_bit(abm); + return (int) abm->funcs->get_current_backlight(abm); } -bool dc_link_set_backlight_level(const struct dc_link *link, uint32_t level, - uint32_t frame_ramp, const struct dc_stream_state *stream) +bool dc_link_set_backlight_level(const struct dc_link *link, + uint32_t backlight_pwm_u16_16, + uint32_t frame_ramp, + const struct dc_stream_state *stream) { struct dc *core_dc = link->ctx->dc; struct abm *abm = core_dc->res_pool->abm; @@ -2175,26 +2175,24 @@ bool dc_link_set_backlight_level(const struct dc_link *link, uint32_t level, if ((dmcu == NULL) || (abm == NULL) || - (abm->funcs->set_backlight_level == NULL)) + (abm->funcs->set_backlight_level_pwm == NULL)) return false; - if (stream) { - if (stream->bl_pwm_level == EDP_BACKLIGHT_RAMP_DISABLE_LEVEL) - frame_ramp = 0; - - ((struct dc_stream_state *)stream)->bl_pwm_level = level; - } + if (stream) + ((struct dc_stream_state *)stream)->bl_pwm_level = + backlight_pwm_u16_16; use_smooth_brightness = dmcu->funcs->is_dmcu_initialized(dmcu); - DC_LOG_BACKLIGHT("New Backlight level: %d (0x%X)\n", level, level); + DC_LOG_BACKLIGHT("New Backlight level: %d (0x%X)\n", + backlight_pwm_u16_16, backlight_pwm_u16_16); if (dc_is_embedded_signal(link->connector_signal)) { - if (stream != NULL) { - for (i = 0; i < MAX_PIPES; i++) { + for (i = 0; i < MAX_PIPES; i++) { + if (core_dc->current_state->res_ctx.pipe_ctx[i].stream) { if (core_dc->current_state->res_ctx. - pipe_ctx[i].stream - == stream) + pipe_ctx[i].stream->sink->link + == link) /* DMCU -1 for all controller id values, * therefore +1 here */ @@ -2204,9 +2202,9 @@ bool dc_link_set_backlight_level(const struct dc_link *link, uint32_t level, 1; } } - abm->funcs->set_backlight_level( + abm->funcs->set_backlight_level_pwm( abm, - level, + backlight_pwm_u16_16, frame_ramp, controller_id, use_smooth_brightness); @@ -2220,7 +2218,7 @@ bool dc_link_set_abm_disable(const struct dc_link *link) struct dc *core_dc = link->ctx->dc; struct abm *abm = core_dc->res_pool->abm; - if ((abm == NULL) || (abm->funcs->set_backlight_level == NULL)) + if ((abm == NULL) || (abm->funcs->set_backlight_level_pwm == NULL)) return false; abm->funcs->set_abm_immediate_disable(abm); @@ -2233,7 +2231,7 @@ bool dc_link_set_psr_enable(const struct dc_link *link, bool enable, bool wait) struct dc *core_dc = link->ctx->dc; struct dmcu *dmcu = core_dc->res_pool->dmcu; - if (dmcu != NULL && link->psr_enabled) + if ((dmcu != NULL && dmcu->funcs->is_dmcu_initialized(dmcu)) && link->psr_enabled) dmcu->funcs->set_psr_enable(dmcu, enable, wait); return true; @@ -2609,6 +2607,13 @@ void core_link_enable_stream( core_dc->hwss.unblank_stream(pipe_ctx, &pipe_ctx->stream->sink->link->cur_link_settings); + if (dc_is_dp_signal(pipe_ctx->stream->signal)) + enable_stream_features(pipe_ctx); + + dc_link_set_backlight_level(pipe_ctx->stream->sink->link, + pipe_ctx->stream->bl_pwm_level, + 0, + pipe_ctx->stream); } } diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index d91df5ef0cb3..849a3a3032f7 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -2196,7 +2196,7 @@ static void get_active_converter_info( } if (link->dpcd_caps.dpcd_rev.raw >= DPCD_REV_11) { - uint8_t det_caps[4]; + uint8_t det_caps[16]; /* CTS 4.2.2.7 expects source to read Detailed Capabilities Info : 00080h-0008F.*/ union dwnstream_port_caps_byte0 *port_caps = (union dwnstream_port_caps_byte0 *)det_caps; core_link_read_dpcd(link, DP_DOWNSTREAM_PORT_0, @@ -2371,11 +2371,22 @@ static bool retrieve_link_cap(struct dc_link *link) dpcd_data[DP_TRAINING_AUX_RD_INTERVAL]; if (aux_rd_interval.bits.EXT_RECIEVER_CAP_FIELD_PRESENT == 1) { - core_link_read_dpcd( + uint8_t ext_cap_data[16]; + + memset(ext_cap_data, '\0', sizeof(ext_cap_data)); + for (i = 0; i < read_dpcd_retry_cnt; i++) { + status = core_link_read_dpcd( link, DP_DP13_DPCD_REV, - dpcd_data, - sizeof(dpcd_data)); + ext_cap_data, + sizeof(ext_cap_data)); + if (status == DC_OK) { + memcpy(dpcd_data, ext_cap_data, sizeof(dpcd_data)); + break; + } + } + if (status != DC_OK) + dm_error("%s: Read extend caps data failed, use cap from dpcd 0.\n", __func__); } } diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index b6fe29b9fb65..c347afd1030f 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -478,10 +478,29 @@ static enum pixel_format convert_pixel_format_to_dalsurface( return dal_pixel_format; } -static void rect_swap_helper(struct rect *rect) -{ - swap(rect->height, rect->width); - swap(rect->x, rect->y); +static inline void get_vp_scan_direction( + enum dc_rotation_angle rotation, + bool horizontal_mirror, + bool *orthogonal_rotation, + bool *flip_vert_scan_dir, + bool *flip_horz_scan_dir) +{ + *orthogonal_rotation = false; + *flip_vert_scan_dir = false; + *flip_horz_scan_dir = false; + if (rotation == ROTATION_ANGLE_180) { + *flip_vert_scan_dir = true; + *flip_horz_scan_dir = true; + } else if (rotation == ROTATION_ANGLE_90) { + *orthogonal_rotation = true; + *flip_horz_scan_dir = true; + } else if (rotation == ROTATION_ANGLE_270) { + *orthogonal_rotation = true; + *flip_vert_scan_dir = true; + } + + if (horizontal_mirror) + *flip_horz_scan_dir = !*flip_horz_scan_dir; } static void calculate_viewport(struct pipe_ctx *pipe_ctx) @@ -490,25 +509,14 @@ static void calculate_viewport(struct pipe_ctx *pipe_ctx) const struct dc_stream_state *stream = pipe_ctx->stream; struct scaler_data *data = &pipe_ctx->plane_res.scl_data; struct rect surf_src = plane_state->src_rect; - struct rect clip = { 0 }; + struct rect clip, dest; int vpc_div = (data->format == PIXEL_FORMAT_420BPP8 || data->format == PIXEL_FORMAT_420BPP10) ? 2 : 1; bool pri_split = pipe_ctx->bottom_pipe && pipe_ctx->bottom_pipe->plane_state == pipe_ctx->plane_state; bool sec_split = pipe_ctx->top_pipe && pipe_ctx->top_pipe->plane_state == pipe_ctx->plane_state; - bool flip_vert_scan_dir = false, flip_horz_scan_dir = false; - - /* - * Need to calculate the scan direction for viewport to properly determine offset - */ - if (pipe_ctx->plane_state->rotation == ROTATION_ANGLE_180) { - flip_vert_scan_dir = true; - flip_horz_scan_dir = true; - } else if (pipe_ctx->plane_state->rotation == ROTATION_ANGLE_90) - flip_vert_scan_dir = true; - else if (pipe_ctx->plane_state->rotation == ROTATION_ANGLE_270) - flip_horz_scan_dir = true; + bool orthogonal_rotation, flip_y_start, flip_x_start; if (stream->view_format == VIEW_3D_FORMAT_SIDE_BY_SIDE || stream->view_format == VIEW_3D_FORMAT_TOP_AND_BOTTOM) { @@ -516,13 +524,10 @@ static void calculate_viewport(struct pipe_ctx *pipe_ctx) sec_split = false; } - if (pipe_ctx->plane_state->rotation == ROTATION_ANGLE_90 || - pipe_ctx->plane_state->rotation == ROTATION_ANGLE_270) - rect_swap_helper(&surf_src); - /* The actual clip is an intersection between stream * source and surface clip */ + dest = plane_state->dst_rect; clip.x = stream->src.x > plane_state->clip_rect.x ? stream->src.x : plane_state->clip_rect.x; @@ -539,84 +544,77 @@ static void calculate_viewport(struct pipe_ctx *pipe_ctx) stream->src.y + stream->src.height - clip.y : plane_state->clip_rect.y + plane_state->clip_rect.height - clip.y ; + /* + * Need to calculate how scan origin is shifted in vp space + * to correctly rotate clip and dst + */ + get_vp_scan_direction( + plane_state->rotation, + plane_state->horizontal_mirror, + &orthogonal_rotation, + &flip_y_start, + &flip_x_start); + + if (orthogonal_rotation) { + swap(clip.x, clip.y); + swap(clip.width, clip.height); + swap(dest.x, dest.y); + swap(dest.width, dest.height); + } + if (flip_x_start) { + clip.x = dest.x + dest.width - clip.x - clip.width; + dest.x = 0; + } + if (flip_y_start) { + clip.y = dest.y + dest.height - clip.y - clip.height; + dest.y = 0; + } + /* offset = surf_src.ofs + (clip.ofs - surface->dst_rect.ofs) * scl_ratio * num_pixels = clip.num_pix * scl_ratio */ - data->viewport.x = surf_src.x + (clip.x - plane_state->dst_rect.x) * - surf_src.width / plane_state->dst_rect.width; - data->viewport.width = clip.width * - surf_src.width / plane_state->dst_rect.width; - - data->viewport.y = surf_src.y + (clip.y - plane_state->dst_rect.y) * - surf_src.height / plane_state->dst_rect.height; - data->viewport.height = clip.height * - surf_src.height / plane_state->dst_rect.height; - - /* To transfer the x, y to correct coordinate on mirror image (camera). - * deg 0 : transfer x, - * deg 90 : don't need to transfer, - * deg180 : transfer y, - * deg270 : transfer x and y. - * To transfer the x, y to correct coordinate on non-mirror image (video). - * deg 0 : don't need to transfer, - * deg 90 : transfer y, - * deg180 : transfer x and y, - * deg270 : transfer x. - */ - if (pipe_ctx->plane_state->horizontal_mirror) { - if (flip_horz_scan_dir && !flip_vert_scan_dir) { - data->viewport.y = surf_src.height - data->viewport.y - data->viewport.height; - data->viewport.x = surf_src.width - data->viewport.x - data->viewport.width; - } else if (flip_horz_scan_dir && flip_vert_scan_dir) - data->viewport.y = surf_src.height - data->viewport.y - data->viewport.height; - else { - if (!flip_horz_scan_dir && !flip_vert_scan_dir) - data->viewport.x = surf_src.width - data->viewport.x - data->viewport.width; + data->viewport.x = surf_src.x + (clip.x - dest.x) * surf_src.width / dest.width; + data->viewport.width = clip.width * surf_src.width / dest.width; + + data->viewport.y = surf_src.y + (clip.y - dest.y) * surf_src.height / dest.height; + data->viewport.height = clip.height * surf_src.height / dest.height; + + /* Handle split */ + if (pri_split || sec_split) { + if (orthogonal_rotation) { + if (flip_y_start != pri_split) + data->viewport.height /= 2; + else { + data->viewport.y += data->viewport.height / 2; + /* Ceil offset pipe */ + data->viewport.height = (data->viewport.height + 1) / 2; + } + } else { + if (flip_x_start != pri_split) + data->viewport.width /= 2; + else { + data->viewport.x += data->viewport.width / 2; + /* Ceil offset pipe */ + data->viewport.width = (data->viewport.width + 1) / 2; + } } - } else { - if (flip_horz_scan_dir) - data->viewport.x = surf_src.width - data->viewport.x - data->viewport.width; - if (flip_vert_scan_dir) - data->viewport.y = surf_src.height - data->viewport.y - data->viewport.height; } /* Round down, compensate in init */ data->viewport_c.x = data->viewport.x / vpc_div; data->viewport_c.y = data->viewport.y / vpc_div; - data->inits.h_c = (data->viewport.x % vpc_div) != 0 ? - dc_fixpt_half : dc_fixpt_zero; - data->inits.v_c = (data->viewport.y % vpc_div) != 0 ? - dc_fixpt_half : dc_fixpt_zero; + data->inits.h_c = (data->viewport.x % vpc_div) != 0 ? dc_fixpt_half : dc_fixpt_zero; + data->inits.v_c = (data->viewport.y % vpc_div) != 0 ? dc_fixpt_half : dc_fixpt_zero; + /* Round up, assume original video size always even dimensions */ data->viewport_c.width = (data->viewport.width + vpc_div - 1) / vpc_div; data->viewport_c.height = (data->viewport.height + vpc_div - 1) / vpc_div; - - /* Handle hsplit */ - if (sec_split) { - data->viewport.x += data->viewport.width / 2; - data->viewport_c.x += data->viewport_c.width / 2; - /* Ceil offset pipe */ - data->viewport.width = (data->viewport.width + 1) / 2; - data->viewport_c.width = (data->viewport_c.width + 1) / 2; - } else if (pri_split) { - if (data->viewport.width > 1) - data->viewport.width /= 2; - if (data->viewport_c.width > 1) - data->viewport_c.width /= 2; - } - - if (plane_state->rotation == ROTATION_ANGLE_90 || - plane_state->rotation == ROTATION_ANGLE_270) { - rect_swap_helper(&data->viewport_c); - rect_swap_helper(&data->viewport); - } } -static void calculate_recout(struct pipe_ctx *pipe_ctx, struct rect *recout_full) +static void calculate_recout(struct pipe_ctx *pipe_ctx) { const struct dc_plane_state *plane_state = pipe_ctx->plane_state; const struct dc_stream_state *stream = pipe_ctx->stream; - struct rect surf_src = plane_state->src_rect; struct rect surf_clip = plane_state->clip_rect; bool pri_split = pipe_ctx->bottom_pipe && pipe_ctx->bottom_pipe->plane_state == pipe_ctx->plane_state; @@ -624,10 +622,6 @@ static void calculate_recout(struct pipe_ctx *pipe_ctx, struct rect *recout_full pipe_ctx->top_pipe->plane_state == pipe_ctx->plane_state; bool top_bottom_split = stream->view_format == VIEW_3D_FORMAT_TOP_AND_BOTTOM; - if (pipe_ctx->plane_state->rotation == ROTATION_ANGLE_90 || - pipe_ctx->plane_state->rotation == ROTATION_ANGLE_270) - rect_swap_helper(&surf_src); - pipe_ctx->plane_res.scl_data.recout.x = stream->dst.x; if (stream->src.x < surf_clip.x) pipe_ctx->plane_res.scl_data.recout.x += (surf_clip.x @@ -656,7 +650,7 @@ static void calculate_recout(struct pipe_ctx *pipe_ctx, struct rect *recout_full stream->dst.y + stream->dst.height - pipe_ctx->plane_res.scl_data.recout.y; - /* Handle h & vsplit */ + /* Handle h & v split, handle rotation using viewport */ if (sec_split && top_bottom_split) { pipe_ctx->plane_res.scl_data.recout.y += pipe_ctx->plane_res.scl_data.recout.height / 2; @@ -665,44 +659,14 @@ static void calculate_recout(struct pipe_ctx *pipe_ctx, struct rect *recout_full (pipe_ctx->plane_res.scl_data.recout.height + 1) / 2; } else if (pri_split && top_bottom_split) pipe_ctx->plane_res.scl_data.recout.height /= 2; - else if (pri_split || sec_split) { - /* HMirror XOR Secondary_pipe XOR Rotation_180 */ - bool right_view = (sec_split != plane_state->horizontal_mirror) != - (plane_state->rotation == ROTATION_ANGLE_180); - - if (plane_state->rotation == ROTATION_ANGLE_90 - || plane_state->rotation == ROTATION_ANGLE_270) - /* Secondary_pipe XOR Rotation_270 */ - right_view = (plane_state->rotation == ROTATION_ANGLE_270) != sec_split; - - if (right_view) { - pipe_ctx->plane_res.scl_data.recout.x += - pipe_ctx->plane_res.scl_data.recout.width / 2; - /* Ceil offset pipe */ - pipe_ctx->plane_res.scl_data.recout.width = - (pipe_ctx->plane_res.scl_data.recout.width + 1) / 2; - } else { - if (pipe_ctx->plane_res.scl_data.recout.width > 1) - pipe_ctx->plane_res.scl_data.recout.width /= 2; - } - } - /* Unclipped recout offset = stream dst offset + ((surf dst offset - stream surf_src offset) - * * 1/ stream scaling ratio) - (surf surf_src offset * 1/ full scl - * ratio) - */ - recout_full->x = stream->dst.x + (plane_state->dst_rect.x - stream->src.x) - * stream->dst.width / stream->src.width - - surf_src.x * plane_state->dst_rect.width / surf_src.width - * stream->dst.width / stream->src.width; - recout_full->y = stream->dst.y + (plane_state->dst_rect.y - stream->src.y) - * stream->dst.height / stream->src.height - - surf_src.y * plane_state->dst_rect.height / surf_src.height - * stream->dst.height / stream->src.height; - - recout_full->width = plane_state->dst_rect.width - * stream->dst.width / stream->src.width; - recout_full->height = plane_state->dst_rect.height - * stream->dst.height / stream->src.height; + else if (sec_split) { + pipe_ctx->plane_res.scl_data.recout.x += + pipe_ctx->plane_res.scl_data.recout.width / 2; + /* Ceil offset pipe */ + pipe_ctx->plane_res.scl_data.recout.width = + (pipe_ctx->plane_res.scl_data.recout.width + 1) / 2; + } else if (pri_split) + pipe_ctx->plane_res.scl_data.recout.width /= 2; } static void calculate_scaling_ratios(struct pipe_ctx *pipe_ctx) @@ -715,9 +679,10 @@ static void calculate_scaling_ratios(struct pipe_ctx *pipe_ctx) const int out_w = stream->dst.width; const int out_h = stream->dst.height; + /*Swap surf_src height and width since scaling ratios are in recout rotation*/ if (pipe_ctx->plane_state->rotation == ROTATION_ANGLE_90 || pipe_ctx->plane_state->rotation == ROTATION_ANGLE_270) - rect_swap_helper(&surf_src); + swap(surf_src.height, surf_src.width); pipe_ctx->plane_res.scl_data.ratios.horz = dc_fixpt_from_fraction( surf_src.width, @@ -754,358 +719,202 @@ static void calculate_scaling_ratios(struct pipe_ctx *pipe_ctx) pipe_ctx->plane_res.scl_data.ratios.vert_c, 19); } -static void calculate_inits_and_adj_vp(struct pipe_ctx *pipe_ctx, struct rect *recout_full) +static inline void adjust_vp_and_init_for_seamless_clip( + bool flip_scan_dir, + int recout_skip, + int src_size, + int taps, + struct fixed31_32 ratio, + struct fixed31_32 *init, + int *vp_offset, + int *vp_size) { - struct scaler_data *data = &pipe_ctx->plane_res.scl_data; - struct rect src = pipe_ctx->plane_state->src_rect; - int vpc_div = (data->format == PIXEL_FORMAT_420BPP8 - || data->format == PIXEL_FORMAT_420BPP10) ? 2 : 1; - bool flip_vert_scan_dir = false, flip_horz_scan_dir = false; - - /* - * Need to calculate the scan direction for viewport to make adjustments - */ - if (pipe_ctx->plane_state->rotation == ROTATION_ANGLE_180) { - flip_vert_scan_dir = true; - flip_horz_scan_dir = true; - } else if (pipe_ctx->plane_state->rotation == ROTATION_ANGLE_90) - flip_vert_scan_dir = true; - else if (pipe_ctx->plane_state->rotation == ROTATION_ANGLE_270) - flip_horz_scan_dir = true; - - if (pipe_ctx->plane_state->rotation == ROTATION_ANGLE_90 || - pipe_ctx->plane_state->rotation == ROTATION_ANGLE_270) { - rect_swap_helper(&src); - rect_swap_helper(&data->viewport_c); - rect_swap_helper(&data->viewport); - - if (pipe_ctx->plane_state->rotation == ROTATION_ANGLE_270 && - pipe_ctx->plane_state->horizontal_mirror) { - flip_vert_scan_dir = true; - } - if (pipe_ctx->plane_state->rotation == ROTATION_ANGLE_90 && - pipe_ctx->plane_state->horizontal_mirror) { - flip_vert_scan_dir = false; - } - } else if (pipe_ctx->plane_state->horizontal_mirror) - flip_horz_scan_dir = !flip_horz_scan_dir; - - /* - * Init calculated according to formula: - * init = (scaling_ratio + number_of_taps + 1) / 2 - * init_bot = init + scaling_ratio - * init_c = init + truncated_vp_c_offset(from calculate viewport) - */ - data->inits.h = dc_fixpt_truncate(dc_fixpt_div_int( - dc_fixpt_add_int(data->ratios.horz, data->taps.h_taps + 1), 2), 19); - - data->inits.h_c = dc_fixpt_truncate(dc_fixpt_add(data->inits.h_c, dc_fixpt_div_int( - dc_fixpt_add_int(data->ratios.horz_c, data->taps.h_taps_c + 1), 2)), 19); - - data->inits.v = dc_fixpt_truncate(dc_fixpt_div_int( - dc_fixpt_add_int(data->ratios.vert, data->taps.v_taps + 1), 2), 19); - - data->inits.v_c = dc_fixpt_truncate(dc_fixpt_add(data->inits.v_c, dc_fixpt_div_int( - dc_fixpt_add_int(data->ratios.vert_c, data->taps.v_taps_c + 1), 2)), 19); - - if (!flip_horz_scan_dir) { + if (!flip_scan_dir) { /* Adjust for viewport end clip-off */ - if ((data->viewport.x + data->viewport.width) < (src.x + src.width)) { - int vp_clip = src.x + src.width - data->viewport.width - data->viewport.x; - int int_part = dc_fixpt_floor( - dc_fixpt_sub(data->inits.h, data->ratios.horz)); - - int_part = int_part > 0 ? int_part : 0; - data->viewport.width += int_part < vp_clip ? int_part : vp_clip; - } - if ((data->viewport_c.x + data->viewport_c.width) < (src.x + src.width) / vpc_div) { - int vp_clip = (src.x + src.width) / vpc_div - - data->viewport_c.width - data->viewport_c.x; - int int_part = dc_fixpt_floor( - dc_fixpt_sub(data->inits.h_c, data->ratios.horz_c)); + if ((*vp_offset + *vp_size) < src_size) { + int vp_clip = src_size - *vp_size - *vp_offset; + int int_part = dc_fixpt_floor(dc_fixpt_sub(*init, ratio)); int_part = int_part > 0 ? int_part : 0; - data->viewport_c.width += int_part < vp_clip ? int_part : vp_clip; + *vp_size += int_part < vp_clip ? int_part : vp_clip; } /* Adjust for non-0 viewport offset */ - if (data->viewport.x) { + if (*vp_offset) { int int_part; - data->inits.h = dc_fixpt_add(data->inits.h, dc_fixpt_mul_int( - data->ratios.horz, data->recout.x - recout_full->x)); - int_part = dc_fixpt_floor(data->inits.h) - data->viewport.x; - if (int_part < data->taps.h_taps) { - int int_adj = data->viewport.x >= (data->taps.h_taps - int_part) ? - (data->taps.h_taps - int_part) : data->viewport.x; - data->viewport.x -= int_adj; - data->viewport.width += int_adj; + *init = dc_fixpt_add(*init, dc_fixpt_mul_int(ratio, recout_skip)); + int_part = dc_fixpt_floor(*init) - *vp_offset; + if (int_part < taps) { + int int_adj = *vp_offset >= (taps - int_part) ? + (taps - int_part) : *vp_offset; + *vp_offset -= int_adj; + *vp_size += int_adj; int_part += int_adj; - } else if (int_part > data->taps.h_taps) { - data->viewport.x += int_part - data->taps.h_taps; - data->viewport.width -= int_part - data->taps.h_taps; - int_part = data->taps.h_taps; + } else if (int_part > taps) { + *vp_offset += int_part - taps; + *vp_size -= int_part - taps; + int_part = taps; } - data->inits.h.value &= 0xffffffff; - data->inits.h = dc_fixpt_add_int(data->inits.h, int_part); - } - - if (data->viewport_c.x) { - int int_part; - - data->inits.h_c = dc_fixpt_add(data->inits.h_c, dc_fixpt_mul_int( - data->ratios.horz_c, data->recout.x - recout_full->x)); - int_part = dc_fixpt_floor(data->inits.h_c) - data->viewport_c.x; - if (int_part < data->taps.h_taps_c) { - int int_adj = data->viewport_c.x >= (data->taps.h_taps_c - int_part) ? - (data->taps.h_taps_c - int_part) : data->viewport_c.x; - data->viewport_c.x -= int_adj; - data->viewport_c.width += int_adj; - int_part += int_adj; - } else if (int_part > data->taps.h_taps_c) { - data->viewport_c.x += int_part - data->taps.h_taps_c; - data->viewport_c.width -= int_part - data->taps.h_taps_c; - int_part = data->taps.h_taps_c; - } - data->inits.h_c.value &= 0xffffffff; - data->inits.h_c = dc_fixpt_add_int(data->inits.h_c, int_part); + init->value &= 0xffffffff; + *init = dc_fixpt_add_int(*init, int_part); } } else { /* Adjust for non-0 viewport offset */ - if (data->viewport.x) { - int int_part = dc_fixpt_floor( - dc_fixpt_sub(data->inits.h, data->ratios.horz)); - - int_part = int_part > 0 ? int_part : 0; - data->viewport.width += int_part < data->viewport.x ? int_part : data->viewport.x; - data->viewport.x -= int_part < data->viewport.x ? int_part : data->viewport.x; - } - if (data->viewport_c.x) { - int int_part = dc_fixpt_floor( - dc_fixpt_sub(data->inits.h_c, data->ratios.horz_c)); + if (*vp_offset) { + int int_part = dc_fixpt_floor(dc_fixpt_sub(*init, ratio)); int_part = int_part > 0 ? int_part : 0; - data->viewport_c.width += int_part < data->viewport_c.x ? int_part : data->viewport_c.x; - data->viewport_c.x -= int_part < data->viewport_c.x ? int_part : data->viewport_c.x; + *vp_size += int_part < *vp_offset ? int_part : *vp_offset; + *vp_offset -= int_part < *vp_offset ? int_part : *vp_offset; } /* Adjust for viewport end clip-off */ - if ((data->viewport.x + data->viewport.width) < (src.x + src.width)) { + if ((*vp_offset + *vp_size) < src_size) { int int_part; - int end_offset = src.x + src.width - - data->viewport.x - data->viewport.width; + int end_offset = src_size - *vp_offset - *vp_size; /* * this is init if vp had no offset, keep in mind this is from the * right side of vp due to scan direction */ - data->inits.h = dc_fixpt_add(data->inits.h, dc_fixpt_mul_int( - data->ratios.horz, data->recout.x - recout_full->x)); + *init = dc_fixpt_add(*init, dc_fixpt_mul_int(ratio, recout_skip)); /* * this is the difference between first pixel of viewport available to read * and init position, takning into account scan direction */ - int_part = dc_fixpt_floor(data->inits.h) - end_offset; - if (int_part < data->taps.h_taps) { - int int_adj = end_offset >= (data->taps.h_taps - int_part) ? - (data->taps.h_taps - int_part) : end_offset; - data->viewport.width += int_adj; + int_part = dc_fixpt_floor(*init) - end_offset; + if (int_part < taps) { + int int_adj = end_offset >= (taps - int_part) ? + (taps - int_part) : end_offset; + *vp_size += int_adj; int_part += int_adj; - } else if (int_part > data->taps.h_taps) { - data->viewport.width += int_part - data->taps.h_taps; - int_part = data->taps.h_taps; + } else if (int_part > taps) { + *vp_size += int_part - taps; + int_part = taps; } - data->inits.h.value &= 0xffffffff; - data->inits.h = dc_fixpt_add_int(data->inits.h, int_part); + init->value &= 0xffffffff; + *init = dc_fixpt_add_int(*init, int_part); } - - if ((data->viewport_c.x + data->viewport_c.width) < (src.x + src.width) / vpc_div) { - int int_part; - int end_offset = (src.x + src.width) / vpc_div - - data->viewport_c.x - data->viewport_c.width; - - /* - * this is init if vp had no offset, keep in mind this is from the - * right side of vp due to scan direction - */ - data->inits.h_c = dc_fixpt_add(data->inits.h_c, dc_fixpt_mul_int( - data->ratios.horz_c, data->recout.x - recout_full->x)); - /* - * this is the difference between first pixel of viewport available to read - * and init position, takning into account scan direction - */ - int_part = dc_fixpt_floor(data->inits.h_c) - end_offset; - if (int_part < data->taps.h_taps_c) { - int int_adj = end_offset >= (data->taps.h_taps_c - int_part) ? - (data->taps.h_taps_c - int_part) : end_offset; - data->viewport_c.width += int_adj; - int_part += int_adj; - } else if (int_part > data->taps.h_taps_c) { - data->viewport_c.width += int_part - data->taps.h_taps_c; - int_part = data->taps.h_taps_c; - } - data->inits.h_c.value &= 0xffffffff; - data->inits.h_c = dc_fixpt_add_int(data->inits.h_c, int_part); - } - } - if (!flip_vert_scan_dir) { - /* Adjust for viewport end clip-off */ - if ((data->viewport.y + data->viewport.height) < (src.y + src.height)) { - int vp_clip = src.y + src.height - data->viewport.height - data->viewport.y; - int int_part = dc_fixpt_floor( - dc_fixpt_sub(data->inits.v, data->ratios.vert)); - - int_part = int_part > 0 ? int_part : 0; - data->viewport.height += int_part < vp_clip ? int_part : vp_clip; - } - if ((data->viewport_c.y + data->viewport_c.height) < (src.y + src.height) / vpc_div) { - int vp_clip = (src.y + src.height) / vpc_div - - data->viewport_c.height - data->viewport_c.y; - int int_part = dc_fixpt_floor( - dc_fixpt_sub(data->inits.v_c, data->ratios.vert_c)); - - int_part = int_part > 0 ? int_part : 0; - data->viewport_c.height += int_part < vp_clip ? int_part : vp_clip; - } - - /* Adjust for non-0 viewport offset */ - if (data->viewport.y) { - int int_part; - - data->inits.v = dc_fixpt_add(data->inits.v, dc_fixpt_mul_int( - data->ratios.vert, data->recout.y - recout_full->y)); - int_part = dc_fixpt_floor(data->inits.v) - data->viewport.y; - if (int_part < data->taps.v_taps) { - int int_adj = data->viewport.y >= (data->taps.v_taps - int_part) ? - (data->taps.v_taps - int_part) : data->viewport.y; - data->viewport.y -= int_adj; - data->viewport.height += int_adj; - int_part += int_adj; - } else if (int_part > data->taps.v_taps) { - data->viewport.y += int_part - data->taps.v_taps; - data->viewport.height -= int_part - data->taps.v_taps; - int_part = data->taps.v_taps; - } - data->inits.v.value &= 0xffffffff; - data->inits.v = dc_fixpt_add_int(data->inits.v, int_part); - } - - if (data->viewport_c.y) { - int int_part; - - data->inits.v_c = dc_fixpt_add(data->inits.v_c, dc_fixpt_mul_int( - data->ratios.vert_c, data->recout.y - recout_full->y)); - int_part = dc_fixpt_floor(data->inits.v_c) - data->viewport_c.y; - if (int_part < data->taps.v_taps_c) { - int int_adj = data->viewport_c.y >= (data->taps.v_taps_c - int_part) ? - (data->taps.v_taps_c - int_part) : data->viewport_c.y; - data->viewport_c.y -= int_adj; - data->viewport_c.height += int_adj; - int_part += int_adj; - } else if (int_part > data->taps.v_taps_c) { - data->viewport_c.y += int_part - data->taps.v_taps_c; - data->viewport_c.height -= int_part - data->taps.v_taps_c; - int_part = data->taps.v_taps_c; - } - data->inits.v_c.value &= 0xffffffff; - data->inits.v_c = dc_fixpt_add_int(data->inits.v_c, int_part); - } - } else { - /* Adjust for non-0 viewport offset */ - if (data->viewport.y) { - int int_part = dc_fixpt_floor( - dc_fixpt_sub(data->inits.v, data->ratios.vert)); +} - int_part = int_part > 0 ? int_part : 0; - data->viewport.height += int_part < data->viewport.y ? int_part : data->viewport.y; - data->viewport.y -= int_part < data->viewport.y ? int_part : data->viewport.y; - } - if (data->viewport_c.y) { - int int_part = dc_fixpt_floor( - dc_fixpt_sub(data->inits.v_c, data->ratios.vert_c)); +static void calculate_inits_and_adj_vp(struct pipe_ctx *pipe_ctx) +{ + const struct dc_plane_state *plane_state = pipe_ctx->plane_state; + const struct dc_stream_state *stream = pipe_ctx->stream; + struct scaler_data *data = &pipe_ctx->plane_res.scl_data; + struct rect src = pipe_ctx->plane_state->src_rect; + int recout_skip_h, recout_skip_v, surf_size_h, surf_size_v; + int vpc_div = (data->format == PIXEL_FORMAT_420BPP8 + || data->format == PIXEL_FORMAT_420BPP10) ? 2 : 1; + bool orthogonal_rotation, flip_vert_scan_dir, flip_horz_scan_dir; - int_part = int_part > 0 ? int_part : 0; - data->viewport_c.height += int_part < data->viewport_c.y ? int_part : data->viewport_c.y; - data->viewport_c.y -= int_part < data->viewport_c.y ? int_part : data->viewport_c.y; - } + /* + * Need to calculate the scan direction for viewport to make adjustments + */ + get_vp_scan_direction( + plane_state->rotation, + plane_state->horizontal_mirror, + &orthogonal_rotation, + &flip_vert_scan_dir, + &flip_horz_scan_dir); + + /* Calculate src rect rotation adjusted to recout space */ + surf_size_h = src.x + src.width; + surf_size_v = src.y + src.height; + if (flip_horz_scan_dir) + src.x = 0; + if (flip_vert_scan_dir) + src.y = 0; + if (orthogonal_rotation) { + swap(src.x, src.y); + swap(src.width, src.height); + } + + /* Recout matching initial vp offset = recout_offset - (stream dst offset + + * ((surf dst offset - stream src offset) * 1/ stream scaling ratio) + * - (surf surf_src offset * 1/ full scl ratio)) + */ + recout_skip_h = data->recout.x - (stream->dst.x + (plane_state->dst_rect.x - stream->src.x) + * stream->dst.width / stream->src.width - + src.x * plane_state->dst_rect.width / src.width + * stream->dst.width / stream->src.width); + recout_skip_v = data->recout.y - (stream->dst.y + (plane_state->dst_rect.y - stream->src.y) + * stream->dst.height / stream->src.height - + src.y * plane_state->dst_rect.height / src.height + * stream->dst.height / stream->src.height); + if (orthogonal_rotation) + swap(recout_skip_h, recout_skip_v); + /* + * Init calculated according to formula: + * init = (scaling_ratio + number_of_taps + 1) / 2 + * init_bot = init + scaling_ratio + * init_c = init + truncated_vp_c_offset(from calculate viewport) + */ + data->inits.h = dc_fixpt_truncate(dc_fixpt_div_int( + dc_fixpt_add_int(data->ratios.horz, data->taps.h_taps + 1), 2), 19); - /* Adjust for viewport end clip-off */ - if ((data->viewport.y + data->viewport.height) < (src.y + src.height)) { - int int_part; - int end_offset = src.y + src.height - - data->viewport.y - data->viewport.height; + data->inits.h_c = dc_fixpt_truncate(dc_fixpt_add(data->inits.h_c, dc_fixpt_div_int( + dc_fixpt_add_int(data->ratios.horz_c, data->taps.h_taps_c + 1), 2)), 19); - /* - * this is init if vp had no offset, keep in mind this is from the - * right side of vp due to scan direction - */ - data->inits.v = dc_fixpt_add(data->inits.v, dc_fixpt_mul_int( - data->ratios.vert, data->recout.y - recout_full->y)); - /* - * this is the difference between first pixel of viewport available to read - * and init position, taking into account scan direction - */ - int_part = dc_fixpt_floor(data->inits.v) - end_offset; - if (int_part < data->taps.v_taps) { - int int_adj = end_offset >= (data->taps.v_taps - int_part) ? - (data->taps.v_taps - int_part) : end_offset; - data->viewport.height += int_adj; - int_part += int_adj; - } else if (int_part > data->taps.v_taps) { - data->viewport.height += int_part - data->taps.v_taps; - int_part = data->taps.v_taps; - } - data->inits.v.value &= 0xffffffff; - data->inits.v = dc_fixpt_add_int(data->inits.v, int_part); - } + data->inits.v = dc_fixpt_truncate(dc_fixpt_div_int( + dc_fixpt_add_int(data->ratios.vert, data->taps.v_taps + 1), 2), 19); - if ((data->viewport_c.y + data->viewport_c.height) < (src.y + src.height) / vpc_div) { - int int_part; - int end_offset = (src.y + src.height) / vpc_div - - data->viewport_c.y - data->viewport_c.height; + data->inits.v_c = dc_fixpt_truncate(dc_fixpt_add(data->inits.v_c, dc_fixpt_div_int( + dc_fixpt_add_int(data->ratios.vert_c, data->taps.v_taps_c + 1), 2)), 19); - /* - * this is init if vp had no offset, keep in mind this is from the - * right side of vp due to scan direction - */ - data->inits.v_c = dc_fixpt_add(data->inits.v_c, dc_fixpt_mul_int( - data->ratios.vert_c, data->recout.y - recout_full->y)); - /* - * this is the difference between first pixel of viewport available to read - * and init position, taking into account scan direction - */ - int_part = dc_fixpt_floor(data->inits.v_c) - end_offset; - if (int_part < data->taps.v_taps_c) { - int int_adj = end_offset >= (data->taps.v_taps_c - int_part) ? - (data->taps.v_taps_c - int_part) : end_offset; - data->viewport_c.height += int_adj; - int_part += int_adj; - } else if (int_part > data->taps.v_taps_c) { - data->viewport_c.height += int_part - data->taps.v_taps_c; - int_part = data->taps.v_taps_c; - } - data->inits.v_c.value &= 0xffffffff; - data->inits.v_c = dc_fixpt_add_int(data->inits.v_c, int_part); - } - } + /* + * Taps, inits and scaling ratios are in recout space need to rotate + * to viewport rotation before adjustment + */ + adjust_vp_and_init_for_seamless_clip( + flip_horz_scan_dir, + recout_skip_h, + surf_size_h, + orthogonal_rotation ? data->taps.v_taps : data->taps.h_taps, + orthogonal_rotation ? data->ratios.vert : data->ratios.horz, + orthogonal_rotation ? &data->inits.v : &data->inits.h, + &data->viewport.x, + &data->viewport.width); + adjust_vp_and_init_for_seamless_clip( + flip_horz_scan_dir, + recout_skip_h, + surf_size_h / vpc_div, + orthogonal_rotation ? data->taps.v_taps_c : data->taps.h_taps_c, + orthogonal_rotation ? data->ratios.vert_c : data->ratios.horz_c, + orthogonal_rotation ? &data->inits.v_c : &data->inits.h_c, + &data->viewport_c.x, + &data->viewport_c.width); + adjust_vp_and_init_for_seamless_clip( + flip_vert_scan_dir, + recout_skip_v, + surf_size_v, + orthogonal_rotation ? data->taps.h_taps : data->taps.v_taps, + orthogonal_rotation ? data->ratios.horz : data->ratios.vert, + orthogonal_rotation ? &data->inits.h : &data->inits.v, + &data->viewport.y, + &data->viewport.height); + adjust_vp_and_init_for_seamless_clip( + flip_vert_scan_dir, + recout_skip_v, + surf_size_v / vpc_div, + orthogonal_rotation ? data->taps.h_taps_c : data->taps.v_taps_c, + orthogonal_rotation ? data->ratios.horz_c : data->ratios.vert_c, + orthogonal_rotation ? &data->inits.h_c : &data->inits.v_c, + &data->viewport_c.y, + &data->viewport_c.height); /* Interlaced inits based on final vert inits */ data->inits.v_bot = dc_fixpt_add(data->inits.v, data->ratios.vert); data->inits.v_c_bot = dc_fixpt_add(data->inits.v_c, data->ratios.vert_c); - if (pipe_ctx->plane_state->rotation == ROTATION_ANGLE_90 || - pipe_ctx->plane_state->rotation == ROTATION_ANGLE_270) { - rect_swap_helper(&data->viewport_c); - rect_swap_helper(&data->viewport); - } } bool resource_build_scaling_params(struct pipe_ctx *pipe_ctx) { const struct dc_plane_state *plane_state = pipe_ctx->plane_state; struct dc_crtc_timing *timing = &pipe_ctx->stream->timing; - struct rect recout_full = { 0 }; bool res = false; DC_LOGGER_INIT(pipe_ctx->stream->ctx->logger); /* Important: scaling ratio calculation requires pixel format, @@ -1115,9 +924,6 @@ bool resource_build_scaling_params(struct pipe_ctx *pipe_ctx) pipe_ctx->plane_res.scl_data.format = convert_pixel_format_to_dalsurface( pipe_ctx->plane_state->format); - if (pipe_ctx->stream->timing.flags.INTERLACE) - pipe_ctx->stream->dst.height *= 2; - calculate_scaling_ratios(pipe_ctx); calculate_viewport(pipe_ctx); @@ -1125,7 +931,7 @@ bool resource_build_scaling_params(struct pipe_ctx *pipe_ctx) if (pipe_ctx->plane_res.scl_data.viewport.height < 16 || pipe_ctx->plane_res.scl_data.viewport.width < 16) return false; - calculate_recout(pipe_ctx, &recout_full); + calculate_recout(pipe_ctx); /** * Setting line buffer pixel depth to 24bpp yields banding @@ -1138,9 +944,6 @@ bool resource_build_scaling_params(struct pipe_ctx *pipe_ctx) pipe_ctx->plane_res.scl_data.h_active = timing->h_addressable + timing->h_border_left + timing->h_border_right; pipe_ctx->plane_res.scl_data.v_active = timing->v_addressable + timing->v_border_top + timing->v_border_bottom; - if (pipe_ctx->stream->timing.flags.INTERLACE) - pipe_ctx->plane_res.scl_data.v_active *= 2; - /* Taps calculations */ if (pipe_ctx->plane_res.xfm != NULL) @@ -1169,7 +972,7 @@ bool resource_build_scaling_params(struct pipe_ctx *pipe_ctx) if (res) /* May need to re-check lb size after this in some obscure scenario */ - calculate_inits_and_adj_vp(pipe_ctx, &recout_full); + calculate_inits_and_adj_vp(pipe_ctx); DC_LOG_SCALER( "%s: Viewport:\nheight:%d width:%d x:%d " @@ -1185,9 +988,6 @@ bool resource_build_scaling_params(struct pipe_ctx *pipe_ctx) plane_state->dst_rect.x, plane_state->dst_rect.y); - if (pipe_ctx->stream->timing.flags.INTERLACE) - pipe_ctx->stream->dst.height /= 2; - return res; } @@ -1382,6 +1182,9 @@ bool dc_add_plane_to_context( return false; } + tail_pipe = resource_get_tail_pipe_for_stream(&context->res_ctx, stream); + ASSERT(tail_pipe); + free_pipe = acquire_free_pipe_for_stream(context, pool, stream); #if defined(CONFIG_DRM_AMD_DC_DCN1_0) @@ -1399,10 +1202,6 @@ bool dc_add_plane_to_context( free_pipe->plane_state = plane_state; if (head_pipe != free_pipe) { - - tail_pipe = resource_get_tail_pipe_for_stream(&context->res_ctx, stream); - ASSERT(tail_pipe); - free_pipe->stream_res.tg = tail_pipe->stream_res.tg; free_pipe->stream_res.abm = tail_pipe->stream_res.abm; free_pipe->stream_res.opp = tail_pipe->stream_res.opp; @@ -1648,6 +1447,14 @@ static bool are_stream_backends_same( return true; } +/** + * dc_is_stream_unchanged() - Compare two stream states for equivalence. + * + * Checks if there a difference between the two states + * that would require a mode change. + * + * Does not compare cursor position or attributes. + */ bool dc_is_stream_unchanged( struct dc_stream_state *old_stream, struct dc_stream_state *stream) { @@ -1658,6 +1465,9 @@ bool dc_is_stream_unchanged( return true; } +/** + * dc_is_stream_scaling_unchanged() - Compare scaling rectangles of two streams. + */ bool dc_is_stream_scaling_unchanged( struct dc_stream_state *old_stream, struct dc_stream_state *stream) { @@ -1817,16 +1627,19 @@ bool resource_is_stream_unchanged( return false; } +/** + * dc_add_stream_to_ctx() - Add a new dc_stream_state to a dc_state. + */ enum dc_status dc_add_stream_to_ctx( struct dc *dc, struct dc_state *new_ctx, struct dc_stream_state *stream) { - struct dc_context *dc_ctx = dc->ctx; enum dc_status res; + DC_LOGGER_INIT(dc->ctx->logger); if (new_ctx->stream_count >= dc->res_pool->timing_generator_count) { - DC_ERROR("Max streams reached, can't add stream %p !\n", stream); + DC_LOG_WARNING("Max streams reached, can't add stream %p !\n", stream); return DC_ERROR_UNEXPECTED; } @@ -1836,11 +1649,14 @@ enum dc_status dc_add_stream_to_ctx( res = dc->res_pool->funcs->add_stream_to_ctx(dc, new_ctx, stream); if (res != DC_OK) - DC_ERROR("Adding stream %p to context failed with err %d!\n", stream, res); + DC_LOG_WARNING("Adding stream %p to context failed with err %d!\n", stream, res); return res; } +/** + * dc_remove_stream_from_ctx() - Remove a stream from a dc_state. + */ enum dc_status dc_remove_stream_from_ctx( struct dc *dc, struct dc_state *new_ctx, @@ -2002,6 +1818,8 @@ enum dc_status resource_map_pool_resources( } */ + calculate_phy_pix_clks(stream); + /* acquire new resources */ pipe_idx = acquire_first_free_pipe(&context->res_ctx, pool, stream); @@ -2059,6 +1877,12 @@ enum dc_status resource_map_pool_resources( return DC_ERROR_UNEXPECTED; } +/** + * dc_resource_state_copy_construct_current() - Creates a new dc_state from existing state + * Is a shallow copy. Increments refcounts on existing streams and planes. + * @dc: copy out of dc->current_state + * @dst_ctx: copy into this + */ void dc_resource_state_copy_construct_current( const struct dc *dc, struct dc_state *dst_ctx) @@ -2071,9 +1895,17 @@ void dc_resource_state_construct( const struct dc *dc, struct dc_state *dst_ctx) { - dst_ctx->dis_clk = dc->res_pool->dccg; + dst_ctx->dccg = dc->res_pool->clk_mgr; } +/** + * dc_validate_global_state() - Determine if HW can support a given state + * Checks HW resource availability and bandwidth requirement. + * @dc: dc struct for this driver + * @new_ctx: state to be validated + * + * Return: DC_OK if the result can be programmed. Otherwise, an error code. + */ enum dc_status dc_validate_global_state( struct dc *dc, struct dc_state *new_ctx) @@ -2401,113 +2233,15 @@ static void set_vendor_info_packet( struct dc_info_packet *info_packet, struct dc_stream_state *stream) { - uint32_t length = 0; - bool hdmi_vic_mode = false; - uint8_t checksum = 0; - uint32_t i = 0; - enum dc_timing_3d_format format; - // Can be different depending on packet content /*todo*/ - // unsigned int length = pPathMode->dolbyVision ? 24 : 5; - - info_packet->valid = false; - - format = stream->timing.timing_3d_format; - if (stream->view_format == VIEW_3D_FORMAT_NONE) - format = TIMING_3D_FORMAT_NONE; - - /* Can be different depending on packet content */ - length = 5; - - if (stream->timing.hdmi_vic != 0 - && stream->timing.h_total >= 3840 - && stream->timing.v_total >= 2160) - hdmi_vic_mode = true; - - /* According to HDMI 1.4a CTS, VSIF should be sent - * for both 3D stereo and HDMI VIC modes. - * For all other modes, there is no VSIF sent. */ + /* SPD info packet for FreeSync */ - if (format == TIMING_3D_FORMAT_NONE && !hdmi_vic_mode) + /* Check if Freesync is supported. Return if false. If true, + * set the corresponding bit in the info packet + */ + if (!stream->vsp_infopacket.valid) return; - /* 24bit IEEE Registration identifier (0x000c03). LSB first. */ - info_packet->sb[1] = 0x03; - info_packet->sb[2] = 0x0C; - info_packet->sb[3] = 0x00; - - /*PB4: 5 lower bytes = 0 (reserved). 3 higher bits = HDMI_Video_Format. - * The value for HDMI_Video_Format are: - * 0x0 (0b000) - No additional HDMI video format is presented in this - * packet - * 0x1 (0b001) - Extended resolution format present. 1 byte of HDMI_VIC - * parameter follows - * 0x2 (0b010) - 3D format indication present. 3D_Structure and - * potentially 3D_Ext_Data follows - * 0x3..0x7 (0b011..0b111) - reserved for future use */ - if (format != TIMING_3D_FORMAT_NONE) - info_packet->sb[4] = (2 << 5); - else if (hdmi_vic_mode) - info_packet->sb[4] = (1 << 5); - - /* PB5: If PB4 claims 3D timing (HDMI_Video_Format = 0x2): - * 4 lower bites = 0 (reserved). 4 higher bits = 3D_Structure. - * The value for 3D_Structure are: - * 0x0 - Frame Packing - * 0x1 - Field Alternative - * 0x2 - Line Alternative - * 0x3 - Side-by-Side (full) - * 0x4 - L + depth - * 0x5 - L + depth + graphics + graphics-depth - * 0x6 - Top-and-Bottom - * 0x7 - Reserved for future use - * 0x8 - Side-by-Side (Half) - * 0x9..0xE - Reserved for future use - * 0xF - Not used */ - switch (format) { - case TIMING_3D_FORMAT_HW_FRAME_PACKING: - case TIMING_3D_FORMAT_SW_FRAME_PACKING: - info_packet->sb[5] = (0x0 << 4); - break; - - case TIMING_3D_FORMAT_SIDE_BY_SIDE: - case TIMING_3D_FORMAT_SBS_SW_PACKED: - info_packet->sb[5] = (0x8 << 4); - length = 6; - break; - - case TIMING_3D_FORMAT_TOP_AND_BOTTOM: - case TIMING_3D_FORMAT_TB_SW_PACKED: - info_packet->sb[5] = (0x6 << 4); - break; - - default: - break; - } - - /*PB5: If PB4 is set to 0x1 (extended resolution format) - * fill PB5 with the correct HDMI VIC code */ - if (hdmi_vic_mode) - info_packet->sb[5] = stream->timing.hdmi_vic; - - /* Header */ - info_packet->hb0 = HDMI_INFOFRAME_TYPE_VENDOR; /* VSIF packet type. */ - info_packet->hb1 = 0x01; /* Version */ - - /* 4 lower bits = Length, 4 higher bits = 0 (reserved) */ - info_packet->hb2 = (uint8_t) (length); - - /* Calculate checksum */ - checksum = 0; - checksum += info_packet->hb0; - checksum += info_packet->hb1; - checksum += info_packet->hb2; - - for (i = 1; i <= length; i++) - checksum += info_packet->sb[i]; - - info_packet->sb[0] = (uint8_t) (0x100 - checksum); - - info_packet->valid = true; + *info_packet = stream->vsp_infopacket; } static void set_spd_info_packet( @@ -2563,10 +2297,6 @@ void dc_resource_state_destruct(struct dc_state *context) } } -/* - * Copy src_ctx into dst_ctx and retain all surfaces and streams referenced - * by the src_ctx - */ void dc_resource_state_copy_construct( const struct dc_state *src_ctx, struct dc_state *dst_ctx) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c index 2ac848a106ba..66e5c4623a49 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c @@ -100,12 +100,11 @@ static void construct(struct dc_stream_state *stream, /* EDID CAP translation for HDMI 2.0 */ stream->timing.flags.LTE_340MCSC_SCRAMBLE = dc_sink_data->edid_caps.lte_340mcsc_scramble; - stream->status.link = stream->sink->link; - update_stream_signal(stream); stream->out_transfer_func = dc_create_transfer_func(); stream->out_transfer_func->type = TF_TYPE_BYPASS; + stream->out_transfer_func->ctx = stream->ctx; } static void destruct(struct dc_stream_state *stream) @@ -171,7 +170,7 @@ struct dc_stream_status *dc_stream_get_status( } /** - * Update the cursor attributes and set cursor surface address + * dc_stream_set_cursor_attributes() - Update cursor attributes and set cursor surface address */ bool dc_stream_set_cursor_attributes( struct dc_stream_state *stream, diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_surface.c b/drivers/gpu/drm/amd/display/dc/core/dc_surface.c index 8fb3aefd195c..c60c9b4c3075 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_surface.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_surface.c @@ -44,6 +44,7 @@ static void construct(struct dc_context *ctx, struct dc_plane_state *plane_state plane_state->in_transfer_func = dc_create_transfer_func(); plane_state->in_transfer_func->type = TF_TYPE_BYPASS; + plane_state->in_transfer_func->ctx = ctx; } static void destruct(struct dc_plane_state *plane_state) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index b57fa61b3034..4b5bbb13ce7f 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -36,9 +36,10 @@ #include "inc/hw_sequencer.h" #include "inc/compressor.h" +#include "inc/hw/dmcu.h" #include "dml/display_mode_lib.h" -#define DC_VER "3.1.68" +#define DC_VER "3.2.08" #define MAX_SURFACES 3 #define MAX_STREAMS 6 @@ -47,13 +48,6 @@ /******************************************************************************* * Display Core Interfaces ******************************************************************************/ -struct dmcu_version { - unsigned int date; - unsigned int month; - unsigned int year; - unsigned int interface_version; -}; - struct dc_versions { const char *dc_ver; struct dmcu_version dmcu_version; @@ -250,8 +244,6 @@ struct dc_debug_options { bool disable_dmcu; bool disable_psr; bool force_abm_enable; - bool disable_hbup_pg; - bool disable_dpp_pg; bool disable_stereo_support; bool vsr_support; bool performance_trace; @@ -305,11 +297,6 @@ struct dc { struct hw_sequencer_funcs hwss; struct dce_hwseq *hwseq; - /* temp store of dm_pp_display_configuration - * to compare to see if display config changed - */ - struct dm_pp_display_configuration prev_display_config; - bool optimized_required; /* FBC compressor */ @@ -755,5 +742,6 @@ void dc_set_power_state( struct dc *dc, enum dc_acpi_cm_power_state power_state); void dc_resume(struct dc *dc); +bool dc_is_dmcu_initialized(struct dc *dc); #endif /* DC_INTERFACE_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/dc_bios_types.h b/drivers/gpu/drm/amd/display/dc/dc_bios_types.h index 8130b95ccc53..a8b3cedf9431 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_bios_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_bios_types.h @@ -86,6 +86,10 @@ struct dc_vbios_funcs { bool (*is_accelerated_mode)( struct dc_bios *bios); + bool (*is_active_display)( + struct dc_bios *bios, + enum signal_type signal, + const struct connector_device_tag_info *device_tag); void (*set_scratch_critical_state)( struct dc_bios *bios, bool state); @@ -141,6 +145,7 @@ struct dc_vbios_funcs { }; struct bios_registers { + uint32_t BIOS_SCRATCH_0; uint32_t BIOS_SCRATCH_3; uint32_t BIOS_SCRATCH_6; }; diff --git a/drivers/gpu/drm/amd/display/dc/dc_hw_types.h b/drivers/gpu/drm/amd/display/dc/dc_hw_types.h index 7825e4b5e97c..9ddfe4c6938b 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_hw_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_hw_types.h @@ -358,15 +358,16 @@ union dc_tiling_info { } gfx8; struct { + enum swizzle_mode_values swizzle; unsigned int num_pipes; - unsigned int num_banks; + unsigned int max_compressed_frags; unsigned int pipe_interleave; + + unsigned int num_banks; unsigned int num_shader_engines; unsigned int num_rb_per_se; - unsigned int max_compressed_frags; bool shaderEnable; - enum swizzle_mode_values swizzle; bool meta_linear; bool rb_aligned; bool pipe_aligned; diff --git a/drivers/gpu/drm/amd/display/dc/dc_link.h b/drivers/gpu/drm/amd/display/dc/dc_link.h index 3bfdccceb524..29f19d57ff7a 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_link.h +++ b/drivers/gpu/drm/amd/display/dc/dc_link.h @@ -128,8 +128,10 @@ struct dc_link { const struct dc_link_status *dc_link_get_status(const struct dc_link *dc_link); -/* - * Return an enumerated dc_link. dc_link order is constant and determined at +/** + * dc_get_link_at_index() - Return an enumerated dc_link. + * + * dc_link order is constant and determined at * boot time. They cannot be created or destroyed. * Use dc_get_caps() to get number of links. */ @@ -138,9 +140,14 @@ static inline struct dc_link *dc_get_link_at_index(struct dc *dc, uint32_t link_ return dc->links[link_index]; } -/* Set backlight level of an embedded panel (eDP, LVDS). */ -bool dc_link_set_backlight_level(const struct dc_link *dc_link, uint32_t level, - uint32_t frame_ramp, const struct dc_stream_state *stream); +/* Set backlight level of an embedded panel (eDP, LVDS). + * backlight_pwm_u16_16 is unsigned 32 bit with 16 bit integer + * and 16 bit fractional, where 1.0 is max backlight value. + */ +bool dc_link_set_backlight_level(const struct dc_link *dc_link, + uint32_t backlight_pwm_u16_16, + uint32_t frame_ramp, + const struct dc_stream_state *stream); int dc_link_get_backlight_level(const struct dc_link *dc_link); diff --git a/drivers/gpu/drm/amd/display/dc/dc_stream.h b/drivers/gpu/drm/amd/display/dc/dc_stream.h index c5bd1fbb6982..be34d638e15d 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_stream.h +++ b/drivers/gpu/drm/amd/display/dc/dc_stream.h @@ -56,6 +56,7 @@ struct dc_stream_state { struct dc_crtc_timing_adjust adjust; struct dc_info_packet vrr_infopacket; struct dc_info_packet vsc_infopacket; + struct dc_info_packet vsp_infopacket; struct rect src; /* composition area */ struct rect dst; /* stream addressable area */ @@ -104,8 +105,6 @@ struct dc_stream_state { bool dpms_off; bool apply_edp_fast_boot_optimization; - struct dc_stream_status status; - struct dc_cursor_attributes cursor_attributes; struct dc_cursor_position cursor_position; uint32_t sdr_white_level; // for boosting (SDR) cursor in HDR mode @@ -131,11 +130,13 @@ struct dc_stream_update { struct dc_crtc_timing_adjust *adjust; struct dc_info_packet *vrr_infopacket; struct dc_info_packet *vsc_infopacket; + struct dc_info_packet *vsp_infopacket; bool *dpms_off; struct colorspace_transform *gamut_remap; enum dc_color_space *output_color_space; + enum dc_dither_option *dither_option; struct dc_csc_transform *output_csc_transform; diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h index 6e12d640d020..0b20ae23f169 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_types.h @@ -73,10 +73,18 @@ struct hw_asic_id { void *atombios_base_address; }; +struct dc_perf_trace { + unsigned long read_count; + unsigned long write_count; + unsigned long last_entry_read; + unsigned long last_entry_write; +}; + struct dc_context { struct dc *dc; void *driver_context; /* e.g. amdgpu_device */ + struct dc_perf_trace *perf_trace; void *cgs_device; enum dce_environment dce_environment; @@ -191,7 +199,6 @@ union display_content_support { }; struct dc_panel_patch { - unsigned int disconnect_delay; unsigned int dppowerup_delay; unsigned int extra_t12_ms; }; diff --git a/drivers/gpu/drm/amd/display/dc/dce/Makefile b/drivers/gpu/drm/amd/display/dc/dce/Makefile index 8f7f0e8b341f..6d7b64a743ca 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dce/Makefile @@ -28,7 +28,7 @@ DCE = dce_audio.o dce_stream_encoder.o dce_link_encoder.o dce_hwseq.o \ dce_mem_input.o dce_clock_source.o dce_scl_filters.o dce_transform.o \ -dce_clocks.o dce_opp.o dce_dmcu.o dce_abm.o dce_ipp.o dce_aux.o \ +dce_clk_mgr.o dce_opp.o dce_dmcu.o dce_abm.o dce_ipp.o dce_aux.o \ dce_i2c.o dce_i2c_hw.o dce_i2c_sw.o AMD_DAL_DCE = $(addprefix $(AMDDALPATH)/dc/dce/,$(DCE)) diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c b/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c index 29294db1a96b..2a342eae80fd 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c @@ -54,7 +54,7 @@ #define MCP_DISABLE_ABM_IMMEDIATELY 255 -static unsigned int get_current_backlight_16_bit(struct dce_abm *abm_dce) +static unsigned int calculate_16_bit_backlight_from_pwm(struct dce_abm *abm_dce) { uint64_t current_backlight; uint32_t round_result; @@ -103,45 +103,21 @@ static unsigned int get_current_backlight_16_bit(struct dce_abm *abm_dce) return (uint32_t)(current_backlight); } -static void driver_set_backlight_level(struct dce_abm *abm_dce, uint32_t level) +static void driver_set_backlight_level(struct dce_abm *abm_dce, + uint32_t backlight_pwm_u16_16) { - uint32_t backlight_24bit; - uint32_t backlight_17bit; uint32_t backlight_16bit; uint32_t masked_pwm_period; - uint8_t rounding_bit; uint8_t bit_count; uint64_t active_duty_cycle; uint32_t pwm_period_bitcnt; /* - * 1. Convert 8-bit value to 17 bit U1.16 format - * (1 integer, 16 fractional bits) - */ - - /* 1.1 multiply 8 bit value by 0x10101 to get a 24 bit value, - * effectively multiplying value by 256/255 - * eg. for a level of 0xEF, backlight_24bit = 0xEF * 0x10101 = 0xEFEFEF - */ - backlight_24bit = level * 0x10101; - - /* 1.2 The upper 16 bits of the 24 bit value is the fraction, lower 8 - * used for rounding, take most significant bit of fraction for - * rounding, e.g. for 0xEFEFEF, rounding bit is 1 - */ - rounding_bit = (backlight_24bit >> 7) & 1; - - /* 1.3 Add the upper 16 bits of the 24 bit value with the rounding bit - * resulting in a 17 bit value e.g. 0xEFF0 = (0xEFEFEF >> 8) + 1 - */ - backlight_17bit = (backlight_24bit >> 8) + rounding_bit; - - /* - * 2. Find 16 bit backlight active duty cycle, where 0 <= backlight + * 1. Find 16 bit backlight active duty cycle, where 0 <= backlight * active duty cycle <= backlight period */ - /* 2.1 Apply bitmask for backlight period value based on value of BITCNT + /* 1.1 Apply bitmask for backlight period value based on value of BITCNT */ REG_GET_2(BL_PWM_PERIOD_CNTL, BL_PWM_PERIOD_BITCNT, &pwm_period_bitcnt, @@ -155,13 +131,13 @@ static void driver_set_backlight_level(struct dce_abm *abm_dce, uint32_t level) /* e.g. maskedPwmPeriod = 0x24 when bitCount is 6 */ masked_pwm_period = masked_pwm_period & ((1 << bit_count) - 1); - /* 2.2 Calculate integer active duty cycle required upper 16 bits + /* 1.2 Calculate integer active duty cycle required upper 16 bits * contain integer component, lower 16 bits contain fractional component * of active duty cycle e.g. 0x21BDC0 = 0xEFF0 * 0x24 */ - active_duty_cycle = backlight_17bit * masked_pwm_period; + active_duty_cycle = backlight_pwm_u16_16 * masked_pwm_period; - /* 2.3 Calculate 16 bit active duty cycle from integer and fractional + /* 1.3 Calculate 16 bit active duty cycle from integer and fractional * components shift by bitCount then mask 16 bits and add rounding bit * from MSB of fraction e.g. 0x86F7 = ((0x21BDC0 >> 6) & 0xFFF) + 0 */ @@ -170,23 +146,23 @@ static void driver_set_backlight_level(struct dce_abm *abm_dce, uint32_t level) backlight_16bit += (active_duty_cycle >> (bit_count - 1)) & 0x1; /* - * 3. Program register with updated value + * 2. Program register with updated value */ - /* 3.1 Lock group 2 backlight registers */ + /* 2.1 Lock group 2 backlight registers */ REG_UPDATE_2(BL_PWM_GRP1_REG_LOCK, BL_PWM_GRP1_IGNORE_MASTER_LOCK_EN, 1, BL_PWM_GRP1_REG_LOCK, 1); - // 3.2 Write new active duty cycle + // 2.2 Write new active duty cycle REG_UPDATE(BL_PWM_CNTL, BL_ACTIVE_INT_FRAC_CNT, backlight_16bit); - /* 3.3 Unlock group 2 backlight registers */ + /* 2.3 Unlock group 2 backlight registers */ REG_UPDATE(BL_PWM_GRP1_REG_LOCK, BL_PWM_GRP1_REG_LOCK, 0); - /* 5.4.4 Wait for pending bit to be cleared */ + /* 3 Wait for pending bit to be cleared */ REG_WAIT(BL_PWM_GRP1_REG_LOCK, BL_PWM_GRP1_REG_UPDATE_PENDING, 0, 1, 10000); @@ -194,16 +170,21 @@ static void driver_set_backlight_level(struct dce_abm *abm_dce, uint32_t level) static void dmcu_set_backlight_level( struct dce_abm *abm_dce, - uint32_t level, + uint32_t backlight_pwm_u16_16, uint32_t frame_ramp, uint32_t controller_id) { - unsigned int backlight_16_bit = (level * 0x10101) >> 8; - unsigned int backlight_17_bit = backlight_16_bit + - (((backlight_16_bit & 0x80) >> 7) & 1); + unsigned int backlight_8_bit = 0; uint32_t rampingBoundary = 0xFFFF; uint32_t s2; + if (backlight_pwm_u16_16 & 0x10000) + // Check for max backlight condition + backlight_8_bit = 0xFF; + else + // Take MSB of fractional part since backlight is not max + backlight_8_bit = (backlight_pwm_u16_16 >> 8) & 0xFF; + /* set ramping boundary */ REG_WRITE(MASTER_COMM_DATA_REG1, rampingBoundary); @@ -220,7 +201,7 @@ static void dmcu_set_backlight_level( 0, 1, 80000); /* setDMCUParam_BL */ - REG_UPDATE(BL1_PWM_USER_LEVEL, BL1_PWM_USER_LEVEL, backlight_17_bit); + REG_UPDATE(BL1_PWM_USER_LEVEL, BL1_PWM_USER_LEVEL, backlight_pwm_u16_16); /* write ramp */ if (controller_id == 0) @@ -237,9 +218,9 @@ static void dmcu_set_backlight_level( s2 = REG_READ(BIOS_SCRATCH_2); s2 &= ~ATOM_S2_CURRENT_BL_LEVEL_MASK; - level &= (ATOM_S2_CURRENT_BL_LEVEL_MASK >> + backlight_8_bit &= (ATOM_S2_CURRENT_BL_LEVEL_MASK >> ATOM_S2_CURRENT_BL_LEVEL_SHIFT); - s2 |= (level << ATOM_S2_CURRENT_BL_LEVEL_SHIFT); + s2 |= (backlight_8_bit << ATOM_S2_CURRENT_BL_LEVEL_SHIFT); REG_WRITE(BIOS_SCRATCH_2, s2); } @@ -247,7 +228,7 @@ static void dmcu_set_backlight_level( static void dce_abm_init(struct abm *abm) { struct dce_abm *abm_dce = TO_DCE_ABM(abm); - unsigned int backlight = get_current_backlight_16_bit(abm_dce); + unsigned int backlight = calculate_16_bit_backlight_from_pwm(abm_dce); REG_WRITE(DC_ABM1_HG_SAMPLE_RATE, 0x103); REG_WRITE(DC_ABM1_HG_SAMPLE_RATE, 0x101); @@ -284,12 +265,26 @@ static void dce_abm_init(struct abm *abm) ABM1_BL_REG_READ_MISSED_FRAME_CLEAR, 1); } -static unsigned int dce_abm_get_current_backlight_8_bit(struct abm *abm) +static unsigned int dce_abm_get_current_backlight(struct abm *abm) { struct dce_abm *abm_dce = TO_DCE_ABM(abm); unsigned int backlight = REG_READ(BL1_PWM_CURRENT_ABM_LEVEL); - return (backlight >> 8); + /* return backlight in hardware format which is unsigned 17 bits, with + * 1 bit integer and 16 bit fractional + */ + return backlight; +} + +static unsigned int dce_abm_get_target_backlight(struct abm *abm) +{ + struct dce_abm *abm_dce = TO_DCE_ABM(abm); + unsigned int backlight = REG_READ(BL1_PWM_TARGET_ABM_LEVEL); + + /* return backlight in hardware format which is unsigned 17 bits, with + * 1 bit integer and 16 bit fractional + */ + return backlight; } static bool dce_abm_set_level(struct abm *abm, uint32_t level) @@ -396,9 +391,9 @@ static bool dce_abm_init_backlight(struct abm *abm) return true; } -static bool dce_abm_set_backlight_level( +static bool dce_abm_set_backlight_level_pwm( struct abm *abm, - unsigned int backlight_level, + unsigned int backlight_pwm_u16_16, unsigned int frame_ramp, unsigned int controller_id, bool use_smooth_brightness) @@ -406,16 +401,16 @@ static bool dce_abm_set_backlight_level( struct dce_abm *abm_dce = TO_DCE_ABM(abm); DC_LOG_BACKLIGHT("New Backlight level: %d (0x%X)\n", - backlight_level, backlight_level); + backlight_pwm_u16_16, backlight_pwm_u16_16); /* If DMCU is in reset state, DMCU is uninitialized */ if (use_smooth_brightness) dmcu_set_backlight_level(abm_dce, - backlight_level, + backlight_pwm_u16_16, frame_ramp, controller_id); else - driver_set_backlight_level(abm_dce, backlight_level); + driver_set_backlight_level(abm_dce, backlight_pwm_u16_16); return true; } @@ -424,8 +419,9 @@ static const struct abm_funcs dce_funcs = { .abm_init = dce_abm_init, .set_abm_level = dce_abm_set_level, .init_backlight = dce_abm_init_backlight, - .set_backlight_level = dce_abm_set_backlight_level, - .get_current_backlight_8_bit = dce_abm_get_current_backlight_8_bit, + .set_backlight_level_pwm = dce_abm_set_backlight_level_pwm, + .get_current_backlight = dce_abm_get_current_backlight, + .get_target_backlight = dce_abm_get_target_backlight, .set_abm_immediate_disable = dce_abm_immediate_disable }; diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.c new file mode 100644 index 000000000000..bd22f51813bf --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.c @@ -0,0 +1,884 @@ +/* + * Copyright 2012-16 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#include "dce_clk_mgr.h" + +#include "reg_helper.h" +#include "dmcu.h" +#include "core_types.h" +#include "dal_asic_id.h" + +#define TO_DCE_CLK_MGR(clocks)\ + container_of(clocks, struct dce_clk_mgr, base) + +#define REG(reg) \ + (clk_mgr_dce->regs->reg) + +#undef FN +#define FN(reg_name, field_name) \ + clk_mgr_dce->clk_mgr_shift->field_name, clk_mgr_dce->clk_mgr_mask->field_name + +#define CTX \ + clk_mgr_dce->base.ctx +#define DC_LOGGER \ + clk_mgr->ctx->logger + +/* Max clock values for each state indexed by "enum clocks_state": */ +static const struct state_dependent_clocks dce80_max_clks_by_state[] = { +/* ClocksStateInvalid - should not be used */ +{ .display_clk_khz = 0, .pixel_clk_khz = 0 }, +/* ClocksStateUltraLow - not expected to be used for DCE 8.0 */ +{ .display_clk_khz = 0, .pixel_clk_khz = 0 }, +/* ClocksStateLow */ +{ .display_clk_khz = 352000, .pixel_clk_khz = 330000}, +/* ClocksStateNominal */ +{ .display_clk_khz = 600000, .pixel_clk_khz = 400000 }, +/* ClocksStatePerformance */ +{ .display_clk_khz = 600000, .pixel_clk_khz = 400000 } }; + +static const struct state_dependent_clocks dce110_max_clks_by_state[] = { +/*ClocksStateInvalid - should not be used*/ +{ .display_clk_khz = 0, .pixel_clk_khz = 0 }, +/*ClocksStateUltraLow - currently by HW design team not supposed to be used*/ +{ .display_clk_khz = 352000, .pixel_clk_khz = 330000 }, +/*ClocksStateLow*/ +{ .display_clk_khz = 352000, .pixel_clk_khz = 330000 }, +/*ClocksStateNominal*/ +{ .display_clk_khz = 467000, .pixel_clk_khz = 400000 }, +/*ClocksStatePerformance*/ +{ .display_clk_khz = 643000, .pixel_clk_khz = 400000 } }; + +static const struct state_dependent_clocks dce112_max_clks_by_state[] = { +/*ClocksStateInvalid - should not be used*/ +{ .display_clk_khz = 0, .pixel_clk_khz = 0 }, +/*ClocksStateUltraLow - currently by HW design team not supposed to be used*/ +{ .display_clk_khz = 389189, .pixel_clk_khz = 346672 }, +/*ClocksStateLow*/ +{ .display_clk_khz = 459000, .pixel_clk_khz = 400000 }, +/*ClocksStateNominal*/ +{ .display_clk_khz = 667000, .pixel_clk_khz = 600000 }, +/*ClocksStatePerformance*/ +{ .display_clk_khz = 1132000, .pixel_clk_khz = 600000 } }; + +static const struct state_dependent_clocks dce120_max_clks_by_state[] = { +/*ClocksStateInvalid - should not be used*/ +{ .display_clk_khz = 0, .pixel_clk_khz = 0 }, +/*ClocksStateUltraLow - currently by HW design team not supposed to be used*/ +{ .display_clk_khz = 0, .pixel_clk_khz = 0 }, +/*ClocksStateLow*/ +{ .display_clk_khz = 460000, .pixel_clk_khz = 400000 }, +/*ClocksStateNominal*/ +{ .display_clk_khz = 670000, .pixel_clk_khz = 600000 }, +/*ClocksStatePerformance*/ +{ .display_clk_khz = 1133000, .pixel_clk_khz = 600000 } }; + +int dentist_get_divider_from_did(int did) +{ + if (did < DENTIST_BASE_DID_1) + did = DENTIST_BASE_DID_1; + if (did > DENTIST_MAX_DID) + did = DENTIST_MAX_DID; + + if (did < DENTIST_BASE_DID_2) { + return DENTIST_DIVIDER_RANGE_1_START + DENTIST_DIVIDER_RANGE_1_STEP + * (did - DENTIST_BASE_DID_1); + } else if (did < DENTIST_BASE_DID_3) { + return DENTIST_DIVIDER_RANGE_2_START + DENTIST_DIVIDER_RANGE_2_STEP + * (did - DENTIST_BASE_DID_2); + } else if (did < DENTIST_BASE_DID_4) { + return DENTIST_DIVIDER_RANGE_3_START + DENTIST_DIVIDER_RANGE_3_STEP + * (did - DENTIST_BASE_DID_3); + } else { + return DENTIST_DIVIDER_RANGE_4_START + DENTIST_DIVIDER_RANGE_4_STEP + * (did - DENTIST_BASE_DID_4); + } +} + +/* SW will adjust DP REF Clock average value for all purposes + * (DP DTO / DP Audio DTO and DP GTC) + if clock is spread for all cases: + -if SS enabled on DP Ref clock and HW de-spreading enabled with SW + calculations for DS_INCR/DS_MODULO (this is planned to be default case) + -if SS enabled on DP Ref clock and HW de-spreading enabled with HW + calculations (not planned to be used, but average clock should still + be valid) + -if SS enabled on DP Ref clock and HW de-spreading disabled + (should not be case with CIK) then SW should program all rates + generated according to average value (case as with previous ASICs) + */ +static int clk_mgr_adjust_dp_ref_freq_for_ss(struct dce_clk_mgr *clk_mgr_dce, int dp_ref_clk_khz) +{ + if (clk_mgr_dce->ss_on_dprefclk && clk_mgr_dce->dprefclk_ss_divider != 0) { + struct fixed31_32 ss_percentage = dc_fixpt_div_int( + dc_fixpt_from_fraction(clk_mgr_dce->dprefclk_ss_percentage, + clk_mgr_dce->dprefclk_ss_divider), 200); + struct fixed31_32 adj_dp_ref_clk_khz; + + ss_percentage = dc_fixpt_sub(dc_fixpt_one, ss_percentage); + adj_dp_ref_clk_khz = dc_fixpt_mul_int(ss_percentage, dp_ref_clk_khz); + dp_ref_clk_khz = dc_fixpt_floor(adj_dp_ref_clk_khz); + } + return dp_ref_clk_khz; +} + +static int dce_get_dp_ref_freq_khz(struct clk_mgr *clk_mgr) +{ + struct dce_clk_mgr *clk_mgr_dce = TO_DCE_CLK_MGR(clk_mgr); + int dprefclk_wdivider; + int dprefclk_src_sel; + int dp_ref_clk_khz = 600000; + int target_div; + + /* ASSERT DP Reference Clock source is from DFS*/ + REG_GET(DPREFCLK_CNTL, DPREFCLK_SRC_SEL, &dprefclk_src_sel); + ASSERT(dprefclk_src_sel == 0); + + /* Read the mmDENTIST_DISPCLK_CNTL to get the currently + * programmed DID DENTIST_DPREFCLK_WDIVIDER*/ + REG_GET(DENTIST_DISPCLK_CNTL, DENTIST_DPREFCLK_WDIVIDER, &dprefclk_wdivider); + + /* Convert DENTIST_DPREFCLK_WDIVIDERto actual divider*/ + target_div = dentist_get_divider_from_did(dprefclk_wdivider); + + /* Calculate the current DFS clock, in kHz.*/ + dp_ref_clk_khz = (DENTIST_DIVIDER_RANGE_SCALE_FACTOR + * clk_mgr_dce->dentist_vco_freq_khz) / target_div; + + return clk_mgr_adjust_dp_ref_freq_for_ss(clk_mgr_dce, dp_ref_clk_khz); +} + +int dce12_get_dp_ref_freq_khz(struct clk_mgr *clk_mgr) +{ + struct dce_clk_mgr *clk_mgr_dce = TO_DCE_CLK_MGR(clk_mgr); + + return clk_mgr_adjust_dp_ref_freq_for_ss(clk_mgr_dce, clk_mgr_dce->dprefclk_khz); +} + +/* unit: in_khz before mode set, get pixel clock from context. ASIC register + * may not be programmed yet + */ +static uint32_t get_max_pixel_clock_for_all_paths(struct dc_state *context) +{ + uint32_t max_pix_clk = 0; + int i; + + for (i = 0; i < MAX_PIPES; i++) { + struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i]; + + if (pipe_ctx->stream == NULL) + continue; + + /* do not check under lay */ + if (pipe_ctx->top_pipe) + continue; + + if (pipe_ctx->stream_res.pix_clk_params.requested_pix_clk > max_pix_clk) + max_pix_clk = pipe_ctx->stream_res.pix_clk_params.requested_pix_clk; + + /* raise clock state for HBR3/2 if required. Confirmed with HW DCE/DPCS + * logic for HBR3 still needs Nominal (0.8V) on VDDC rail + */ + if (dc_is_dp_signal(pipe_ctx->stream->signal) && + pipe_ctx->stream_res.pix_clk_params.requested_sym_clk > max_pix_clk) + max_pix_clk = pipe_ctx->stream_res.pix_clk_params.requested_sym_clk; + } + + return max_pix_clk; +} + +static enum dm_pp_clocks_state dce_get_required_clocks_state( + struct clk_mgr *clk_mgr, + struct dc_state *context) +{ + struct dce_clk_mgr *clk_mgr_dce = TO_DCE_CLK_MGR(clk_mgr); + int i; + enum dm_pp_clocks_state low_req_clk; + int max_pix_clk = get_max_pixel_clock_for_all_paths(context); + + /* Iterate from highest supported to lowest valid state, and update + * lowest RequiredState with the lowest state that satisfies + * all required clocks + */ + for (i = clk_mgr_dce->max_clks_state; i >= DM_PP_CLOCKS_STATE_ULTRA_LOW; i--) + if (context->bw.dce.dispclk_khz > + clk_mgr_dce->max_clks_by_state[i].display_clk_khz + || max_pix_clk > + clk_mgr_dce->max_clks_by_state[i].pixel_clk_khz) + break; + + low_req_clk = i + 1; + if (low_req_clk > clk_mgr_dce->max_clks_state) { + /* set max clock state for high phyclock, invalid on exceeding display clock */ + if (clk_mgr_dce->max_clks_by_state[clk_mgr_dce->max_clks_state].display_clk_khz + < context->bw.dce.dispclk_khz) + low_req_clk = DM_PP_CLOCKS_STATE_INVALID; + else + low_req_clk = clk_mgr_dce->max_clks_state; + } + + return low_req_clk; +} + +static int dce_set_clock( + struct clk_mgr *clk_mgr, + int requested_clk_khz) +{ + struct dce_clk_mgr *clk_mgr_dce = TO_DCE_CLK_MGR(clk_mgr); + struct bp_pixel_clock_parameters pxl_clk_params = { 0 }; + struct dc_bios *bp = clk_mgr->ctx->dc_bios; + int actual_clock = requested_clk_khz; + struct dmcu *dmcu = clk_mgr_dce->base.ctx->dc->res_pool->dmcu; + + /* Make sure requested clock isn't lower than minimum threshold*/ + if (requested_clk_khz > 0) + requested_clk_khz = max(requested_clk_khz, + clk_mgr_dce->dentist_vco_freq_khz / 64); + + /* Prepare to program display clock*/ + pxl_clk_params.target_pixel_clock = requested_clk_khz; + pxl_clk_params.pll_id = CLOCK_SOURCE_ID_DFS; + + if (clk_mgr_dce->dfs_bypass_active) + pxl_clk_params.flags.SET_DISPCLK_DFS_BYPASS = true; + + bp->funcs->program_display_engine_pll(bp, &pxl_clk_params); + + if (clk_mgr_dce->dfs_bypass_active) { + /* Cache the fixed display clock*/ + clk_mgr_dce->dfs_bypass_disp_clk = + pxl_clk_params.dfs_bypass_display_clock; + actual_clock = pxl_clk_params.dfs_bypass_display_clock; + } + + /* from power down, we need mark the clock state as ClocksStateNominal + * from HWReset, so when resume we will call pplib voltage regulator.*/ + if (requested_clk_khz == 0) + clk_mgr_dce->cur_min_clks_state = DM_PP_CLOCKS_STATE_NOMINAL; + + if (dmcu && dmcu->funcs->is_dmcu_initialized(dmcu)) + dmcu->funcs->set_psr_wait_loop(dmcu, actual_clock / 1000 / 7); + + return actual_clock; +} + +int dce112_set_clock(struct clk_mgr *clk_mgr, int requested_clk_khz) +{ + struct dce_clk_mgr *clk_mgr_dce = TO_DCE_CLK_MGR(clk_mgr); + struct bp_set_dce_clock_parameters dce_clk_params; + struct dc_bios *bp = clk_mgr->ctx->dc_bios; + struct dc *core_dc = clk_mgr->ctx->dc; + struct dmcu *dmcu = core_dc->res_pool->dmcu; + int actual_clock = requested_clk_khz; + /* Prepare to program display clock*/ + memset(&dce_clk_params, 0, sizeof(dce_clk_params)); + + /* Make sure requested clock isn't lower than minimum threshold*/ + if (requested_clk_khz > 0) + requested_clk_khz = max(requested_clk_khz, + clk_mgr_dce->dentist_vco_freq_khz / 62); + + dce_clk_params.target_clock_frequency = requested_clk_khz; + dce_clk_params.pll_id = CLOCK_SOURCE_ID_DFS; + dce_clk_params.clock_type = DCECLOCK_TYPE_DISPLAY_CLOCK; + + bp->funcs->set_dce_clock(bp, &dce_clk_params); + actual_clock = dce_clk_params.target_clock_frequency; + + /* from power down, we need mark the clock state as ClocksStateNominal + * from HWReset, so when resume we will call pplib voltage regulator.*/ + if (requested_clk_khz == 0) + clk_mgr_dce->cur_min_clks_state = DM_PP_CLOCKS_STATE_NOMINAL; + + /*Program DP ref Clock*/ + /*VBIOS will determine DPREFCLK frequency, so we don't set it*/ + dce_clk_params.target_clock_frequency = 0; + dce_clk_params.clock_type = DCECLOCK_TYPE_DPREFCLK; + if (!ASICREV_IS_VEGA20_P(clk_mgr->ctx->asic_id.hw_internal_rev)) + dce_clk_params.flags.USE_GENLOCK_AS_SOURCE_FOR_DPREFCLK = + (dce_clk_params.pll_id == + CLOCK_SOURCE_COMBO_DISPLAY_PLL0); + else + dce_clk_params.flags.USE_GENLOCK_AS_SOURCE_FOR_DPREFCLK = false; + + bp->funcs->set_dce_clock(bp, &dce_clk_params); + + if (!IS_FPGA_MAXIMUS_DC(core_dc->ctx->dce_environment)) { + if (dmcu && dmcu->funcs->is_dmcu_initialized(dmcu)) { + if (clk_mgr_dce->dfs_bypass_disp_clk != actual_clock) + dmcu->funcs->set_psr_wait_loop(dmcu, + actual_clock / 1000 / 7); + } + } + + clk_mgr_dce->dfs_bypass_disp_clk = actual_clock; + return actual_clock; +} + +static void dce_clock_read_integrated_info(struct dce_clk_mgr *clk_mgr_dce) +{ + struct dc_debug_options *debug = &clk_mgr_dce->base.ctx->dc->debug; + struct dc_bios *bp = clk_mgr_dce->base.ctx->dc_bios; + struct integrated_info info = { { { 0 } } }; + struct dc_firmware_info fw_info = { { 0 } }; + int i; + + if (bp->integrated_info) + info = *bp->integrated_info; + + clk_mgr_dce->dentist_vco_freq_khz = info.dentist_vco_freq; + if (clk_mgr_dce->dentist_vco_freq_khz == 0) { + bp->funcs->get_firmware_info(bp, &fw_info); + clk_mgr_dce->dentist_vco_freq_khz = + fw_info.smu_gpu_pll_output_freq; + if (clk_mgr_dce->dentist_vco_freq_khz == 0) + clk_mgr_dce->dentist_vco_freq_khz = 3600000; + } + + /*update the maximum display clock for each power state*/ + for (i = 0; i < NUMBER_OF_DISP_CLK_VOLTAGE; ++i) { + enum dm_pp_clocks_state clk_state = DM_PP_CLOCKS_STATE_INVALID; + + switch (i) { + case 0: + clk_state = DM_PP_CLOCKS_STATE_ULTRA_LOW; + break; + + case 1: + clk_state = DM_PP_CLOCKS_STATE_LOW; + break; + + case 2: + clk_state = DM_PP_CLOCKS_STATE_NOMINAL; + break; + + case 3: + clk_state = DM_PP_CLOCKS_STATE_PERFORMANCE; + break; + + default: + clk_state = DM_PP_CLOCKS_STATE_INVALID; + break; + } + + /*Do not allow bad VBIOS/SBIOS to override with invalid values, + * check for > 100MHz*/ + if (info.disp_clk_voltage[i].max_supported_clk >= 100000) + clk_mgr_dce->max_clks_by_state[clk_state].display_clk_khz = + info.disp_clk_voltage[i].max_supported_clk; + } + + if (!debug->disable_dfs_bypass && bp->integrated_info) + if (bp->integrated_info->gpu_cap_info & DFS_BYPASS_ENABLE) + clk_mgr_dce->dfs_bypass_enabled = true; +} + +void dce_clock_read_ss_info(struct dce_clk_mgr *clk_mgr_dce) +{ + struct dc_bios *bp = clk_mgr_dce->base.ctx->dc_bios; + int ss_info_num = bp->funcs->get_ss_entry_number( + bp, AS_SIGNAL_TYPE_GPU_PLL); + + if (ss_info_num) { + struct spread_spectrum_info info = { { 0 } }; + enum bp_result result = bp->funcs->get_spread_spectrum_info( + bp, AS_SIGNAL_TYPE_GPU_PLL, 0, &info); + + /* Based on VBIOS, VBIOS will keep entry for GPU PLL SS + * even if SS not enabled and in that case + * SSInfo.spreadSpectrumPercentage !=0 would be sign + * that SS is enabled + */ + if (result == BP_RESULT_OK && + info.spread_spectrum_percentage != 0) { + clk_mgr_dce->ss_on_dprefclk = true; + clk_mgr_dce->dprefclk_ss_divider = info.spread_percentage_divider; + + if (info.type.CENTER_MODE == 0) { + /* TODO: Currently for DP Reference clock we + * need only SS percentage for + * downspread */ + clk_mgr_dce->dprefclk_ss_percentage = + info.spread_spectrum_percentage; + } + + return; + } + + result = bp->funcs->get_spread_spectrum_info( + bp, AS_SIGNAL_TYPE_DISPLAY_PORT, 0, &info); + + /* Based on VBIOS, VBIOS will keep entry for DPREFCLK SS + * even if SS not enabled and in that case + * SSInfo.spreadSpectrumPercentage !=0 would be sign + * that SS is enabled + */ + if (result == BP_RESULT_OK && + info.spread_spectrum_percentage != 0) { + clk_mgr_dce->ss_on_dprefclk = true; + clk_mgr_dce->dprefclk_ss_divider = info.spread_percentage_divider; + + if (info.type.CENTER_MODE == 0) { + /* Currently for DP Reference clock we + * need only SS percentage for + * downspread */ + clk_mgr_dce->dprefclk_ss_percentage = + info.spread_spectrum_percentage; + } + } + } +} + +void dce110_fill_display_configs( + const struct dc_state *context, + struct dm_pp_display_configuration *pp_display_cfg) +{ + int j; + int num_cfgs = 0; + + for (j = 0; j < context->stream_count; j++) { + int k; + + const struct dc_stream_state *stream = context->streams[j]; + struct dm_pp_single_disp_config *cfg = + &pp_display_cfg->disp_configs[num_cfgs]; + const struct pipe_ctx *pipe_ctx = NULL; + + for (k = 0; k < MAX_PIPES; k++) + if (stream == context->res_ctx.pipe_ctx[k].stream) { + pipe_ctx = &context->res_ctx.pipe_ctx[k]; + break; + } + + ASSERT(pipe_ctx != NULL); + + /* only notify active stream */ + if (stream->dpms_off) + continue; + + num_cfgs++; + cfg->signal = pipe_ctx->stream->signal; + cfg->pipe_idx = pipe_ctx->stream_res.tg->inst; + cfg->src_height = stream->src.height; + cfg->src_width = stream->src.width; + cfg->ddi_channel_mapping = + stream->sink->link->ddi_channel_mapping.raw; + cfg->transmitter = + stream->sink->link->link_enc->transmitter; + cfg->link_settings.lane_count = + stream->sink->link->cur_link_settings.lane_count; + cfg->link_settings.link_rate = + stream->sink->link->cur_link_settings.link_rate; + cfg->link_settings.link_spread = + stream->sink->link->cur_link_settings.link_spread; + cfg->sym_clock = stream->phy_pix_clk; + /* Round v_refresh*/ + cfg->v_refresh = stream->timing.pix_clk_khz * 1000; + cfg->v_refresh /= stream->timing.h_total; + cfg->v_refresh = (cfg->v_refresh + stream->timing.v_total / 2) + / stream->timing.v_total; + } + + pp_display_cfg->display_count = num_cfgs; +} + +static uint32_t dce110_get_min_vblank_time_us(const struct dc_state *context) +{ + uint8_t j; + uint32_t min_vertical_blank_time = -1; + + for (j = 0; j < context->stream_count; j++) { + struct dc_stream_state *stream = context->streams[j]; + uint32_t vertical_blank_in_pixels = 0; + uint32_t vertical_blank_time = 0; + + vertical_blank_in_pixels = stream->timing.h_total * + (stream->timing.v_total + - stream->timing.v_addressable); + + vertical_blank_time = vertical_blank_in_pixels + * 1000 / stream->timing.pix_clk_khz; + + if (min_vertical_blank_time > vertical_blank_time) + min_vertical_blank_time = vertical_blank_time; + } + + return min_vertical_blank_time; +} + +static int determine_sclk_from_bounding_box( + const struct dc *dc, + int required_sclk) +{ + int i; + + /* + * Some asics do not give us sclk levels, so we just report the actual + * required sclk + */ + if (dc->sclk_lvls.num_levels == 0) + return required_sclk; + + for (i = 0; i < dc->sclk_lvls.num_levels; i++) { + if (dc->sclk_lvls.clocks_in_khz[i] >= required_sclk) + return dc->sclk_lvls.clocks_in_khz[i]; + } + /* + * even maximum level could not satisfy requirement, this + * is unexpected at this stage, should have been caught at + * validation time + */ + ASSERT(0); + return dc->sclk_lvls.clocks_in_khz[dc->sclk_lvls.num_levels - 1]; +} + +static void dce_pplib_apply_display_requirements( + struct dc *dc, + struct dc_state *context) +{ + struct dm_pp_display_configuration *pp_display_cfg = &context->pp_display_cfg; + + pp_display_cfg->avail_mclk_switch_time_us = dce110_get_min_vblank_time_us(context); + + dce110_fill_display_configs(context, pp_display_cfg); + + if (memcmp(&dc->current_state->pp_display_cfg, pp_display_cfg, sizeof(*pp_display_cfg)) != 0) + dm_pp_apply_display_requirements(dc->ctx, pp_display_cfg); +} + +static void dce11_pplib_apply_display_requirements( + struct dc *dc, + struct dc_state *context) +{ + struct dm_pp_display_configuration *pp_display_cfg = &context->pp_display_cfg; + + pp_display_cfg->all_displays_in_sync = + context->bw.dce.all_displays_in_sync; + pp_display_cfg->nb_pstate_switch_disable = + context->bw.dce.nbp_state_change_enable == false; + pp_display_cfg->cpu_cc6_disable = + context->bw.dce.cpuc_state_change_enable == false; + pp_display_cfg->cpu_pstate_disable = + context->bw.dce.cpup_state_change_enable == false; + pp_display_cfg->cpu_pstate_separation_time = + context->bw.dce.blackout_recovery_time_us; + + pp_display_cfg->min_memory_clock_khz = context->bw.dce.yclk_khz + / MEMORY_TYPE_MULTIPLIER_CZ; + + pp_display_cfg->min_engine_clock_khz = determine_sclk_from_bounding_box( + dc, + context->bw.dce.sclk_khz); + + pp_display_cfg->min_dcfclock_khz = pp_display_cfg->min_engine_clock_khz; + + pp_display_cfg->min_engine_clock_deep_sleep_khz + = context->bw.dce.sclk_deep_sleep_khz; + + pp_display_cfg->avail_mclk_switch_time_us = + dce110_get_min_vblank_time_us(context); + /* TODO: dce11.2*/ + pp_display_cfg->avail_mclk_switch_time_in_disp_active_us = 0; + + pp_display_cfg->disp_clk_khz = dc->res_pool->clk_mgr->clks.dispclk_khz; + + dce110_fill_display_configs(context, pp_display_cfg); + + /* TODO: is this still applicable?*/ + if (pp_display_cfg->display_count == 1) { + const struct dc_crtc_timing *timing = + &context->streams[0]->timing; + + pp_display_cfg->crtc_index = + pp_display_cfg->disp_configs[0].pipe_idx; + pp_display_cfg->line_time_in_us = timing->h_total * 1000 / timing->pix_clk_khz; + } + + if (memcmp(&dc->current_state->pp_display_cfg, pp_display_cfg, sizeof(*pp_display_cfg)) != 0) + dm_pp_apply_display_requirements(dc->ctx, pp_display_cfg); +} + +static void dce_update_clocks(struct clk_mgr *clk_mgr, + struct dc_state *context, + bool safe_to_lower) +{ + struct dce_clk_mgr *clk_mgr_dce = TO_DCE_CLK_MGR(clk_mgr); + struct dm_pp_power_level_change_request level_change_req; + int unpatched_disp_clk = context->bw.dce.dispclk_khz; + + /*TODO: W/A for dal3 linux, investigate why this works */ + if (!clk_mgr_dce->dfs_bypass_active) + context->bw.dce.dispclk_khz = context->bw.dce.dispclk_khz * 115 / 100; + + level_change_req.power_level = dce_get_required_clocks_state(clk_mgr, context); + /* get max clock state from PPLIB */ + if ((level_change_req.power_level < clk_mgr_dce->cur_min_clks_state && safe_to_lower) + || level_change_req.power_level > clk_mgr_dce->cur_min_clks_state) { + if (dm_pp_apply_power_level_change_request(clk_mgr->ctx, &level_change_req)) + clk_mgr_dce->cur_min_clks_state = level_change_req.power_level; + } + + if (should_set_clock(safe_to_lower, context->bw.dce.dispclk_khz, clk_mgr->clks.dispclk_khz)) { + context->bw.dce.dispclk_khz = dce_set_clock(clk_mgr, context->bw.dce.dispclk_khz); + clk_mgr->clks.dispclk_khz = context->bw.dce.dispclk_khz; + } + dce_pplib_apply_display_requirements(clk_mgr->ctx->dc, context); + + context->bw.dce.dispclk_khz = unpatched_disp_clk; +} + +static void dce11_update_clocks(struct clk_mgr *clk_mgr, + struct dc_state *context, + bool safe_to_lower) +{ + struct dce_clk_mgr *clk_mgr_dce = TO_DCE_CLK_MGR(clk_mgr); + struct dm_pp_power_level_change_request level_change_req; + + level_change_req.power_level = dce_get_required_clocks_state(clk_mgr, context); + /* get max clock state from PPLIB */ + if ((level_change_req.power_level < clk_mgr_dce->cur_min_clks_state && safe_to_lower) + || level_change_req.power_level > clk_mgr_dce->cur_min_clks_state) { + if (dm_pp_apply_power_level_change_request(clk_mgr->ctx, &level_change_req)) + clk_mgr_dce->cur_min_clks_state = level_change_req.power_level; + } + + if (should_set_clock(safe_to_lower, context->bw.dce.dispclk_khz, clk_mgr->clks.dispclk_khz)) { + context->bw.dce.dispclk_khz = dce_set_clock(clk_mgr, context->bw.dce.dispclk_khz); + clk_mgr->clks.dispclk_khz = context->bw.dce.dispclk_khz; + } + dce11_pplib_apply_display_requirements(clk_mgr->ctx->dc, context); +} + +static void dce112_update_clocks(struct clk_mgr *clk_mgr, + struct dc_state *context, + bool safe_to_lower) +{ + struct dce_clk_mgr *clk_mgr_dce = TO_DCE_CLK_MGR(clk_mgr); + struct dm_pp_power_level_change_request level_change_req; + + level_change_req.power_level = dce_get_required_clocks_state(clk_mgr, context); + /* get max clock state from PPLIB */ + if ((level_change_req.power_level < clk_mgr_dce->cur_min_clks_state && safe_to_lower) + || level_change_req.power_level > clk_mgr_dce->cur_min_clks_state) { + if (dm_pp_apply_power_level_change_request(clk_mgr->ctx, &level_change_req)) + clk_mgr_dce->cur_min_clks_state = level_change_req.power_level; + } + + if (should_set_clock(safe_to_lower, context->bw.dce.dispclk_khz, clk_mgr->clks.dispclk_khz)) { + context->bw.dce.dispclk_khz = dce112_set_clock(clk_mgr, context->bw.dce.dispclk_khz); + clk_mgr->clks.dispclk_khz = context->bw.dce.dispclk_khz; + } + dce11_pplib_apply_display_requirements(clk_mgr->ctx->dc, context); +} + +static void dce12_update_clocks(struct clk_mgr *clk_mgr, + struct dc_state *context, + bool safe_to_lower) +{ + struct dce_clk_mgr *clk_mgr_dce = TO_DCE_CLK_MGR(clk_mgr); + struct dm_pp_clock_for_voltage_req clock_voltage_req = {0}; + int max_pix_clk = get_max_pixel_clock_for_all_paths(context); + int unpatched_disp_clk = context->bw.dce.dispclk_khz; + + /*TODO: W/A for dal3 linux, investigate why this works */ + if (!clk_mgr_dce->dfs_bypass_active) + context->bw.dce.dispclk_khz = context->bw.dce.dispclk_khz * 115 / 100; + + if (should_set_clock(safe_to_lower, context->bw.dce.dispclk_khz, clk_mgr->clks.dispclk_khz)) { + clock_voltage_req.clk_type = DM_PP_CLOCK_TYPE_DISPLAY_CLK; + clock_voltage_req.clocks_in_khz = context->bw.dce.dispclk_khz; + context->bw.dce.dispclk_khz = dce112_set_clock(clk_mgr, context->bw.dce.dispclk_khz); + clk_mgr->clks.dispclk_khz = context->bw.dce.dispclk_khz; + + dm_pp_apply_clock_for_voltage_request(clk_mgr->ctx, &clock_voltage_req); + } + + if (should_set_clock(safe_to_lower, max_pix_clk, clk_mgr->clks.phyclk_khz)) { + clock_voltage_req.clk_type = DM_PP_CLOCK_TYPE_DISPLAYPHYCLK; + clock_voltage_req.clocks_in_khz = max_pix_clk; + clk_mgr->clks.phyclk_khz = max_pix_clk; + + dm_pp_apply_clock_for_voltage_request(clk_mgr->ctx, &clock_voltage_req); + } + dce11_pplib_apply_display_requirements(clk_mgr->ctx->dc, context); + + context->bw.dce.dispclk_khz = unpatched_disp_clk; +} + +static const struct clk_mgr_funcs dce120_funcs = { + .get_dp_ref_clk_frequency = dce12_get_dp_ref_freq_khz, + .update_clocks = dce12_update_clocks +}; + +static const struct clk_mgr_funcs dce112_funcs = { + .get_dp_ref_clk_frequency = dce_get_dp_ref_freq_khz, + .update_clocks = dce112_update_clocks +}; + +static const struct clk_mgr_funcs dce110_funcs = { + .get_dp_ref_clk_frequency = dce_get_dp_ref_freq_khz, + .update_clocks = dce11_update_clocks, +}; + +static const struct clk_mgr_funcs dce_funcs = { + .get_dp_ref_clk_frequency = dce_get_dp_ref_freq_khz, + .update_clocks = dce_update_clocks +}; + +static void dce_clk_mgr_construct( + struct dce_clk_mgr *clk_mgr_dce, + struct dc_context *ctx, + const struct clk_mgr_registers *regs, + const struct clk_mgr_shift *clk_shift, + const struct clk_mgr_mask *clk_mask) +{ + struct clk_mgr *base = &clk_mgr_dce->base; + struct dm_pp_static_clock_info static_clk_info = {0}; + + base->ctx = ctx; + base->funcs = &dce_funcs; + + clk_mgr_dce->regs = regs; + clk_mgr_dce->clk_mgr_shift = clk_shift; + clk_mgr_dce->clk_mgr_mask = clk_mask; + + clk_mgr_dce->dfs_bypass_disp_clk = 0; + + clk_mgr_dce->dprefclk_ss_percentage = 0; + clk_mgr_dce->dprefclk_ss_divider = 1000; + clk_mgr_dce->ss_on_dprefclk = false; + + + if (dm_pp_get_static_clocks(ctx, &static_clk_info)) + clk_mgr_dce->max_clks_state = static_clk_info.max_clocks_state; + else + clk_mgr_dce->max_clks_state = DM_PP_CLOCKS_STATE_NOMINAL; + clk_mgr_dce->cur_min_clks_state = DM_PP_CLOCKS_STATE_INVALID; + + dce_clock_read_integrated_info(clk_mgr_dce); + dce_clock_read_ss_info(clk_mgr_dce); +} + +struct clk_mgr *dce_clk_mgr_create( + struct dc_context *ctx, + const struct clk_mgr_registers *regs, + const struct clk_mgr_shift *clk_shift, + const struct clk_mgr_mask *clk_mask) +{ + struct dce_clk_mgr *clk_mgr_dce = kzalloc(sizeof(*clk_mgr_dce), GFP_KERNEL); + + if (clk_mgr_dce == NULL) { + BREAK_TO_DEBUGGER(); + return NULL; + } + + memcpy(clk_mgr_dce->max_clks_by_state, + dce80_max_clks_by_state, + sizeof(dce80_max_clks_by_state)); + + dce_clk_mgr_construct( + clk_mgr_dce, ctx, regs, clk_shift, clk_mask); + + return &clk_mgr_dce->base; +} + +struct clk_mgr *dce110_clk_mgr_create( + struct dc_context *ctx, + const struct clk_mgr_registers *regs, + const struct clk_mgr_shift *clk_shift, + const struct clk_mgr_mask *clk_mask) +{ + struct dce_clk_mgr *clk_mgr_dce = kzalloc(sizeof(*clk_mgr_dce), GFP_KERNEL); + + if (clk_mgr_dce == NULL) { + BREAK_TO_DEBUGGER(); + return NULL; + } + + memcpy(clk_mgr_dce->max_clks_by_state, + dce110_max_clks_by_state, + sizeof(dce110_max_clks_by_state)); + + dce_clk_mgr_construct( + clk_mgr_dce, ctx, regs, clk_shift, clk_mask); + + clk_mgr_dce->base.funcs = &dce110_funcs; + + return &clk_mgr_dce->base; +} + +struct clk_mgr *dce112_clk_mgr_create( + struct dc_context *ctx, + const struct clk_mgr_registers *regs, + const struct clk_mgr_shift *clk_shift, + const struct clk_mgr_mask *clk_mask) +{ + struct dce_clk_mgr *clk_mgr_dce = kzalloc(sizeof(*clk_mgr_dce), GFP_KERNEL); + + if (clk_mgr_dce == NULL) { + BREAK_TO_DEBUGGER(); + return NULL; + } + + memcpy(clk_mgr_dce->max_clks_by_state, + dce112_max_clks_by_state, + sizeof(dce112_max_clks_by_state)); + + dce_clk_mgr_construct( + clk_mgr_dce, ctx, regs, clk_shift, clk_mask); + + clk_mgr_dce->base.funcs = &dce112_funcs; + + return &clk_mgr_dce->base; +} + +struct clk_mgr *dce120_clk_mgr_create(struct dc_context *ctx) +{ + struct dce_clk_mgr *clk_mgr_dce = kzalloc(sizeof(*clk_mgr_dce), GFP_KERNEL); + + if (clk_mgr_dce == NULL) { + BREAK_TO_DEBUGGER(); + return NULL; + } + + memcpy(clk_mgr_dce->max_clks_by_state, + dce120_max_clks_by_state, + sizeof(dce120_max_clks_by_state)); + + dce_clk_mgr_construct( + clk_mgr_dce, ctx, NULL, NULL, NULL); + + clk_mgr_dce->dprefclk_khz = 600000; + clk_mgr_dce->base.funcs = &dce120_funcs; + + return &clk_mgr_dce->base; +} + +void dce_clk_mgr_destroy(struct clk_mgr **clk_mgr) +{ + struct dce_clk_mgr *clk_mgr_dce = TO_DCE_CLK_MGR(*clk_mgr); + + kfree(clk_mgr_dce); + *clk_mgr = NULL; +} diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_clocks.h b/drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.h index 34fdb386c884..3bceb31d910d 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_clocks.h +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.h @@ -24,10 +24,13 @@ */ -#ifndef _DCE_CLOCKS_H_ -#define _DCE_CLOCKS_H_ +#ifndef _DCE_CLK_MGR_H_ +#define _DCE_CLK_MGR_H_ -#include "display_clock.h" +#include "clk_mgr.h" +#include "dccg.h" + +#define MEMORY_TYPE_MULTIPLIER_CZ 4 #define CLK_COMMON_REG_LIST_DCE_BASE() \ .DPREFCLK_CNTL = mmDPREFCLK_CNTL, \ @@ -53,24 +56,31 @@ type DENTIST_DISPCLK_WDIVIDER; \ type DENTIST_DISPCLK_CHG_DONE; -struct dccg_shift { +struct clk_mgr_shift { CLK_REG_FIELD_LIST(uint8_t) }; -struct dccg_mask { +struct clk_mgr_mask { CLK_REG_FIELD_LIST(uint32_t) }; -struct dccg_registers { +struct clk_mgr_registers { uint32_t DPREFCLK_CNTL; uint32_t DENTIST_DISPCLK_CNTL; }; -struct dce_dccg { - struct dccg base; - const struct dccg_registers *regs; - const struct dccg_shift *clk_shift; - const struct dccg_mask *clk_mask; +struct state_dependent_clocks { + int display_clk_khz; + int pixel_clk_khz; +}; + +struct dce_clk_mgr { + struct clk_mgr base; + const struct clk_mgr_registers *regs; + const struct clk_mgr_shift *clk_mgr_shift; + const struct clk_mgr_mask *clk_mgr_mask; + + struct dccg *dccg; struct state_dependent_clocks max_clks_by_state[DM_PP_CLOCKS_MAX_STATES]; @@ -91,33 +101,70 @@ struct dce_dccg { /* DPREFCLK SS percentage Divider (100 or 1000) */ int dprefclk_ss_divider; int dprefclk_khz; + + enum dm_pp_clocks_state max_clks_state; + enum dm_pp_clocks_state cur_min_clks_state; +}; + +/* Starting DID for each range */ +enum dentist_base_divider_id { + DENTIST_BASE_DID_1 = 0x08, + DENTIST_BASE_DID_2 = 0x40, + DENTIST_BASE_DID_3 = 0x60, + DENTIST_BASE_DID_4 = 0x7e, + DENTIST_MAX_DID = 0x7f }; +/* Starting point and step size for each divider range.*/ +enum dentist_divider_range { + DENTIST_DIVIDER_RANGE_1_START = 8, /* 2.00 */ + DENTIST_DIVIDER_RANGE_1_STEP = 1, /* 0.25 */ + DENTIST_DIVIDER_RANGE_2_START = 64, /* 16.00 */ + DENTIST_DIVIDER_RANGE_2_STEP = 2, /* 0.50 */ + DENTIST_DIVIDER_RANGE_3_START = 128, /* 32.00 */ + DENTIST_DIVIDER_RANGE_3_STEP = 4, /* 1.00 */ + DENTIST_DIVIDER_RANGE_4_START = 248, /* 62.00 */ + DENTIST_DIVIDER_RANGE_4_STEP = 264, /* 66.00 */ + DENTIST_DIVIDER_RANGE_SCALE_FACTOR = 4 +}; + +static inline bool should_set_clock(bool safe_to_lower, int calc_clk, int cur_clk) +{ + return ((safe_to_lower && calc_clk < cur_clk) || calc_clk > cur_clk); +} + +void dce_clock_read_ss_info(struct dce_clk_mgr *dccg_dce); + +int dce12_get_dp_ref_freq_khz(struct clk_mgr *dccg); + +void dce110_fill_display_configs( + const struct dc_state *context, + struct dm_pp_display_configuration *pp_display_cfg); + +int dce112_set_clock(struct clk_mgr *dccg, int requested_clk_khz); -struct dccg *dce_dccg_create( +struct clk_mgr *dce_clk_mgr_create( struct dc_context *ctx, - const struct dccg_registers *regs, - const struct dccg_shift *clk_shift, - const struct dccg_mask *clk_mask); + const struct clk_mgr_registers *regs, + const struct clk_mgr_shift *clk_shift, + const struct clk_mgr_mask *clk_mask); -struct dccg *dce110_dccg_create( +struct clk_mgr *dce110_clk_mgr_create( struct dc_context *ctx, - const struct dccg_registers *regs, - const struct dccg_shift *clk_shift, - const struct dccg_mask *clk_mask); + const struct clk_mgr_registers *regs, + const struct clk_mgr_shift *clk_shift, + const struct clk_mgr_mask *clk_mask); -struct dccg *dce112_dccg_create( +struct clk_mgr *dce112_clk_mgr_create( struct dc_context *ctx, - const struct dccg_registers *regs, - const struct dccg_shift *clk_shift, - const struct dccg_mask *clk_mask); + const struct clk_mgr_registers *regs, + const struct clk_mgr_shift *clk_shift, + const struct clk_mgr_mask *clk_mask); -struct dccg *dce120_dccg_create(struct dc_context *ctx); +struct clk_mgr *dce120_clk_mgr_create(struct dc_context *ctx); -#ifdef CONFIG_DRM_AMD_DC_DCN1_0 -struct dccg *dcn1_dccg_create(struct dc_context *ctx); -#endif +void dce_clk_mgr_destroy(struct clk_mgr **clk_mgr); -void dce_dccg_destroy(struct dccg **dccg); +int dentist_get_divider_from_did(int did); -#endif /* _DCE_CLOCKS_H_ */ +#endif /* _DCE_CLK_MGR_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_clocks.c b/drivers/gpu/drm/amd/display/dc/dce/dce_clocks.c deleted file mode 100644 index d89a097ba936..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_clocks.c +++ /dev/null @@ -1,947 +0,0 @@ -/* - * Copyright 2012-16 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: AMD - * - */ - -#include "dce_clocks.h" -#include "dm_services.h" -#include "reg_helper.h" -#include "fixed31_32.h" -#include "bios_parser_interface.h" -#include "dc.h" -#include "dmcu.h" -#if defined(CONFIG_DRM_AMD_DC_DCN1_0) -#include "dcn_calcs.h" -#endif -#include "core_types.h" -#include "dc_types.h" -#include "dal_asic_id.h" - -#define TO_DCE_CLOCKS(clocks)\ - container_of(clocks, struct dce_dccg, base) - -#define REG(reg) \ - (clk_dce->regs->reg) - -#undef FN -#define FN(reg_name, field_name) \ - clk_dce->clk_shift->field_name, clk_dce->clk_mask->field_name - -#define CTX \ - clk_dce->base.ctx -#define DC_LOGGER \ - clk->ctx->logger - -/* Max clock values for each state indexed by "enum clocks_state": */ -static const struct state_dependent_clocks dce80_max_clks_by_state[] = { -/* ClocksStateInvalid - should not be used */ -{ .display_clk_khz = 0, .pixel_clk_khz = 0 }, -/* ClocksStateUltraLow - not expected to be used for DCE 8.0 */ -{ .display_clk_khz = 0, .pixel_clk_khz = 0 }, -/* ClocksStateLow */ -{ .display_clk_khz = 352000, .pixel_clk_khz = 330000}, -/* ClocksStateNominal */ -{ .display_clk_khz = 600000, .pixel_clk_khz = 400000 }, -/* ClocksStatePerformance */ -{ .display_clk_khz = 600000, .pixel_clk_khz = 400000 } }; - -static const struct state_dependent_clocks dce110_max_clks_by_state[] = { -/*ClocksStateInvalid - should not be used*/ -{ .display_clk_khz = 0, .pixel_clk_khz = 0 }, -/*ClocksStateUltraLow - currently by HW design team not supposed to be used*/ -{ .display_clk_khz = 352000, .pixel_clk_khz = 330000 }, -/*ClocksStateLow*/ -{ .display_clk_khz = 352000, .pixel_clk_khz = 330000 }, -/*ClocksStateNominal*/ -{ .display_clk_khz = 467000, .pixel_clk_khz = 400000 }, -/*ClocksStatePerformance*/ -{ .display_clk_khz = 643000, .pixel_clk_khz = 400000 } }; - -static const struct state_dependent_clocks dce112_max_clks_by_state[] = { -/*ClocksStateInvalid - should not be used*/ -{ .display_clk_khz = 0, .pixel_clk_khz = 0 }, -/*ClocksStateUltraLow - currently by HW design team not supposed to be used*/ -{ .display_clk_khz = 389189, .pixel_clk_khz = 346672 }, -/*ClocksStateLow*/ -{ .display_clk_khz = 459000, .pixel_clk_khz = 400000 }, -/*ClocksStateNominal*/ -{ .display_clk_khz = 667000, .pixel_clk_khz = 600000 }, -/*ClocksStatePerformance*/ -{ .display_clk_khz = 1132000, .pixel_clk_khz = 600000 } }; - -static const struct state_dependent_clocks dce120_max_clks_by_state[] = { -/*ClocksStateInvalid - should not be used*/ -{ .display_clk_khz = 0, .pixel_clk_khz = 0 }, -/*ClocksStateUltraLow - currently by HW design team not supposed to be used*/ -{ .display_clk_khz = 0, .pixel_clk_khz = 0 }, -/*ClocksStateLow*/ -{ .display_clk_khz = 460000, .pixel_clk_khz = 400000 }, -/*ClocksStateNominal*/ -{ .display_clk_khz = 670000, .pixel_clk_khz = 600000 }, -/*ClocksStatePerformance*/ -{ .display_clk_khz = 1133000, .pixel_clk_khz = 600000 } }; - -/* Starting DID for each range */ -enum dentist_base_divider_id { - DENTIST_BASE_DID_1 = 0x08, - DENTIST_BASE_DID_2 = 0x40, - DENTIST_BASE_DID_3 = 0x60, - DENTIST_BASE_DID_4 = 0x7e, - DENTIST_MAX_DID = 0x7f -}; - -/* Starting point and step size for each divider range.*/ -enum dentist_divider_range { - DENTIST_DIVIDER_RANGE_1_START = 8, /* 2.00 */ - DENTIST_DIVIDER_RANGE_1_STEP = 1, /* 0.25 */ - DENTIST_DIVIDER_RANGE_2_START = 64, /* 16.00 */ - DENTIST_DIVIDER_RANGE_2_STEP = 2, /* 0.50 */ - DENTIST_DIVIDER_RANGE_3_START = 128, /* 32.00 */ - DENTIST_DIVIDER_RANGE_3_STEP = 4, /* 1.00 */ - DENTIST_DIVIDER_RANGE_4_START = 248, /* 62.00 */ - DENTIST_DIVIDER_RANGE_4_STEP = 264, /* 66.00 */ - DENTIST_DIVIDER_RANGE_SCALE_FACTOR = 4 -}; - -static int dentist_get_divider_from_did(int did) -{ - if (did < DENTIST_BASE_DID_1) - did = DENTIST_BASE_DID_1; - if (did > DENTIST_MAX_DID) - did = DENTIST_MAX_DID; - - if (did < DENTIST_BASE_DID_2) { - return DENTIST_DIVIDER_RANGE_1_START + DENTIST_DIVIDER_RANGE_1_STEP - * (did - DENTIST_BASE_DID_1); - } else if (did < DENTIST_BASE_DID_3) { - return DENTIST_DIVIDER_RANGE_2_START + DENTIST_DIVIDER_RANGE_2_STEP - * (did - DENTIST_BASE_DID_2); - } else if (did < DENTIST_BASE_DID_4) { - return DENTIST_DIVIDER_RANGE_3_START + DENTIST_DIVIDER_RANGE_3_STEP - * (did - DENTIST_BASE_DID_3); - } else { - return DENTIST_DIVIDER_RANGE_4_START + DENTIST_DIVIDER_RANGE_4_STEP - * (did - DENTIST_BASE_DID_4); - } -} - -/* SW will adjust DP REF Clock average value for all purposes - * (DP DTO / DP Audio DTO and DP GTC) - if clock is spread for all cases: - -if SS enabled on DP Ref clock and HW de-spreading enabled with SW - calculations for DS_INCR/DS_MODULO (this is planned to be default case) - -if SS enabled on DP Ref clock and HW de-spreading enabled with HW - calculations (not planned to be used, but average clock should still - be valid) - -if SS enabled on DP Ref clock and HW de-spreading disabled - (should not be case with CIK) then SW should program all rates - generated according to average value (case as with previous ASICs) - */ -static int dccg_adjust_dp_ref_freq_for_ss(struct dce_dccg *clk_dce, int dp_ref_clk_khz) -{ - if (clk_dce->ss_on_dprefclk && clk_dce->dprefclk_ss_divider != 0) { - struct fixed31_32 ss_percentage = dc_fixpt_div_int( - dc_fixpt_from_fraction(clk_dce->dprefclk_ss_percentage, - clk_dce->dprefclk_ss_divider), 200); - struct fixed31_32 adj_dp_ref_clk_khz; - - ss_percentage = dc_fixpt_sub(dc_fixpt_one, ss_percentage); - adj_dp_ref_clk_khz = dc_fixpt_mul_int(ss_percentage, dp_ref_clk_khz); - dp_ref_clk_khz = dc_fixpt_floor(adj_dp_ref_clk_khz); - } - return dp_ref_clk_khz; -} - -static int dce_get_dp_ref_freq_khz(struct dccg *clk) -{ - struct dce_dccg *clk_dce = TO_DCE_CLOCKS(clk); - int dprefclk_wdivider; - int dprefclk_src_sel; - int dp_ref_clk_khz = 600000; - int target_div; - - /* ASSERT DP Reference Clock source is from DFS*/ - REG_GET(DPREFCLK_CNTL, DPREFCLK_SRC_SEL, &dprefclk_src_sel); - ASSERT(dprefclk_src_sel == 0); - - /* Read the mmDENTIST_DISPCLK_CNTL to get the currently - * programmed DID DENTIST_DPREFCLK_WDIVIDER*/ - REG_GET(DENTIST_DISPCLK_CNTL, DENTIST_DPREFCLK_WDIVIDER, &dprefclk_wdivider); - - /* Convert DENTIST_DPREFCLK_WDIVIDERto actual divider*/ - target_div = dentist_get_divider_from_did(dprefclk_wdivider); - - /* Calculate the current DFS clock, in kHz.*/ - dp_ref_clk_khz = (DENTIST_DIVIDER_RANGE_SCALE_FACTOR - * clk_dce->dentist_vco_freq_khz) / target_div; - - return dccg_adjust_dp_ref_freq_for_ss(clk_dce, dp_ref_clk_khz); -} - -static int dce12_get_dp_ref_freq_khz(struct dccg *clk) -{ - struct dce_dccg *clk_dce = TO_DCE_CLOCKS(clk); - - return dccg_adjust_dp_ref_freq_for_ss(clk_dce, clk_dce->dprefclk_khz); -} - -static enum dm_pp_clocks_state dce_get_required_clocks_state( - struct dccg *clk, - struct dc_clocks *req_clocks) -{ - struct dce_dccg *clk_dce = TO_DCE_CLOCKS(clk); - int i; - enum dm_pp_clocks_state low_req_clk; - - /* Iterate from highest supported to lowest valid state, and update - * lowest RequiredState with the lowest state that satisfies - * all required clocks - */ - for (i = clk->max_clks_state; i >= DM_PP_CLOCKS_STATE_ULTRA_LOW; i--) - if (req_clocks->dispclk_khz > - clk_dce->max_clks_by_state[i].display_clk_khz - || req_clocks->phyclk_khz > - clk_dce->max_clks_by_state[i].pixel_clk_khz) - break; - - low_req_clk = i + 1; - if (low_req_clk > clk->max_clks_state) { - /* set max clock state for high phyclock, invalid on exceeding display clock */ - if (clk_dce->max_clks_by_state[clk->max_clks_state].display_clk_khz - < req_clocks->dispclk_khz) - low_req_clk = DM_PP_CLOCKS_STATE_INVALID; - else - low_req_clk = clk->max_clks_state; - } - - return low_req_clk; -} - -static int dce_set_clock( - struct dccg *clk, - int requested_clk_khz) -{ - struct dce_dccg *clk_dce = TO_DCE_CLOCKS(clk); - struct bp_pixel_clock_parameters pxl_clk_params = { 0 }; - struct dc_bios *bp = clk->ctx->dc_bios; - int actual_clock = requested_clk_khz; - - /* Make sure requested clock isn't lower than minimum threshold*/ - if (requested_clk_khz > 0) - requested_clk_khz = max(requested_clk_khz, - clk_dce->dentist_vco_freq_khz / 64); - - /* Prepare to program display clock*/ - pxl_clk_params.target_pixel_clock = requested_clk_khz; - pxl_clk_params.pll_id = CLOCK_SOURCE_ID_DFS; - - if (clk_dce->dfs_bypass_active) - pxl_clk_params.flags.SET_DISPCLK_DFS_BYPASS = true; - - bp->funcs->program_display_engine_pll(bp, &pxl_clk_params); - - if (clk_dce->dfs_bypass_active) { - /* Cache the fixed display clock*/ - clk_dce->dfs_bypass_disp_clk = - pxl_clk_params.dfs_bypass_display_clock; - actual_clock = pxl_clk_params.dfs_bypass_display_clock; - } - - /* from power down, we need mark the clock state as ClocksStateNominal - * from HWReset, so when resume we will call pplib voltage regulator.*/ - if (requested_clk_khz == 0) - clk->cur_min_clks_state = DM_PP_CLOCKS_STATE_NOMINAL; - return actual_clock; -} - -static int dce_psr_set_clock( - struct dccg *clk, - int requested_clk_khz) -{ - struct dce_dccg *clk_dce = TO_DCE_CLOCKS(clk); - struct dc_context *ctx = clk_dce->base.ctx; - struct dc *core_dc = ctx->dc; - struct dmcu *dmcu = core_dc->res_pool->dmcu; - int actual_clk_khz = requested_clk_khz; - - actual_clk_khz = dce_set_clock(clk, requested_clk_khz); - - dmcu->funcs->set_psr_wait_loop(dmcu, actual_clk_khz / 1000 / 7); - return actual_clk_khz; -} - -static int dce112_set_clock( - struct dccg *clk, - int requested_clk_khz) -{ - struct dce_dccg *clk_dce = TO_DCE_CLOCKS(clk); - struct bp_set_dce_clock_parameters dce_clk_params; - struct dc_bios *bp = clk->ctx->dc_bios; - struct dc *core_dc = clk->ctx->dc; - struct dmcu *dmcu = core_dc->res_pool->dmcu; - int actual_clock = requested_clk_khz; - /* Prepare to program display clock*/ - memset(&dce_clk_params, 0, sizeof(dce_clk_params)); - - /* Make sure requested clock isn't lower than minimum threshold*/ - if (requested_clk_khz > 0) - requested_clk_khz = max(requested_clk_khz, - clk_dce->dentist_vco_freq_khz / 62); - - dce_clk_params.target_clock_frequency = requested_clk_khz; - dce_clk_params.pll_id = CLOCK_SOURCE_ID_DFS; - dce_clk_params.clock_type = DCECLOCK_TYPE_DISPLAY_CLOCK; - - bp->funcs->set_dce_clock(bp, &dce_clk_params); - actual_clock = dce_clk_params.target_clock_frequency; - - /* from power down, we need mark the clock state as ClocksStateNominal - * from HWReset, so when resume we will call pplib voltage regulator.*/ - if (requested_clk_khz == 0) - clk->cur_min_clks_state = DM_PP_CLOCKS_STATE_NOMINAL; - - /*Program DP ref Clock*/ - /*VBIOS will determine DPREFCLK frequency, so we don't set it*/ - dce_clk_params.target_clock_frequency = 0; - dce_clk_params.clock_type = DCECLOCK_TYPE_DPREFCLK; - if (!ASICREV_IS_VEGA20_P(clk->ctx->asic_id.hw_internal_rev)) - dce_clk_params.flags.USE_GENLOCK_AS_SOURCE_FOR_DPREFCLK = - (dce_clk_params.pll_id == - CLOCK_SOURCE_COMBO_DISPLAY_PLL0); - else - dce_clk_params.flags.USE_GENLOCK_AS_SOURCE_FOR_DPREFCLK = false; - - bp->funcs->set_dce_clock(bp, &dce_clk_params); - - if (!IS_FPGA_MAXIMUS_DC(core_dc->ctx->dce_environment)) { - if (clk_dce->dfs_bypass_disp_clk != actual_clock) - dmcu->funcs->set_psr_wait_loop(dmcu, - actual_clock / 1000 / 7); - } - - clk_dce->dfs_bypass_disp_clk = actual_clock; - return actual_clock; -} - -static void dce_clock_read_integrated_info(struct dce_dccg *clk_dce) -{ - struct dc_debug_options *debug = &clk_dce->base.ctx->dc->debug; - struct dc_bios *bp = clk_dce->base.ctx->dc_bios; - struct integrated_info info = { { { 0 } } }; - struct dc_firmware_info fw_info = { { 0 } }; - int i; - - if (bp->integrated_info) - info = *bp->integrated_info; - - clk_dce->dentist_vco_freq_khz = info.dentist_vco_freq; - if (clk_dce->dentist_vco_freq_khz == 0) { - bp->funcs->get_firmware_info(bp, &fw_info); - clk_dce->dentist_vco_freq_khz = - fw_info.smu_gpu_pll_output_freq; - if (clk_dce->dentist_vco_freq_khz == 0) - clk_dce->dentist_vco_freq_khz = 3600000; - } - - /*update the maximum display clock for each power state*/ - for (i = 0; i < NUMBER_OF_DISP_CLK_VOLTAGE; ++i) { - enum dm_pp_clocks_state clk_state = DM_PP_CLOCKS_STATE_INVALID; - - switch (i) { - case 0: - clk_state = DM_PP_CLOCKS_STATE_ULTRA_LOW; - break; - - case 1: - clk_state = DM_PP_CLOCKS_STATE_LOW; - break; - - case 2: - clk_state = DM_PP_CLOCKS_STATE_NOMINAL; - break; - - case 3: - clk_state = DM_PP_CLOCKS_STATE_PERFORMANCE; - break; - - default: - clk_state = DM_PP_CLOCKS_STATE_INVALID; - break; - } - - /*Do not allow bad VBIOS/SBIOS to override with invalid values, - * check for > 100MHz*/ - if (info.disp_clk_voltage[i].max_supported_clk >= 100000) - clk_dce->max_clks_by_state[clk_state].display_clk_khz = - info.disp_clk_voltage[i].max_supported_clk; - } - - if (!debug->disable_dfs_bypass && bp->integrated_info) - if (bp->integrated_info->gpu_cap_info & DFS_BYPASS_ENABLE) - clk_dce->dfs_bypass_enabled = true; -} - -static void dce_clock_read_ss_info(struct dce_dccg *clk_dce) -{ - struct dc_bios *bp = clk_dce->base.ctx->dc_bios; - int ss_info_num = bp->funcs->get_ss_entry_number( - bp, AS_SIGNAL_TYPE_GPU_PLL); - - if (ss_info_num) { - struct spread_spectrum_info info = { { 0 } }; - enum bp_result result = bp->funcs->get_spread_spectrum_info( - bp, AS_SIGNAL_TYPE_GPU_PLL, 0, &info); - - /* Based on VBIOS, VBIOS will keep entry for GPU PLL SS - * even if SS not enabled and in that case - * SSInfo.spreadSpectrumPercentage !=0 would be sign - * that SS is enabled - */ - if (result == BP_RESULT_OK && - info.spread_spectrum_percentage != 0) { - clk_dce->ss_on_dprefclk = true; - clk_dce->dprefclk_ss_divider = info.spread_percentage_divider; - - if (info.type.CENTER_MODE == 0) { - /* TODO: Currently for DP Reference clock we - * need only SS percentage for - * downspread */ - clk_dce->dprefclk_ss_percentage = - info.spread_spectrum_percentage; - } - - return; - } - - result = bp->funcs->get_spread_spectrum_info( - bp, AS_SIGNAL_TYPE_DISPLAY_PORT, 0, &info); - - /* Based on VBIOS, VBIOS will keep entry for DPREFCLK SS - * even if SS not enabled and in that case - * SSInfo.spreadSpectrumPercentage !=0 would be sign - * that SS is enabled - */ - if (result == BP_RESULT_OK && - info.spread_spectrum_percentage != 0) { - clk_dce->ss_on_dprefclk = true; - clk_dce->dprefclk_ss_divider = info.spread_percentage_divider; - - if (info.type.CENTER_MODE == 0) { - /* Currently for DP Reference clock we - * need only SS percentage for - * downspread */ - clk_dce->dprefclk_ss_percentage = - info.spread_spectrum_percentage; - } - } - } -} - -static inline bool should_set_clock(bool safe_to_lower, int calc_clk, int cur_clk) -{ - return ((safe_to_lower && calc_clk < cur_clk) || calc_clk > cur_clk); -} - -static void dce12_update_clocks(struct dccg *dccg, - struct dc_clocks *new_clocks, - bool safe_to_lower) -{ - struct dm_pp_clock_for_voltage_req clock_voltage_req = {0}; - - /* TODO: Investigate why this is needed to fix display corruption. */ - new_clocks->dispclk_khz = new_clocks->dispclk_khz * 115 / 100; - - if (should_set_clock(safe_to_lower, new_clocks->dispclk_khz, dccg->clks.dispclk_khz)) { - clock_voltage_req.clk_type = DM_PP_CLOCK_TYPE_DISPLAY_CLK; - clock_voltage_req.clocks_in_khz = new_clocks->dispclk_khz; - new_clocks->dispclk_khz = dccg->funcs->set_dispclk(dccg, new_clocks->dispclk_khz); - dccg->clks.dispclk_khz = new_clocks->dispclk_khz; - - dm_pp_apply_clock_for_voltage_request(dccg->ctx, &clock_voltage_req); - } - - if (should_set_clock(safe_to_lower, new_clocks->phyclk_khz, dccg->clks.phyclk_khz)) { - clock_voltage_req.clk_type = DM_PP_CLOCK_TYPE_DISPLAYPHYCLK; - clock_voltage_req.clocks_in_khz = new_clocks->phyclk_khz; - dccg->clks.phyclk_khz = new_clocks->phyclk_khz; - - dm_pp_apply_clock_for_voltage_request(dccg->ctx, &clock_voltage_req); - } -} - -#ifdef CONFIG_DRM_AMD_DC_DCN1_0 -static int dcn1_determine_dppclk_threshold(struct dccg *dccg, struct dc_clocks *new_clocks) -{ - bool request_dpp_div = new_clocks->dispclk_khz > new_clocks->dppclk_khz; - bool dispclk_increase = new_clocks->dispclk_khz > dccg->clks.dispclk_khz; - int disp_clk_threshold = new_clocks->max_supported_dppclk_khz; - bool cur_dpp_div = dccg->clks.dispclk_khz > dccg->clks.dppclk_khz; - - /* increase clock, looking for div is 0 for current, request div is 1*/ - if (dispclk_increase) { - /* already divided by 2, no need to reach target clk with 2 steps*/ - if (cur_dpp_div) - return new_clocks->dispclk_khz; - - /* request disp clk is lower than maximum supported dpp clk, - * no need to reach target clk with two steps. - */ - if (new_clocks->dispclk_khz <= disp_clk_threshold) - return new_clocks->dispclk_khz; - - /* target dpp clk not request divided by 2, still within threshold */ - if (!request_dpp_div) - return new_clocks->dispclk_khz; - - } else { - /* decrease clock, looking for current dppclk divided by 2, - * request dppclk not divided by 2. - */ - - /* current dpp clk not divided by 2, no need to ramp*/ - if (!cur_dpp_div) - return new_clocks->dispclk_khz; - - /* current disp clk is lower than current maximum dpp clk, - * no need to ramp - */ - if (dccg->clks.dispclk_khz <= disp_clk_threshold) - return new_clocks->dispclk_khz; - - /* request dpp clk need to be divided by 2 */ - if (request_dpp_div) - return new_clocks->dispclk_khz; - } - - return disp_clk_threshold; -} - -static void dcn1_ramp_up_dispclk_with_dpp(struct dccg *dccg, struct dc_clocks *new_clocks) -{ - struct dc *dc = dccg->ctx->dc; - int dispclk_to_dpp_threshold = dcn1_determine_dppclk_threshold(dccg, new_clocks); - bool request_dpp_div = new_clocks->dispclk_khz > new_clocks->dppclk_khz; - int i; - - /* set disp clk to dpp clk threshold */ - dccg->funcs->set_dispclk(dccg, dispclk_to_dpp_threshold); - - /* update request dpp clk division option */ - for (i = 0; i < dc->res_pool->pipe_count; i++) { - struct pipe_ctx *pipe_ctx = &dc->current_state->res_ctx.pipe_ctx[i]; - - if (!pipe_ctx->plane_state) - continue; - - pipe_ctx->plane_res.dpp->funcs->dpp_dppclk_control( - pipe_ctx->plane_res.dpp, - request_dpp_div, - true); - } - - /* If target clk not same as dppclk threshold, set to target clock */ - if (dispclk_to_dpp_threshold != new_clocks->dispclk_khz) - dccg->funcs->set_dispclk(dccg, new_clocks->dispclk_khz); - - dccg->clks.dispclk_khz = new_clocks->dispclk_khz; - dccg->clks.dppclk_khz = new_clocks->dppclk_khz; - dccg->clks.max_supported_dppclk_khz = new_clocks->max_supported_dppclk_khz; -} - -static void dcn1_update_clocks(struct dccg *dccg, - struct dc_clocks *new_clocks, - bool safe_to_lower) -{ - struct dc *dc = dccg->ctx->dc; - struct pp_smu_display_requirement_rv *smu_req_cur = - &dc->res_pool->pp_smu_req; - struct pp_smu_display_requirement_rv smu_req = *smu_req_cur; - struct pp_smu_funcs_rv *pp_smu = dc->res_pool->pp_smu; - struct dm_pp_clock_for_voltage_req clock_voltage_req = {0}; - bool send_request_to_increase = false; - bool send_request_to_lower = false; - - if (new_clocks->phyclk_khz) - smu_req.display_count = 1; - else - smu_req.display_count = 0; - - if (new_clocks->dispclk_khz > dccg->clks.dispclk_khz - || new_clocks->phyclk_khz > dccg->clks.phyclk_khz - || new_clocks->fclk_khz > dccg->clks.fclk_khz - || new_clocks->dcfclk_khz > dccg->clks.dcfclk_khz) - send_request_to_increase = true; - - if (should_set_clock(safe_to_lower, new_clocks->phyclk_khz, dccg->clks.phyclk_khz)) { - dccg->clks.phyclk_khz = new_clocks->phyclk_khz; - - send_request_to_lower = true; - } - - if (should_set_clock(safe_to_lower, new_clocks->fclk_khz, dccg->clks.fclk_khz)) { - dccg->clks.fclk_khz = new_clocks->fclk_khz; - clock_voltage_req.clk_type = DM_PP_CLOCK_TYPE_FCLK; - clock_voltage_req.clocks_in_khz = new_clocks->fclk_khz; - smu_req.hard_min_fclk_khz = new_clocks->fclk_khz; - - dm_pp_apply_clock_for_voltage_request(dccg->ctx, &clock_voltage_req); - send_request_to_lower = true; - } - - if (should_set_clock(safe_to_lower, new_clocks->dcfclk_khz, dccg->clks.dcfclk_khz)) { - dccg->clks.dcfclk_khz = new_clocks->dcfclk_khz; - smu_req.hard_min_dcefclk_khz = new_clocks->dcfclk_khz; - - send_request_to_lower = true; - } - - if (should_set_clock(safe_to_lower, - new_clocks->dcfclk_deep_sleep_khz, dccg->clks.dcfclk_deep_sleep_khz)) { - dccg->clks.dcfclk_deep_sleep_khz = new_clocks->dcfclk_deep_sleep_khz; - smu_req.min_deep_sleep_dcefclk_mhz = new_clocks->dcfclk_deep_sleep_khz; - - send_request_to_lower = true; - } - - /* make sure dcf clk is before dpp clk to - * make sure we have enough voltage to run dpp clk - */ - if (send_request_to_increase) { - /*use dcfclk to request voltage*/ - clock_voltage_req.clk_type = DM_PP_CLOCK_TYPE_DCFCLK; - clock_voltage_req.clocks_in_khz = dcn_find_dcfclk_suits_all(dc, new_clocks); - dm_pp_apply_clock_for_voltage_request(dccg->ctx, &clock_voltage_req); - if (pp_smu->set_display_requirement) - pp_smu->set_display_requirement(&pp_smu->pp_smu, &smu_req); - } - - /* dcn1 dppclk is tied to dispclk */ - /* program dispclk on = as a w/a for sleep resume clock ramping issues */ - if (should_set_clock(safe_to_lower, new_clocks->dispclk_khz, dccg->clks.dispclk_khz) - || new_clocks->dispclk_khz == dccg->clks.dispclk_khz) { - dcn1_ramp_up_dispclk_with_dpp(dccg, new_clocks); - dccg->clks.dispclk_khz = new_clocks->dispclk_khz; - - send_request_to_lower = true; - } - - if (!send_request_to_increase && send_request_to_lower) { - /*use dcfclk to request voltage*/ - clock_voltage_req.clk_type = DM_PP_CLOCK_TYPE_DCFCLK; - clock_voltage_req.clocks_in_khz = dcn_find_dcfclk_suits_all(dc, new_clocks); - dm_pp_apply_clock_for_voltage_request(dccg->ctx, &clock_voltage_req); - if (pp_smu->set_display_requirement) - pp_smu->set_display_requirement(&pp_smu->pp_smu, &smu_req); - } - - - *smu_req_cur = smu_req; -} -#endif - -static void dce_update_clocks(struct dccg *dccg, - struct dc_clocks *new_clocks, - bool safe_to_lower) -{ - struct dm_pp_power_level_change_request level_change_req; - struct dce_dccg *clk_dce = TO_DCE_CLOCKS(dccg); - - /* TODO: Investigate why this is needed to fix display corruption. */ - if (!clk_dce->dfs_bypass_active) - new_clocks->dispclk_khz = new_clocks->dispclk_khz * 115 / 100; - - level_change_req.power_level = dce_get_required_clocks_state(dccg, new_clocks); - /* get max clock state from PPLIB */ - if ((level_change_req.power_level < dccg->cur_min_clks_state && safe_to_lower) - || level_change_req.power_level > dccg->cur_min_clks_state) { - if (dm_pp_apply_power_level_change_request(dccg->ctx, &level_change_req)) - dccg->cur_min_clks_state = level_change_req.power_level; - } - - if (should_set_clock(safe_to_lower, new_clocks->dispclk_khz, dccg->clks.dispclk_khz)) { - new_clocks->dispclk_khz = dccg->funcs->set_dispclk(dccg, new_clocks->dispclk_khz); - dccg->clks.dispclk_khz = new_clocks->dispclk_khz; - } -} - -static bool dce_update_dfs_bypass( - struct dccg *dccg, - struct dc *dc, - struct dc_state *context, - int requested_clock_khz) -{ - struct dce_dccg *clk_dce = TO_DCE_CLOCKS(dccg); - struct resource_context *res_ctx = &context->res_ctx; - enum signal_type signal_type = SIGNAL_TYPE_NONE; - bool was_active = clk_dce->dfs_bypass_active; - int i; - - /* Disable DFS bypass by default. */ - clk_dce->dfs_bypass_active = false; - - /* Check that DFS bypass is available. */ - if (!clk_dce->dfs_bypass_enabled) - goto update; - - /* Check if the requested display clock is below the threshold. */ - if (requested_clock_khz >= 400000) - goto update; - - /* DFS-bypass should only be enabled on single stream setups */ - if (context->stream_count != 1) - goto update; - - /* Check that the stream's signal type is an embedded panel */ - for (i = 0; i < dc->res_pool->pipe_count; i++) { - if (res_ctx->pipe_ctx[i].stream) { - struct pipe_ctx *pipe_ctx = &res_ctx->pipe_ctx[i]; - - signal_type = pipe_ctx->stream->sink->link->connector_signal; - break; - } - } - - if (signal_type == SIGNAL_TYPE_EDP || - signal_type == SIGNAL_TYPE_LVDS) - clk_dce->dfs_bypass_active = true; - -update: - /* Update the clock state. We don't need to respect safe_to_lower - * because DFS bypass should always be greater than the current - * display clock frequency. - */ - if (was_active != clk_dce->dfs_bypass_active) { - dccg->clks.dispclk_khz = - dccg->funcs->set_dispclk(dccg, dccg->clks.dispclk_khz); - return true; - } - - return false; -} - -#ifdef CONFIG_DRM_AMD_DC_DCN1_0 -static const struct display_clock_funcs dcn1_funcs = { - .get_dp_ref_clk_frequency = dce12_get_dp_ref_freq_khz, - .set_dispclk = dce112_set_clock, - .update_clocks = dcn1_update_clocks -}; -#endif - -static const struct display_clock_funcs dce120_funcs = { - .get_dp_ref_clk_frequency = dce12_get_dp_ref_freq_khz, - .set_dispclk = dce112_set_clock, - .update_clocks = dce12_update_clocks -}; - -static const struct display_clock_funcs dce112_funcs = { - .get_dp_ref_clk_frequency = dce_get_dp_ref_freq_khz, - .set_dispclk = dce112_set_clock, - .update_clocks = dce_update_clocks -}; - -static const struct display_clock_funcs dce110_funcs = { - .get_dp_ref_clk_frequency = dce_get_dp_ref_freq_khz, - .set_dispclk = dce_psr_set_clock, - .update_clocks = dce_update_clocks, - .update_dfs_bypass = dce_update_dfs_bypass -}; - -static const struct display_clock_funcs dce_funcs = { - .get_dp_ref_clk_frequency = dce_get_dp_ref_freq_khz, - .set_dispclk = dce_set_clock, - .update_clocks = dce_update_clocks -}; - -static void dce_dccg_construct( - struct dce_dccg *clk_dce, - struct dc_context *ctx, - const struct dccg_registers *regs, - const struct dccg_shift *clk_shift, - const struct dccg_mask *clk_mask) -{ - struct dccg *base = &clk_dce->base; - - base->ctx = ctx; - base->funcs = &dce_funcs; - - clk_dce->regs = regs; - clk_dce->clk_shift = clk_shift; - clk_dce->clk_mask = clk_mask; - - clk_dce->dfs_bypass_disp_clk = 0; - - clk_dce->dprefclk_ss_percentage = 0; - clk_dce->dprefclk_ss_divider = 1000; - clk_dce->ss_on_dprefclk = false; - - base->max_clks_state = DM_PP_CLOCKS_STATE_NOMINAL; - base->cur_min_clks_state = DM_PP_CLOCKS_STATE_INVALID; - - dce_clock_read_integrated_info(clk_dce); - dce_clock_read_ss_info(clk_dce); -} - -struct dccg *dce_dccg_create( - struct dc_context *ctx, - const struct dccg_registers *regs, - const struct dccg_shift *clk_shift, - const struct dccg_mask *clk_mask) -{ - struct dce_dccg *clk_dce = kzalloc(sizeof(*clk_dce), GFP_KERNEL); - - if (clk_dce == NULL) { - BREAK_TO_DEBUGGER(); - return NULL; - } - - memcpy(clk_dce->max_clks_by_state, - dce80_max_clks_by_state, - sizeof(dce80_max_clks_by_state)); - - dce_dccg_construct( - clk_dce, ctx, regs, clk_shift, clk_mask); - - return &clk_dce->base; -} - -struct dccg *dce110_dccg_create( - struct dc_context *ctx, - const struct dccg_registers *regs, - const struct dccg_shift *clk_shift, - const struct dccg_mask *clk_mask) -{ - struct dce_dccg *clk_dce = kzalloc(sizeof(*clk_dce), GFP_KERNEL); - - if (clk_dce == NULL) { - BREAK_TO_DEBUGGER(); - return NULL; - } - - memcpy(clk_dce->max_clks_by_state, - dce110_max_clks_by_state, - sizeof(dce110_max_clks_by_state)); - - dce_dccg_construct( - clk_dce, ctx, regs, clk_shift, clk_mask); - - clk_dce->base.funcs = &dce110_funcs; - - return &clk_dce->base; -} - -struct dccg *dce112_dccg_create( - struct dc_context *ctx, - const struct dccg_registers *regs, - const struct dccg_shift *clk_shift, - const struct dccg_mask *clk_mask) -{ - struct dce_dccg *clk_dce = kzalloc(sizeof(*clk_dce), GFP_KERNEL); - - if (clk_dce == NULL) { - BREAK_TO_DEBUGGER(); - return NULL; - } - - memcpy(clk_dce->max_clks_by_state, - dce112_max_clks_by_state, - sizeof(dce112_max_clks_by_state)); - - dce_dccg_construct( - clk_dce, ctx, regs, clk_shift, clk_mask); - - clk_dce->base.funcs = &dce112_funcs; - - return &clk_dce->base; -} - -struct dccg *dce120_dccg_create(struct dc_context *ctx) -{ - struct dce_dccg *clk_dce = kzalloc(sizeof(*clk_dce), GFP_KERNEL); - - if (clk_dce == NULL) { - BREAK_TO_DEBUGGER(); - return NULL; - } - - memcpy(clk_dce->max_clks_by_state, - dce120_max_clks_by_state, - sizeof(dce120_max_clks_by_state)); - - dce_dccg_construct( - clk_dce, ctx, NULL, NULL, NULL); - - clk_dce->dprefclk_khz = 600000; - clk_dce->base.funcs = &dce120_funcs; - - return &clk_dce->base; -} - -#ifdef CONFIG_DRM_AMD_DC_DCN1_0 -struct dccg *dcn1_dccg_create(struct dc_context *ctx) -{ - struct dc_debug_options *debug = &ctx->dc->debug; - struct dc_bios *bp = ctx->dc_bios; - struct dc_firmware_info fw_info = { { 0 } }; - struct dce_dccg *clk_dce = kzalloc(sizeof(*clk_dce), GFP_KERNEL); - - if (clk_dce == NULL) { - BREAK_TO_DEBUGGER(); - return NULL; - } - - clk_dce->base.ctx = ctx; - clk_dce->base.funcs = &dcn1_funcs; - - clk_dce->dfs_bypass_disp_clk = 0; - - clk_dce->dprefclk_ss_percentage = 0; - clk_dce->dprefclk_ss_divider = 1000; - clk_dce->ss_on_dprefclk = false; - - clk_dce->dprefclk_khz = 600000; - if (bp->integrated_info) - clk_dce->dentist_vco_freq_khz = bp->integrated_info->dentist_vco_freq; - if (clk_dce->dentist_vco_freq_khz == 0) { - bp->funcs->get_firmware_info(bp, &fw_info); - clk_dce->dentist_vco_freq_khz = fw_info.smu_gpu_pll_output_freq; - if (clk_dce->dentist_vco_freq_khz == 0) - clk_dce->dentist_vco_freq_khz = 3600000; - } - - if (!debug->disable_dfs_bypass && bp->integrated_info) - if (bp->integrated_info->gpu_cap_info & DFS_BYPASS_ENABLE) - clk_dce->dfs_bypass_enabled = true; - - dce_clock_read_ss_info(clk_dce); - - return &clk_dce->base; -} -#endif - -void dce_dccg_destroy(struct dccg **dccg) -{ - struct dce_dccg *clk_dce = TO_DCE_CLOCKS(*dccg); - - kfree(clk_dce); - *dccg = NULL; -} diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_hwseq.h b/drivers/gpu/drm/amd/display/dc/dce/dce_hwseq.h index 64dc75378541..c83a7f05f14c 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_hwseq.h +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_hwseq.h @@ -233,6 +233,16 @@ struct dce_hwseq_registers { uint32_t DOMAIN5_PG_CONFIG; uint32_t DOMAIN6_PG_CONFIG; uint32_t DOMAIN7_PG_CONFIG; + uint32_t DOMAIN8_PG_CONFIG; + uint32_t DOMAIN9_PG_CONFIG; + uint32_t DOMAIN10_PG_CONFIG; + uint32_t DOMAIN11_PG_CONFIG; + uint32_t DOMAIN16_PG_CONFIG; + uint32_t DOMAIN17_PG_CONFIG; + uint32_t DOMAIN18_PG_CONFIG; + uint32_t DOMAIN19_PG_CONFIG; + uint32_t DOMAIN20_PG_CONFIG; + uint32_t DOMAIN21_PG_CONFIG; uint32_t DOMAIN0_PG_STATUS; uint32_t DOMAIN1_PG_STATUS; uint32_t DOMAIN2_PG_STATUS; @@ -241,6 +251,16 @@ struct dce_hwseq_registers { uint32_t DOMAIN5_PG_STATUS; uint32_t DOMAIN6_PG_STATUS; uint32_t DOMAIN7_PG_STATUS; + uint32_t DOMAIN8_PG_STATUS; + uint32_t DOMAIN9_PG_STATUS; + uint32_t DOMAIN10_PG_STATUS; + uint32_t DOMAIN11_PG_STATUS; + uint32_t DOMAIN16_PG_STATUS; + uint32_t DOMAIN17_PG_STATUS; + uint32_t DOMAIN18_PG_STATUS; + uint32_t DOMAIN19_PG_STATUS; + uint32_t DOMAIN20_PG_STATUS; + uint32_t DOMAIN21_PG_STATUS; uint32_t DIO_MEM_PWR_CTRL; uint32_t DCCG_GATE_DISABLE_CNTL; uint32_t DCCG_GATE_DISABLE_CNTL2; @@ -262,6 +282,8 @@ struct dce_hwseq_registers { uint32_t D2VGA_CONTROL; uint32_t D3VGA_CONTROL; uint32_t D4VGA_CONTROL; + uint32_t D5VGA_CONTROL; + uint32_t D6VGA_CONTROL; uint32_t VGA_TEST_CONTROL; /* MMHUB registers. read only. temporary hack */ uint32_t VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32; @@ -489,6 +511,26 @@ struct dce_hwseq_registers { type DOMAIN6_POWER_GATE; \ type DOMAIN7_POWER_FORCEON; \ type DOMAIN7_POWER_GATE; \ + type DOMAIN8_POWER_FORCEON; \ + type DOMAIN8_POWER_GATE; \ + type DOMAIN9_POWER_FORCEON; \ + type DOMAIN9_POWER_GATE; \ + type DOMAIN10_POWER_FORCEON; \ + type DOMAIN10_POWER_GATE; \ + type DOMAIN11_POWER_FORCEON; \ + type DOMAIN11_POWER_GATE; \ + type DOMAIN16_POWER_FORCEON; \ + type DOMAIN16_POWER_GATE; \ + type DOMAIN17_POWER_FORCEON; \ + type DOMAIN17_POWER_GATE; \ + type DOMAIN18_POWER_FORCEON; \ + type DOMAIN18_POWER_GATE; \ + type DOMAIN19_POWER_FORCEON; \ + type DOMAIN19_POWER_GATE; \ + type DOMAIN20_POWER_FORCEON; \ + type DOMAIN20_POWER_GATE; \ + type DOMAIN21_POWER_FORCEON; \ + type DOMAIN21_POWER_GATE; \ type DOMAIN0_PGFSM_PWR_STATUS; \ type DOMAIN1_PGFSM_PWR_STATUS; \ type DOMAIN2_PGFSM_PWR_STATUS; \ @@ -497,6 +539,16 @@ struct dce_hwseq_registers { type DOMAIN5_PGFSM_PWR_STATUS; \ type DOMAIN6_PGFSM_PWR_STATUS; \ type DOMAIN7_PGFSM_PWR_STATUS; \ + type DOMAIN8_PGFSM_PWR_STATUS; \ + type DOMAIN9_PGFSM_PWR_STATUS; \ + type DOMAIN10_PGFSM_PWR_STATUS; \ + type DOMAIN11_PGFSM_PWR_STATUS; \ + type DOMAIN16_PGFSM_PWR_STATUS; \ + type DOMAIN17_PGFSM_PWR_STATUS; \ + type DOMAIN18_PGFSM_PWR_STATUS; \ + type DOMAIN19_PGFSM_PWR_STATUS; \ + type DOMAIN20_PGFSM_PWR_STATUS; \ + type DOMAIN21_PGFSM_PWR_STATUS; \ type DCFCLK_GATE_DIS; \ type DCHUBBUB_GLOBAL_TIMER_REFDIV; \ type VGA_TEST_ENABLE; \ diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c index 366bc8c2c643..3e18ea84b1f9 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_link_encoder.c @@ -645,7 +645,7 @@ static bool dce110_link_encoder_validate_hdmi_output( return false; /* DCE11 HW does not support 420 */ - if (!enc110->base.features.ycbcr420_supported && + if (!enc110->base.features.hdmi_ycbcr420_supported && crtc_timing->pixel_encoding == PIXEL_ENCODING_YCBCR420) return false; diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c index c47c81883d3c..cce0d18f91da 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c @@ -908,7 +908,6 @@ static void dce110_stream_encoder_dp_blank( struct stream_encoder *enc) { struct dce110_stream_encoder *enc110 = DCE110STRENC_FROM_STRENC(enc); - uint32_t retries = 0; uint32_t reg1 = 0; uint32_t max_retries = DP_BLANK_MAX_RETRY * 10; @@ -926,30 +925,28 @@ static void dce110_stream_encoder_dp_blank( * (2 = start of the next vertical blank) */ REG_UPDATE(DP_VID_STREAM_CNTL, DP_VID_STREAM_DIS_DEFER, 2); /* Larger delay to wait until VBLANK - use max retry of - * 10us*3000=30ms. This covers 16.6ms of typical 60 Hz mode + - * a little more because we may not trust delay accuracy. - */ + * 10us*3000=30ms. This covers 16.6ms of typical 60 Hz mode + + * a little more because we may not trust delay accuracy. + */ max_retries = DP_BLANK_MAX_RETRY * 150; /* disable DP stream */ REG_UPDATE(DP_VID_STREAM_CNTL, DP_VID_STREAM_ENABLE, 0); /* the encoder stops sending the video stream - * at the start of the vertical blanking. - * Poll for DP_VID_STREAM_STATUS == 0 - */ + * at the start of the vertical blanking. + * Poll for DP_VID_STREAM_STATUS == 0 + */ REG_WAIT(DP_VID_STREAM_CNTL, DP_VID_STREAM_STATUS, 0, 10, max_retries); - ASSERT(retries <= max_retries); - /* Tell the DP encoder to ignore timing from CRTC, must be done after - * the polling. If we set DP_STEER_FIFO_RESET before DP stream blank is - * complete, stream status will be stuck in video stream enabled state, - * i.e. DP_VID_STREAM_STATUS stuck at 1. - */ + * the polling. If we set DP_STEER_FIFO_RESET before DP stream blank is + * complete, stream status will be stuck in video stream enabled state, + * i.e. DP_VID_STREAM_STATUS stuck at 1. + */ REG_UPDATE(DP_STEER_FIFO, DP_STEER_FIFO_RESET, true); } diff --git a/drivers/gpu/drm/amd/display/dc/dce100/dce100_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce100/dce100_hw_sequencer.c index 74c05e878807..87771676acac 100644 --- a/drivers/gpu/drm/amd/display/dc/dce100/dce100_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dce100/dce100_hw_sequencer.c @@ -105,74 +105,30 @@ bool dce100_enable_display_power_gating( return false; } -static void dce100_pplib_apply_display_requirements( - struct dc *dc, - struct dc_state *context) -{ - struct dm_pp_display_configuration *pp_display_cfg = &context->pp_display_cfg; - - pp_display_cfg->avail_mclk_switch_time_us = - dce110_get_min_vblank_time_us(context); - /*pp_display_cfg->min_memory_clock_khz = context->bw.dce.yclk_khz - / MEMORY_TYPE_MULTIPLIER;*/ - - dce110_fill_display_configs(context, pp_display_cfg); - - if (memcmp(&dc->prev_display_config, pp_display_cfg, sizeof( - struct dm_pp_display_configuration)) != 0) - dm_pp_apply_display_requirements(dc->ctx, pp_display_cfg); - - dc->prev_display_config = *pp_display_cfg; -} - -/* unit: in_khz before mode set, get pixel clock from context. ASIC register - * may not be programmed yet - */ -static uint32_t get_max_pixel_clock_for_all_paths( - struct dc *dc, - struct dc_state *context) +void dce100_prepare_bandwidth( + struct dc *dc, + struct dc_state *context) { - uint32_t max_pix_clk = 0; - int i; - - for (i = 0; i < MAX_PIPES; i++) { - struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i]; - - if (pipe_ctx->stream == NULL) - continue; - - /* do not check under lay */ - if (pipe_ctx->top_pipe) - continue; + dce110_set_safe_displaymarks(&context->res_ctx, dc->res_pool); - if (pipe_ctx->stream_res.pix_clk_params.requested_pix_clk > max_pix_clk) - max_pix_clk = - pipe_ctx->stream_res.pix_clk_params.requested_pix_clk; - } - return max_pix_clk; + dc->res_pool->clk_mgr->funcs->update_clocks( + dc->res_pool->clk_mgr, + context, + false); } -void dce100_set_bandwidth( +void dce100_optimize_bandwidth( struct dc *dc, - struct dc_state *context, - bool decrease_allowed) + struct dc_state *context) { - struct dc_clocks req_clks; - - req_clks.dispclk_khz = context->bw.dce.dispclk_khz * 115 / 100; - req_clks.phyclk_khz = get_max_pixel_clock_for_all_paths(dc, context); - dce110_set_safe_displaymarks(&context->res_ctx, dc->res_pool); - dc->res_pool->dccg->funcs->update_clocks( - dc->res_pool->dccg, - &req_clks, - decrease_allowed); - - dce100_pplib_apply_display_requirements(dc, context); + dc->res_pool->clk_mgr->funcs->update_clocks( + dc->res_pool->clk_mgr, + context, + true); } - /**************************************************************************/ void dce100_hw_sequencer_construct(struct dc *dc) @@ -180,8 +136,7 @@ void dce100_hw_sequencer_construct(struct dc *dc) dce110_hw_sequencer_construct(dc); dc->hwss.enable_display_power_gating = dce100_enable_display_power_gating; - dc->hwss.set_bandwidth = dce100_set_bandwidth; - dc->hwss.pplib_apply_display_requirements = - dce100_pplib_apply_display_requirements; + dc->hwss.prepare_bandwidth = dce100_prepare_bandwidth; + dc->hwss.optimize_bandwidth = dce100_optimize_bandwidth; } diff --git a/drivers/gpu/drm/amd/display/dc/dce100/dce100_hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/dce100/dce100_hw_sequencer.h index c6ec0ed6ec3d..acd418515346 100644 --- a/drivers/gpu/drm/amd/display/dc/dce100/dce100_hw_sequencer.h +++ b/drivers/gpu/drm/amd/display/dc/dce100/dce100_hw_sequencer.h @@ -33,10 +33,9 @@ struct dc_state; void dce100_hw_sequencer_construct(struct dc *dc); -void dce100_set_bandwidth( +void dce100_prepare_bandwidth( struct dc *dc, - struct dc_state *context, - bool decrease_allowed); + struct dc_state *context); bool dce100_enable_display_power_gating(struct dc *dc, uint8_t controller_id, struct dc_bios *dcb, diff --git a/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c b/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c index 14754a87156c..6ae51a5dfc04 100644 --- a/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c @@ -36,11 +36,11 @@ #include "dce/dce_link_encoder.h" #include "dce/dce_stream_encoder.h" +#include "dce/dce_clk_mgr.h" #include "dce/dce_mem_input.h" #include "dce/dce_ipp.h" #include "dce/dce_transform.h" #include "dce/dce_opp.h" -#include "dce/dce_clocks.h" #include "dce/dce_clock_source.h" #include "dce/dce_audio.h" #include "dce/dce_hwseq.h" @@ -137,15 +137,15 @@ static const struct dce110_timing_generator_offsets dce100_tg_offsets[] = { .reg_name = mm ## block ## id ## _ ## reg_name -static const struct dccg_registers disp_clk_regs = { +static const struct clk_mgr_registers disp_clk_regs = { CLK_COMMON_REG_LIST_DCE_BASE() }; -static const struct dccg_shift disp_clk_shift = { +static const struct clk_mgr_shift disp_clk_shift = { CLK_COMMON_MASK_SH_LIST_DCE_COMMON_BASE(__SHIFT) }; -static const struct dccg_mask disp_clk_mask = { +static const struct clk_mgr_mask disp_clk_mask = { CLK_COMMON_MASK_SH_LIST_DCE_COMMON_BASE(_MASK) }; @@ -722,8 +722,8 @@ static void destruct(struct dce110_resource_pool *pool) dce_aud_destroy(&pool->base.audios[i]); } - if (pool->base.dccg != NULL) - dce_dccg_destroy(&pool->base.dccg); + if (pool->base.clk_mgr != NULL) + dce_clk_mgr_destroy(&pool->base.clk_mgr); if (pool->base.abm != NULL) dce_abm_destroy(&pool->base.abm); @@ -767,7 +767,7 @@ bool dce100_validate_bandwidth( if (at_least_one_pipe) { /* TODO implement when needed but for now hardcode max value*/ context->bw.dce.dispclk_khz = 681000; - context->bw.dce.yclk_khz = 250000 * MEMORY_TYPE_MULTIPLIER; + context->bw.dce.yclk_khz = 250000 * MEMORY_TYPE_MULTIPLIER_CZ; } else { context->bw.dce.dispclk_khz = 0; context->bw.dce.yclk_khz = 0; @@ -860,7 +860,6 @@ static bool construct( struct dc_context *ctx = dc->ctx; struct dc_firmware_info info; struct dc_bios *bp; - struct dm_pp_static_clock_info static_clk_info = {0}; ctx->dc_bios->regs = &bios_regs; @@ -908,11 +907,11 @@ static bool construct( } } - pool->base.dccg = dce_dccg_create(ctx, + pool->base.clk_mgr = dce_clk_mgr_create(ctx, &disp_clk_regs, &disp_clk_shift, &disp_clk_mask); - if (pool->base.dccg == NULL) { + if (pool->base.clk_mgr == NULL) { dm_error("DC: failed to create display clock!\n"); BREAK_TO_DEBUGGER(); goto res_create_fail; @@ -938,12 +937,6 @@ static bool construct( goto res_create_fail; } - /* get static clock information for PPLIB or firmware, save - * max_clock_state - */ - if (dm_pp_get_static_clocks(ctx, &static_clk_info)) - pool->base.dccg->max_clks_state = - static_clk_info.max_clocks_state; { struct irq_service_init_data init_data; init_data.ctx = dc->ctx; diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_compressor.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_compressor.c index 1f7f25013217..52d50e24a995 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_compressor.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_compressor.c @@ -64,65 +64,37 @@ static const struct dce110_compressor_reg_offsets reg_offsets[] = { static const uint32_t dce11_one_lpt_channel_max_resolution = 2560 * 1600; -enum fbc_idle_force { - /* Bit 0 - Display registers updated */ - FBC_IDLE_FORCE_DISPLAY_REGISTER_UPDATE = 0x00000001, - - /* Bit 2 - FBC_GRPH_COMP_EN register updated */ - FBC_IDLE_FORCE_GRPH_COMP_EN = 0x00000002, - /* Bit 3 - FBC_SRC_SEL register updated */ - FBC_IDLE_FORCE_SRC_SEL_CHANGE = 0x00000004, - /* Bit 4 - FBC_MIN_COMPRESSION register updated */ - FBC_IDLE_FORCE_MIN_COMPRESSION_CHANGE = 0x00000008, - /* Bit 5 - FBC_ALPHA_COMP_EN register updated */ - FBC_IDLE_FORCE_ALPHA_COMP_EN = 0x00000010, - /* Bit 6 - FBC_ZERO_ALPHA_CHUNK_SKIP_EN register updated */ - FBC_IDLE_FORCE_ZERO_ALPHA_CHUNK_SKIP_EN = 0x00000020, - /* Bit 7 - FBC_FORCE_COPY_TO_COMP_BUF register updated */ - FBC_IDLE_FORCE_FORCE_COPY_TO_COMP_BUF = 0x00000040, - - /* Bit 24 - Memory write to region 0 defined by MC registers. */ - FBC_IDLE_FORCE_MEMORY_WRITE_TO_REGION0 = 0x01000000, - /* Bit 25 - Memory write to region 1 defined by MC registers */ - FBC_IDLE_FORCE_MEMORY_WRITE_TO_REGION1 = 0x02000000, - /* Bit 26 - Memory write to region 2 defined by MC registers */ - FBC_IDLE_FORCE_MEMORY_WRITE_TO_REGION2 = 0x04000000, - /* Bit 27 - Memory write to region 3 defined by MC registers. */ - FBC_IDLE_FORCE_MEMORY_WRITE_TO_REGION3 = 0x08000000, - - /* Bit 28 - Memory write from any client other than MCIF */ - FBC_IDLE_FORCE_MEMORY_WRITE_OTHER_THAN_MCIF = 0x10000000, - /* Bit 29 - CG statics screen signal is inactive */ - FBC_IDLE_FORCE_CG_STATIC_SCREEN_IS_INACTIVE = 0x20000000, -}; - - static uint32_t align_to_chunks_number_per_line(uint32_t pixels) { return 256 * ((pixels + 255) / 256); } -static void reset_lb_on_vblank(struct dc_context *ctx) +static void reset_lb_on_vblank(struct compressor *compressor, uint32_t crtc_inst) { - uint32_t value, frame_count; + uint32_t value; + uint32_t frame_count; + uint32_t status_pos; uint32_t retry = 0; - uint32_t status_pos = - dm_read_reg(ctx, mmCRTC_STATUS_POSITION); + struct dce110_compressor *cp110 = TO_DCE110_COMPRESSOR(compressor); + + cp110->offsets = reg_offsets[crtc_inst]; + + status_pos = dm_read_reg(compressor->ctx, DCP_REG(mmCRTC_STATUS_POSITION)); /* Only if CRTC is enabled and counter is moving we wait for one frame. */ - if (status_pos != dm_read_reg(ctx, mmCRTC_STATUS_POSITION)) { + if (status_pos != dm_read_reg(compressor->ctx, DCP_REG(mmCRTC_STATUS_POSITION))) { /* Resetting LB on VBlank */ - value = dm_read_reg(ctx, mmLB_SYNC_RESET_SEL); + value = dm_read_reg(compressor->ctx, DCP_REG(mmLB_SYNC_RESET_SEL)); set_reg_field_value(value, 3, LB_SYNC_RESET_SEL, LB_SYNC_RESET_SEL); set_reg_field_value(value, 1, LB_SYNC_RESET_SEL, LB_SYNC_RESET_SEL2); - dm_write_reg(ctx, mmLB_SYNC_RESET_SEL, value); + dm_write_reg(compressor->ctx, DCP_REG(mmLB_SYNC_RESET_SEL), value); - frame_count = dm_read_reg(ctx, mmCRTC_STATUS_FRAME_COUNT); + frame_count = dm_read_reg(compressor->ctx, DCP_REG(mmCRTC_STATUS_FRAME_COUNT)); for (retry = 10000; retry > 0; retry--) { - if (frame_count != dm_read_reg(ctx, mmCRTC_STATUS_FRAME_COUNT)) + if (frame_count != dm_read_reg(compressor->ctx, DCP_REG(mmCRTC_STATUS_FRAME_COUNT))) break; udelay(10); } @@ -130,13 +102,11 @@ static void reset_lb_on_vblank(struct dc_context *ctx) dm_error("Frame count did not increase for 100ms.\n"); /* Resetting LB on VBlank */ - value = dm_read_reg(ctx, mmLB_SYNC_RESET_SEL); + value = dm_read_reg(compressor->ctx, DCP_REG(mmLB_SYNC_RESET_SEL)); set_reg_field_value(value, 2, LB_SYNC_RESET_SEL, LB_SYNC_RESET_SEL); set_reg_field_value(value, 0, LB_SYNC_RESET_SEL, LB_SYNC_RESET_SEL2); - dm_write_reg(ctx, mmLB_SYNC_RESET_SEL, value); - + dm_write_reg(compressor->ctx, DCP_REG(mmLB_SYNC_RESET_SEL), value); } - } static void wait_for_fbc_state_changed( @@ -226,10 +196,10 @@ void dce110_compressor_enable_fbc( uint32_t addr; uint32_t value, misc_value; - addr = mmFBC_CNTL; value = dm_read_reg(compressor->ctx, addr); set_reg_field_value(value, 1, FBC_CNTL, FBC_GRPH_COMP_EN); + /* params->inst is valid HW CRTC instance start from 0 */ set_reg_field_value( value, params->inst, @@ -238,8 +208,10 @@ void dce110_compressor_enable_fbc( /* Keep track of enum controller_id FBC is attached to */ compressor->is_enabled = true; - compressor->attached_inst = params->inst; - cp110->offsets = reg_offsets[params->inst]; + /* attached_inst is SW CRTC instance start from 1 + * 0 = CONTROLLER_ID_UNDEFINED means not attached crtc + */ + compressor->attached_inst = params->inst + CONTROLLER_ID_D0; /* Toggle it as there is bug in HW */ set_reg_field_value(value, 0, FBC_CNTL, FBC_GRPH_COMP_EN); @@ -268,9 +240,10 @@ void dce110_compressor_enable_fbc( void dce110_compressor_disable_fbc(struct compressor *compressor) { struct dce110_compressor *cp110 = TO_DCE110_COMPRESSOR(compressor); + uint32_t crtc_inst = 0; if (compressor->options.bits.FBC_SUPPORT) { - if (dce110_compressor_is_fbc_enabled_in_hw(compressor, NULL)) { + if (dce110_compressor_is_fbc_enabled_in_hw(compressor, &crtc_inst)) { uint32_t reg_data; /* Turn off compression */ reg_data = dm_read_reg(compressor->ctx, mmFBC_CNTL); @@ -284,8 +257,10 @@ void dce110_compressor_disable_fbc(struct compressor *compressor) wait_for_fbc_state_changed(cp110, false); } - /* Sync line buffer - dce100/110 only*/ - reset_lb_on_vblank(compressor->ctx); + /* Sync line buffer which fbc was attached to dce100/110 only */ + if (crtc_inst > CONTROLLER_ID_UNDEFINED && crtc_inst < CONTROLLER_ID_D3) + reset_lb_on_vblank(compressor, + crtc_inst - CONTROLLER_ID_D0); } } @@ -328,6 +303,8 @@ void dce110_compressor_program_compressed_surface_address_and_pitch( uint32_t compressed_surf_address_low_part = compressor->compr_surface_address.addr.low_part; + cp110->offsets = reg_offsets[params->inst]; + /* Clear content first. */ dm_write_reg( compressor->ctx, @@ -410,13 +387,7 @@ void dce110_compressor_set_fbc_invalidation_triggers( value = dm_read_reg(compressor->ctx, addr); set_reg_field_value( value, - fbc_trigger | - FBC_IDLE_FORCE_GRPH_COMP_EN | - FBC_IDLE_FORCE_SRC_SEL_CHANGE | - FBC_IDLE_FORCE_MIN_COMPRESSION_CHANGE | - FBC_IDLE_FORCE_ALPHA_COMP_EN | - FBC_IDLE_FORCE_ZERO_ALPHA_CHUNK_SKIP_EN | - FBC_IDLE_FORCE_FORCE_COPY_TO_COMP_BUF, + fbc_trigger, FBC_IDLE_FORCE_CLEAR_MASK, FBC_IDLE_FORCE_CLEAR_MASK); dm_write_reg(compressor->ctx, addr, value); @@ -549,7 +520,7 @@ void dce110_compressor_construct(struct dce110_compressor *compressor, compressor->base.channel_interleave_size = 0; compressor->base.dram_channels_num = 0; compressor->base.lpt_channels_num = 0; - compressor->base.attached_inst = 0; + compressor->base.attached_inst = CONTROLLER_ID_UNDEFINED; compressor->base.is_enabled = false; compressor->base.funcs = &dce110_compressor_funcs; diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c index a6bcb90e8419..6349ba7bec7c 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c @@ -548,14 +548,14 @@ dce110_translate_regamma_to_hw_format(const struct dc_transfer_func *output_tf, regamma_params->hw_points_num = hw_points; - i = 1; - for (k = 0; k < 16 && i < 16; k++) { + k = 0; + for (i = 1; i < 16; i++) { if (seg_distr[k] != -1) { regamma_params->arr_curve_points[k].segments_num = seg_distr[k]; regamma_params->arr_curve_points[i].offset = regamma_params->arr_curve_points[k].offset + (1 << seg_distr[k]); } - i++; + k++; } if (seg_distr[k] != -1) @@ -1085,7 +1085,6 @@ void dce110_unblank_stream(struct pipe_ctx *pipe_ctx, if (link->local_sink && link->local_sink->sink_signal == SIGNAL_TYPE_EDP) { link->dc->hwss.edp_backlight_control(link, true); - stream->bl_pwm_level = EDP_BACKLIGHT_RAMP_DISABLE_LEVEL; } } void dce110_blank_stream(struct pipe_ctx *pipe_ctx) @@ -1192,8 +1191,8 @@ static void build_audio_output( if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT || pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST) { audio_output->pll_info.dp_dto_source_clock_in_khz = - state->dis_clk->funcs->get_dp_ref_clk_frequency( - state->dis_clk); + state->dccg->funcs->get_dp_ref_clk_frequency( + state->dccg); } audio_output->pll_info.feed_back_divider = @@ -1547,6 +1546,7 @@ void dce110_enable_accelerated_mode(struct dc *dc, struct dc_state *context) int i; struct dc_link *edp_link_to_turnoff = NULL; struct dc_link *edp_link = get_link_for_edp(dc); + struct dc_bios *bios = dc->ctx->dc_bios; bool can_edp_fast_boot_optimize = false; bool apply_edp_fast_boot_optimization = false; @@ -1573,6 +1573,20 @@ void dce110_enable_accelerated_mode(struct dc *dc, struct dc_state *context) if (context->streams[i]->signal == SIGNAL_TYPE_EDP) { context->streams[i]->apply_edp_fast_boot_optimization = true; apply_edp_fast_boot_optimization = true; + + /* When after S4 and S5, vbios may post edp and previous dpms_off + * doesn't make sense. + * Update dpms_off state to align hw and sw state via check + * vBios scratch register. + */ + if (bios->funcs->is_active_display) { + const struct connector_device_tag_info *device_tag = &(edp_link->device_tag); + + if (bios->funcs->is_active_display(bios, + context->streams[i]->signal, + device_tag)) + context->streams[i]->dpms_off = false; + } } } } @@ -1748,44 +1762,17 @@ static void set_static_screen_control(struct pipe_ctx **pipe_ctx, set_static_screen_control(pipe_ctx[i]->stream_res.tg, value); } -/* unit: in_khz before mode set, get pixel clock from context. ASIC register - * may not be programmed yet - */ -static uint32_t get_max_pixel_clock_for_all_paths( - struct dc *dc, - struct dc_state *context) -{ - uint32_t max_pix_clk = 0; - int i; - - for (i = 0; i < MAX_PIPES; i++) { - struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i]; - - if (pipe_ctx->stream == NULL) - continue; - - /* do not check under lay */ - if (pipe_ctx->top_pipe) - continue; - - if (pipe_ctx->stream_res.pix_clk_params.requested_pix_clk > max_pix_clk) - max_pix_clk = - pipe_ctx->stream_res.pix_clk_params.requested_pix_clk; - } - - return max_pix_clk; -} - /* * Check if FBC can be enabled */ static bool should_enable_fbc(struct dc *dc, - struct dc_state *context, - uint32_t *pipe_idx) + struct dc_state *context, + uint32_t *pipe_idx) { uint32_t i; struct pipe_ctx *pipe_ctx = NULL; struct resource_context *res_ctx = &context->res_ctx; + unsigned int underlay_idx = dc->res_pool->underlay_pipe_index; ASSERT(dc->fbc_compressor); @@ -1800,14 +1787,28 @@ static bool should_enable_fbc(struct dc *dc, for (i = 0; i < dc->res_pool->pipe_count; i++) { if (res_ctx->pipe_ctx[i].stream) { + pipe_ctx = &res_ctx->pipe_ctx[i]; - *pipe_idx = i; - break; + + if (!pipe_ctx) + continue; + + /* fbc not applicable on underlay pipe */ + if (pipe_ctx->pipe_idx != underlay_idx) { + *pipe_idx = i; + break; + } } } - /* Pipe context should be found */ - ASSERT(pipe_ctx); + if (i == dc->res_pool->pipe_count) + return false; + + if (!pipe_ctx->stream->sink) + return false; + + if (!pipe_ctx->stream->sink->link) + return false; /* Only supports eDP */ if (pipe_ctx->stream->sink->link->connector_signal != SIGNAL_TYPE_EDP) @@ -1831,8 +1832,9 @@ static bool should_enable_fbc(struct dc *dc, /* * Enable FBC */ -static void enable_fbc(struct dc *dc, - struct dc_state *context) +static void enable_fbc( + struct dc *dc, + struct dc_state *context) { uint32_t pipe_idx = 0; @@ -1842,10 +1844,9 @@ static void enable_fbc(struct dc *dc, struct compressor *compr = dc->fbc_compressor; struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[pipe_idx]; - params.source_view_width = pipe_ctx->stream->timing.h_addressable; params.source_view_height = pipe_ctx->stream->timing.v_addressable; - + params.inst = pipe_ctx->stream_res.tg->inst; compr->compr_surface_address.quad_part = dc->ctx->fbc_gpu_addr; compr->funcs->surface_address_and_pitch(compr, ¶ms); @@ -2060,10 +2061,10 @@ enum dc_status dce110_apply_ctx_to_hw( return status; } - dcb->funcs->set_scratch_critical_state(dcb, false); - if (dc->fbc_compressor) - enable_fbc(dc, context); + enable_fbc(dc, dc->current_state); + + dcb->funcs->set_scratch_critical_state(dcb, false); return DC_OK; } @@ -2296,7 +2297,7 @@ static void dce110_enable_per_frame_crtc_position_reset( int i; gsl_params.gsl_group = 0; - gsl_params.gsl_master = grouped_pipes[0]->stream->triggered_crtc_reset.event_source->status.primary_otg_inst; + gsl_params.gsl_master = 0; for (i = 0; i < group_size; i++) grouped_pipes[i]->stream_res.tg->funcs->setup_global_swap_lock( @@ -2385,193 +2386,33 @@ static void init_hw(struct dc *dc) } -void dce110_fill_display_configs( - const struct dc_state *context, - struct dm_pp_display_configuration *pp_display_cfg) -{ - int j; - int num_cfgs = 0; - - for (j = 0; j < context->stream_count; j++) { - int k; - - const struct dc_stream_state *stream = context->streams[j]; - struct dm_pp_single_disp_config *cfg = - &pp_display_cfg->disp_configs[num_cfgs]; - const struct pipe_ctx *pipe_ctx = NULL; - - for (k = 0; k < MAX_PIPES; k++) - if (stream == context->res_ctx.pipe_ctx[k].stream) { - pipe_ctx = &context->res_ctx.pipe_ctx[k]; - break; - } - - ASSERT(pipe_ctx != NULL); - - /* only notify active stream */ - if (stream->dpms_off) - continue; - - num_cfgs++; - cfg->signal = pipe_ctx->stream->signal; - cfg->pipe_idx = pipe_ctx->stream_res.tg->inst; - cfg->src_height = stream->src.height; - cfg->src_width = stream->src.width; - cfg->ddi_channel_mapping = - stream->sink->link->ddi_channel_mapping.raw; - cfg->transmitter = - stream->sink->link->link_enc->transmitter; - cfg->link_settings.lane_count = - stream->sink->link->cur_link_settings.lane_count; - cfg->link_settings.link_rate = - stream->sink->link->cur_link_settings.link_rate; - cfg->link_settings.link_spread = - stream->sink->link->cur_link_settings.link_spread; - cfg->sym_clock = stream->phy_pix_clk; - /* Round v_refresh*/ - cfg->v_refresh = stream->timing.pix_clk_khz * 1000; - cfg->v_refresh /= stream->timing.h_total; - cfg->v_refresh = (cfg->v_refresh + stream->timing.v_total / 2) - / stream->timing.v_total; - } - - pp_display_cfg->display_count = num_cfgs; -} - -uint32_t dce110_get_min_vblank_time_us(const struct dc_state *context) -{ - uint8_t j; - uint32_t min_vertical_blank_time = -1; - - for (j = 0; j < context->stream_count; j++) { - struct dc_stream_state *stream = context->streams[j]; - uint32_t vertical_blank_in_pixels = 0; - uint32_t vertical_blank_time = 0; - - vertical_blank_in_pixels = stream->timing.h_total * - (stream->timing.v_total - - stream->timing.v_addressable); - - vertical_blank_time = vertical_blank_in_pixels - * 1000 / stream->timing.pix_clk_khz; - - if (min_vertical_blank_time > vertical_blank_time) - min_vertical_blank_time = vertical_blank_time; - } - return min_vertical_blank_time; -} - -static int determine_sclk_from_bounding_box( - const struct dc *dc, - int required_sclk) -{ - int i; - - /* - * Some asics do not give us sclk levels, so we just report the actual - * required sclk - */ - if (dc->sclk_lvls.num_levels == 0) - return required_sclk; - - for (i = 0; i < dc->sclk_lvls.num_levels; i++) { - if (dc->sclk_lvls.clocks_in_khz[i] >= required_sclk) - return dc->sclk_lvls.clocks_in_khz[i]; - } - /* - * even maximum level could not satisfy requirement, this - * is unexpected at this stage, should have been caught at - * validation time - */ - ASSERT(0); - return dc->sclk_lvls.clocks_in_khz[dc->sclk_lvls.num_levels - 1]; -} - -static void pplib_apply_display_requirements( - struct dc *dc, - struct dc_state *context) +void dce110_prepare_bandwidth( + struct dc *dc, + struct dc_state *context) { - struct dm_pp_display_configuration *pp_display_cfg = &context->pp_display_cfg; - - pp_display_cfg->all_displays_in_sync = - context->bw.dce.all_displays_in_sync; - pp_display_cfg->nb_pstate_switch_disable = - context->bw.dce.nbp_state_change_enable == false; - pp_display_cfg->cpu_cc6_disable = - context->bw.dce.cpuc_state_change_enable == false; - pp_display_cfg->cpu_pstate_disable = - context->bw.dce.cpup_state_change_enable == false; - pp_display_cfg->cpu_pstate_separation_time = - context->bw.dce.blackout_recovery_time_us; - - pp_display_cfg->min_memory_clock_khz = context->bw.dce.yclk_khz - / MEMORY_TYPE_MULTIPLIER; - - pp_display_cfg->min_engine_clock_khz = determine_sclk_from_bounding_box( - dc, - context->bw.dce.sclk_khz); - - pp_display_cfg->min_dcfclock_khz = pp_display_cfg->min_engine_clock_khz; - - pp_display_cfg->min_engine_clock_deep_sleep_khz - = context->bw.dce.sclk_deep_sleep_khz; - - pp_display_cfg->avail_mclk_switch_time_us = - dce110_get_min_vblank_time_us(context); - /* TODO: dce11.2*/ - pp_display_cfg->avail_mclk_switch_time_in_disp_active_us = 0; - - pp_display_cfg->disp_clk_khz = dc->res_pool->dccg->clks.dispclk_khz; + struct clk_mgr *dccg = dc->res_pool->clk_mgr; - dce110_fill_display_configs(context, pp_display_cfg); + dce110_set_safe_displaymarks(&context->res_ctx, dc->res_pool); - /* TODO: is this still applicable?*/ - if (pp_display_cfg->display_count == 1) { - const struct dc_crtc_timing *timing = - &context->streams[0]->timing; - - pp_display_cfg->crtc_index = - pp_display_cfg->disp_configs[0].pipe_idx; - pp_display_cfg->line_time_in_us = timing->h_total * 1000 - / timing->pix_clk_khz; - } - - if (memcmp(&dc->prev_display_config, pp_display_cfg, sizeof( - struct dm_pp_display_configuration)) != 0) - dm_pp_apply_display_requirements(dc->ctx, pp_display_cfg); - - dc->prev_display_config = *pp_display_cfg; + dccg->funcs->update_clocks( + dccg, + context, + false); } -static void dce110_set_bandwidth( +void dce110_optimize_bandwidth( struct dc *dc, - struct dc_state *context, - bool decrease_allowed) + struct dc_state *context) { - struct dc_clocks req_clks; - struct dccg *dccg = dc->res_pool->dccg; - - req_clks.dispclk_khz = context->bw.dce.dispclk_khz; - req_clks.phyclk_khz = get_max_pixel_clock_for_all_paths(dc, context); + struct clk_mgr *dccg = dc->res_pool->clk_mgr; - if (decrease_allowed) - dce110_set_displaymarks(dc, context); - else - dce110_set_safe_displaymarks(&context->res_ctx, dc->res_pool); - - if (dccg->funcs->update_dfs_bypass) - dccg->funcs->update_dfs_bypass( - dccg, - dc, - context, - req_clks.dispclk_khz); + dce110_set_displaymarks(dc, context); dccg->funcs->update_clocks( dccg, - &req_clks, - decrease_allowed); - pplib_apply_display_requirements(dc, context); + context, + true); } static void dce110_program_front_end_for_pipe( @@ -2582,7 +2423,6 @@ static void dce110_program_front_end_for_pipe( struct dc_plane_state *plane_state = pipe_ctx->plane_state; struct xfm_grph_csc_adjustment adjust; struct out_csc_color_matrix tbl_entry; - unsigned int underlay_idx = dc->res_pool->underlay_pipe_index; unsigned int i; DC_LOGGER_INIT(); memset(&tbl_entry, 0, sizeof(tbl_entry)); @@ -2623,15 +2463,6 @@ static void dce110_program_front_end_for_pipe( program_scaler(dc, pipe_ctx); - /* fbc not applicable on Underlay pipe */ - if (dc->fbc_compressor && old_pipe->stream && - pipe_ctx->pipe_idx != underlay_idx) { - if (plane_state->tiling_info.gfx8.array_mode == DC_ARRAY_LINEAR_GENERAL) - dc->fbc_compressor->funcs->disable_fbc(dc->fbc_compressor); - else - enable_fbc(dc, dc->current_state); - } - mi->funcs->mem_input_program_surface_config( mi, plane_state->format, @@ -2708,6 +2539,9 @@ static void dce110_apply_ctx_for_surface( if (num_planes == 0) return; + if (dc->fbc_compressor) + dc->fbc_compressor->funcs->disable_fbc(dc->fbc_compressor); + for (i = 0; i < dc->res_pool->pipe_count; i++) { struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i]; struct pipe_ctx *old_pipe_ctx = &dc->current_state->res_ctx.pipe_ctx[i]; @@ -2750,6 +2584,9 @@ static void dce110_apply_ctx_for_surface( (pipe_ctx->plane_state || old_pipe_ctx->plane_state)) dc->hwss.pipe_control_lock(dc, pipe_ctx, false); } + + if (dc->fbc_compressor) + enable_fbc(dc, dc->current_state); } static void dce110_power_down_fe(struct dc *dc, struct pipe_ctx *pipe_ctx) @@ -2776,28 +2613,6 @@ static void dce110_wait_for_mpcc_disconnect( /* do nothing*/ } -static void program_csc_matrix(struct pipe_ctx *pipe_ctx, - enum dc_color_space colorspace, - uint16_t *matrix) -{ - int i; - struct out_csc_color_matrix tbl_entry; - - if (pipe_ctx->stream->csc_color_matrix.enable_adjustment - == true) { - enum dc_color_space color_space = - pipe_ctx->stream->output_color_space; - - //uint16_t matrix[12]; - for (i = 0; i < 12; i++) - tbl_entry.regval[i] = pipe_ctx->stream->csc_color_matrix.matrix[i]; - - tbl_entry.color_space = color_space; - //tbl_entry.regval = matrix; - pipe_ctx->plane_res.xfm->funcs->opp_set_csc_adjustment(pipe_ctx->plane_res.xfm, &tbl_entry); - } -} - void dce110_set_cursor_position(struct pipe_ctx *pipe_ctx) { struct dc_cursor_position pos_cpy = pipe_ctx->stream->cursor_position; @@ -2846,13 +2661,8 @@ void dce110_set_cursor_attribute(struct pipe_ctx *pipe_ctx) pipe_ctx->plane_res.xfm, attributes); } -static void ready_shared_resources(struct dc *dc, struct dc_state *context) {} - -static void optimize_shared_resources(struct dc *dc) {} - static const struct hw_sequencer_funcs dce110_funcs = { .program_gamut_remap = program_gamut_remap, - .program_csc_matrix = program_csc_matrix, .init_hw = init_hw, .apply_ctx_to_hw = dce110_apply_ctx_to_hw, .apply_ctx_for_surface = dce110_apply_ctx_for_surface, @@ -2875,7 +2685,8 @@ static const struct hw_sequencer_funcs dce110_funcs = { .enable_display_power_gating = dce110_enable_display_power_gating, .disable_plane = dce110_power_down_fe, .pipe_control_lock = dce_pipe_control_lock, - .set_bandwidth = dce110_set_bandwidth, + .prepare_bandwidth = dce110_prepare_bandwidth, + .optimize_bandwidth = dce110_optimize_bandwidth, .set_drr = set_drr, .get_position = get_position, .set_static_screen_control = set_static_screen_control, @@ -2884,9 +2695,6 @@ static const struct hw_sequencer_funcs dce110_funcs = { .setup_stereo = NULL, .set_avmute = dce110_set_avmute, .wait_for_mpcc_disconnect = dce110_wait_for_mpcc_disconnect, - .ready_shared_resources = ready_shared_resources, - .optimize_shared_resources = optimize_shared_resources, - .pplib_apply_display_requirements = pplib_apply_display_requirements, .edp_backlight_control = hwss_edp_backlight_control, .edp_power_control = hwss_edp_power_control, .edp_wait_for_hpd_ready = hwss_edp_wait_for_hpd_ready, diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.h index d6db3dbd9015..cd3e36d52a52 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.h +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.h @@ -40,7 +40,6 @@ enum dc_status dce110_apply_ctx_to_hw( struct dc_state *context); - void dce110_enable_stream(struct pipe_ctx *pipe_ctx); void dce110_disable_stream(struct pipe_ctx *pipe_ctx, int option); @@ -64,11 +63,13 @@ void dce110_set_safe_displaymarks( struct resource_context *res_ctx, const struct resource_pool *pool); -void dce110_fill_display_configs( - const struct dc_state *context, - struct dm_pp_display_configuration *pp_display_cfg); +void dce110_prepare_bandwidth( + struct dc *dc, + struct dc_state *context); -uint32_t dce110_get_min_vblank_time_us(const struct dc_state *context); +void dce110_optimize_bandwidth( + struct dc *dc, + struct dc_state *context); void dp_receiver_power_ctrl(struct dc_link *link, bool on); diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c index 7c9fd9052ee2..e33d11785b1f 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c @@ -31,6 +31,7 @@ #include "resource.h" #include "dce110/dce110_resource.h" +#include "dce/dce_clk_mgr.h" #include "include/irq_service_interface.h" #include "dce/dce_audio.h" #include "dce110/dce110_timing_generator.h" @@ -45,7 +46,6 @@ #include "dce110/dce110_transform_v.h" #include "dce/dce_opp.h" #include "dce110/dce110_opp_v.h" -#include "dce/dce_clocks.h" #include "dce/dce_clock_source.h" #include "dce/dce_hwseq.h" #include "dce110/dce110_hw_sequencer.h" @@ -148,15 +148,15 @@ static const struct dce110_timing_generator_offsets dce110_tg_offsets[] = { #define SRI(reg_name, block, id)\ .reg_name = mm ## block ## id ## _ ## reg_name -static const struct dccg_registers disp_clk_regs = { +static const struct clk_mgr_registers disp_clk_regs = { CLK_COMMON_REG_LIST_DCE_BASE() }; -static const struct dccg_shift disp_clk_shift = { +static const struct clk_mgr_shift disp_clk_shift = { CLK_COMMON_MASK_SH_LIST_DCE_COMMON_BASE(__SHIFT) }; -static const struct dccg_mask disp_clk_mask = { +static const struct clk_mgr_mask disp_clk_mask = { CLK_COMMON_MASK_SH_LIST_DCE_COMMON_BASE(_MASK) }; @@ -760,8 +760,8 @@ static void destruct(struct dce110_resource_pool *pool) if (pool->base.dmcu != NULL) dce_dmcu_destroy(&pool->base.dmcu); - if (pool->base.dccg != NULL) - dce_dccg_destroy(&pool->base.dccg); + if (pool->base.clk_mgr != NULL) + dce_clk_mgr_destroy(&pool->base.clk_mgr); if (pool->base.irqs != NULL) { dal_irq_service_destroy(&pool->base.irqs); @@ -1173,12 +1173,12 @@ static void bw_calcs_data_update_from_pplib(struct dc *dc) &clks); dc->bw_vbios->low_yclk = bw_frc_to_fixed( - clks.clocks_in_khz[0] * MEMORY_TYPE_MULTIPLIER, 1000); + clks.clocks_in_khz[0] * MEMORY_TYPE_MULTIPLIER_CZ, 1000); dc->bw_vbios->mid_yclk = bw_frc_to_fixed( - clks.clocks_in_khz[clks.num_levels>>1] * MEMORY_TYPE_MULTIPLIER, + clks.clocks_in_khz[clks.num_levels>>1] * MEMORY_TYPE_MULTIPLIER_CZ, 1000); dc->bw_vbios->high_yclk = bw_frc_to_fixed( - clks.clocks_in_khz[clks.num_levels-1] * MEMORY_TYPE_MULTIPLIER, + clks.clocks_in_khz[clks.num_levels-1] * MEMORY_TYPE_MULTIPLIER_CZ, 1000); } @@ -1201,7 +1201,6 @@ static bool construct( struct dc_context *ctx = dc->ctx; struct dc_firmware_info info; struct dc_bios *bp; - struct dm_pp_static_clock_info static_clk_info = {0}; ctx->dc_bios->regs = &bios_regs; @@ -1257,11 +1256,11 @@ static bool construct( } } - pool->base.dccg = dce110_dccg_create(ctx, + pool->base.clk_mgr = dce110_clk_mgr_create(ctx, &disp_clk_regs, &disp_clk_shift, &disp_clk_mask); - if (pool->base.dccg == NULL) { + if (pool->base.clk_mgr == NULL) { dm_error("DC: failed to create display clock!\n"); BREAK_TO_DEBUGGER(); goto res_create_fail; @@ -1287,13 +1286,6 @@ static bool construct( goto res_create_fail; } - /* get static clock information for PPLIB or firmware, save - * max_clock_state - */ - if (dm_pp_get_static_clocks(ctx, &static_clk_info)) - pool->base.dccg->max_clks_state = - static_clk_info.max_clocks_state; - { struct irq_service_init_data init_data; init_data.ctx = dc->ctx; diff --git a/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c b/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c index 3ce79c208ddf..969d4e72dc94 100644 --- a/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c @@ -35,6 +35,7 @@ #include "irq/dce110/irq_service_dce110.h" +#include "dce/dce_clk_mgr.h" #include "dce/dce_mem_input.h" #include "dce/dce_transform.h" #include "dce/dce_link_encoder.h" @@ -42,7 +43,6 @@ #include "dce/dce_audio.h" #include "dce/dce_opp.h" #include "dce/dce_ipp.h" -#include "dce/dce_clocks.h" #include "dce/dce_clock_source.h" #include "dce/dce_hwseq.h" @@ -148,15 +148,15 @@ static const struct dce110_timing_generator_offsets dce112_tg_offsets[] = { .reg_name = mm ## block ## id ## _ ## reg_name -static const struct dccg_registers disp_clk_regs = { +static const struct clk_mgr_registers disp_clk_regs = { CLK_COMMON_REG_LIST_DCE_BASE() }; -static const struct dccg_shift disp_clk_shift = { +static const struct clk_mgr_shift disp_clk_shift = { CLK_COMMON_MASK_SH_LIST_DCE_COMMON_BASE(__SHIFT) }; -static const struct dccg_mask disp_clk_mask = { +static const struct clk_mgr_mask disp_clk_mask = { CLK_COMMON_MASK_SH_LIST_DCE_COMMON_BASE(_MASK) }; @@ -551,7 +551,8 @@ static struct transform *dce112_transform_create( static const struct encoder_feature_support link_enc_feature = { .max_hdmi_deep_color = COLOR_DEPTH_121212, .max_hdmi_pixel_clock = 600000, - .ycbcr420_supported = true, + .hdmi_ycbcr420_supported = true, + .dp_ycbcr420_supported = false, .flags.bits.IS_HBR2_CAPABLE = true, .flags.bits.IS_HBR3_CAPABLE = true, .flags.bits.IS_TPS3_CAPABLE = true, @@ -749,8 +750,8 @@ static void destruct(struct dce110_resource_pool *pool) if (pool->base.dmcu != NULL) dce_dmcu_destroy(&pool->base.dmcu); - if (pool->base.dccg != NULL) - dce_dccg_destroy(&pool->base.dccg); + if (pool->base.clk_mgr != NULL) + dce_clk_mgr_destroy(&pool->base.clk_mgr); if (pool->base.irqs != NULL) { dal_irq_service_destroy(&pool->base.irqs); @@ -1015,12 +1016,12 @@ static void bw_calcs_data_update_from_pplib(struct dc *dc) &clks); dc->bw_vbios->low_yclk = bw_frc_to_fixed( - clks.clocks_in_khz[0] * MEMORY_TYPE_MULTIPLIER, 1000); + clks.clocks_in_khz[0] * MEMORY_TYPE_MULTIPLIER_CZ, 1000); dc->bw_vbios->mid_yclk = bw_frc_to_fixed( - clks.clocks_in_khz[clks.num_levels>>1] * MEMORY_TYPE_MULTIPLIER, + clks.clocks_in_khz[clks.num_levels>>1] * MEMORY_TYPE_MULTIPLIER_CZ, 1000); dc->bw_vbios->high_yclk = bw_frc_to_fixed( - clks.clocks_in_khz[clks.num_levels-1] * MEMORY_TYPE_MULTIPLIER, + clks.clocks_in_khz[clks.num_levels-1] * MEMORY_TYPE_MULTIPLIER_CZ, 1000); return; @@ -1056,12 +1057,12 @@ static void bw_calcs_data_update_from_pplib(struct dc *dc) * YCLK = UMACLK*m_memoryTypeMultiplier */ dc->bw_vbios->low_yclk = bw_frc_to_fixed( - mem_clks.data[0].clocks_in_khz * MEMORY_TYPE_MULTIPLIER, 1000); + mem_clks.data[0].clocks_in_khz * MEMORY_TYPE_MULTIPLIER_CZ, 1000); dc->bw_vbios->mid_yclk = bw_frc_to_fixed( - mem_clks.data[mem_clks.num_levels>>1].clocks_in_khz * MEMORY_TYPE_MULTIPLIER, + mem_clks.data[mem_clks.num_levels>>1].clocks_in_khz * MEMORY_TYPE_MULTIPLIER_CZ, 1000); dc->bw_vbios->high_yclk = bw_frc_to_fixed( - mem_clks.data[mem_clks.num_levels-1].clocks_in_khz * MEMORY_TYPE_MULTIPLIER, + mem_clks.data[mem_clks.num_levels-1].clocks_in_khz * MEMORY_TYPE_MULTIPLIER_CZ, 1000); /* Now notify PPLib/SMU about which Watermarks sets they should select @@ -1131,7 +1132,6 @@ static bool construct( { unsigned int i; struct dc_context *ctx = dc->ctx; - struct dm_pp_static_clock_info static_clk_info = {0}; ctx->dc_bios->regs = &bios_regs; @@ -1199,11 +1199,11 @@ static bool construct( } } - pool->base.dccg = dce112_dccg_create(ctx, + pool->base.clk_mgr = dce112_clk_mgr_create(ctx, &disp_clk_regs, &disp_clk_shift, &disp_clk_mask); - if (pool->base.dccg == NULL) { + if (pool->base.clk_mgr == NULL) { dm_error("DC: failed to create display clock!\n"); BREAK_TO_DEBUGGER(); goto res_create_fail; @@ -1229,13 +1229,6 @@ static bool construct( goto res_create_fail; } - /* get static clock information for PPLIB or firmware, save - * max_clock_state - */ - if (dm_pp_get_static_clocks(ctx, &static_clk_info)) - pool->base.dccg->max_clks_state = - static_clk_info.max_clocks_state; - { struct irq_service_init_data init_data; init_data.ctx = dc->ctx; diff --git a/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c b/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c index 79ab5f9f9115..f12696674eb0 100644 --- a/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c @@ -31,6 +31,7 @@ #include "resource.h" #include "include/irq_service_interface.h" #include "dce120_resource.h" + #include "dce112/dce112_resource.h" #include "dce110/dce110_resource.h" @@ -39,7 +40,6 @@ #include "irq/dce120/irq_service_dce120.h" #include "dce/dce_opp.h" #include "dce/dce_clock_source.h" -#include "dce/dce_clocks.h" #include "dce/dce_ipp.h" #include "dce/dce_mem_input.h" @@ -47,6 +47,7 @@ #include "dce120/dce120_hw_sequencer.h" #include "dce/dce_transform.h" +#include "dce/dce_clk_mgr.h" #include "dce/dce_audio.h" #include "dce/dce_link_encoder.h" #include "dce/dce_stream_encoder.h" @@ -573,8 +574,8 @@ static void destruct(struct dce110_resource_pool *pool) if (pool->base.dmcu != NULL) dce_dmcu_destroy(&pool->base.dmcu); - if (pool->base.dccg != NULL) - dce_dccg_destroy(&pool->base.dccg); + if (pool->base.clk_mgr != NULL) + dce_clk_mgr_destroy(&pool->base.clk_mgr); } static void read_dce_straps( @@ -606,7 +607,8 @@ static struct audio *create_audio( static const struct encoder_feature_support link_enc_feature = { .max_hdmi_deep_color = COLOR_DEPTH_121212, .max_hdmi_pixel_clock = 600000, - .ycbcr420_supported = true, + .hdmi_ycbcr420_supported = true, + .dp_ycbcr420_supported = false, .flags.bits.IS_HBR2_CAPABLE = true, .flags.bits.IS_HBR3_CAPABLE = true, .flags.bits.IS_TPS3_CAPABLE = true, @@ -834,12 +836,12 @@ static void bw_calcs_data_update_from_pplib(struct dc *dc) * YCLK = UMACLK*m_memoryTypeMultiplier */ dc->bw_vbios->low_yclk = bw_frc_to_fixed( - mem_clks.data[0].clocks_in_khz * MEMORY_TYPE_MULTIPLIER, 1000); + mem_clks.data[0].clocks_in_khz * MEMORY_TYPE_MULTIPLIER_CZ, 1000); dc->bw_vbios->mid_yclk = bw_frc_to_fixed( - mem_clks.data[mem_clks.num_levels>>1].clocks_in_khz * MEMORY_TYPE_MULTIPLIER, + mem_clks.data[mem_clks.num_levels>>1].clocks_in_khz * MEMORY_TYPE_MULTIPLIER_CZ, 1000); dc->bw_vbios->high_yclk = bw_frc_to_fixed( - mem_clks.data[mem_clks.num_levels-1].clocks_in_khz * MEMORY_TYPE_MULTIPLIER, + mem_clks.data[mem_clks.num_levels-1].clocks_in_khz * MEMORY_TYPE_MULTIPLIER_CZ, 1000); /* Now notify PPLib/SMU about which Watermarks sets they should select @@ -973,8 +975,8 @@ static bool construct( } } - pool->base.dccg = dce120_dccg_create(ctx); - if (pool->base.dccg == NULL) { + pool->base.clk_mgr = dce120_clk_mgr_create(ctx); + if (pool->base.clk_mgr == NULL) { dm_error("DC: failed to create display clock!\n"); BREAK_TO_DEBUGGER(); goto dccg_create_fail; diff --git a/drivers/gpu/drm/amd/display/dc/dce80/dce80_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce80/dce80_hw_sequencer.c index 6c6a1a16af19..a60a90e68d91 100644 --- a/drivers/gpu/drm/amd/display/dc/dce80/dce80_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dce80/dce80_hw_sequencer.c @@ -76,6 +76,7 @@ void dce80_hw_sequencer_construct(struct dc *dc) dc->hwss.enable_display_power_gating = dce100_enable_display_power_gating; dc->hwss.pipe_control_lock = dce_pipe_control_lock; - dc->hwss.set_bandwidth = dce100_set_bandwidth; + dc->hwss.prepare_bandwidth = dce100_prepare_bandwidth; + dc->hwss.optimize_bandwidth = dce100_prepare_bandwidth; } diff --git a/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c b/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c index d68f951f9869..cdd1d6b7b9f2 100644 --- a/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c @@ -37,14 +37,13 @@ #include "dce110/dce110_timing_generator.h" #include "dce110/dce110_resource.h" #include "dce80/dce80_timing_generator.h" +#include "dce/dce_clk_mgr.h" #include "dce/dce_mem_input.h" #include "dce/dce_link_encoder.h" #include "dce/dce_stream_encoder.h" -#include "dce/dce_mem_input.h" #include "dce/dce_ipp.h" #include "dce/dce_transform.h" #include "dce/dce_opp.h" -#include "dce/dce_clocks.h" #include "dce/dce_clock_source.h" #include "dce/dce_audio.h" #include "dce/dce_hwseq.h" @@ -155,15 +154,15 @@ static const struct dce110_timing_generator_offsets dce80_tg_offsets[] = { .reg_name = mm ## block ## id ## _ ## reg_name -static const struct dccg_registers disp_clk_regs = { +static const struct clk_mgr_registers disp_clk_regs = { CLK_COMMON_REG_LIST_DCE_BASE() }; -static const struct dccg_shift disp_clk_shift = { +static const struct clk_mgr_shift disp_clk_shift = { CLK_COMMON_MASK_SH_LIST_DCE_COMMON_BASE(__SHIFT) }; -static const struct dccg_mask disp_clk_mask = { +static const struct clk_mgr_mask disp_clk_mask = { CLK_COMMON_MASK_SH_LIST_DCE_COMMON_BASE(_MASK) }; @@ -779,8 +778,8 @@ static void destruct(struct dce110_resource_pool *pool) } } - if (pool->base.dccg != NULL) - dce_dccg_destroy(&pool->base.dccg); + if (pool->base.clk_mgr != NULL) + dce_clk_mgr_destroy(&pool->base.clk_mgr); if (pool->base.irqs != NULL) { dal_irq_service_destroy(&pool->base.irqs); @@ -793,7 +792,7 @@ bool dce80_validate_bandwidth( { /* TODO implement when needed but for now hardcode max value*/ context->bw.dce.dispclk_khz = 681000; - context->bw.dce.yclk_khz = 250000 * MEMORY_TYPE_MULTIPLIER; + context->bw.dce.yclk_khz = 250000 * MEMORY_TYPE_MULTIPLIER_CZ; return true; } @@ -855,7 +854,6 @@ static bool dce80_construct( struct dc_context *ctx = dc->ctx; struct dc_firmware_info info; struct dc_bios *bp; - struct dm_pp_static_clock_info static_clk_info = {0}; ctx->dc_bios->regs = &bios_regs; @@ -918,11 +916,11 @@ static bool dce80_construct( } } - pool->base.dccg = dce_dccg_create(ctx, + pool->base.clk_mgr = dce_clk_mgr_create(ctx, &disp_clk_regs, &disp_clk_shift, &disp_clk_mask); - if (pool->base.dccg == NULL) { + if (pool->base.clk_mgr == NULL) { dm_error("DC: failed to create display clock!\n"); BREAK_TO_DEBUGGER(); goto res_create_fail; @@ -948,10 +946,6 @@ static bool dce80_construct( goto res_create_fail; } - if (dm_pp_get_static_clocks(ctx, &static_clk_info)) - pool->base.dccg->max_clks_state = - static_clk_info.max_clocks_state; - { struct irq_service_init_data init_data; init_data.ctx = dc->ctx; @@ -1065,7 +1059,6 @@ static bool dce81_construct( struct dc_context *ctx = dc->ctx; struct dc_firmware_info info; struct dc_bios *bp; - struct dm_pp_static_clock_info static_clk_info = {0}; ctx->dc_bios->regs = &bios_regs; @@ -1128,11 +1121,11 @@ static bool dce81_construct( } } - pool->base.dccg = dce_dccg_create(ctx, + pool->base.clk_mgr = dce_clk_mgr_create(ctx, &disp_clk_regs, &disp_clk_shift, &disp_clk_mask); - if (pool->base.dccg == NULL) { + if (pool->base.clk_mgr == NULL) { dm_error("DC: failed to create display clock!\n"); BREAK_TO_DEBUGGER(); goto res_create_fail; @@ -1158,10 +1151,6 @@ static bool dce81_construct( goto res_create_fail; } - if (dm_pp_get_static_clocks(ctx, &static_clk_info)) - pool->base.dccg->max_clks_state = - static_clk_info.max_clocks_state; - { struct irq_service_init_data init_data; init_data.ctx = dc->ctx; @@ -1275,7 +1264,6 @@ static bool dce83_construct( struct dc_context *ctx = dc->ctx; struct dc_firmware_info info; struct dc_bios *bp; - struct dm_pp_static_clock_info static_clk_info = {0}; ctx->dc_bios->regs = &bios_regs; @@ -1334,11 +1322,11 @@ static bool dce83_construct( } } - pool->base.dccg = dce_dccg_create(ctx, + pool->base.clk_mgr = dce_clk_mgr_create(ctx, &disp_clk_regs, &disp_clk_shift, &disp_clk_mask); - if (pool->base.dccg == NULL) { + if (pool->base.clk_mgr == NULL) { dm_error("DC: failed to create display clock!\n"); BREAK_TO_DEBUGGER(); goto res_create_fail; @@ -1364,10 +1352,6 @@ static bool dce83_construct( goto res_create_fail; } - if (dm_pp_get_static_clocks(ctx, &static_clk_info)) - pool->base.dccg->max_clks_state = - static_clk_info.max_clocks_state; - { struct irq_service_init_data init_data; init_data.ctx = dc->ctx; diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/Makefile b/drivers/gpu/drm/amd/display/dc/dcn10/Makefile index 032f872be89c..55f293c8a3c0 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn10/Makefile @@ -24,7 +24,7 @@ DCN10 = dcn10_resource.o dcn10_ipp.o dcn10_hw_sequencer.o dcn10_hw_sequencer_debug.o \ dcn10_dpp.o dcn10_opp.o dcn10_optc.o \ - dcn10_hubp.o dcn10_mpc.o \ + dcn10_hubp.o dcn10_mpc.o dcn10_clk_mgr.o \ dcn10_dpp_dscl.o dcn10_dpp_cm.o dcn10_cm_common.o \ dcn10_hubbub.o dcn10_stream_encoder.o dcn10_link_encoder.o diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_clk_mgr.c new file mode 100644 index 000000000000..54abedbf1b43 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_clk_mgr.c @@ -0,0 +1,375 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#include "dcn10_clk_mgr.h" + +#include "reg_helper.h" +#include "core_types.h" + +#define TO_DCE_CLK_MGR(clocks)\ + container_of(clocks, struct dce_clk_mgr, base) + +#define REG(reg) \ + (clk_mgr_dce->regs->reg) + +#undef FN +#define FN(reg_name, field_name) \ + clk_mgr_dce->clk_mgr_shift->field_name, clk_mgr_dce->clk_mgr_mask->field_name + +#define CTX \ + clk_mgr_dce->base.ctx +#define DC_LOGGER \ + clk_mgr->ctx->logger + +void dcn1_pplib_apply_display_requirements( + struct dc *dc, + struct dc_state *context) +{ + struct dm_pp_display_configuration *pp_display_cfg = &context->pp_display_cfg; + + pp_display_cfg->min_engine_clock_khz = dc->res_pool->clk_mgr->clks.dcfclk_khz; + pp_display_cfg->min_memory_clock_khz = dc->res_pool->clk_mgr->clks.fclk_khz; + pp_display_cfg->min_engine_clock_deep_sleep_khz = dc->res_pool->clk_mgr->clks.dcfclk_deep_sleep_khz; + pp_display_cfg->min_dcfc_deep_sleep_clock_khz = dc->res_pool->clk_mgr->clks.dcfclk_deep_sleep_khz; + pp_display_cfg->min_dcfclock_khz = dc->res_pool->clk_mgr->clks.dcfclk_khz; + pp_display_cfg->disp_clk_khz = dc->res_pool->clk_mgr->clks.dispclk_khz; + dce110_fill_display_configs(context, pp_display_cfg); + + dm_pp_apply_display_requirements(dc->ctx, pp_display_cfg); +} + +static int dcn1_determine_dppclk_threshold(struct clk_mgr *clk_mgr, struct dc_clocks *new_clocks) +{ + bool request_dpp_div = new_clocks->dispclk_khz > new_clocks->dppclk_khz; + bool dispclk_increase = new_clocks->dispclk_khz > clk_mgr->clks.dispclk_khz; + int disp_clk_threshold = new_clocks->max_supported_dppclk_khz; + bool cur_dpp_div = clk_mgr->clks.dispclk_khz > clk_mgr->clks.dppclk_khz; + + /* increase clock, looking for div is 0 for current, request div is 1*/ + if (dispclk_increase) { + /* already divided by 2, no need to reach target clk with 2 steps*/ + if (cur_dpp_div) + return new_clocks->dispclk_khz; + + /* request disp clk is lower than maximum supported dpp clk, + * no need to reach target clk with two steps. + */ + if (new_clocks->dispclk_khz <= disp_clk_threshold) + return new_clocks->dispclk_khz; + + /* target dpp clk not request divided by 2, still within threshold */ + if (!request_dpp_div) + return new_clocks->dispclk_khz; + + } else { + /* decrease clock, looking for current dppclk divided by 2, + * request dppclk not divided by 2. + */ + + /* current dpp clk not divided by 2, no need to ramp*/ + if (!cur_dpp_div) + return new_clocks->dispclk_khz; + + /* current disp clk is lower than current maximum dpp clk, + * no need to ramp + */ + if (clk_mgr->clks.dispclk_khz <= disp_clk_threshold) + return new_clocks->dispclk_khz; + + /* request dpp clk need to be divided by 2 */ + if (request_dpp_div) + return new_clocks->dispclk_khz; + } + + return disp_clk_threshold; +} + +static void dcn1_ramp_up_dispclk_with_dpp(struct clk_mgr *clk_mgr, struct dc_clocks *new_clocks) +{ + struct dc *dc = clk_mgr->ctx->dc; + int dispclk_to_dpp_threshold = dcn1_determine_dppclk_threshold(clk_mgr, new_clocks); + bool request_dpp_div = new_clocks->dispclk_khz > new_clocks->dppclk_khz; + int i; + + /* set disp clk to dpp clk threshold */ + dce112_set_clock(clk_mgr, dispclk_to_dpp_threshold); + + /* update request dpp clk division option */ + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe_ctx = &dc->current_state->res_ctx.pipe_ctx[i]; + + if (!pipe_ctx->plane_state) + continue; + + pipe_ctx->plane_res.dpp->funcs->dpp_dppclk_control( + pipe_ctx->plane_res.dpp, + request_dpp_div, + true); + } + + /* If target clk not same as dppclk threshold, set to target clock */ + if (dispclk_to_dpp_threshold != new_clocks->dispclk_khz) + dce112_set_clock(clk_mgr, new_clocks->dispclk_khz); + + clk_mgr->clks.dispclk_khz = new_clocks->dispclk_khz; + clk_mgr->clks.dppclk_khz = new_clocks->dppclk_khz; + clk_mgr->clks.max_supported_dppclk_khz = new_clocks->max_supported_dppclk_khz; +} + +static int get_active_display_cnt( + struct dc *dc, + struct dc_state *context) +{ + int i, display_count; + + display_count = 0; + for (i = 0; i < context->stream_count; i++) { + const struct dc_stream_state *stream = context->streams[i]; + + /* + * Only notify active stream or virtual stream. + * Need to notify virtual stream to work around + * headless case. HPD does not fire when system is in + * S0i2. + */ + if (!stream->dpms_off || stream->signal == SIGNAL_TYPE_VIRTUAL) + display_count++; + } + + return display_count; +} + +static void notify_deep_sleep_dcfclk_to_smu( + struct pp_smu_funcs_rv *pp_smu, int min_dcef_deep_sleep_clk_khz) +{ + int min_dcef_deep_sleep_clk_mhz; //minimum required DCEF Deep Sleep clock in mhz + /* + * if function pointer not set up, this message is + * sent as part of pplib_apply_display_requirements. + * So just return. + */ + if (!pp_smu || !pp_smu->set_min_deep_sleep_dcfclk) + return; + + min_dcef_deep_sleep_clk_mhz = (min_dcef_deep_sleep_clk_khz + 999) / 1000; //Round up + pp_smu->set_min_deep_sleep_dcfclk(&pp_smu->pp_smu, min_dcef_deep_sleep_clk_mhz); +} + +static void notify_hard_min_dcfclk_to_smu( + struct pp_smu_funcs_rv *pp_smu, int min_dcf_clk_khz) +{ + int min_dcf_clk_mhz; //minimum required DCF clock in mhz + + /* + * if function pointer not set up, this message is + * sent as part of pplib_apply_display_requirements. + * So just return. + */ + if (!pp_smu || !pp_smu->set_hard_min_dcfclk_by_freq) + return; + + min_dcf_clk_mhz = min_dcf_clk_khz / 1000; + + pp_smu->set_hard_min_dcfclk_by_freq(&pp_smu->pp_smu, min_dcf_clk_mhz); +} + +static void notify_hard_min_fclk_to_smu( + struct pp_smu_funcs_rv *pp_smu, int min_f_clk_khz) +{ + int min_f_clk_mhz; //minimum required F clock in mhz + + /* + * if function pointer not set up, this message is + * sent as part of pplib_apply_display_requirements. + * So just return. + */ + if (!pp_smu || !pp_smu->set_hard_min_fclk_by_freq) + return; + + min_f_clk_mhz = min_f_clk_khz / 1000; + + pp_smu->set_hard_min_fclk_by_freq(&pp_smu->pp_smu, min_f_clk_mhz); +} + +static void dcn1_update_clocks(struct clk_mgr *clk_mgr, + struct dc_state *context, + bool safe_to_lower) +{ + struct dc *dc = clk_mgr->ctx->dc; + struct dc_clocks *new_clocks = &context->bw.dcn.clk; + struct pp_smu_display_requirement_rv *smu_req_cur = + &dc->res_pool->pp_smu_req; + struct pp_smu_display_requirement_rv smu_req = *smu_req_cur; + struct pp_smu_funcs_rv *pp_smu = dc->res_pool->pp_smu; + uint32_t requested_dcf_clock_in_khz = 0; + bool send_request_to_increase = false; + bool send_request_to_lower = false; + int display_count; + + bool enter_display_off = false; + + display_count = get_active_display_cnt(dc, context); + + if (display_count == 0) + enter_display_off = true; + + if (enter_display_off == safe_to_lower) { + /* + * Notify SMU active displays + * if function pointer not set up, this message is + * sent as part of pplib_apply_display_requirements. + */ + if (pp_smu->set_display_count) + pp_smu->set_display_count(&pp_smu->pp_smu, display_count); + else + smu_req.display_count = display_count; + + } + + if (new_clocks->dispclk_khz > clk_mgr->clks.dispclk_khz + || new_clocks->phyclk_khz > clk_mgr->clks.phyclk_khz + || new_clocks->fclk_khz > clk_mgr->clks.fclk_khz + || new_clocks->dcfclk_khz > clk_mgr->clks.dcfclk_khz) + send_request_to_increase = true; + + if (should_set_clock(safe_to_lower, new_clocks->phyclk_khz, clk_mgr->clks.phyclk_khz)) { + clk_mgr->clks.phyclk_khz = new_clocks->phyclk_khz; + + send_request_to_lower = true; + } + + // F Clock + if (should_set_clock(safe_to_lower, new_clocks->fclk_khz, clk_mgr->clks.fclk_khz)) { + clk_mgr->clks.fclk_khz = new_clocks->fclk_khz; + smu_req.hard_min_fclk_mhz = new_clocks->fclk_khz / 1000; + + notify_hard_min_fclk_to_smu(pp_smu, new_clocks->fclk_khz); + + send_request_to_lower = true; + } + + //DCF Clock + if (should_set_clock(safe_to_lower, new_clocks->dcfclk_khz, clk_mgr->clks.dcfclk_khz)) { + clk_mgr->clks.dcfclk_khz = new_clocks->dcfclk_khz; + smu_req.hard_min_dcefclk_mhz = new_clocks->dcfclk_khz / 1000; + + send_request_to_lower = true; + } + + if (should_set_clock(safe_to_lower, + new_clocks->dcfclk_deep_sleep_khz, clk_mgr->clks.dcfclk_deep_sleep_khz)) { + clk_mgr->clks.dcfclk_deep_sleep_khz = new_clocks->dcfclk_deep_sleep_khz; + smu_req.min_deep_sleep_dcefclk_mhz = new_clocks->dcfclk_deep_sleep_khz / 1000; + + send_request_to_lower = true; + } + + /* make sure dcf clk is before dpp clk to + * make sure we have enough voltage to run dpp clk + */ + if (send_request_to_increase) { + /*use dcfclk to request voltage*/ + requested_dcf_clock_in_khz = dcn_find_dcfclk_suits_all(dc, new_clocks); + + notify_hard_min_dcfclk_to_smu(pp_smu, requested_dcf_clock_in_khz); + + if (pp_smu->set_display_requirement) + pp_smu->set_display_requirement(&pp_smu->pp_smu, &smu_req); + + notify_deep_sleep_dcfclk_to_smu(pp_smu, clk_mgr->clks.dcfclk_deep_sleep_khz); + dcn1_pplib_apply_display_requirements(dc, context); + } + + /* dcn1 dppclk is tied to dispclk */ + /* program dispclk on = as a w/a for sleep resume clock ramping issues */ + if (should_set_clock(safe_to_lower, new_clocks->dispclk_khz, clk_mgr->clks.dispclk_khz) + || new_clocks->dispclk_khz == clk_mgr->clks.dispclk_khz) { + dcn1_ramp_up_dispclk_with_dpp(clk_mgr, new_clocks); + clk_mgr->clks.dispclk_khz = new_clocks->dispclk_khz; + + send_request_to_lower = true; + } + + if (!send_request_to_increase && send_request_to_lower) { + /*use dcfclk to request voltage*/ + requested_dcf_clock_in_khz = dcn_find_dcfclk_suits_all(dc, new_clocks); + + notify_hard_min_dcfclk_to_smu(pp_smu, requested_dcf_clock_in_khz); + + if (pp_smu->set_display_requirement) + pp_smu->set_display_requirement(&pp_smu->pp_smu, &smu_req); + + notify_deep_sleep_dcfclk_to_smu(pp_smu, clk_mgr->clks.dcfclk_deep_sleep_khz); + dcn1_pplib_apply_display_requirements(dc, context); + } + + + *smu_req_cur = smu_req; +} +static const struct clk_mgr_funcs dcn1_funcs = { + .get_dp_ref_clk_frequency = dce12_get_dp_ref_freq_khz, + .update_clocks = dcn1_update_clocks +}; +struct clk_mgr *dcn1_clk_mgr_create(struct dc_context *ctx) +{ + struct dc_debug_options *debug = &ctx->dc->debug; + struct dc_bios *bp = ctx->dc_bios; + struct dc_firmware_info fw_info = { { 0 } }; + struct dce_clk_mgr *clk_mgr_dce = kzalloc(sizeof(*clk_mgr_dce), GFP_KERNEL); + + if (clk_mgr_dce == NULL) { + BREAK_TO_DEBUGGER(); + return NULL; + } + + clk_mgr_dce->base.ctx = ctx; + clk_mgr_dce->base.funcs = &dcn1_funcs; + + clk_mgr_dce->dfs_bypass_disp_clk = 0; + + clk_mgr_dce->dprefclk_ss_percentage = 0; + clk_mgr_dce->dprefclk_ss_divider = 1000; + clk_mgr_dce->ss_on_dprefclk = false; + + clk_mgr_dce->dprefclk_khz = 600000; + if (bp->integrated_info) + clk_mgr_dce->dentist_vco_freq_khz = bp->integrated_info->dentist_vco_freq; + if (clk_mgr_dce->dentist_vco_freq_khz == 0) { + bp->funcs->get_firmware_info(bp, &fw_info); + clk_mgr_dce->dentist_vco_freq_khz = fw_info.smu_gpu_pll_output_freq; + if (clk_mgr_dce->dentist_vco_freq_khz == 0) + clk_mgr_dce->dentist_vco_freq_khz = 3600000; + } + + if (!debug->disable_dfs_bypass && bp->integrated_info) + if (bp->integrated_info->gpu_cap_info & DFS_BYPASS_ENABLE) + clk_mgr_dce->dfs_bypass_enabled = true; + + dce_clock_read_ss_info(clk_mgr_dce); + + return &clk_mgr_dce->base; +} + + diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_clk_mgr.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_clk_mgr.h new file mode 100644 index 000000000000..a995eda443a3 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_clk_mgr.h @@ -0,0 +1,43 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef __DCN10_CLK_MGR_H__ +#define __DCN10_CLK_MGR_H__ + +#include "../dce/dce_clk_mgr.h" + +struct clk_bypass { + uint32_t dcfclk_bypass; + uint32_t dispclk_pypass; + uint32_t dprefclk_bypass; +}; + +void dcn1_pplib_apply_display_requirements( + struct dc *dc, + struct dc_state *context); + +struct clk_mgr *dcn1_clk_mgr_create(struct dc_context *ctx); + +#endif //__DCN10_CLK_MGR_H__ diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c index 5d95a997fd9f..7469333a2c8a 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c @@ -71,39 +71,39 @@ void cm_helper_program_xfer_func( unsigned int i = 0; REG_SET_2(reg->start_cntl_b, 0, - exp_region_start, params->arr_points[0].custom_float_x, + exp_region_start, params->corner_points[0].blue.custom_float_x, exp_resion_start_segment, 0); REG_SET_2(reg->start_cntl_g, 0, - exp_region_start, params->arr_points[0].custom_float_x, + exp_region_start, params->corner_points[0].green.custom_float_x, exp_resion_start_segment, 0); REG_SET_2(reg->start_cntl_r, 0, - exp_region_start, params->arr_points[0].custom_float_x, + exp_region_start, params->corner_points[0].red.custom_float_x, exp_resion_start_segment, 0); REG_SET(reg->start_slope_cntl_b, 0, - field_region_linear_slope, params->arr_points[0].custom_float_slope); + field_region_linear_slope, params->corner_points[0].blue.custom_float_slope); REG_SET(reg->start_slope_cntl_g, 0, - field_region_linear_slope, params->arr_points[0].custom_float_slope); + field_region_linear_slope, params->corner_points[0].green.custom_float_slope); REG_SET(reg->start_slope_cntl_r, 0, - field_region_linear_slope, params->arr_points[0].custom_float_slope); + field_region_linear_slope, params->corner_points[0].red.custom_float_slope); REG_SET(reg->start_end_cntl1_b, 0, - field_region_end, params->arr_points[1].custom_float_x); + field_region_end, params->corner_points[1].blue.custom_float_x); REG_SET_2(reg->start_end_cntl2_b, 0, - field_region_end_slope, params->arr_points[1].custom_float_slope, - field_region_end_base, params->arr_points[1].custom_float_y); + field_region_end_slope, params->corner_points[1].blue.custom_float_slope, + field_region_end_base, params->corner_points[1].blue.custom_float_y); REG_SET(reg->start_end_cntl1_g, 0, - field_region_end, params->arr_points[1].custom_float_x); + field_region_end, params->corner_points[1].green.custom_float_x); REG_SET_2(reg->start_end_cntl2_g, 0, - field_region_end_slope, params->arr_points[1].custom_float_slope, - field_region_end_base, params->arr_points[1].custom_float_y); + field_region_end_slope, params->corner_points[1].green.custom_float_slope, + field_region_end_base, params->corner_points[1].green.custom_float_y); REG_SET(reg->start_end_cntl1_r, 0, - field_region_end, params->arr_points[1].custom_float_x); + field_region_end, params->corner_points[1].red.custom_float_x); REG_SET_2(reg->start_end_cntl2_r, 0, - field_region_end_slope, params->arr_points[1].custom_float_slope, - field_region_end_base, params->arr_points[1].custom_float_y); + field_region_end_slope, params->corner_points[1].red.custom_float_slope, + field_region_end_base, params->corner_points[1].red.custom_float_y); for (reg_region_cur = reg->region_start; reg_region_cur <= reg->region_end; @@ -127,7 +127,7 @@ void cm_helper_program_xfer_func( bool cm_helper_convert_to_custom_float( struct pwl_result_data *rgb_resulted, - struct curve_points *arr_points, + struct curve_points3 *corner_points, uint32_t hw_points_num, bool fixpoint) { @@ -141,20 +141,53 @@ bool cm_helper_convert_to_custom_float( fmt.mantissa_bits = 12; fmt.sign = false; - if (!convert_to_custom_float_format(arr_points[0].x, &fmt, - &arr_points[0].custom_float_x)) { + /* corner_points[0] - beginning base, slope offset for R,G,B + * corner_points[1] - end base, slope offset for R,G,B + */ + if (!convert_to_custom_float_format(corner_points[0].red.x, &fmt, + &corner_points[0].red.custom_float_x)) { + BREAK_TO_DEBUGGER(); + return false; + } + if (!convert_to_custom_float_format(corner_points[0].green.x, &fmt, + &corner_points[0].green.custom_float_x)) { + BREAK_TO_DEBUGGER(); + return false; + } + if (!convert_to_custom_float_format(corner_points[0].blue.x, &fmt, + &corner_points[0].blue.custom_float_x)) { BREAK_TO_DEBUGGER(); return false; } - if (!convert_to_custom_float_format(arr_points[0].offset, &fmt, - &arr_points[0].custom_float_offset)) { + if (!convert_to_custom_float_format(corner_points[0].red.offset, &fmt, + &corner_points[0].red.custom_float_offset)) { + BREAK_TO_DEBUGGER(); + return false; + } + if (!convert_to_custom_float_format(corner_points[0].green.offset, &fmt, + &corner_points[0].green.custom_float_offset)) { + BREAK_TO_DEBUGGER(); + return false; + } + if (!convert_to_custom_float_format(corner_points[0].blue.offset, &fmt, + &corner_points[0].blue.custom_float_offset)) { BREAK_TO_DEBUGGER(); return false; } - if (!convert_to_custom_float_format(arr_points[0].slope, &fmt, - &arr_points[0].custom_float_slope)) { + if (!convert_to_custom_float_format(corner_points[0].red.slope, &fmt, + &corner_points[0].red.custom_float_slope)) { + BREAK_TO_DEBUGGER(); + return false; + } + if (!convert_to_custom_float_format(corner_points[0].green.slope, &fmt, + &corner_points[0].green.custom_float_slope)) { + BREAK_TO_DEBUGGER(); + return false; + } + if (!convert_to_custom_float_format(corner_points[0].blue.slope, &fmt, + &corner_points[0].blue.custom_float_slope)) { BREAK_TO_DEBUGGER(); return false; } @@ -162,22 +195,59 @@ bool cm_helper_convert_to_custom_float( fmt.mantissa_bits = 10; fmt.sign = false; - if (!convert_to_custom_float_format(arr_points[1].x, &fmt, - &arr_points[1].custom_float_x)) { + if (!convert_to_custom_float_format(corner_points[1].red.x, &fmt, + &corner_points[1].red.custom_float_x)) { BREAK_TO_DEBUGGER(); return false; } - - if (fixpoint == true) - arr_points[1].custom_float_y = dc_fixpt_clamp_u0d14(arr_points[1].y); - else if (!convert_to_custom_float_format(arr_points[1].y, &fmt, - &arr_points[1].custom_float_y)) { + if (!convert_to_custom_float_format(corner_points[1].green.x, &fmt, + &corner_points[1].green.custom_float_x)) { + BREAK_TO_DEBUGGER(); + return false; + } + if (!convert_to_custom_float_format(corner_points[1].blue.x, &fmt, + &corner_points[1].blue.custom_float_x)) { BREAK_TO_DEBUGGER(); return false; } - if (!convert_to_custom_float_format(arr_points[1].slope, &fmt, - &arr_points[1].custom_float_slope)) { + if (fixpoint == true) { + corner_points[1].red.custom_float_y = + dc_fixpt_clamp_u0d14(corner_points[1].red.y); + corner_points[1].green.custom_float_y = + dc_fixpt_clamp_u0d14(corner_points[1].green.y); + corner_points[1].blue.custom_float_y = + dc_fixpt_clamp_u0d14(corner_points[1].blue.y); + } else { + if (!convert_to_custom_float_format(corner_points[1].red.y, + &fmt, &corner_points[1].red.custom_float_y)) { + BREAK_TO_DEBUGGER(); + return false; + } + if (!convert_to_custom_float_format(corner_points[1].green.y, + &fmt, &corner_points[1].green.custom_float_y)) { + BREAK_TO_DEBUGGER(); + return false; + } + if (!convert_to_custom_float_format(corner_points[1].blue.y, + &fmt, &corner_points[1].blue.custom_float_y)) { + BREAK_TO_DEBUGGER(); + return false; + } + } + + if (!convert_to_custom_float_format(corner_points[1].red.slope, &fmt, + &corner_points[1].red.custom_float_slope)) { + BREAK_TO_DEBUGGER(); + return false; + } + if (!convert_to_custom_float_format(corner_points[1].green.slope, &fmt, + &corner_points[1].green.custom_float_slope)) { + BREAK_TO_DEBUGGER(); + return false; + } + if (!convert_to_custom_float_format(corner_points[1].blue.slope, &fmt, + &corner_points[1].blue.custom_float_slope)) { BREAK_TO_DEBUGGER(); return false; } @@ -242,15 +312,10 @@ bool cm_helper_translate_curve_to_hw_format( const struct dc_transfer_func *output_tf, struct pwl_params *lut_params, bool fixpoint) { - struct curve_points *arr_points; + struct curve_points3 *corner_points; struct pwl_result_data *rgb_resulted; struct pwl_result_data *rgb; struct pwl_result_data *rgb_plus_1; - struct fixed31_32 y_r; - struct fixed31_32 y_g; - struct fixed31_32 y_b; - struct fixed31_32 y1_min; - struct fixed31_32 y3_max; int32_t region_start, region_end; int32_t i; @@ -259,16 +324,16 @@ bool cm_helper_translate_curve_to_hw_format( if (output_tf == NULL || lut_params == NULL || output_tf->type == TF_TYPE_BYPASS) return false; - PERF_TRACE(); + PERF_TRACE_CTX(output_tf->ctx); - arr_points = lut_params->arr_points; + corner_points = lut_params->corner_points; rgb_resulted = lut_params->rgb_resulted; hw_points = 0; memset(lut_params, 0, sizeof(struct pwl_params)); memset(seg_distr, 0, sizeof(seg_distr)); - if (output_tf->tf == TRANSFER_FUNCTION_PQ) { + if (output_tf->tf == TRANSFER_FUNCTION_PQ || output_tf->tf == TRANSFER_FUNCTION_GAMMA22) { /* 32 segments * segments are from 2^-25 to 2^7 */ @@ -327,31 +392,37 @@ bool cm_helper_translate_curve_to_hw_format( rgb_resulted[hw_points - 1].green = output_tf->tf_pts.green[start_index]; rgb_resulted[hw_points - 1].blue = output_tf->tf_pts.blue[start_index]; - arr_points[0].x = dc_fixpt_pow(dc_fixpt_from_int(2), + // All 3 color channels have same x + corner_points[0].red.x = dc_fixpt_pow(dc_fixpt_from_int(2), dc_fixpt_from_int(region_start)); - arr_points[1].x = dc_fixpt_pow(dc_fixpt_from_int(2), - dc_fixpt_from_int(region_end)); + corner_points[0].green.x = corner_points[0].red.x; + corner_points[0].blue.x = corner_points[0].red.x; - y_r = rgb_resulted[0].red; - y_g = rgb_resulted[0].green; - y_b = rgb_resulted[0].blue; + corner_points[1].red.x = dc_fixpt_pow(dc_fixpt_from_int(2), + dc_fixpt_from_int(region_end)); + corner_points[1].green.x = corner_points[1].red.x; + corner_points[1].blue.x = corner_points[1].red.x; - y1_min = dc_fixpt_min(y_r, dc_fixpt_min(y_g, y_b)); + corner_points[0].red.y = rgb_resulted[0].red; + corner_points[0].green.y = rgb_resulted[0].green; + corner_points[0].blue.y = rgb_resulted[0].blue; - arr_points[0].y = y1_min; - arr_points[0].slope = dc_fixpt_div(arr_points[0].y, arr_points[0].x); - y_r = rgb_resulted[hw_points - 1].red; - y_g = rgb_resulted[hw_points - 1].green; - y_b = rgb_resulted[hw_points - 1].blue; + corner_points[0].red.slope = dc_fixpt_div(corner_points[0].red.y, + corner_points[0].red.x); + corner_points[0].green.slope = dc_fixpt_div(corner_points[0].green.y, + corner_points[0].green.x); + corner_points[0].blue.slope = dc_fixpt_div(corner_points[0].blue.y, + corner_points[0].blue.x); /* see comment above, m_arrPoints[1].y should be the Y value for the * region end (m_numOfHwPoints), not last HW point(m_numOfHwPoints - 1) */ - y3_max = dc_fixpt_max(y_r, dc_fixpt_max(y_g, y_b)); - - arr_points[1].y = y3_max; - - arr_points[1].slope = dc_fixpt_zero; + corner_points[1].red.y = rgb_resulted[hw_points - 1].red; + corner_points[1].green.y = rgb_resulted[hw_points - 1].green; + corner_points[1].blue.y = rgb_resulted[hw_points - 1].blue; + corner_points[1].red.slope = dc_fixpt_zero; + corner_points[1].green.slope = dc_fixpt_zero; + corner_points[1].blue.slope = dc_fixpt_zero; if (output_tf->tf == TRANSFER_FUNCTION_PQ) { /* for PQ, we want to have a straight line from last HW X point, @@ -360,9 +431,15 @@ bool cm_helper_translate_curve_to_hw_format( const struct fixed31_32 end_value = dc_fixpt_from_int(125); - arr_points[1].slope = dc_fixpt_div( - dc_fixpt_sub(dc_fixpt_one, arr_points[1].y), - dc_fixpt_sub(end_value, arr_points[1].x)); + corner_points[1].red.slope = dc_fixpt_div( + dc_fixpt_sub(dc_fixpt_one, corner_points[1].red.y), + dc_fixpt_sub(end_value, corner_points[1].red.x)); + corner_points[1].green.slope = dc_fixpt_div( + dc_fixpt_sub(dc_fixpt_one, corner_points[1].green.y), + dc_fixpt_sub(end_value, corner_points[1].green.x)); + corner_points[1].blue.slope = dc_fixpt_div( + dc_fixpt_sub(dc_fixpt_one, corner_points[1].blue.y), + dc_fixpt_sub(end_value, corner_points[1].blue.x)); } lut_params->hw_points_num = hw_points; @@ -411,7 +488,7 @@ bool cm_helper_translate_curve_to_hw_format( ++i; } cm_helper_convert_to_custom_float(rgb_resulted, - lut_params->arr_points, + lut_params->corner_points, hw_points, fixpoint); return true; @@ -424,15 +501,10 @@ bool cm_helper_translate_curve_to_degamma_hw_format( const struct dc_transfer_func *output_tf, struct pwl_params *lut_params) { - struct curve_points *arr_points; + struct curve_points3 *corner_points; struct pwl_result_data *rgb_resulted; struct pwl_result_data *rgb; struct pwl_result_data *rgb_plus_1; - struct fixed31_32 y_r; - struct fixed31_32 y_g; - struct fixed31_32 y_b; - struct fixed31_32 y1_min; - struct fixed31_32 y3_max; int32_t region_start, region_end; int32_t i; @@ -441,9 +513,9 @@ bool cm_helper_translate_curve_to_degamma_hw_format( if (output_tf == NULL || lut_params == NULL || output_tf->type == TF_TYPE_BYPASS) return false; - PERF_TRACE(); + PERF_TRACE_CTX(output_tf->ctx); - arr_points = lut_params->arr_points; + corner_points = lut_params->corner_points; rgb_resulted = lut_params->rgb_resulted; hw_points = 0; @@ -489,31 +561,28 @@ bool cm_helper_translate_curve_to_degamma_hw_format( rgb_resulted[hw_points - 1].green = output_tf->tf_pts.green[start_index]; rgb_resulted[hw_points - 1].blue = output_tf->tf_pts.blue[start_index]; - arr_points[0].x = dc_fixpt_pow(dc_fixpt_from_int(2), + corner_points[0].red.x = dc_fixpt_pow(dc_fixpt_from_int(2), dc_fixpt_from_int(region_start)); - arr_points[1].x = dc_fixpt_pow(dc_fixpt_from_int(2), + corner_points[0].green.x = corner_points[0].red.x; + corner_points[0].blue.x = corner_points[0].red.x; + corner_points[1].red.x = dc_fixpt_pow(dc_fixpt_from_int(2), dc_fixpt_from_int(region_end)); + corner_points[1].green.x = corner_points[1].red.x; + corner_points[1].blue.x = corner_points[1].red.x; - y_r = rgb_resulted[0].red; - y_g = rgb_resulted[0].green; - y_b = rgb_resulted[0].blue; - - y1_min = dc_fixpt_min(y_r, dc_fixpt_min(y_g, y_b)); - - arr_points[0].y = y1_min; - arr_points[0].slope = dc_fixpt_div(arr_points[0].y, arr_points[0].x); - y_r = rgb_resulted[hw_points - 1].red; - y_g = rgb_resulted[hw_points - 1].green; - y_b = rgb_resulted[hw_points - 1].blue; + corner_points[0].red.y = rgb_resulted[0].red; + corner_points[0].green.y = rgb_resulted[0].green; + corner_points[0].blue.y = rgb_resulted[0].blue; /* see comment above, m_arrPoints[1].y should be the Y value for the * region end (m_numOfHwPoints), not last HW point(m_numOfHwPoints - 1) */ - y3_max = dc_fixpt_max(y_r, dc_fixpt_max(y_g, y_b)); - - arr_points[1].y = y3_max; - - arr_points[1].slope = dc_fixpt_zero; + corner_points[1].red.y = rgb_resulted[hw_points - 1].red; + corner_points[1].green.y = rgb_resulted[hw_points - 1].green; + corner_points[1].blue.y = rgb_resulted[hw_points - 1].blue; + corner_points[1].red.slope = dc_fixpt_zero; + corner_points[1].green.slope = dc_fixpt_zero; + corner_points[1].blue.slope = dc_fixpt_zero; if (output_tf->tf == TRANSFER_FUNCTION_PQ) { /* for PQ, we want to have a straight line from last HW X point, @@ -522,9 +591,15 @@ bool cm_helper_translate_curve_to_degamma_hw_format( const struct fixed31_32 end_value = dc_fixpt_from_int(125); - arr_points[1].slope = dc_fixpt_div( - dc_fixpt_sub(dc_fixpt_one, arr_points[1].y), - dc_fixpt_sub(end_value, arr_points[1].x)); + corner_points[1].red.slope = dc_fixpt_div( + dc_fixpt_sub(dc_fixpt_one, corner_points[1].red.y), + dc_fixpt_sub(end_value, corner_points[1].red.x)); + corner_points[1].green.slope = dc_fixpt_div( + dc_fixpt_sub(dc_fixpt_one, corner_points[1].green.y), + dc_fixpt_sub(end_value, corner_points[1].green.x)); + corner_points[1].blue.slope = dc_fixpt_div( + dc_fixpt_sub(dc_fixpt_one, corner_points[1].blue.y), + dc_fixpt_sub(end_value, corner_points[1].blue.x)); } lut_params->hw_points_num = hw_points; @@ -564,7 +639,7 @@ bool cm_helper_translate_curve_to_degamma_hw_format( ++i; } cm_helper_convert_to_custom_float(rgb_resulted, - lut_params->arr_points, + lut_params->corner_points, hw_points, false); return true; diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.h index 7a531b02871f..5ae4d69391a5 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.h +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.h @@ -98,7 +98,7 @@ void cm_helper_program_xfer_func( bool cm_helper_convert_to_custom_float( struct pwl_result_data *rgb_resulted, - struct curve_points *arr_points, + struct curve_points3 *corner_points, uint32_t hw_points_num, bool fixpoint); diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c index 4254e7e1a509..c7d1e678ebf5 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c @@ -100,7 +100,7 @@ bool hububu1_is_allow_self_refresh_enabled(struct hubbub *hubbub) REG_GET(DCHUBBUB_ARB_DRAM_STATE_CNTL, DCHUBBUB_ARB_ALLOW_SELF_REFRESH_FORCE_ENABLE, &enable); - return true ? false : enable; + return enable ? true : false; } diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c index 74132a1f3046..345af015d061 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c @@ -99,6 +99,14 @@ static unsigned int hubp1_get_underflow_status(struct hubp *hubp) return hubp_underflow; } + +void hubp1_clear_underflow(struct hubp *hubp) +{ + struct dcn10_hubp *hubp1 = TO_DCN10_HUBP(hubp); + + REG_UPDATE(DCHUBP_CNTL, HUBP_UNDERFLOW_CLEAR, 1); +} + static void hubp1_set_hubp_blank_en(struct hubp *hubp, bool blank) { struct dcn10_hubp *hubp1 = TO_DCN10_HUBP(hubp); @@ -565,19 +573,6 @@ void hubp1_program_deadline( REFCYC_X_AFTER_SCALER, dlg_attr->refcyc_x_after_scaler, DST_Y_AFTER_SCALER, dlg_attr->dst_y_after_scaler); - if (REG(PREFETCH_SETTINS)) - REG_SET_2(PREFETCH_SETTINS, 0, - DST_Y_PREFETCH, dlg_attr->dst_y_prefetch, - VRATIO_PREFETCH, dlg_attr->vratio_prefetch); - else - REG_SET_2(PREFETCH_SETTINGS, 0, - DST_Y_PREFETCH, dlg_attr->dst_y_prefetch, - VRATIO_PREFETCH, dlg_attr->vratio_prefetch); - - REG_SET_2(VBLANK_PARAMETERS_0, 0, - DST_Y_PER_VM_VBLANK, dlg_attr->dst_y_per_vm_vblank, - DST_Y_PER_ROW_VBLANK, dlg_attr->dst_y_per_row_vblank); - REG_SET(REF_FREQ_TO_PIX_FREQ, 0, REF_FREQ_TO_PIX_FREQ, dlg_attr->ref_freq_to_pix_freq); @@ -585,9 +580,6 @@ void hubp1_program_deadline( REG_SET(VBLANK_PARAMETERS_1, 0, REFCYC_PER_PTE_GROUP_VBLANK_L, dlg_attr->refcyc_per_pte_group_vblank_l); - REG_SET(VBLANK_PARAMETERS_3, 0, - REFCYC_PER_META_CHUNK_VBLANK_L, dlg_attr->refcyc_per_meta_chunk_vblank_l); - if (REG(NOM_PARAMETERS_0)) REG_SET(NOM_PARAMETERS_0, 0, DST_Y_PER_PTE_ROW_NOM_L, dlg_attr->dst_y_per_pte_row_nom_l); @@ -602,27 +594,13 @@ void hubp1_program_deadline( REG_SET(NOM_PARAMETERS_5, 0, REFCYC_PER_META_CHUNK_NOM_L, dlg_attr->refcyc_per_meta_chunk_nom_l); - REG_SET_2(PER_LINE_DELIVERY_PRE, 0, - REFCYC_PER_LINE_DELIVERY_PRE_L, dlg_attr->refcyc_per_line_delivery_pre_l, - REFCYC_PER_LINE_DELIVERY_PRE_C, dlg_attr->refcyc_per_line_delivery_pre_c); - REG_SET_2(PER_LINE_DELIVERY, 0, REFCYC_PER_LINE_DELIVERY_L, dlg_attr->refcyc_per_line_delivery_l, REFCYC_PER_LINE_DELIVERY_C, dlg_attr->refcyc_per_line_delivery_c); - if (REG(PREFETCH_SETTINS_C)) - REG_SET(PREFETCH_SETTINS_C, 0, - VRATIO_PREFETCH_C, dlg_attr->vratio_prefetch_c); - else - REG_SET(PREFETCH_SETTINGS_C, 0, - VRATIO_PREFETCH_C, dlg_attr->vratio_prefetch_c); - REG_SET(VBLANK_PARAMETERS_2, 0, REFCYC_PER_PTE_GROUP_VBLANK_C, dlg_attr->refcyc_per_pte_group_vblank_c); - REG_SET(VBLANK_PARAMETERS_4, 0, - REFCYC_PER_META_CHUNK_VBLANK_C, dlg_attr->refcyc_per_meta_chunk_vblank_c); - if (REG(NOM_PARAMETERS_2)) REG_SET(NOM_PARAMETERS_2, 0, DST_Y_PER_PTE_ROW_NOM_C, dlg_attr->dst_y_per_pte_row_nom_c); @@ -642,10 +620,6 @@ void hubp1_program_deadline( QoS_LEVEL_LOW_WM, ttu_attr->qos_level_low_wm, QoS_LEVEL_HIGH_WM, ttu_attr->qos_level_high_wm); - REG_SET_2(DCN_GLOBAL_TTU_CNTL, 0, - MIN_TTU_VBLANK, ttu_attr->min_ttu_vblank, - QoS_LEVEL_FLIP, ttu_attr->qos_level_flip); - /* TTU - per luma/chroma */ /* Assumed surf0 is luma and 1 is chroma */ @@ -654,25 +628,15 @@ void hubp1_program_deadline( QoS_LEVEL_FIXED, ttu_attr->qos_level_fixed_l, QoS_RAMP_DISABLE, ttu_attr->qos_ramp_disable_l); - REG_SET(DCN_SURF0_TTU_CNTL1, 0, - REFCYC_PER_REQ_DELIVERY_PRE, - ttu_attr->refcyc_per_req_delivery_pre_l); - REG_SET_3(DCN_SURF1_TTU_CNTL0, 0, REFCYC_PER_REQ_DELIVERY, ttu_attr->refcyc_per_req_delivery_c, QoS_LEVEL_FIXED, ttu_attr->qos_level_fixed_c, QoS_RAMP_DISABLE, ttu_attr->qos_ramp_disable_c); - REG_SET(DCN_SURF1_TTU_CNTL1, 0, - REFCYC_PER_REQ_DELIVERY_PRE, - ttu_attr->refcyc_per_req_delivery_pre_c); - REG_SET_3(DCN_CUR0_TTU_CNTL0, 0, REFCYC_PER_REQ_DELIVERY, ttu_attr->refcyc_per_req_delivery_cur0, QoS_LEVEL_FIXED, ttu_attr->qos_level_fixed_cur0, QoS_RAMP_DISABLE, ttu_attr->qos_ramp_disable_cur0); - REG_SET(DCN_CUR0_TTU_CNTL1, 0, - REFCYC_PER_REQ_DELIVERY_PRE, ttu_attr->refcyc_per_req_delivery_pre_cur0); } static void hubp1_setup( @@ -690,6 +654,48 @@ static void hubp1_setup( hubp1_vready_workaround(hubp, pipe_dest); } +static void hubp1_setup_interdependent( + struct hubp *hubp, + struct _vcs_dpi_display_dlg_regs_st *dlg_attr, + struct _vcs_dpi_display_ttu_regs_st *ttu_attr) +{ + struct dcn10_hubp *hubp1 = TO_DCN10_HUBP(hubp); + + REG_SET_2(PREFETCH_SETTINS, 0, + DST_Y_PREFETCH, dlg_attr->dst_y_prefetch, + VRATIO_PREFETCH, dlg_attr->vratio_prefetch); + + REG_SET(PREFETCH_SETTINS_C, 0, + VRATIO_PREFETCH_C, dlg_attr->vratio_prefetch_c); + + REG_SET_2(VBLANK_PARAMETERS_0, 0, + DST_Y_PER_VM_VBLANK, dlg_attr->dst_y_per_vm_vblank, + DST_Y_PER_ROW_VBLANK, dlg_attr->dst_y_per_row_vblank); + + REG_SET(VBLANK_PARAMETERS_3, 0, + REFCYC_PER_META_CHUNK_VBLANK_L, dlg_attr->refcyc_per_meta_chunk_vblank_l); + + REG_SET(VBLANK_PARAMETERS_4, 0, + REFCYC_PER_META_CHUNK_VBLANK_C, dlg_attr->refcyc_per_meta_chunk_vblank_c); + + REG_SET_2(PER_LINE_DELIVERY_PRE, 0, + REFCYC_PER_LINE_DELIVERY_PRE_L, dlg_attr->refcyc_per_line_delivery_pre_l, + REFCYC_PER_LINE_DELIVERY_PRE_C, dlg_attr->refcyc_per_line_delivery_pre_c); + + REG_SET(DCN_SURF0_TTU_CNTL1, 0, + REFCYC_PER_REQ_DELIVERY_PRE, + ttu_attr->refcyc_per_req_delivery_pre_l); + REG_SET(DCN_SURF1_TTU_CNTL1, 0, + REFCYC_PER_REQ_DELIVERY_PRE, + ttu_attr->refcyc_per_req_delivery_pre_c); + REG_SET(DCN_CUR0_TTU_CNTL1, 0, + REFCYC_PER_REQ_DELIVERY_PRE, ttu_attr->refcyc_per_req_delivery_pre_cur0); + + REG_SET_2(DCN_GLOBAL_TTU_CNTL, 0, + MIN_TTU_VBLANK, ttu_attr->min_ttu_vblank, + QoS_LEVEL_FLIP, ttu_attr->qos_level_flip); +} + bool hubp1_is_flip_pending(struct hubp *hubp) { uint32_t flip_pending = 0; @@ -1178,6 +1184,7 @@ static const struct hubp_funcs dcn10_hubp_funcs = { hubp1_program_surface_config, .hubp_is_flip_pending = hubp1_is_flip_pending, .hubp_setup = hubp1_setup, + .hubp_setup_interdependent = hubp1_setup_interdependent, .hubp_set_vm_system_aperture_settings = hubp1_set_vm_system_aperture_settings, .hubp_set_vm_context0_settings = hubp1_set_vm_context0_settings, .set_blank = hubp1_set_blank, @@ -1190,6 +1197,7 @@ static const struct hubp_funcs dcn10_hubp_funcs = { .hubp_clk_cntl = hubp1_clk_cntl, .hubp_vtg_sel = hubp1_vtg_sel, .hubp_read_state = hubp1_read_state, + .hubp_clear_underflow = hubp1_clear_underflow, .hubp_disable_control = hubp1_disable_control, .hubp_get_underflow_status = hubp1_get_underflow_status, diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.h index 4890273b632b..62d4232e7796 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.h +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.h @@ -251,6 +251,7 @@ HUBP_SF(HUBP0_DCHUBP_CNTL, HUBP_BLANK_EN, mask_sh),\ HUBP_SF(HUBP0_DCHUBP_CNTL, HUBP_TTU_DISABLE, mask_sh),\ HUBP_SF(HUBP0_DCHUBP_CNTL, HUBP_UNDERFLOW_STATUS, mask_sh),\ + HUBP_SF(HUBP0_DCHUBP_CNTL, HUBP_UNDERFLOW_CLEAR, mask_sh),\ HUBP_SF(HUBP0_DCHUBP_CNTL, HUBP_NO_OUTSTANDING_REQ, mask_sh),\ HUBP_SF(HUBP0_DCHUBP_CNTL, HUBP_VTG_SEL, mask_sh),\ HUBP_SF(HUBP0_DCHUBP_CNTL, HUBP_DISABLE, mask_sh),\ @@ -435,6 +436,7 @@ type HUBP_NO_OUTSTANDING_REQ;\ type HUBP_VTG_SEL;\ type HUBP_UNDERFLOW_STATUS;\ + type HUBP_UNDERFLOW_CLEAR;\ type NUM_PIPES;\ type NUM_BANKS;\ type PIPE_INTERLEAVE;\ @@ -739,6 +741,7 @@ void dcn10_hubp_construct( const struct dcn_mi_mask *hubp_mask); void hubp1_read_state(struct hubp *hubp); +void hubp1_clear_underflow(struct hubp *hubp); enum cursor_pitch hubp1_get_cursor_pitch(unsigned int pitch); diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c index 193184affefb..0bd33a713836 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c @@ -45,6 +45,7 @@ #include "dcn10_hubbub.h" #include "dcn10_cm_common.h" #include "dc_link_dp.h" +#include "dccg.h" #define DC_LOGGER_INIT(logger) @@ -786,7 +787,7 @@ static bool dcn10_hw_wa_force_recovery(struct dc *dc) &dc->current_state->res_ctx.pipe_ctx[i]; if (pipe_ctx != NULL) { hubp = pipe_ctx->plane_res.hubp; - if (hubp != NULL) { + if (hubp != NULL && hubp->funcs->hubp_get_underflow_status) { if (hubp->funcs->hubp_get_underflow_status(hubp) != 0) { /* one pipe underflow, we will reset all the pipes*/ need_recover = true; @@ -812,7 +813,7 @@ static bool dcn10_hw_wa_force_recovery(struct dc *dc) if (pipe_ctx != NULL) { hubp = pipe_ctx->plane_res.hubp; /*DCHUBP_CNTL:HUBP_BLANK_EN=1*/ - if (hubp != NULL) + if (hubp != NULL && hubp->funcs->set_hubp_blank_en) hubp->funcs->set_hubp_blank_en(hubp, true); } } @@ -825,7 +826,7 @@ static bool dcn10_hw_wa_force_recovery(struct dc *dc) if (pipe_ctx != NULL) { hubp = pipe_ctx->plane_res.hubp; /*DCHUBP_CNTL:HUBP_DISABLE=1*/ - if (hubp != NULL) + if (hubp != NULL && hubp->funcs->hubp_disable_control) hubp->funcs->hubp_disable_control(hubp, true); } } @@ -835,7 +836,7 @@ static bool dcn10_hw_wa_force_recovery(struct dc *dc) if (pipe_ctx != NULL) { hubp = pipe_ctx->plane_res.hubp; /*DCHUBP_CNTL:HUBP_DISABLE=0*/ - if (hubp != NULL) + if (hubp != NULL && hubp->funcs->hubp_disable_control) hubp->funcs->hubp_disable_control(hubp, true); } } @@ -847,7 +848,7 @@ static bool dcn10_hw_wa_force_recovery(struct dc *dc) if (pipe_ctx != NULL) { hubp = pipe_ctx->plane_res.hubp; /*DCHUBP_CNTL:HUBP_BLANK_EN=0*/ - if (hubp != NULL) + if (hubp != NULL && hubp->funcs->set_hubp_blank_en) hubp->funcs->set_hubp_blank_en(hubp, true); } } @@ -1126,7 +1127,7 @@ static void dcn10_init_hw(struct dc *dc) enable_power_gating_plane(dc->hwseq, true); - memset(&dc->res_pool->dccg->clks, 0, sizeof(dc->res_pool->dccg->clks)); + memset(&dc->res_pool->clk_mgr->clks, 0, sizeof(dc->res_pool->clk_mgr->clks)); } static void reset_hw_ctx_wrap( @@ -1226,7 +1227,8 @@ static bool dcn10_set_input_transfer_func(struct pipe_ctx *pipe_ctx, tf = plane_state->in_transfer_func; if (plane_state->gamma_correction && - !plane_state->gamma_correction->is_identity + !dpp_base->ctx->dc->debug.always_use_regamma + && !plane_state->gamma_correction->is_identity && dce_use_lut(plane_state->format)) dpp_base->funcs->dpp_program_input_lut(dpp_base, plane_state->gamma_correction); @@ -1399,7 +1401,7 @@ static void dcn10_enable_per_frame_crtc_position_reset( if (grouped_pipes[i]->stream_res.tg->funcs->enable_crtc_reset) grouped_pipes[i]->stream_res.tg->funcs->enable_crtc_reset( grouped_pipes[i]->stream_res.tg, - grouped_pipes[i]->stream->triggered_crtc_reset.event_source->status.primary_otg_inst, + 0, &grouped_pipes[i]->stream->triggered_crtc_reset); DC_SYNC_INFO("Waiting for trigger\n"); @@ -1603,7 +1605,7 @@ static void mmhub_read_vm_context0_settings(struct dcn10_hubp *hubp1, } -static void dcn10_program_pte_vm(struct dce_hwseq *hws, struct hubp *hubp) +void dcn10_program_pte_vm(struct dce_hwseq *hws, struct hubp *hubp) { struct dcn10_hubp *hubp1 = TO_DCN10_HUBP(hubp); struct vm_system_aperture_param apt = { {{ 0 } } }; @@ -1703,33 +1705,22 @@ static void program_gamut_remap(struct pipe_ctx *pipe_ctx) pipe_ctx->plane_res.dpp->funcs->dpp_set_gamut_remap(pipe_ctx->plane_res.dpp, &adjust); } - -static void program_csc_matrix(struct pipe_ctx *pipe_ctx, +static void dcn10_program_output_csc(struct dc *dc, + struct pipe_ctx *pipe_ctx, enum dc_color_space colorspace, - uint16_t *matrix) + uint16_t *matrix, + int opp_id) { if (pipe_ctx->stream->csc_color_matrix.enable_adjustment == true) { - if (pipe_ctx->plane_res.dpp->funcs->dpp_set_csc_adjustment != NULL) - pipe_ctx->plane_res.dpp->funcs->dpp_set_csc_adjustment(pipe_ctx->plane_res.dpp, matrix); + if (pipe_ctx->plane_res.dpp->funcs->dpp_set_csc_adjustment != NULL) + pipe_ctx->plane_res.dpp->funcs->dpp_set_csc_adjustment(pipe_ctx->plane_res.dpp, matrix); } else { if (pipe_ctx->plane_res.dpp->funcs->dpp_set_csc_default != NULL) pipe_ctx->plane_res.dpp->funcs->dpp_set_csc_default(pipe_ctx->plane_res.dpp, colorspace); } } -static void dcn10_program_output_csc(struct dc *dc, - struct pipe_ctx *pipe_ctx, - enum dc_color_space colorspace, - uint16_t *matrix, - int opp_id) -{ - if (pipe_ctx->plane_res.dpp->funcs->dpp_set_csc_adjustment != NULL) - program_csc_matrix(pipe_ctx, - colorspace, - matrix); -} - -static bool is_lower_pipe_tree_visible(struct pipe_ctx *pipe_ctx) +bool is_lower_pipe_tree_visible(struct pipe_ctx *pipe_ctx) { if (pipe_ctx->plane_state->visible) return true; @@ -1738,7 +1729,7 @@ static bool is_lower_pipe_tree_visible(struct pipe_ctx *pipe_ctx) return false; } -static bool is_upper_pipe_tree_visible(struct pipe_ctx *pipe_ctx) +bool is_upper_pipe_tree_visible(struct pipe_ctx *pipe_ctx) { if (pipe_ctx->plane_state->visible) return true; @@ -1747,7 +1738,7 @@ static bool is_upper_pipe_tree_visible(struct pipe_ctx *pipe_ctx) return false; } -static bool is_pipe_tree_visible(struct pipe_ctx *pipe_ctx) +bool is_pipe_tree_visible(struct pipe_ctx *pipe_ctx) { if (pipe_ctx->plane_state->visible) return true; @@ -1780,7 +1771,7 @@ bool is_rgb_cspace(enum dc_color_space output_color_space) } } -static void dcn10_get_surface_visual_confirm_color( +void dcn10_get_surface_visual_confirm_color( const struct pipe_ctx *pipe_ctx, struct tg_color *color) { @@ -1816,7 +1807,7 @@ static void dcn10_get_surface_visual_confirm_color( } } -static void dcn10_get_hdr_visual_confirm_color( +void dcn10_get_hdr_visual_confirm_color( struct pipe_ctx *pipe_ctx, struct tg_color *color) { @@ -1943,10 +1934,6 @@ static void dcn10_update_mpcc(struct dc *dc, struct pipe_ctx *pipe_ctx) struct mpc *mpc = dc->res_pool->mpc; struct mpc_tree *mpc_tree_params = &(pipe_ctx->stream_res.opp->mpc_tree_params); - - - /* TODO: proper fix once fpga works */ - if (dc->debug.visual_confirm == VISUAL_CONFIRM_HDR) { dcn10_get_hdr_visual_confirm_color( pipe_ctx, &blnd_cfg.black_color); @@ -2026,8 +2013,6 @@ static void update_scaler(struct pipe_ctx *pipe_ctx) bool per_pixel_alpha = pipe_ctx->plane_state->per_pixel_alpha && pipe_ctx->bottom_pipe; - /* TODO: proper fix once fpga works */ - pipe_ctx->plane_res.scl_data.lb_params.alpha_en = per_pixel_alpha; pipe_ctx->plane_res.scl_data.lb_params.depth = LB_PIXEL_DEPTH_30BPP; /* scaler configuration */ @@ -2035,7 +2020,7 @@ static void update_scaler(struct pipe_ctx *pipe_ctx) pipe_ctx->plane_res.dpp, &pipe_ctx->plane_res.scl_data); } -static void update_dchubp_dpp( +void update_dchubp_dpp( struct dc *dc, struct pipe_ctx *pipe_ctx, struct dc_state *context) @@ -2052,16 +2037,22 @@ static void update_dchubp_dpp( */ if (plane_state->update_flags.bits.full_update) { bool should_divided_by_2 = context->bw.dcn.clk.dppclk_khz <= - dc->res_pool->dccg->clks.dispclk_khz / 2; + dc->res_pool->clk_mgr->clks.dispclk_khz / 2; dpp->funcs->dpp_dppclk_control( dpp, should_divided_by_2, true); - dc->res_pool->dccg->clks.dppclk_khz = should_divided_by_2 ? - dc->res_pool->dccg->clks.dispclk_khz / 2 : - dc->res_pool->dccg->clks.dispclk_khz; + if (dc->res_pool->dccg) + dc->res_pool->dccg->funcs->update_dpp_dto( + dc->res_pool->dccg, + dpp->inst, + pipe_ctx->plane_res.bw.calc.dppclk_khz); + else + dc->res_pool->clk_mgr->clks.dppclk_khz = should_divided_by_2 ? + dc->res_pool->clk_mgr->clks.dispclk_khz / 2 : + dc->res_pool->clk_mgr->clks.dispclk_khz; } /* TODO: Need input parameter to tell current DCHUB pipe tie to which OTG @@ -2077,6 +2068,10 @@ static void update_dchubp_dpp( &pipe_ctx->ttu_regs, &pipe_ctx->rq_regs, &pipe_ctx->pipe_dlg_param); + hubp->funcs->hubp_setup_interdependent( + hubp, + &pipe_ctx->dlg_regs, + &pipe_ctx->ttu_regs); } size.grph.surface_size = pipe_ctx->plane_res.scl_data.viewport; @@ -2182,7 +2177,7 @@ static void dcn10_blank_pixel_data( } } -static void set_hdr_multiplier(struct pipe_ctx *pipe_ctx) +void set_hdr_multiplier(struct pipe_ctx *pipe_ctx) { struct fixed31_32 multiplier = dc_fixpt_from_fraction( pipe_ctx->plane_state->sdr_white_level, 80); @@ -2257,47 +2252,7 @@ static void program_all_pipe_in_tree( } } -static void dcn10_pplib_apply_display_requirements( - struct dc *dc, - struct dc_state *context) -{ - struct dm_pp_display_configuration *pp_display_cfg = &context->pp_display_cfg; - - pp_display_cfg->min_engine_clock_khz = dc->res_pool->dccg->clks.dcfclk_khz; - pp_display_cfg->min_memory_clock_khz = dc->res_pool->dccg->clks.fclk_khz; - pp_display_cfg->min_engine_clock_deep_sleep_khz = dc->res_pool->dccg->clks.dcfclk_deep_sleep_khz; - pp_display_cfg->min_dcfc_deep_sleep_clock_khz = dc->res_pool->dccg->clks.dcfclk_deep_sleep_khz; - pp_display_cfg->min_dcfclock_khz = dc->res_pool->dccg->clks.dcfclk_khz; - pp_display_cfg->disp_clk_khz = dc->res_pool->dccg->clks.dispclk_khz; - dce110_fill_display_configs(context, pp_display_cfg); - - if (memcmp(&dc->prev_display_config, pp_display_cfg, sizeof( - struct dm_pp_display_configuration)) != 0) - dm_pp_apply_display_requirements(dc->ctx, pp_display_cfg); - - dc->prev_display_config = *pp_display_cfg; -} - -static void optimize_shared_resources(struct dc *dc) -{ - if (dc->current_state->stream_count == 0) { - /* S0i2 message */ - dcn10_pplib_apply_display_requirements(dc, dc->current_state); - } - - if (dc->debug.pplib_wm_report_mode == WM_REPORT_OVERRIDE) - dcn_bw_notify_pplib_of_wm_ranges(dc); -} - -static void ready_shared_resources(struct dc *dc, struct dc_state *context) -{ - /* S0i2 message */ - if (dc->current_state->stream_count == 0 && - context->stream_count != 0) - dcn10_pplib_apply_display_requirements(dc, context); -} - -static struct pipe_ctx *find_top_pipe_for_stream( +struct pipe_ctx *find_top_pipe_for_stream( struct dc *dc, struct dc_state *context, const struct dc_stream_state *stream) @@ -2387,6 +2342,32 @@ static void dcn10_apply_ctx_for_surface( dcn10_pipe_control_lock(dc, top_pipe_to_program, false); + if (top_pipe_to_program->plane_state && + top_pipe_to_program->plane_state->update_flags.bits.full_update) + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i]; + + /* Skip inactive pipes and ones already updated */ + if (!pipe_ctx->stream || pipe_ctx->stream == stream) + continue; + + pipe_ctx->stream_res.tg->funcs->lock(pipe_ctx->stream_res.tg); + + pipe_ctx->plane_res.hubp->funcs->hubp_setup_interdependent( + pipe_ctx->plane_res.hubp, + &pipe_ctx->dlg_regs, + &pipe_ctx->ttu_regs); + } + + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i]; + + if (!pipe_ctx->stream || pipe_ctx->stream == stream) + continue; + + dcn10_pipe_control_lock(dc, pipe_ctx, false); + } + if (num_planes == 0) false_optc_underflow_wa(dc, stream, tg); @@ -2398,10 +2379,9 @@ static void dcn10_apply_ctx_for_surface( hubbub1_wm_change_req_wa(dc->res_pool->hubbub); } -static void dcn10_set_bandwidth( +static void dcn10_prepare_bandwidth( struct dc *dc, - struct dc_state *context, - bool safe_to_lower) + struct dc_state *context) { if (dc->debug.sanity_checks) dcn10_verify_allow_pstate_change_high(dc); @@ -2410,12 +2390,39 @@ static void dcn10_set_bandwidth( if (context->stream_count == 0) context->bw.dcn.clk.phyclk_khz = 0; - dc->res_pool->dccg->funcs->update_clocks( - dc->res_pool->dccg, - &context->bw.dcn.clk, - safe_to_lower); + dc->res_pool->clk_mgr->funcs->update_clocks( + dc->res_pool->clk_mgr, + context, + false); + } + + hubbub1_program_watermarks(dc->res_pool->hubbub, + &context->bw.dcn.watermarks, + dc->res_pool->ref_clock_inKhz / 1000, + true); + + if (dc->debug.pplib_wm_report_mode == WM_REPORT_OVERRIDE) + dcn_bw_notify_pplib_of_wm_ranges(dc); + + if (dc->debug.sanity_checks) + dcn10_verify_allow_pstate_change_high(dc); +} + +static void dcn10_optimize_bandwidth( + struct dc *dc, + struct dc_state *context) +{ + if (dc->debug.sanity_checks) + dcn10_verify_allow_pstate_change_high(dc); + + if (!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) { + if (context->stream_count == 0) + context->bw.dcn.clk.phyclk_khz = 0; - dcn10_pplib_apply_display_requirements(dc, context); + dc->res_pool->clk_mgr->funcs->update_clocks( + dc->res_pool->clk_mgr, + context, + true); } hubbub1_program_watermarks(dc->res_pool->hubbub, @@ -2423,6 +2430,9 @@ static void dcn10_set_bandwidth( dc->res_pool->ref_clock_inKhz / 1000, true); + if (dc->debug.pplib_wm_report_mode == WM_REPORT_OVERRIDE) + dcn_bw_notify_pplib_of_wm_ranges(dc); + if (dc->debug.sanity_checks) dcn10_verify_allow_pstate_change_high(dc); } @@ -2694,7 +2704,6 @@ static void dcn10_set_cursor_sdr_white_level(struct pipe_ctx *pipe_ctx) static const struct hw_sequencer_funcs dcn10_funcs = { .program_gamut_remap = program_gamut_remap, - .program_csc_matrix = program_csc_matrix, .init_hw = dcn10_init_hw, .apply_ctx_to_hw = dce110_apply_ctx_to_hw, .apply_ctx_for_surface = dcn10_apply_ctx_for_surface, @@ -2721,7 +2730,8 @@ static const struct hw_sequencer_funcs dcn10_funcs = { .disable_plane = dcn10_disable_plane, .blank_pixel_data = dcn10_blank_pixel_data, .pipe_control_lock = dcn10_pipe_control_lock, - .set_bandwidth = dcn10_set_bandwidth, + .prepare_bandwidth = dcn10_prepare_bandwidth, + .optimize_bandwidth = dcn10_optimize_bandwidth, .reset_hw_ctx_wrap = reset_hw_ctx_wrap, .enable_stream_timing = dcn10_enable_stream_timing, .set_drr = set_drr, @@ -2731,11 +2741,8 @@ static const struct hw_sequencer_funcs dcn10_funcs = { .set_avmute = dce110_set_avmute, .log_hw_state = dcn10_log_hw_state, .get_hw_state = dcn10_get_hw_state, + .clear_status_bits = dcn10_clear_status_bits, .wait_for_mpcc_disconnect = dcn10_wait_for_mpcc_disconnect, - .ready_shared_resources = ready_shared_resources, - .optimize_shared_resources = optimize_shared_resources, - .pplib_apply_display_requirements = - dcn10_pplib_apply_display_requirements, .edp_backlight_control = hwss_edp_backlight_control, .edp_power_control = hwss_edp_power_control, .edp_wait_for_hpd_ready = hwss_edp_wait_for_hpd_ready, diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.h index 84d461e0ed3e..f8eea10e4c64 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.h +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.h @@ -51,4 +51,34 @@ void dcn10_get_hw_state( char *pBuf, unsigned int bufSize, unsigned int mask); +void dcn10_clear_status_bits(struct dc *dc, unsigned int mask); + +bool is_lower_pipe_tree_visible(struct pipe_ctx *pipe_ctx); + +bool is_upper_pipe_tree_visible(struct pipe_ctx *pipe_ctx); + +bool is_pipe_tree_visible(struct pipe_ctx *pipe_ctx); + +void dcn10_program_pte_vm(struct dce_hwseq *hws, struct hubp *hubp); + +void set_hdr_multiplier(struct pipe_ctx *pipe_ctx); + +void dcn10_get_surface_visual_confirm_color( + const struct pipe_ctx *pipe_ctx, + struct tg_color *color); + +void dcn10_get_hdr_visual_confirm_color( + struct pipe_ctx *pipe_ctx, + struct tg_color *color); + +void update_dchubp_dpp( + struct dc *dc, + struct pipe_ctx *pipe_ctx, + struct dc_state *context); + +struct pipe_ctx *find_top_pipe_for_stream( + struct dc *dc, + struct dc_state *context, + const struct dc_stream_state *stream); + #endif /* __DC_HWSS_DCN10_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer_debug.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer_debug.c index 64158900730f..cd469014baa3 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer_debug.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer_debug.c @@ -44,6 +44,7 @@ #include "dcn10_hubp.h" #include "dcn10_hubbub.h" #include "dcn10_cm_common.h" +#include "dcn10_clk_mgr.h" static unsigned int snprintf_count(char *pBuf, unsigned int bufSize, char *fmt, ...) { @@ -454,12 +455,6 @@ static unsigned int dcn10_get_otg_states(struct dc *dc, char *pBuf, unsigned int remaining_buffer -= chars_printed; pBuf += chars_printed; - - // Clear underflow for debug purposes - // We want to keep underflow sticky bit on for the longevity tests outside of test environment. - // This function is called only from Windows or Diags test environment, hence it's safe to clear - // it from here without affecting the original intent. - tg->funcs->clear_optc_underflow(tg); } } @@ -469,19 +464,75 @@ static unsigned int dcn10_get_otg_states(struct dc *dc, char *pBuf, unsigned int static unsigned int dcn10_get_clock_states(struct dc *dc, char *pBuf, unsigned int bufSize) { unsigned int chars_printed = 0; + unsigned int remaining_buffer = bufSize; - chars_printed = snprintf_count(pBuf, bufSize, "dcfclk_khz,dcfclk_deep_sleep_khz,dispclk_khz," - "dppclk_khz,max_supported_dppclk_khz,fclk_khz,socclk_khz\n" - "%d,%d,%d,%d,%d,%d,%d\n", + chars_printed = snprintf_count(pBuf, bufSize, "dcfclk,dcfclk_deep_sleep,dispclk," + "dppclk,fclk,socclk\n" + "%d,%d,%d,%d,%d,%d\n", dc->current_state->bw.dcn.clk.dcfclk_khz, dc->current_state->bw.dcn.clk.dcfclk_deep_sleep_khz, dc->current_state->bw.dcn.clk.dispclk_khz, dc->current_state->bw.dcn.clk.dppclk_khz, - dc->current_state->bw.dcn.clk.max_supported_dppclk_khz, dc->current_state->bw.dcn.clk.fclk_khz, dc->current_state->bw.dcn.clk.socclk_khz); - return chars_printed; + remaining_buffer -= chars_printed; + pBuf += chars_printed; + + return bufSize - remaining_buffer; +} + +static void dcn10_clear_otpc_underflow(struct dc *dc) +{ + struct resource_pool *pool = dc->res_pool; + int i; + + for (i = 0; i < pool->timing_generator_count; i++) { + struct timing_generator *tg = pool->timing_generators[i]; + struct dcn_otg_state s = {0}; + + optc1_read_otg_state(DCN10TG_FROM_TG(tg), &s); + + if (s.otg_enabled & 1) + tg->funcs->clear_optc_underflow(tg); + } +} + +static void dcn10_clear_hubp_underflow(struct dc *dc) +{ + struct resource_pool *pool = dc->res_pool; + int i; + + for (i = 0; i < pool->pipe_count; i++) { + struct hubp *hubp = pool->hubps[i]; + struct dcn_hubp_state *s = &(TO_DCN10_HUBP(hubp)->state); + + hubp->funcs->hubp_read_state(hubp); + + if (!s->blank_en) + hubp->funcs->hubp_clear_underflow(hubp); + } +} + +void dcn10_clear_status_bits(struct dc *dc, unsigned int mask) +{ + /* + * Mask Format + * Bit 0 - 31: Status bit to clear + * + * Mask = 0x0 means clear all status bits + */ + const unsigned int DC_HW_STATE_MASK_HUBP_UNDERFLOW = 0x1; + const unsigned int DC_HW_STATE_MASK_OTPC_UNDERFLOW = 0x2; + + if (mask == 0x0) + mask = 0xFFFFFFFF; + + if (mask & DC_HW_STATE_MASK_HUBP_UNDERFLOW) + dcn10_clear_hubp_underflow(dc); + + if (mask & DC_HW_STATE_MASK_OTPC_UNDERFLOW) + dcn10_clear_otpc_underflow(dc); } void dcn10_get_hw_state(struct dc *dc, char *pBuf, unsigned int bufSize, unsigned int mask) @@ -491,16 +542,16 @@ void dcn10_get_hw_state(struct dc *dc, char *pBuf, unsigned int bufSize, unsigne * Bit 0 - 15: Hardware block mask * Bit 15: 1 = Invariant Only, 0 = All */ - const unsigned int DC_HW_STATE_MASK_HUBBUB = 0x1; - const unsigned int DC_HW_STATE_MASK_HUBP = 0x2; - const unsigned int DC_HW_STATE_MASK_RQ = 0x4; - const unsigned int DC_HW_STATE_MASK_DLG = 0x8; - const unsigned int DC_HW_STATE_MASK_TTU = 0x10; - const unsigned int DC_HW_STATE_MASK_CM = 0x20; - const unsigned int DC_HW_STATE_MASK_MPCC = 0x40; - const unsigned int DC_HW_STATE_MASK_OTG = 0x80; - const unsigned int DC_HW_STATE_MASK_CLOCKS = 0x100; - const unsigned int DC_HW_STATE_INVAR_ONLY = 0x8000; + const unsigned int DC_HW_STATE_MASK_HUBBUB = 0x1; + const unsigned int DC_HW_STATE_MASK_HUBP = 0x2; + const unsigned int DC_HW_STATE_MASK_RQ = 0x4; + const unsigned int DC_HW_STATE_MASK_DLG = 0x8; + const unsigned int DC_HW_STATE_MASK_TTU = 0x10; + const unsigned int DC_HW_STATE_MASK_CM = 0x20; + const unsigned int DC_HW_STATE_MASK_MPCC = 0x40; + const unsigned int DC_HW_STATE_MASK_OTG = 0x80; + const unsigned int DC_HW_STATE_MASK_CLOCKS = 0x100; + const unsigned int DC_HW_STATE_INVAR_ONLY = 0x8000; unsigned int chars_printed = 0; unsigned int remaining_buf_size = bufSize; @@ -556,6 +607,9 @@ void dcn10_get_hw_state(struct dc *dc, char *pBuf, unsigned int bufSize, unsigne remaining_buf_size -= chars_printed; } - if ((mask & DC_HW_STATE_MASK_CLOCKS) && remaining_buf_size > 0) + if ((mask & DC_HW_STATE_MASK_CLOCKS) && remaining_buf_size > 0) { chars_printed = dcn10_get_clock_states(dc, pBuf, remaining_buf_size); + pBuf += chars_printed; + remaining_buf_size -= chars_printed; + } } diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.c index ba6a8686062f..477ab9222216 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.c @@ -589,7 +589,7 @@ static bool dcn10_link_encoder_validate_hdmi_output( return false; /* DCE11 HW does not support 420 */ - if (!enc10->base.features.ycbcr420_supported && + if (!enc10->base.features.hdmi_ycbcr420_supported && crtc_timing->pixel_encoding == PIXEL_ENCODING_YCBCR420) return false; @@ -606,8 +606,10 @@ bool dcn10_link_encoder_validate_dp_output( const struct dcn10_link_encoder *enc10, const struct dc_crtc_timing *crtc_timing) { - if (crtc_timing->pixel_encoding == PIXEL_ENCODING_YCBCR420) - return false; + if (crtc_timing->pixel_encoding == PIXEL_ENCODING_YCBCR420) { + if (!enc10->base.features.dp_ycbcr420_supported) + return false; + } return true; } diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c index 54626682bab2..7c138615f17d 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c @@ -87,9 +87,8 @@ static void optc1_disable_stereo(struct timing_generator *optc) REG_SET(OTG_STEREO_CONTROL, 0, OTG_STEREO_EN, 0); - REG_SET_3(OTG_3D_STRUCTURE_CONTROL, 0, + REG_SET_2(OTG_3D_STRUCTURE_CONTROL, 0, OTG_3D_STRUCTURE_EN, 0, - OTG_3D_STRUCTURE_V_UPDATE_MODE, 0, OTG_3D_STRUCTURE_STEREO_SEL_OVR, 0); } @@ -274,10 +273,12 @@ void optc1_program_timing( * program the reg for interrupt postition. */ vertical_line_start = asic_blank_end - optc->dlg_otg_param.vstartup_start + 1; - if (vertical_line_start < 0) { - ASSERT(0); + v_fp2 = 0; + if (vertical_line_start < 0) + v_fp2 = -vertical_line_start; + if (vertical_line_start < 0) vertical_line_start = 0; - } + REG_SET(OTG_VERTICAL_INTERRUPT2_POSITION, 0, OTG_VERTICAL_INTERRUPT2_LINE_START, vertical_line_start); @@ -296,9 +297,6 @@ void optc1_program_timing( if (patched_crtc_timing.flags.INTERLACE == 1) field_num = 1; } - v_fp2 = 0; - if (optc->dlg_otg_param.vstartup_start > asic_blank_end) - v_fp2 = optc->dlg_otg_param.vstartup_start > asic_blank_end; /* Interlace */ if (patched_crtc_timing.flags.INTERLACE == 1) { @@ -337,9 +335,8 @@ void optc1_program_timing( /* Enable stereo - only when we need to pack 3D frame. Other types * of stereo handled in explicit call */ - h_div_2 = (dc_crtc_timing->pixel_encoding == PIXEL_ENCODING_YCBCR420) ? - 1 : 0; + h_div_2 = optc1_is_two_pixels_per_containter(&patched_crtc_timing); REG_UPDATE(OTG_H_TIMING_CNTL, OTG_H_TIMING_DIV_BY2, h_div_2); @@ -362,20 +359,19 @@ void optc1_set_blank_data_double_buffer(struct timing_generator *optc, bool enab static void optc1_unblank_crtc(struct timing_generator *optc) { struct optc *optc1 = DCN10TG_FROM_TG(optc); - uint32_t vertical_interrupt_enable = 0; - - REG_GET(OTG_VERTICAL_INTERRUPT2_CONTROL, - OTG_VERTICAL_INTERRUPT2_INT_ENABLE, &vertical_interrupt_enable); - - /* temporary work around for vertical interrupt, once vertical interrupt enabled, - * this check will be removed. - */ - if (vertical_interrupt_enable) - optc1_set_blank_data_double_buffer(optc, true); REG_UPDATE_2(OTG_BLANK_CONTROL, OTG_BLANK_DATA_EN, 0, OTG_BLANK_DE_MODE, 0); + + /* W/A for automated testing + * Automated testing will fail underflow test as there + * sporadic underflows which occur during the optc blank + * sequence. As a w/a, clear underflow on unblank. + * This prevents the failure, but will not mask actual + * underflow that affect real use cases. + */ + optc1_clear_optc_underflow(optc); } /** @@ -1155,9 +1151,8 @@ static void optc1_enable_stereo(struct timing_generator *optc, OTG_DISABLE_STEREOSYNC_OUTPUT_FOR_DP, 1); if (flags->PROGRAM_STEREO) - REG_UPDATE_3(OTG_3D_STRUCTURE_CONTROL, + REG_UPDATE_2(OTG_3D_STRUCTURE_CONTROL, OTG_3D_STRUCTURE_EN, flags->FRAME_PACKED, - OTG_3D_STRUCTURE_V_UPDATE_MODE, flags->FRAME_PACKED, OTG_3D_STRUCTURE_STEREO_SEL_OVR, flags->FRAME_PACKED); } @@ -1425,3 +1420,9 @@ void dcn10_timing_generator_init(struct optc *optc1) optc1->min_h_sync_width = 8; optc1->min_v_sync_width = 1; } + +bool optc1_is_two_pixels_per_containter(const struct dc_crtc_timing *timing) +{ + return timing->pixel_encoding == PIXEL_ENCODING_YCBCR420; +} + diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h index c1b114209fe8..8bacf0b6e27e 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h @@ -565,4 +565,6 @@ bool optc1_configure_crc(struct timing_generator *optc, bool optc1_get_crc(struct timing_generator *optc, uint32_t *r_cr, uint32_t *g_y, uint32_t *b_cb); +bool optc1_is_two_pixels_per_containter(const struct dc_crtc_timing *timing); + #endif /* __DC_TIMING_GENERATOR_DCN10_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c index a71453a15ae3..5d4772dec0ba 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c @@ -28,23 +28,23 @@ #include "resource.h" #include "include/irq_service_interface.h" -#include "dcn10/dcn10_resource.h" +#include "dcn10_resource.h" -#include "dcn10/dcn10_ipp.h" -#include "dcn10/dcn10_mpc.h" +#include "dcn10_ipp.h" +#include "dcn10_mpc.h" #include "irq/dcn10/irq_service_dcn10.h" -#include "dcn10/dcn10_dpp.h" +#include "dcn10_dpp.h" #include "dcn10_optc.h" -#include "dcn10/dcn10_hw_sequencer.h" +#include "dcn10_hw_sequencer.h" #include "dce110/dce110_hw_sequencer.h" -#include "dcn10/dcn10_opp.h" -#include "dcn10/dcn10_link_encoder.h" -#include "dcn10/dcn10_stream_encoder.h" -#include "dce/dce_clocks.h" +#include "dcn10_opp.h" +#include "dcn10_link_encoder.h" +#include "dcn10_stream_encoder.h" +#include "dcn10_clk_mgr.h" #include "dce/dce_clock_source.h" #include "dce/dce_audio.h" #include "dce/dce_hwseq.h" -#include "../virtual/virtual_stream_encoder.h" +#include "virtual/virtual_stream_encoder.h" #include "dce110/dce110_resource.h" #include "dce112/dce112_resource.h" #include "dcn10_hubp.h" @@ -202,7 +202,6 @@ enum dcn10_clk_src_array_id { #define MMHUB_SR(reg_name)\ .reg_name = MMHUB_BASE(mm ## reg_name ## _BASE_IDX) + \ mm ## reg_name - /* macros to expend register list macro defined in HW object header file * end *********************/ @@ -436,8 +435,8 @@ static const struct dcn_optc_mask tg_mask = { TG_COMMON_MASK_SH_LIST_DCN1_0(_MASK) }; - static const struct bios_registers bios_regs = { + NBIO_SR(BIOS_SCRATCH_0), NBIO_SR(BIOS_SCRATCH_3), NBIO_SR(BIOS_SCRATCH_6) }; @@ -496,7 +495,6 @@ static const struct dce110_clk_src_mask cs_mask = { CS_COMMON_MASK_SH_LIST_DCN1_0(_MASK) }; - static const struct resource_caps res_cap = { .num_timing_generator = 4, .num_opp = 4, @@ -719,7 +717,8 @@ static struct timing_generator *dcn10_timing_generator_create( static const struct encoder_feature_support link_enc_feature = { .max_hdmi_deep_color = COLOR_DEPTH_121212, .max_hdmi_pixel_clock = 600000, - .ycbcr420_supported = true, + .hdmi_ycbcr420_supported = true, + .dp_ycbcr420_supported = false, .flags.bits.IS_HBR2_CAPABLE = true, .flags.bits.IS_HBR3_CAPABLE = true, .flags.bits.IS_TPS3_CAPABLE = true, @@ -949,8 +948,8 @@ static void destruct(struct dcn10_resource_pool *pool) if (pool->base.dmcu != NULL) dce_dmcu_destroy(&pool->base.dmcu); - if (pool->base.dccg != NULL) - dce_dccg_destroy(&pool->base.dccg); + if (pool->base.clk_mgr != NULL) + dce_clk_mgr_destroy(&pool->base.clk_mgr); kfree(pool->base.pp_smu); } @@ -1275,9 +1274,8 @@ static bool construct( goto fail; } } - - pool->base.dccg = dcn1_dccg_create(ctx); - if (pool->base.dccg == NULL) { + pool->base.clk_mgr = dcn1_clk_mgr_create(ctx); + if (pool->base.clk_mgr == NULL) { dm_error("DC: failed to create display clock!\n"); BREAK_TO_DEBUGGER(); goto fail; diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c index 6f9078f3c4d3..b8b5525a389a 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c @@ -766,7 +766,6 @@ void enc1_stream_encoder_dp_blank( struct stream_encoder *enc) { struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc); - uint32_t retries = 0; uint32_t reg1 = 0; uint32_t max_retries = DP_BLANK_MAX_RETRY * 10; @@ -803,8 +802,6 @@ void enc1_stream_encoder_dp_blank( 0, 10, max_retries); - ASSERT(retries <= max_retries); - /* Tell the DP encoder to ignore timing from CRTC, must be done after * the polling. If we set DP_STEER_FIFO_RESET before DP stream blank is * complete, stream status will be stuck in video stream enabled state, diff --git a/drivers/gpu/drm/amd/display/dc/dm_event_log.h b/drivers/gpu/drm/amd/display/dc/dm_event_log.h index 34a701ca879e..65663f4d93e1 100644 --- a/drivers/gpu/drm/amd/display/dc/dm_event_log.h +++ b/drivers/gpu/drm/amd/display/dc/dm_event_log.h @@ -33,6 +33,7 @@ #define EVENT_LOG_AUX_REQ(ddc, type, action, address, len, data) #define EVENT_LOG_AUX_REP(ddc, type, replyStatus, len, data) +#define EVENT_LOG_CUST_MSG(tag, a, ...) #endif diff --git a/drivers/gpu/drm/amd/display/dc/dm_pp_smu.h b/drivers/gpu/drm/amd/display/dc/dm_pp_smu.h index f2ea8452d48f..0029a39efb1c 100644 --- a/drivers/gpu/drm/amd/display/dc/dm_pp_smu.h +++ b/drivers/gpu/drm/amd/display/dc/dm_pp_smu.h @@ -55,10 +55,10 @@ struct pp_smu { struct pp_smu_wm_set_range { unsigned int wm_inst; - uint32_t min_fill_clk_khz; - uint32_t max_fill_clk_khz; - uint32_t min_drain_clk_khz; - uint32_t max_drain_clk_khz; + uint32_t min_fill_clk_mhz; + uint32_t max_fill_clk_mhz; + uint32_t min_drain_clk_mhz; + uint32_t max_drain_clk_mhz; }; #define MAX_WATERMARK_SETS 4 @@ -77,15 +77,15 @@ struct pp_smu_display_requirement_rv { */ unsigned int display_count; - /* PPSMC_MSG_SetHardMinFclkByFreq: khz + /* PPSMC_MSG_SetHardMinFclkByFreq: mhz * FCLK will vary with DPM, but never below requested hard min */ - unsigned int hard_min_fclk_khz; + unsigned int hard_min_fclk_mhz; - /* PPSMC_MSG_SetHardMinDcefclkByFreq: khz + /* PPSMC_MSG_SetHardMinDcefclkByFreq: mhz * fixed clock at requested freq, either from FCH bypass or DFS */ - unsigned int hard_min_dcefclk_khz; + unsigned int hard_min_dcefclk_mhz; /* PPSMC_MSG_SetMinDeepSleepDcefclk: mhz * when DF is in cstate, dcf clock is further divided down @@ -102,14 +102,20 @@ struct pp_smu_funcs_rv { */ void (*set_display_count)(struct pp_smu *pp, int count); - /* which SMU message? are reader and writer WM separate SMU msg? */ + /* reader and writer WM's are sent together as part of one table*/ + /* + * PPSMC_MSG_SetDriverDramAddrHigh + * PPSMC_MSG_SetDriverDramAddrLow + * PPSMC_MSG_TransferTableDram2Smu + * + * */ void (*set_wm_ranges)(struct pp_smu *pp, struct pp_smu_wm_range_sets *ranges); /* PPSMC_MSG_SetHardMinDcfclkByFreq * fixed clock at requested freq, either from FCH bypass or DFS */ - void (*set_hard_min_dcfclk_by_freq)(struct pp_smu *pp, int khz); + void (*set_hard_min_dcfclk_by_freq)(struct pp_smu *pp, int mhz); /* PPSMC_MSG_SetMinDeepSleepDcfclk * when DF is in cstate, dcf clock is further divided down @@ -120,12 +126,12 @@ struct pp_smu_funcs_rv { /* PPSMC_MSG_SetHardMinFclkByFreq * FCLK will vary with DPM, but never below requested hard min */ - void (*set_hard_min_fclk_by_freq)(struct pp_smu *pp, int khz); + void (*set_hard_min_fclk_by_freq)(struct pp_smu *pp, int mhz); /* PPSMC_MSG_SetHardMinSocclkByFreq * Needed for DWB support */ - void (*set_hard_min_socclk_by_freq)(struct pp_smu *pp, int khz); + void (*set_hard_min_socclk_by_freq)(struct pp_smu *pp, int mhz); /* PME w/a */ void (*set_pme_wa_enable)(struct pp_smu *pp); diff --git a/drivers/gpu/drm/amd/display/dc/dm_services.h b/drivers/gpu/drm/amd/display/dc/dm_services.h index 28128c02de00..1961cc6d9143 100644 --- a/drivers/gpu/drm/amd/display/dc/dm_services.h +++ b/drivers/gpu/drm/amd/display/dc/dm_services.h @@ -31,6 +31,8 @@ #define __DM_SERVICES_H__ +#include "amdgpu_dm_trace.h" + /* TODO: remove when DC is complete. */ #include "dm_services_types.h" #include "logger_interface.h" @@ -70,6 +72,7 @@ static inline uint32_t dm_read_reg_func( } #endif value = cgs_read_register(ctx->cgs_device, address); + trace_amdgpu_dc_rreg(&ctx->perf_trace->read_count, address, value); return value; } @@ -90,6 +93,7 @@ static inline void dm_write_reg_func( } #endif cgs_write_register(ctx->cgs_device, address, value); + trace_amdgpu_dc_wreg(&ctx->perf_trace->write_count, address, value); } static inline uint32_t dm_read_index_reg( @@ -351,8 +355,12 @@ unsigned long long dm_get_elapse_time_in_ns(struct dc_context *ctx, /* * performance tracing */ -void dm_perf_trace_timestamp(const char *func_name, unsigned int line); -#define PERF_TRACE() dm_perf_trace_timestamp(__func__, __LINE__) +#define PERF_TRACE() trace_amdgpu_dc_performance(CTX->perf_trace->read_count,\ + CTX->perf_trace->write_count, &CTX->perf_trace->last_entry_read,\ + &CTX->perf_trace->last_entry_write, __func__, __LINE__) +#define PERF_TRACE_CTX(__CTX) trace_amdgpu_dc_performance(__CTX->perf_trace->read_count,\ + __CTX->perf_trace->write_count, &__CTX->perf_trace->last_entry_read,\ + &__CTX->perf_trace->last_entry_write, __func__, __LINE__) /* diff --git a/drivers/gpu/drm/amd/display/dc/dm_services_types.h b/drivers/gpu/drm/amd/display/dc/dm_services_types.h index 2b83f922ac02..1af8c777b3ac 100644 --- a/drivers/gpu/drm/amd/display/dc/dm_services_types.h +++ b/drivers/gpu/drm/amd/display/dc/dm_services_types.h @@ -208,22 +208,20 @@ struct dm_bl_data_point { /* Brightness level as effective value in range 0-255, * corresponding to above percentage */ - uint8_t signalLevel; + uint8_t signal_level; }; /* Total size of the structure should not exceed 256 bytes */ struct dm_acpi_atif_backlight_caps { - - uint16_t size; /* Bytes 0-1 (2 bytes) */ uint16_t flags; /* Byted 2-3 (2 bytes) */ - uint8_t errorCode; /* Byte 4 */ - uint8_t acLevelPercentage; /* Byte 5 */ - uint8_t dcLevelPercentage; /* Byte 6 */ - uint8_t minInputSignal; /* Byte 7 */ - uint8_t maxInputSignal; /* Byte 8 */ - uint8_t numOfDataPoints; /* Byte 9 */ - struct dm_bl_data_point dataPoints[99]; /* Bytes 10-207 (198 bytes)*/ + uint8_t error_code; /* Byte 4 */ + uint8_t ac_level_percentage; /* Byte 5 */ + uint8_t dc_level_percentage; /* Byte 6 */ + uint8_t min_input_signal; /* Byte 7 */ + uint8_t max_input_signal; /* Byte 8 */ + uint8_t num_data_points; /* Byte 9 */ + struct dm_bl_data_point data_points[99]; /* Bytes 10-207 (198 bytes)*/ }; enum dm_acpi_display_type { diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h index cbafce649e33..5dd04520ceca 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h @@ -113,7 +113,8 @@ struct _vcs_dpi_soc_bounding_box_st { int use_urgent_burst_bw; double max_hscl_ratio; double max_vscl_ratio; - struct _vcs_dpi_voltage_scaling_st clock_limits[7]; + unsigned int num_states; + struct _vcs_dpi_voltage_scaling_st clock_limits[8]; }; struct _vcs_dpi_ip_params_st { diff --git a/drivers/gpu/drm/amd/display/dc/gpio/gpio_service.c b/drivers/gpu/drm/amd/display/dc/gpio/gpio_service.c index f20161c5706d..dada04296025 100644 --- a/drivers/gpu/drm/amd/display/dc/gpio/gpio_service.c +++ b/drivers/gpu/drm/amd/display/dc/gpio/gpio_service.c @@ -56,7 +56,6 @@ struct gpio_service *dal_gpio_service_create( struct dc_context *ctx) { struct gpio_service *service; - uint32_t index_of_id; service = kzalloc(sizeof(struct gpio_service), GFP_KERNEL); @@ -78,44 +77,33 @@ struct gpio_service *dal_gpio_service_create( goto failure_1; } - /* allocate and initialize business storage */ + /* allocate and initialize busyness storage */ { - const uint32_t bits_per_uint = sizeof(uint32_t) << 3; - index_of_id = 0; service->ctx = ctx; do { uint32_t number_of_bits = service->factory.number_of_pins[index_of_id]; + uint32_t i = 0; - uint32_t number_of_uints = - (number_of_bits + bits_per_uint - 1) / - bits_per_uint; - - uint32_t *slot; - - if (number_of_bits) { - uint32_t index_of_uint = 0; + if (number_of_bits) { + service->busyness[index_of_id] = + kcalloc(number_of_bits, sizeof(char), + GFP_KERNEL); - slot = kcalloc(number_of_uints, - sizeof(uint32_t), - GFP_KERNEL); - - if (!slot) { + if (!service->busyness[index_of_id]) { BREAK_TO_DEBUGGER(); goto failure_2; } do { - slot[index_of_uint] = 0; - - ++index_of_uint; - } while (index_of_uint < number_of_uints); - } else - slot = NULL; - - service->busyness[index_of_id] = slot; + service->busyness[index_of_id][i] = 0; + ++i; + } while (i < number_of_bits); + } else { + service->busyness[index_of_id] = NULL; + } ++index_of_id; } while (index_of_id < GPIO_ID_COUNT); @@ -125,13 +113,8 @@ struct gpio_service *dal_gpio_service_create( failure_2: while (index_of_id) { - uint32_t *slot; - --index_of_id; - - slot = service->busyness[index_of_id]; - - kfree(slot); + kfree(service->busyness[index_of_id]); } failure_1: @@ -169,9 +152,7 @@ void dal_gpio_service_destroy( uint32_t index_of_id = 0; do { - uint32_t *slot = (*ptr)->busyness[index_of_id]; - - kfree(slot); + kfree((*ptr)->busyness[index_of_id]); ++index_of_id; } while (index_of_id < GPIO_ID_COUNT); @@ -192,11 +173,7 @@ static bool is_pin_busy( enum gpio_id id, uint32_t en) { - const uint32_t bits_per_uint = sizeof(uint32_t) << 3; - - const uint32_t *slot = service->busyness[id] + (en / bits_per_uint); - - return 0 != (*slot & (1 << (en % bits_per_uint))); + return service->busyness[id][en]; } static void set_pin_busy( @@ -204,10 +181,7 @@ static void set_pin_busy( enum gpio_id id, uint32_t en) { - const uint32_t bits_per_uint = sizeof(uint32_t) << 3; - - service->busyness[id][en / bits_per_uint] |= - (1 << (en % bits_per_uint)); + service->busyness[id][en] = true; } static void set_pin_free( @@ -215,10 +189,7 @@ static void set_pin_free( enum gpio_id id, uint32_t en) { - const uint32_t bits_per_uint = sizeof(uint32_t) << 3; - - service->busyness[id][en / bits_per_uint] &= - ~(1 << (en % bits_per_uint)); + service->busyness[id][en] = false; } enum gpio_result dal_gpio_service_open( diff --git a/drivers/gpu/drm/amd/display/dc/gpio/gpio_service.h b/drivers/gpu/drm/amd/display/dc/gpio/gpio_service.h index c7f3081f59cc..1d501a43d13b 100644 --- a/drivers/gpu/drm/amd/display/dc/gpio/gpio_service.h +++ b/drivers/gpu/drm/amd/display/dc/gpio/gpio_service.h @@ -36,10 +36,9 @@ struct gpio_service { /* * @brief * Business storage. - * For each member of 'enum gpio_id', - * store array of bits (packed into uint32_t slots), - * index individual bit by 'en' value */ - uint32_t *busyness[GPIO_ID_COUNT]; + * one byte For each member of 'enum gpio_id' + */ + char *busyness[GPIO_ID_COUNT]; }; enum gpio_result dal_gpio_service_open( diff --git a/drivers/gpu/drm/amd/display/dc/inc/bw_fixed.h b/drivers/gpu/drm/amd/display/dc/inc/bw_fixed.h index 39ee8eba3c31..d1656c9d50df 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/bw_fixed.h +++ b/drivers/gpu/drm/amd/display/dc/inc/bw_fixed.h @@ -126,7 +126,7 @@ static inline struct bw_fixed bw_div(const struct bw_fixed arg1, const struct bw static inline struct bw_fixed bw_mod(const struct bw_fixed arg1, const struct bw_fixed arg2) { struct bw_fixed res; - div64_u64_rem(arg1.value, arg2.value, &res.value); + div64_u64_rem(arg1.value, arg2.value, (uint64_t *)&res.value); return res; } diff --git a/drivers/gpu/drm/amd/display/dc/inc/compressor.h b/drivers/gpu/drm/amd/display/dc/inc/compressor.h index bcb18f5e1e60..7a147a9762a0 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/compressor.h +++ b/drivers/gpu/drm/amd/display/dc/inc/compressor.h @@ -77,6 +77,7 @@ struct compressor_funcs { }; struct compressor { struct dc_context *ctx; + /* CONTROLLER_ID_D0 + instance, CONTROLLER_ID_UNDEFINED = 0 */ uint32_t attached_inst; bool is_enabled; const struct compressor_funcs *funcs; diff --git a/drivers/gpu/drm/amd/display/dc/inc/core_types.h b/drivers/gpu/drm/amd/display/dc/inc/core_types.h index c1976c175b57..b168a5e9dd9d 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/core_types.h +++ b/drivers/gpu/drm/amd/display/dc/inc/core_types.h @@ -82,7 +82,7 @@ void core_link_disable_stream(struct pipe_ctx *pipe_ctx, int option); void core_link_set_avmute(struct pipe_ctx *pipe_ctx, bool enable); /********** DAL Core*********************/ -#include "display_clock.h" +#include "hw/clk_mgr.h" #include "transform.h" #include "dpp.h" @@ -169,6 +169,7 @@ struct resource_pool { unsigned int audio_count; struct audio_support audio_support; + struct clk_mgr *clk_mgr; struct dccg *dccg; struct irq_service *irqs; @@ -271,6 +272,17 @@ union bw_context { struct dce_bw_output dce; }; +/** + * struct dc_state - The full description of a state requested by a user + * + * @streams: Stream properties + * @stream_status: The planes on a given stream + * @res_ctx: Persistent state of resources + * @bw: The output from bandwidth and watermark calculations + * @pp_display_cfg: PowerPlay clocks and settings + * @dcn_bw_vars: non-stack memory to support bandwidth calculations + * + */ struct dc_state { struct dc_stream_state *streams[MAX_PIPES]; struct dc_stream_status stream_status[MAX_PIPES]; @@ -278,7 +290,6 @@ struct dc_state { struct resource_context res_ctx; - /* The output from BW and WM calculations. */ union bw_context bw; /* Note: these are big structures, do *not* put on stack! */ @@ -287,7 +298,7 @@ struct dc_state { struct dcn_bw_internal_vars dcn_bw_vars; #endif - struct dccg *dis_clk; + struct clk_mgr *dccg; struct kref refcount; }; diff --git a/drivers/gpu/drm/amd/display/dc/inc/dcn_calcs.h b/drivers/gpu/drm/amd/display/dc/inc/dcn_calcs.h index e688eb9b975c..ece954a40a8e 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/dcn_calcs.h +++ b/drivers/gpu/drm/amd/display/dc/inc/dcn_calcs.h @@ -31,8 +31,8 @@ #define __DCN_CALCS_H__ #include "bw_fixed.h" -#include "display_clock.h" #include "../dml/display_mode_lib.h" +#include "hw/clk_mgr.h" struct dc; struct dc_state; diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/abm.h b/drivers/gpu/drm/amd/display/dc/inc/hw/abm.h index a83a48494613..abc961c0906e 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/abm.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/abm.h @@ -47,12 +47,18 @@ struct abm_funcs { bool (*set_abm_level)(struct abm *abm, unsigned int abm_level); bool (*set_abm_immediate_disable)(struct abm *abm); bool (*init_backlight)(struct abm *abm); - bool (*set_backlight_level)(struct abm *abm, - unsigned int backlight_level, + + /* backlight_pwm_u16_16 is unsigned 32 bit, + * 16 bit integer + 16 fractional, where 1.0 is max backlight value. + */ + bool (*set_backlight_level_pwm)(struct abm *abm, + unsigned int backlight_pwm_u16_16, unsigned int frame_ramp, unsigned int controller_id, bool use_smooth_brightness); - unsigned int (*get_current_backlight_8_bit)(struct abm *abm); + + unsigned int (*get_current_backlight)(struct abm *abm); + unsigned int (*get_target_backlight)(struct abm *abm); }; #endif diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/display_clock.h b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h index 689faa16c0ae..23a4b18e5fee 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/display_clock.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h @@ -23,41 +23,25 @@ * */ -#ifndef __DISPLAY_CLOCK_H__ -#define __DISPLAY_CLOCK_H__ +#ifndef __DAL_CLK_MGR_H__ +#define __DAL_CLK_MGR_H__ #include "dm_services_types.h" #include "dc.h" -/* Structure containing all state-dependent clocks - * (dependent on "enum clocks_state") */ -struct state_dependent_clocks { - int display_clk_khz; - int pixel_clk_khz; -}; - -struct dccg { +struct clk_mgr { struct dc_context *ctx; - const struct display_clock_funcs *funcs; + const struct clk_mgr_funcs *funcs; - enum dm_pp_clocks_state max_clks_state; - enum dm_pp_clocks_state cur_min_clks_state; struct dc_clocks clks; }; -struct display_clock_funcs { - void (*update_clocks)(struct dccg *dccg, - struct dc_clocks *new_clocks, +struct clk_mgr_funcs { + void (*update_clocks)(struct clk_mgr *clk_mgr, + struct dc_state *context, bool safe_to_lower); - int (*set_dispclk)(struct dccg *dccg, - int requested_clock_khz); - - int (*get_dp_ref_clk_frequency)(struct dccg *dccg); - bool (*update_dfs_bypass)(struct dccg *dccg, - struct dc *dc, - struct dc_state *context, - int requested_clock_khz); + int (*get_dp_ref_clk_frequency)(struct clk_mgr *clk_mgr); }; -#endif /* __DISPLAY_CLOCK_H__ */ +#endif /* __DAL_CLK_MGR_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h new file mode 100644 index 000000000000..95a56d012626 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h @@ -0,0 +1,44 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef __DAL_DCCG_H__ +#define __DAL_DCCG_H__ + +#include "dc_types.h" + +struct dccg { + struct dc_context *ctx; + const struct dccg_funcs *funcs; + + int ref_dppclk; +}; + +struct dccg_funcs { + void (*update_dpp_dto)(struct dccg *dccg, + int dpp_inst, + int req_dppclk); +}; + +#endif //__DAL_DCCG_H__ diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dmcu.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dmcu.h index 4550747fb61c..cb85eaa9857f 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/dmcu.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dmcu.h @@ -32,6 +32,13 @@ enum dmcu_state { DMCU_RUNNING = 1 }; +struct dmcu_version { + unsigned int date; + unsigned int month; + unsigned int year; + unsigned int interface_version; +}; + struct dmcu { struct dc_context *ctx; const struct dmcu_funcs *funcs; diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h b/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h index 334c48cdafdc..04c6989aac58 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h @@ -63,6 +63,11 @@ struct hubp_funcs { struct _vcs_dpi_display_rq_regs_st *rq_regs, struct _vcs_dpi_display_pipe_dest_params_st *pipe_dest); + void (*hubp_setup_interdependent)( + struct hubp *hubp, + struct _vcs_dpi_display_dlg_regs_st *dlg_regs, + struct _vcs_dpi_display_ttu_regs_st *ttu_regs); + void (*dcc_control)(struct hubp *hubp, bool enable, bool independent_64b_blks); void (*mem_program_viewport)( @@ -121,6 +126,7 @@ struct hubp_funcs { void (*hubp_clk_cntl)(struct hubp *hubp, bool enable); void (*hubp_vtg_sel)(struct hubp *hubp, uint32_t otg_inst); void (*hubp_read_state)(struct hubp *hubp); + void (*hubp_clear_underflow)(struct hubp *hubp); void (*hubp_disable_control)(struct hubp *hubp, bool disable_hubp); unsigned int (*hubp_get_underflow_status)(struct hubp *hubp); diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h b/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h index cf7433ebf91a..da85537a4488 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h @@ -53,6 +53,12 @@ struct curve_points { uint32_t custom_float_slope; }; +struct curve_points3 { + struct curve_points red; + struct curve_points green; + struct curve_points blue; +}; + struct pwl_result_data { struct fixed31_32 red; struct fixed31_32 green; @@ -71,9 +77,17 @@ struct pwl_result_data { uint32_t delta_blue_reg; }; +/* arr_curve_points - regamma regions/segments specification + * arr_points - beginning and end point specified separately (only one on DCE) + * corner_points - beginning and end point for all 3 colors (DCN) + * rgb_resulted - final curve + */ struct pwl_params { struct gamma_curve arr_curve_points[34]; - struct curve_points arr_points[2]; + union { + struct curve_points arr_points[2]; + struct curve_points3 corner_points[2]; + }; struct pwl_result_data rgb_resulted[256 + 3]; uint32_t hw_points_num; }; diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/link_encoder.h b/drivers/gpu/drm/amd/display/dc/inc/hw/link_encoder.h index e28e9770e0a3..c20fdcaac53b 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/link_encoder.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/link_encoder.h @@ -65,7 +65,8 @@ struct encoder_feature_support { enum dc_color_depth max_hdmi_deep_color; unsigned int max_hdmi_pixel_clock; - bool ycbcr420_supported; + bool hdmi_ycbcr420_supported; + bool dp_ycbcr420_supported; }; union dpcd_psr_configuration { diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/mem_input.h b/drivers/gpu/drm/amd/display/dc/inc/hw/mem_input.h index da89c2edb07c..06df02ddff6a 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/mem_input.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/mem_input.h @@ -31,7 +31,7 @@ #include "dml/display_mode_structs.h" struct dchub_init_data; -struct cstate_pstate_watermarks_st { +struct cstate_pstate_watermarks_st1 { uint32_t cstate_exit_ns; uint32_t cstate_enter_plus_exit_ns; uint32_t pstate_change_ns; @@ -40,7 +40,7 @@ struct cstate_pstate_watermarks_st { struct dcn_watermarks { uint32_t pte_meta_urgent_ns; uint32_t urgent_ns; - struct cstate_pstate_watermarks_st cstate_pstate; + struct cstate_pstate_watermarks_st1 cstate_pstate; }; struct dcn_watermark_set { diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h index 26f29d5da3d8..d6a85f48b6d1 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h @@ -32,8 +32,6 @@ #include "inc/hw/link_encoder.h" #include "core_status.h" -#define EDP_BACKLIGHT_RAMP_DISABLE_LEVEL 0xFFFFFFFF - enum pipe_gating_control { PIPE_GATING_CONTROL_DISABLE = 0, PIPE_GATING_CONTROL_ENABLE, @@ -87,11 +85,6 @@ struct hw_sequencer_funcs { void (*program_gamut_remap)( struct pipe_ctx *pipe_ctx); - void (*program_csc_matrix)( - struct pipe_ctx *pipe_ctx, - enum dc_color_space colorspace, - uint16_t *matrix); - void (*program_output_csc)(struct dc *dc, struct pipe_ctx *pipe_ctx, enum dc_color_space colorspace, @@ -177,10 +170,12 @@ struct hw_sequencer_funcs { struct pipe_ctx *pipe_ctx, bool blank); - void (*set_bandwidth)( + void (*prepare_bandwidth)( struct dc *dc, - struct dc_state *context, - bool safe_to_lower); + struct dc_state *context); + void (*optimize_bandwidth)( + struct dc *dc, + struct dc_state *context); void (*set_drr)(struct pipe_ctx **pipe_ctx, int num_pipes, int vmin, int vmax); @@ -205,16 +200,12 @@ struct hw_sequencer_funcs { void (*log_hw_state)(struct dc *dc, struct dc_log_buffer_ctx *log_ctx); void (*get_hw_state)(struct dc *dc, char *pBuf, unsigned int bufSize, unsigned int mask); + void (*clear_status_bits)(struct dc *dc, unsigned int mask); void (*wait_for_mpcc_disconnect)(struct dc *dc, struct resource_pool *res_pool, struct pipe_ctx *pipe_ctx); - void (*ready_shared_resources)(struct dc *dc, struct dc_state *context); - void (*optimize_shared_resources)(struct dc *dc); - void (*pplib_apply_display_requirements)( - struct dc *dc, - struct dc_state *context); void (*edp_power_control)( struct dc_link *link, bool enable); diff --git a/drivers/gpu/drm/amd/display/dc/inc/resource.h b/drivers/gpu/drm/amd/display/dc/inc/resource.h index 33b99e3ab10d..0086a2f1d21a 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/resource.h +++ b/drivers/gpu/drm/amd/display/dc/inc/resource.h @@ -30,9 +30,6 @@ #include "dal_asic_id.h" #include "dm_pp_smu.h" -/* TODO unhardcode, 4 for CZ*/ -#define MEMORY_TYPE_MULTIPLIER 4 - enum dce_version resource_parse_asic_id( struct hw_asic_id asic_id); diff --git a/drivers/gpu/drm/amd/display/modules/color/color_gamma.c b/drivers/gpu/drm/amd/display/modules/color/color_gamma.c index cdcefd087487..479b77c2e89e 100644 --- a/drivers/gpu/drm/amd/display/modules/color/color_gamma.c +++ b/drivers/gpu/drm/amd/display/modules/color/color_gamma.c @@ -306,6 +306,18 @@ static struct fixed31_32 translate_from_linear_space( a1); } +static struct fixed31_32 calculate_gamma22(struct fixed31_32 arg) +{ + struct fixed31_32 gamma = dc_fixpt_from_fraction(22, 10); + + return translate_from_linear_space(arg, + dc_fixpt_zero, + dc_fixpt_zero, + dc_fixpt_zero, + dc_fixpt_zero, + gamma); +} + static struct fixed31_32 translate_to_linear_space( struct fixed31_32 arg, struct fixed31_32 a0, @@ -709,6 +721,175 @@ static void build_regamma(struct pwl_float_data_ex *rgb_regamma, } } +static void hermite_spline_eetf(struct fixed31_32 input_x, + struct fixed31_32 max_display, + struct fixed31_32 min_display, + struct fixed31_32 max_content, + struct fixed31_32 *out_x) +{ + struct fixed31_32 min_lum_pq; + struct fixed31_32 max_lum_pq; + struct fixed31_32 max_content_pq; + struct fixed31_32 ks; + struct fixed31_32 E1; + struct fixed31_32 E2; + struct fixed31_32 E3; + struct fixed31_32 t; + struct fixed31_32 t2; + struct fixed31_32 t3; + struct fixed31_32 two; + struct fixed31_32 three; + struct fixed31_32 temp1; + struct fixed31_32 temp2; + struct fixed31_32 a = dc_fixpt_from_fraction(15, 10); + struct fixed31_32 b = dc_fixpt_from_fraction(5, 10); + struct fixed31_32 epsilon = dc_fixpt_from_fraction(1, 1000000); // dc_fixpt_epsilon is a bit too small + + if (dc_fixpt_eq(max_content, dc_fixpt_zero)) { + *out_x = dc_fixpt_zero; + return; + } + + compute_pq(input_x, &E1); + compute_pq(dc_fixpt_div(min_display, max_content), &min_lum_pq); + compute_pq(dc_fixpt_div(max_display, max_content), &max_lum_pq); + compute_pq(dc_fixpt_one, &max_content_pq); // always 1? DAL2 code is weird + a = dc_fixpt_div(dc_fixpt_add(dc_fixpt_one, b), max_content_pq); // (1+b)/maxContent + ks = dc_fixpt_sub(dc_fixpt_mul(a, max_lum_pq), b); // a * max_lum_pq - b + + if (dc_fixpt_lt(E1, ks)) + E2 = E1; + else if (dc_fixpt_le(ks, E1) && dc_fixpt_le(E1, dc_fixpt_one)) { + if (dc_fixpt_lt(epsilon, dc_fixpt_sub(dc_fixpt_one, ks))) + // t = (E1 - ks) / (1 - ks) + t = dc_fixpt_div(dc_fixpt_sub(E1, ks), + dc_fixpt_sub(dc_fixpt_one, ks)); + else + t = dc_fixpt_zero; + + two = dc_fixpt_from_int(2); + three = dc_fixpt_from_int(3); + + t2 = dc_fixpt_mul(t, t); + t3 = dc_fixpt_mul(t2, t); + temp1 = dc_fixpt_mul(two, t3); + temp2 = dc_fixpt_mul(three, t2); + + // (2t^3 - 3t^2 + 1) * ks + E2 = dc_fixpt_mul(ks, dc_fixpt_add(dc_fixpt_one, + dc_fixpt_sub(temp1, temp2))); + + // (-2t^3 + 3t^2) * max_lum_pq + E2 = dc_fixpt_add(E2, dc_fixpt_mul(max_lum_pq, + dc_fixpt_sub(temp2, temp1))); + + temp1 = dc_fixpt_mul(two, t2); + temp2 = dc_fixpt_sub(dc_fixpt_one, ks); + + // (t^3 - 2t^2 + t) * (1-ks) + E2 = dc_fixpt_add(E2, dc_fixpt_mul(temp2, + dc_fixpt_add(t, dc_fixpt_sub(t3, temp1)))); + } else + E2 = dc_fixpt_one; + + temp1 = dc_fixpt_sub(dc_fixpt_one, E2); + temp2 = dc_fixpt_mul(temp1, temp1); + temp2 = dc_fixpt_mul(temp2, temp2); + // temp2 = (1-E2)^4 + + E3 = dc_fixpt_add(E2, dc_fixpt_mul(min_lum_pq, temp2)); + compute_de_pq(E3, out_x); + + *out_x = dc_fixpt_div(*out_x, dc_fixpt_div(max_display, max_content)); +} + +static bool build_freesync_hdr(struct pwl_float_data_ex *rgb_regamma, + uint32_t hw_points_num, + const struct hw_x_point *coordinate_x, + const struct freesync_hdr_tf_params *fs_params) +{ + uint32_t i; + struct pwl_float_data_ex *rgb = rgb_regamma; + const struct hw_x_point *coord_x = coordinate_x; + struct fixed31_32 scaledX = dc_fixpt_zero; + struct fixed31_32 scaledX1 = dc_fixpt_zero; + struct fixed31_32 max_display; + struct fixed31_32 min_display; + struct fixed31_32 max_content; + struct fixed31_32 min_content; + struct fixed31_32 clip = dc_fixpt_one; + struct fixed31_32 output; + bool use_eetf = false; + bool is_clipped = false; + struct fixed31_32 sdr_white_level; + + if (fs_params == NULL || fs_params->max_content == 0 || + fs_params->max_display == 0) + return false; + + max_display = dc_fixpt_from_int(fs_params->max_display); + min_display = dc_fixpt_from_fraction(fs_params->min_display, 10000); + max_content = dc_fixpt_from_int(fs_params->max_content); + min_content = dc_fixpt_from_fraction(fs_params->min_content, 10000); + sdr_white_level = dc_fixpt_from_int(fs_params->sdr_white_level); + + if (fs_params->min_display > 1000) // cap at 0.1 at the bottom + min_display = dc_fixpt_from_fraction(1, 10); + if (fs_params->max_display < 100) // cap at 100 at the top + max_display = dc_fixpt_from_int(100); + + if (fs_params->min_content < fs_params->min_display) + use_eetf = true; + else + min_content = min_display; + + if (fs_params->max_content > fs_params->max_display) + use_eetf = true; + else + max_content = max_display; + + rgb += 32; // first 32 points have problems with fixed point, too small + coord_x += 32; + for (i = 32; i <= hw_points_num; i++) { + if (!is_clipped) { + if (use_eetf) { + /*max content is equal 1 */ + scaledX1 = dc_fixpt_div(coord_x->x, + dc_fixpt_div(max_content, sdr_white_level)); + hermite_spline_eetf(scaledX1, max_display, min_display, + max_content, &scaledX); + } else + scaledX = dc_fixpt_div(coord_x->x, + dc_fixpt_div(max_display, sdr_white_level)); + + if (dc_fixpt_lt(scaledX, clip)) { + if (dc_fixpt_lt(scaledX, dc_fixpt_zero)) + output = dc_fixpt_zero; + else + output = calculate_gamma22(scaledX); + + rgb->r = output; + rgb->g = output; + rgb->b = output; + } else { + is_clipped = true; + rgb->r = clip; + rgb->g = clip; + rgb->b = clip; + } + } else { + rgb->r = clip; + rgb->g = clip; + rgb->b = clip; + } + + ++coord_x; + ++rgb; + } + + return true; +} + static void build_degamma(struct pwl_float_data_ex *curve, uint32_t hw_points_num, const struct hw_x_point *coordinate_x, bool is_2_4) @@ -1356,7 +1537,8 @@ static bool map_regamma_hw_to_x_user( #define _EXTRA_POINTS 3 bool mod_color_calculate_regamma_params(struct dc_transfer_func *output_tf, - const struct dc_gamma *ramp, bool mapUserRamp, bool canRomBeUsed) + const struct dc_gamma *ramp, bool mapUserRamp, bool canRomBeUsed, + const struct freesync_hdr_tf_params *fs_params) { struct dc_transfer_func_distributed_points *tf_pts = &output_tf->tf_pts; struct dividers dividers; @@ -1374,7 +1556,7 @@ bool mod_color_calculate_regamma_params(struct dc_transfer_func *output_tf, /* we can use hardcoded curve for plain SRGB TF */ if (output_tf->type == TF_TYPE_PREDEFINED && canRomBeUsed == true && output_tf->tf == TRANSFER_FUNCTION_SRGB && - (!mapUserRamp && ramp->type == GAMMA_RGB_256)) + (ramp->is_identity || (!mapUserRamp && ramp->type == GAMMA_RGB_256))) return true; output_tf->type = TF_TYPE_DISTRIBUTED_POINTS; @@ -1424,6 +1606,12 @@ bool mod_color_calculate_regamma_params(struct dc_transfer_func *output_tf, MAX_HW_POINTS, coordinates_x, output_tf->sdr_ref_white_level); + } else if (tf == TRANSFER_FUNCTION_GAMMA22 && + fs_params != NULL) { + build_freesync_hdr(rgb_regamma, + MAX_HW_POINTS, + coordinates_x, + fs_params); } else { tf_pts->end_exponent = 0; tf_pts->x_point_at_y1_red = 1; @@ -1573,7 +1761,7 @@ bool mod_color_calculate_degamma_params(struct dc_transfer_func *input_tf, struct pwl_float_data *rgb_user = NULL; struct pwl_float_data_ex *curve = NULL; - struct gamma_pixel *axix_x = NULL; + struct gamma_pixel *axis_x = NULL; struct pixel_gamma_point *coeff = NULL; enum dc_transfer_func_predefined tf = TRANSFER_FUNCTION_SRGB; bool ret = false; @@ -1599,10 +1787,10 @@ bool mod_color_calculate_degamma_params(struct dc_transfer_func *input_tf, GFP_KERNEL); if (!curve) goto curve_alloc_fail; - axix_x = kvcalloc(ramp->num_entries + _EXTRA_POINTS, sizeof(*axix_x), + axis_x = kvcalloc(ramp->num_entries + _EXTRA_POINTS, sizeof(*axis_x), GFP_KERNEL); - if (!axix_x) - goto axix_x_alloc_fail; + if (!axis_x) + goto axis_x_alloc_fail; coeff = kvcalloc(MAX_HW_POINTS + _EXTRA_POINTS, sizeof(*coeff), GFP_KERNEL); if (!coeff) @@ -1615,7 +1803,7 @@ bool mod_color_calculate_degamma_params(struct dc_transfer_func *input_tf, tf = input_tf->tf; build_evenly_distributed_points( - axix_x, + axis_x, ramp->num_entries, dividers); @@ -1640,7 +1828,7 @@ bool mod_color_calculate_degamma_params(struct dc_transfer_func *input_tf, tf_pts->x_point_at_y1_blue = 1; map_regamma_hw_to_x_user(ramp, coeff, rgb_user, - coordinates_x, axix_x, curve, + coordinates_x, axis_x, curve, MAX_HW_POINTS, tf_pts, mapUserRamp && ramp->type != GAMMA_CUSTOM); if (ramp->type == GAMMA_CUSTOM) @@ -1650,8 +1838,8 @@ bool mod_color_calculate_degamma_params(struct dc_transfer_func *input_tf, kvfree(coeff); coeff_alloc_fail: - kvfree(axix_x); -axix_x_alloc_fail: + kvfree(axis_x); +axis_x_alloc_fail: kvfree(curve); curve_alloc_fail: kvfree(rgb_user); diff --git a/drivers/gpu/drm/amd/display/modules/color/color_gamma.h b/drivers/gpu/drm/amd/display/modules/color/color_gamma.h index 63ccb9c91224..a6e164df090a 100644 --- a/drivers/gpu/drm/amd/display/modules/color/color_gamma.h +++ b/drivers/gpu/drm/amd/display/modules/color/color_gamma.h @@ -73,12 +73,21 @@ struct regamma_lut { }; }; +struct freesync_hdr_tf_params { + unsigned int sdr_white_level; + unsigned int min_content; // luminance in 1/10000 nits + unsigned int max_content; // luminance in nits + unsigned int min_display; // luminance in 1/10000 nits + unsigned int max_display; // luminance in nits +}; + void setup_x_points_distribution(void); void precompute_pq(void); void precompute_de_pq(void); bool mod_color_calculate_regamma_params(struct dc_transfer_func *output_tf, - const struct dc_gamma *ramp, bool mapUserRamp, bool canRomBeUsed); + const struct dc_gamma *ramp, bool mapUserRamp, bool canRomBeUsed, + const struct freesync_hdr_tf_params *fs_params); bool mod_color_calculate_degamma_params(struct dc_transfer_func *output_tf, const struct dc_gamma *ramp, bool mapUserRamp); diff --git a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c index 4018c7180d00..1544ed3f1747 100644 --- a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c +++ b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c @@ -37,6 +37,8 @@ #define RENDER_TIMES_MAX_COUNT 10 /* Threshold to exit BTR (to avoid frequent enter-exits at the lower limit) */ #define BTR_EXIT_MARGIN 2000 +/*Threshold to exit fixed refresh rate*/ +#define FIXED_REFRESH_EXIT_MARGIN_IN_HZ 4 /* Number of consecutive frames to check before entering/exiting fixed refresh*/ #define FIXED_REFRESH_ENTER_FRAME_COUNT 5 #define FIXED_REFRESH_EXIT_FRAME_COUNT 5 @@ -257,40 +259,14 @@ static void apply_below_the_range(struct core_freesync *core_freesync, if (in_out_vrr->btr.btr_active) { in_out_vrr->btr.frame_counter = 0; in_out_vrr->btr.btr_active = false; - - /* Exit Fixed Refresh mode */ - } else if (in_out_vrr->fixed.fixed_active) { - - in_out_vrr->fixed.frame_counter++; - - if (in_out_vrr->fixed.frame_counter > - FIXED_REFRESH_EXIT_FRAME_COUNT) { - in_out_vrr->fixed.frame_counter = 0; - in_out_vrr->fixed.fixed_active = false; - } } } else if (last_render_time_in_us > max_render_time_in_us) { /* Enter Below the Range */ - if (!in_out_vrr->btr.btr_active && - in_out_vrr->btr.btr_enabled) { - in_out_vrr->btr.btr_active = true; - - /* Enter Fixed Refresh mode */ - } else if (!in_out_vrr->fixed.fixed_active && - !in_out_vrr->btr.btr_enabled) { - in_out_vrr->fixed.frame_counter++; - - if (in_out_vrr->fixed.frame_counter > - FIXED_REFRESH_ENTER_FRAME_COUNT) { - in_out_vrr->fixed.frame_counter = 0; - in_out_vrr->fixed.fixed_active = true; - } - } + in_out_vrr->btr.btr_active = true; } /* BTR set to "not active" so disengage */ if (!in_out_vrr->btr.btr_active) { - in_out_vrr->btr.btr_active = false; in_out_vrr->btr.inserted_duration_in_us = 0; in_out_vrr->btr.frames_to_insert = 0; in_out_vrr->btr.frame_counter = 0; @@ -375,7 +351,12 @@ static void apply_fixed_refresh(struct core_freesync *core_freesync, bool update = false; unsigned int max_render_time_in_us = in_out_vrr->max_duration_in_us; - if (last_render_time_in_us + BTR_EXIT_MARGIN < max_render_time_in_us) { + //Compute the exit refresh rate and exit frame duration + unsigned int exit_refresh_rate_in_milli_hz = ((1000000000/max_render_time_in_us) + + (1000*FIXED_REFRESH_EXIT_MARGIN_IN_HZ)); + unsigned int exit_frame_duration_in_us = 1000000000/exit_refresh_rate_in_milli_hz; + + if (last_render_time_in_us < exit_frame_duration_in_us) { /* Exit Fixed Refresh mode */ if (in_out_vrr->fixed.fixed_active) { in_out_vrr->fixed.frame_counter++; @@ -627,12 +608,12 @@ static void build_vrr_infopacket_data(const struct mod_vrr_params *vrr, static void build_vrr_infopacket_fs2_data(enum color_transfer_func app_tf, struct dc_info_packet *infopacket) { - if (app_tf != transfer_func_unknown) { + if (app_tf != TRANSFER_FUNC_UNKNOWN) { infopacket->valid = true; infopacket->sb[6] |= 0x08; // PB6 = [Bit 3 = Native Color Active] - if (app_tf == transfer_func_gamma_22) { + if (app_tf == TRANSFER_FUNC_GAMMA_22) { infopacket->sb[9] |= 0x04; // PB6 = [Bit 2 = Gamma 2.2 EOTF Active] } } @@ -707,11 +688,11 @@ void mod_freesync_build_vrr_infopacket(struct mod_freesync *mod_freesync, return; switch (packet_type) { - case packet_type_fs2: + case PACKET_TYPE_FS2: build_vrr_infopacket_v2(stream->signal, vrr, app_tf, infopacket); break; - case packet_type_vrr: - case packet_type_fs1: + case PACKET_TYPE_VRR: + case PACKET_TYPE_FS1: default: build_vrr_infopacket_v1(stream->signal, vrr, infopacket); } diff --git a/drivers/gpu/drm/amd/display/modules/inc/mod_info_packet.h b/drivers/gpu/drm/amd/display/modules/inc/mod_info_packet.h index 786b34380f85..5b1c9a4c7643 100644 --- a/drivers/gpu/drm/amd/display/modules/inc/mod_info_packet.h +++ b/drivers/gpu/drm/amd/display/modules/inc/mod_info_packet.h @@ -26,15 +26,13 @@ #ifndef MOD_INFO_PACKET_H_ #define MOD_INFO_PACKET_H_ -struct info_packet_inputs { - const struct dc_stream_state *pStream; -}; +#include "mod_shared.h" -struct info_packets { - struct dc_info_packet *pVscInfoPacket; -}; +//Forward Declarations +struct dc_stream_state; +struct dc_info_packet; -void mod_build_infopackets(struct info_packet_inputs *inputs, - struct info_packets *info_packets); +void mod_build_vsc_infopacket(const struct dc_stream_state *stream, + struct dc_info_packet *info_packet); #endif diff --git a/drivers/gpu/drm/amd/display/modules/inc/mod_shared.h b/drivers/gpu/drm/amd/display/modules/inc/mod_shared.h index 238c431ae483..1bd02c0ac30c 100644 --- a/drivers/gpu/drm/amd/display/modules/inc/mod_shared.h +++ b/drivers/gpu/drm/amd/display/modules/inc/mod_shared.h @@ -23,27 +23,26 @@ * */ - #ifndef MOD_SHARED_H_ #define MOD_SHARED_H_ enum color_transfer_func { - transfer_func_unknown, - transfer_func_srgb, - transfer_func_bt709, - transfer_func_pq2084, - transfer_func_pq2084_interim, - transfer_func_linear_0_1, - transfer_func_linear_0_125, - transfer_func_dolbyvision, - transfer_func_gamma_22, - transfer_func_gamma_26 + TRANSFER_FUNC_UNKNOWN, + TRANSFER_FUNC_SRGB, + TRANSFER_FUNC_BT709, + TRANSFER_FUNC_PQ2084, + TRANSFER_FUNC_PQ2084_INTERIM, + TRANSFER_FUNC_LINEAR_0_1, + TRANSFER_FUNC_LINEAR_0_125, + TRANSFER_FUNC_GAMMA_22, + TRANSFER_FUNC_GAMMA_26 }; enum vrr_packet_type { - packet_type_vrr, - packet_type_fs1, - packet_type_fs2 + PACKET_TYPE_VRR, + PACKET_TYPE_FS1, + PACKET_TYPE_FS2 }; + #endif /* MOD_SHARED_H_ */ diff --git a/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c b/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c index ff8bfb9b43b0..db06fab2ad5c 100644 --- a/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c +++ b/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c @@ -25,6 +25,10 @@ #include "mod_info_packet.h" #include "core_types.h" +#include "dc_types.h" +#include "mod_shared.h" + +#define HDMI_INFOFRAME_TYPE_VENDOR 0x81 enum ColorimetryRGBDP { ColorimetryRGB_DP_sRGB = 0, @@ -41,7 +45,7 @@ enum ColorimetryYCCDP { ColorimetryYCC_DP_ITU2020YCbCr = 7, }; -static void mod_build_vsc_infopacket(const struct dc_stream_state *stream, +void mod_build_vsc_infopacket(const struct dc_stream_state *stream, struct dc_info_packet *info_packet) { unsigned int vscPacketRevision = 0; @@ -159,7 +163,7 @@ static void mod_build_vsc_infopacket(const struct dc_stream_state *stream, * DPCD register is exposed in the new Extended Receiver Capability field for DPCD Rev. 1.4 * (and higher). When MISC1. bit 6. is Set to 1, a Source device uses a VSC SDP to indicate * the Pixel Encoding/Colorimetry Format and that a Sink device must ignore MISC1, bit 7, and - * MISC0, bits 7:1 (MISC1, bit 7. and MISC0, bits 7:1 become “don’t care”).) + * MISC0, bits 7:1 (MISC1, bit 7. and MISC0, bits 7:1 become "don't care").) */ if (vscPacketRevision == 0x5) { /* Secondary-data Packet ID = 0 */ @@ -320,10 +324,3 @@ static void mod_build_vsc_infopacket(const struct dc_stream_state *stream, } -void mod_build_infopackets(struct info_packet_inputs *inputs, - struct info_packets *info_packets) -{ - if (info_packets->pVscInfoPacket != NULL) - mod_build_vsc_infopacket(inputs->pStream, info_packets->pVscInfoPacket); -} - diff --git a/drivers/gpu/drm/amd/display/modules/power/Makefile b/drivers/gpu/drm/amd/display/modules/power/Makefile new file mode 100644 index 000000000000..87851f892a52 --- /dev/null +++ b/drivers/gpu/drm/amd/display/modules/power/Makefile @@ -0,0 +1,31 @@ +# +# Copyright 2017 Advanced Micro Devices, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# +# +# Makefile for the 'power' sub-module of DAL. +# + +MOD_POWER = power_helpers.o + +AMD_DAL_MOD_POWER = $(addprefix $(AMDDALPATH)/modules/power/,$(MOD_POWER)) +#$(info ************ DAL POWER MODULE MAKEFILE ************) + +AMD_DISPLAY_FILES += $(AMD_DAL_MOD_POWER)
\ No newline at end of file diff --git a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c new file mode 100644 index 000000000000..00f63b7dd32f --- /dev/null +++ b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c @@ -0,0 +1,326 @@ +/* Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#include "power_helpers.h" +#include "dc/inc/hw/dmcu.h" + +#define DIV_ROUNDUP(a, b) (((a)+((b)/2))/(b)) + +/* Possible Min Reduction config from least aggressive to most aggressive + * 0 1 2 3 4 5 6 7 8 9 10 11 12 + * 100 98.0 94.1 94.1 85.1 80.3 75.3 69.4 60.0 57.6 50.2 49.8 40.0 % + */ +static const unsigned char min_reduction_table[13] = { +0xff, 0xfa, 0xf0, 0xf0, 0xd9, 0xcd, 0xc0, 0xb1, 0x99, 0x93, 0x80, 0x82, 0x66}; + +/* Possible Max Reduction configs from least aggressive to most aggressive + * 0 1 2 3 4 5 6 7 8 9 10 11 12 + * 96.1 89.8 85.1 80.3 69.4 64.7 64.7 50.2 39.6 30.2 30.2 30.2 19.6 % + */ +static const unsigned char max_reduction_table[13] = { +0xf5, 0xe5, 0xd9, 0xcd, 0xb1, 0xa5, 0xa5, 0x80, 0x65, 0x4d, 0x4d, 0x4d, 0x32}; + +/* Predefined ABM configuration sets. We may have different configuration sets + * in order to satisfy different power/quality requirements. + */ +static const unsigned char abm_config[abm_defines_max_config][abm_defines_max_level] = { +/* ABM Level 1, ABM Level 2, ABM Level 3, ABM Level 4 */ +{ 2, 5, 7, 8 }, /* Default - Medium aggressiveness */ +{ 2, 5, 8, 11 }, /* Alt #1 - Increased aggressiveness */ +{ 0, 2, 4, 8 }, /* Alt #2 - Minimal aggressiveness */ +{ 3, 6, 10, 12 }, /* Alt #3 - Super aggressiveness */ +}; + +#define NUM_AMBI_LEVEL 5 +#define NUM_AGGR_LEVEL 4 +#define NUM_POWER_FN_SEGS 8 +#define NUM_BL_CURVE_SEGS 16 + +/* NOTE: iRAM is 256B in size */ +struct iram_table_v_2 { + /* flags */ + uint16_t flags; /* 0x00 U16 */ + + /* parameters for ABM2.0 algorithm */ + uint8_t min_reduction[NUM_AMBI_LEVEL][NUM_AGGR_LEVEL]; /* 0x02 U0.8 */ + uint8_t max_reduction[NUM_AMBI_LEVEL][NUM_AGGR_LEVEL]; /* 0x16 U0.8 */ + uint8_t bright_pos_gain[NUM_AMBI_LEVEL][NUM_AGGR_LEVEL]; /* 0x2a U2.6 */ + uint8_t bright_neg_gain[NUM_AMBI_LEVEL][NUM_AGGR_LEVEL]; /* 0x3e U2.6 */ + uint8_t dark_pos_gain[NUM_AMBI_LEVEL][NUM_AGGR_LEVEL]; /* 0x52 U2.6 */ + uint8_t dark_neg_gain[NUM_AMBI_LEVEL][NUM_AGGR_LEVEL]; /* 0x66 U2.6 */ + uint8_t iir_curve[NUM_AMBI_LEVEL]; /* 0x7a U0.8 */ + uint8_t deviation_gain; /* 0x7f U0.8 */ + + /* parameters for crgb conversion */ + uint16_t crgb_thresh[NUM_POWER_FN_SEGS]; /* 0x80 U3.13 */ + uint16_t crgb_offset[NUM_POWER_FN_SEGS]; /* 0x90 U1.15 */ + uint16_t crgb_slope[NUM_POWER_FN_SEGS]; /* 0xa0 U4.12 */ + + /* parameters for custom curve */ + /* thresholds for brightness --> backlight */ + uint16_t backlight_thresholds[NUM_BL_CURVE_SEGS]; /* 0xb0 U16.0 */ + /* offsets for brightness --> backlight */ + uint16_t backlight_offsets[NUM_BL_CURVE_SEGS]; /* 0xd0 U16.0 */ + + /* For reading PSR State directly from IRAM */ + uint8_t psr_state; /* 0xf0 */ + uint8_t dmcu_interface_version; /* 0xf1 */ + uint8_t dmcu_date_version_year_b0; /* 0xf2 */ + uint8_t dmcu_date_version_year_b1; /* 0xf3 */ + uint8_t dmcu_date_version_month; /* 0xf4 */ + uint8_t dmcu_date_version_day; /* 0xf5 */ + uint8_t dmcu_state; /* 0xf6 */ + + uint16_t blRampReduction; /* 0xf7 */ + uint16_t blRampStart; /* 0xf9 */ + uint8_t dummy5; /* 0xfb */ + uint8_t dummy6; /* 0xfc */ + uint8_t dummy7; /* 0xfd */ + uint8_t dummy8; /* 0xfe */ + uint8_t dummy9; /* 0xff */ +}; + +static uint16_t backlight_8_to_16(unsigned int backlight_8bit) +{ + return (uint16_t)(backlight_8bit * 0x101); +} + +static void fill_backlight_transform_table(struct dmcu_iram_parameters params, + struct iram_table_v_2 *table) +{ + unsigned int i; + unsigned int num_entries = NUM_BL_CURVE_SEGS; + unsigned int query_input_8bit; + unsigned int query_output_8bit; + unsigned int lut_index; + + table->backlight_thresholds[0] = 0; + table->backlight_offsets[0] = params.backlight_lut_array[0]; + table->backlight_thresholds[num_entries-1] = 0xFFFF; + table->backlight_offsets[num_entries-1] = + params.backlight_lut_array[params.backlight_lut_array_size - 1]; + + /* Setup all brightness levels between 0% and 100% exclusive + * Fills brightness-to-backlight transform table. Backlight custom curve + * describes transform from brightness to backlight. It will be defined + * as set of thresholds and set of offsets, together, implying + * extrapolation of custom curve into 16 uniformly spanned linear + * segments. Each threshold/offset represented by 16 bit entry in + * format U4.10. + */ + for (i = 1; i+1 < num_entries; i++) { + query_input_8bit = DIV_ROUNDUP((i * 256), num_entries); + + lut_index = (params.backlight_lut_array_size - 1) * i / (num_entries - 1); + ASSERT(lut_index < params.backlight_lut_array_size); + query_output_8bit = params.backlight_lut_array[lut_index] >> 8; + + table->backlight_thresholds[i] = + backlight_8_to_16(query_input_8bit); + table->backlight_offsets[i] = + backlight_8_to_16(query_output_8bit); + } +} + +bool dmcu_load_iram(struct dmcu *dmcu, + struct dmcu_iram_parameters params) +{ + struct iram_table_v_2 ram_table; + unsigned int set = params.set; + + if (dmcu == NULL) + return false; + + if (!dmcu->funcs->is_dmcu_initialized(dmcu)) + return true; + + memset(&ram_table, 0, sizeof(ram_table)); + + ram_table.flags = 0x0; + ram_table.deviation_gain = 0xb3; + + ram_table.blRampReduction = + cpu_to_be16(params.backlight_ramping_reduction); + ram_table.blRampStart = + cpu_to_be16(params.backlight_ramping_start); + + ram_table.min_reduction[0][0] = min_reduction_table[abm_config[set][0]]; + ram_table.min_reduction[1][0] = min_reduction_table[abm_config[set][0]]; + ram_table.min_reduction[2][0] = min_reduction_table[abm_config[set][0]]; + ram_table.min_reduction[3][0] = min_reduction_table[abm_config[set][0]]; + ram_table.min_reduction[4][0] = min_reduction_table[abm_config[set][0]]; + ram_table.max_reduction[0][0] = max_reduction_table[abm_config[set][0]]; + ram_table.max_reduction[1][0] = max_reduction_table[abm_config[set][0]]; + ram_table.max_reduction[2][0] = max_reduction_table[abm_config[set][0]]; + ram_table.max_reduction[3][0] = max_reduction_table[abm_config[set][0]]; + ram_table.max_reduction[4][0] = max_reduction_table[abm_config[set][0]]; + + ram_table.min_reduction[0][1] = min_reduction_table[abm_config[set][1]]; + ram_table.min_reduction[1][1] = min_reduction_table[abm_config[set][1]]; + ram_table.min_reduction[2][1] = min_reduction_table[abm_config[set][1]]; + ram_table.min_reduction[3][1] = min_reduction_table[abm_config[set][1]]; + ram_table.min_reduction[4][1] = min_reduction_table[abm_config[set][1]]; + ram_table.max_reduction[0][1] = max_reduction_table[abm_config[set][1]]; + ram_table.max_reduction[1][1] = max_reduction_table[abm_config[set][1]]; + ram_table.max_reduction[2][1] = max_reduction_table[abm_config[set][1]]; + ram_table.max_reduction[3][1] = max_reduction_table[abm_config[set][1]]; + ram_table.max_reduction[4][1] = max_reduction_table[abm_config[set][1]]; + + ram_table.min_reduction[0][2] = min_reduction_table[abm_config[set][2]]; + ram_table.min_reduction[1][2] = min_reduction_table[abm_config[set][2]]; + ram_table.min_reduction[2][2] = min_reduction_table[abm_config[set][2]]; + ram_table.min_reduction[3][2] = min_reduction_table[abm_config[set][2]]; + ram_table.min_reduction[4][2] = min_reduction_table[abm_config[set][2]]; + ram_table.max_reduction[0][2] = max_reduction_table[abm_config[set][2]]; + ram_table.max_reduction[1][2] = max_reduction_table[abm_config[set][2]]; + ram_table.max_reduction[2][2] = max_reduction_table[abm_config[set][2]]; + ram_table.max_reduction[3][2] = max_reduction_table[abm_config[set][2]]; + ram_table.max_reduction[4][2] = max_reduction_table[abm_config[set][2]]; + + ram_table.min_reduction[0][3] = min_reduction_table[abm_config[set][3]]; + ram_table.min_reduction[1][3] = min_reduction_table[abm_config[set][3]]; + ram_table.min_reduction[2][3] = min_reduction_table[abm_config[set][3]]; + ram_table.min_reduction[3][3] = min_reduction_table[abm_config[set][3]]; + ram_table.min_reduction[4][3] = min_reduction_table[abm_config[set][3]]; + ram_table.max_reduction[0][3] = max_reduction_table[abm_config[set][3]]; + ram_table.max_reduction[1][3] = max_reduction_table[abm_config[set][3]]; + ram_table.max_reduction[2][3] = max_reduction_table[abm_config[set][3]]; + ram_table.max_reduction[3][3] = max_reduction_table[abm_config[set][3]]; + ram_table.max_reduction[4][3] = max_reduction_table[abm_config[set][3]]; + + ram_table.bright_pos_gain[0][0] = 0x20; + ram_table.bright_pos_gain[0][1] = 0x20; + ram_table.bright_pos_gain[0][2] = 0x20; + ram_table.bright_pos_gain[0][3] = 0x20; + ram_table.bright_pos_gain[1][0] = 0x20; + ram_table.bright_pos_gain[1][1] = 0x20; + ram_table.bright_pos_gain[1][2] = 0x20; + ram_table.bright_pos_gain[1][3] = 0x20; + ram_table.bright_pos_gain[2][0] = 0x20; + ram_table.bright_pos_gain[2][1] = 0x20; + ram_table.bright_pos_gain[2][2] = 0x20; + ram_table.bright_pos_gain[2][3] = 0x20; + ram_table.bright_pos_gain[3][0] = 0x20; + ram_table.bright_pos_gain[3][1] = 0x20; + ram_table.bright_pos_gain[3][2] = 0x20; + ram_table.bright_pos_gain[3][3] = 0x20; + ram_table.bright_pos_gain[4][0] = 0x20; + ram_table.bright_pos_gain[4][1] = 0x20; + ram_table.bright_pos_gain[4][2] = 0x20; + ram_table.bright_pos_gain[4][3] = 0x20; + ram_table.bright_neg_gain[0][1] = 0x00; + ram_table.bright_neg_gain[0][2] = 0x00; + ram_table.bright_neg_gain[0][3] = 0x00; + ram_table.bright_neg_gain[1][0] = 0x00; + ram_table.bright_neg_gain[1][1] = 0x00; + ram_table.bright_neg_gain[1][2] = 0x00; + ram_table.bright_neg_gain[1][3] = 0x00; + ram_table.bright_neg_gain[2][0] = 0x00; + ram_table.bright_neg_gain[2][1] = 0x00; + ram_table.bright_neg_gain[2][2] = 0x00; + ram_table.bright_neg_gain[2][3] = 0x00; + ram_table.bright_neg_gain[3][0] = 0x00; + ram_table.bright_neg_gain[3][1] = 0x00; + ram_table.bright_neg_gain[3][2] = 0x00; + ram_table.bright_neg_gain[3][3] = 0x00; + ram_table.bright_neg_gain[4][0] = 0x00; + ram_table.bright_neg_gain[4][1] = 0x00; + ram_table.bright_neg_gain[4][2] = 0x00; + ram_table.bright_neg_gain[4][3] = 0x00; + ram_table.dark_pos_gain[0][0] = 0x00; + ram_table.dark_pos_gain[0][1] = 0x00; + ram_table.dark_pos_gain[0][2] = 0x00; + ram_table.dark_pos_gain[0][3] = 0x00; + ram_table.dark_pos_gain[1][0] = 0x00; + ram_table.dark_pos_gain[1][1] = 0x00; + ram_table.dark_pos_gain[1][2] = 0x00; + ram_table.dark_pos_gain[1][3] = 0x00; + ram_table.dark_pos_gain[2][0] = 0x00; + ram_table.dark_pos_gain[2][1] = 0x00; + ram_table.dark_pos_gain[2][2] = 0x00; + ram_table.dark_pos_gain[2][3] = 0x00; + ram_table.dark_pos_gain[3][0] = 0x00; + ram_table.dark_pos_gain[3][1] = 0x00; + ram_table.dark_pos_gain[3][2] = 0x00; + ram_table.dark_pos_gain[3][3] = 0x00; + ram_table.dark_pos_gain[4][0] = 0x00; + ram_table.dark_pos_gain[4][1] = 0x00; + ram_table.dark_pos_gain[4][2] = 0x00; + ram_table.dark_pos_gain[4][3] = 0x00; + ram_table.dark_neg_gain[0][0] = 0x00; + ram_table.dark_neg_gain[0][1] = 0x00; + ram_table.dark_neg_gain[0][2] = 0x00; + ram_table.dark_neg_gain[0][3] = 0x00; + ram_table.dark_neg_gain[1][0] = 0x00; + ram_table.dark_neg_gain[1][1] = 0x00; + ram_table.dark_neg_gain[1][2] = 0x00; + ram_table.dark_neg_gain[1][3] = 0x00; + ram_table.dark_neg_gain[2][0] = 0x00; + ram_table.dark_neg_gain[2][1] = 0x00; + ram_table.dark_neg_gain[2][2] = 0x00; + ram_table.dark_neg_gain[2][3] = 0x00; + ram_table.dark_neg_gain[3][0] = 0x00; + ram_table.dark_neg_gain[3][1] = 0x00; + ram_table.dark_neg_gain[3][2] = 0x00; + ram_table.dark_neg_gain[3][3] = 0x00; + ram_table.dark_neg_gain[4][0] = 0x00; + ram_table.dark_neg_gain[4][1] = 0x00; + ram_table.dark_neg_gain[4][2] = 0x00; + ram_table.dark_neg_gain[4][3] = 0x00; + ram_table.iir_curve[0] = 0x65; + ram_table.iir_curve[1] = 0x65; + ram_table.iir_curve[2] = 0x65; + ram_table.iir_curve[3] = 0x65; + ram_table.iir_curve[4] = 0x65; + ram_table.crgb_thresh[0] = cpu_to_be16(0x13b6); + ram_table.crgb_thresh[1] = cpu_to_be16(0x1648); + ram_table.crgb_thresh[2] = cpu_to_be16(0x18e3); + ram_table.crgb_thresh[3] = cpu_to_be16(0x1b41); + ram_table.crgb_thresh[4] = cpu_to_be16(0x1d46); + ram_table.crgb_thresh[5] = cpu_to_be16(0x1f21); + ram_table.crgb_thresh[6] = cpu_to_be16(0x2167); + ram_table.crgb_thresh[7] = cpu_to_be16(0x2384); + ram_table.crgb_offset[0] = cpu_to_be16(0x2999); + ram_table.crgb_offset[1] = cpu_to_be16(0x3999); + ram_table.crgb_offset[2] = cpu_to_be16(0x4666); + ram_table.crgb_offset[3] = cpu_to_be16(0x5999); + ram_table.crgb_offset[4] = cpu_to_be16(0x6333); + ram_table.crgb_offset[5] = cpu_to_be16(0x7800); + ram_table.crgb_offset[6] = cpu_to_be16(0x8c00); + ram_table.crgb_offset[7] = cpu_to_be16(0xa000); + ram_table.crgb_slope[0] = cpu_to_be16(0x3147); + ram_table.crgb_slope[1] = cpu_to_be16(0x2978); + ram_table.crgb_slope[2] = cpu_to_be16(0x23a2); + ram_table.crgb_slope[3] = cpu_to_be16(0x1f55); + ram_table.crgb_slope[4] = cpu_to_be16(0x1c63); + ram_table.crgb_slope[5] = cpu_to_be16(0x1a0f); + ram_table.crgb_slope[6] = cpu_to_be16(0x178d); + ram_table.crgb_slope[7] = cpu_to_be16(0x15ab); + + fill_backlight_transform_table( + params, &ram_table); + + return dmcu->funcs->load_iram( + dmcu, 0, (char *)(&ram_table), sizeof(ram_table)); +} diff --git a/drivers/gpu/drm/amd/display/modules/power/power_helpers.h b/drivers/gpu/drm/amd/display/modules/power/power_helpers.h new file mode 100644 index 000000000000..da5df00fedce --- /dev/null +++ b/drivers/gpu/drm/amd/display/modules/power/power_helpers.h @@ -0,0 +1,47 @@ +/* Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef MODULES_POWER_POWER_HELPERS_H_ +#define MODULES_POWER_POWER_HELPERS_H_ + +#include "dc/inc/hw/dmcu.h" + + +enum abm_defines { + abm_defines_max_level = 4, + abm_defines_max_config = 4, +}; + +struct dmcu_iram_parameters { + unsigned int *backlight_lut_array; + unsigned int backlight_lut_array_size; + unsigned int backlight_ramping_reduction; + unsigned int backlight_ramping_start; + unsigned int set; +}; + +bool dmcu_load_iram(struct dmcu *dmcu, + struct dmcu_iram_parameters params); + +#endif /* MODULES_POWER_POWER_HELPERS_H_ */ diff --git a/drivers/gpu/drm/amd/include/amd_acpi.h b/drivers/gpu/drm/amd/include/amd_acpi.h index 9b9699fc433f..c72cbfe8f684 100644 --- a/drivers/gpu/drm/amd/include/amd_acpi.h +++ b/drivers/gpu/drm/amd/include/amd_acpi.h @@ -52,6 +52,30 @@ struct atif_sbios_requests { u8 backlight_level; /* panel backlight level (0-255) */ } __packed; +struct atif_qbtc_arguments { + u16 size; /* structure size in bytes (includes size field) */ + u8 requested_display; /* which display is requested */ +} __packed; + +#define ATIF_QBTC_MAX_DATA_POINTS 99 + +struct atif_qbtc_data_point { + u8 luminance; /* luminance in percent */ + u8 ipnut_signal; /* input signal in range 0-255 */ +} __packed; + +struct atif_qbtc_output { + u16 size; /* structure size in bytes (includes size field) */ + u16 flags; /* all zeroes */ + u8 error_code; /* error code */ + u8 ac_level; /* default brightness on AC power */ + u8 dc_level; /* default brightness on DC power */ + u8 min_input_signal; /* max input signal in range 0-255 */ + u8 max_input_signal; /* min input signal in range 0-255 */ + u8 number_of_points; /* number of data points */ + struct atif_qbtc_data_point data_points[ATIF_QBTC_MAX_DATA_POINTS]; +} __packed; + #define ATIF_NOTIFY_MASK 0x3 #define ATIF_NOTIFY_NONE 0 #define ATIF_NOTIFY_81 1 @@ -126,26 +150,18 @@ struct atcs_pref_req_output { * DWORD - supported functions bit vector */ /* Notifications mask */ -# define ATIF_DISPLAY_SWITCH_REQUEST_SUPPORTED (1 << 0) -# define ATIF_EXPANSION_MODE_CHANGE_REQUEST_SUPPORTED (1 << 1) # define ATIF_THERMAL_STATE_CHANGE_REQUEST_SUPPORTED (1 << 2) # define ATIF_FORCED_POWER_STATE_CHANGE_REQUEST_SUPPORTED (1 << 3) # define ATIF_SYSTEM_POWER_SOURCE_CHANGE_REQUEST_SUPPORTED (1 << 4) -# define ATIF_DISPLAY_CONF_CHANGE_REQUEST_SUPPORTED (1 << 5) -# define ATIF_PX_GFX_SWITCH_REQUEST_SUPPORTED (1 << 6) # define ATIF_PANEL_BRIGHTNESS_CHANGE_REQUEST_SUPPORTED (1 << 7) # define ATIF_DGPU_DISPLAY_EVENT_SUPPORTED (1 << 8) +# define ATIF_GPU_PACKAGE_POWER_LIMIT_REQUEST_SUPPORTED (1 << 12) /* supported functions vector */ # define ATIF_GET_SYSTEM_PARAMETERS_SUPPORTED (1 << 0) # define ATIF_GET_SYSTEM_BIOS_REQUESTS_SUPPORTED (1 << 1) -# define ATIF_SELECT_ACTIVE_DISPLAYS_SUPPORTED (1 << 2) -# define ATIF_GET_LID_STATE_SUPPORTED (1 << 3) -# define ATIF_GET_TV_STANDARD_FROM_CMOS_SUPPORTED (1 << 4) -# define ATIF_SET_TV_STANDARD_IN_CMOS_SUPPORTED (1 << 5) -# define ATIF_GET_PANEL_EXPANSION_MODE_FROM_CMOS_SUPPORTED (1 << 6) -# define ATIF_SET_PANEL_EXPANSION_MODE_IN_CMOS_SUPPORTED (1 << 7) # define ATIF_TEMPERATURE_CHANGE_NOTIFICATION_SUPPORTED (1 << 12) -# define ATIF_GET_GRAPHICS_DEVICE_TYPES_SUPPORTED (1 << 14) +# define ATIF_QUERY_BACKLIGHT_TRANSFER_CHARACTERISTICS_SUPPORTED (1 << 15) +# define ATIF_READY_TO_UNDOCK_NOTIFICATION_SUPPORTED (1 << 16) # define ATIF_GET_EXTERNAL_GPU_INFORMATION_SUPPORTED (1 << 20) #define ATIF_FUNCTION_GET_SYSTEM_PARAMETERS 0x1 /* ARG0: ATIF_FUNCTION_GET_SYSTEM_PARAMETERS @@ -170,6 +186,10 @@ struct atcs_pref_req_output { * n (0xd0-0xd9) is specified in notify command code. * bit 2: * 1 - lid changes not reported though int10 + * bit 3: + * 1 - system bios controls overclocking + * bit 4: + * 1 - enable overclocking */ #define ATIF_FUNCTION_GET_SYSTEM_BIOS_REQUESTS 0x2 /* ARG0: ATIF_FUNCTION_GET_SYSTEM_BIOS_REQUESTS @@ -177,28 +197,23 @@ struct atcs_pref_req_output { * OUTPUT: * WORD - structure size in bytes (includes size field) * DWORD - pending sbios requests - * BYTE - panel expansion mode + * BYTE - reserved (all zeroes) * BYTE - thermal state: target gfx controller * BYTE - thermal state: state id (0: exit state, non-0: state) * BYTE - forced power state: target gfx controller - * BYTE - forced power state: state id + * BYTE - forced power state: state id (0: forced state, non-0: state) * BYTE - system power source * BYTE - panel backlight level (0-255) + * BYTE - GPU package power limit: target gfx controller + * DWORD - GPU package power limit: value (24:8 fractional format, Watts) */ /* pending sbios requests */ -# define ATIF_DISPLAY_SWITCH_REQUEST (1 << 0) -# define ATIF_EXPANSION_MODE_CHANGE_REQUEST (1 << 1) # define ATIF_THERMAL_STATE_CHANGE_REQUEST (1 << 2) # define ATIF_FORCED_POWER_STATE_CHANGE_REQUEST (1 << 3) # define ATIF_SYSTEM_POWER_SOURCE_CHANGE_REQUEST (1 << 4) -# define ATIF_DISPLAY_CONF_CHANGE_REQUEST (1 << 5) -# define ATIF_PX_GFX_SWITCH_REQUEST (1 << 6) # define ATIF_PANEL_BRIGHTNESS_CHANGE_REQUEST (1 << 7) # define ATIF_DGPU_DISPLAY_EVENT (1 << 8) -/* panel expansion mode */ -# define ATIF_PANEL_EXPANSION_DISABLE 0 -# define ATIF_PANEL_EXPANSION_FULL 1 -# define ATIF_PANEL_EXPANSION_ASPECT 2 +# define ATIF_GPU_PACKAGE_POWER_LIMIT_REQUEST (1 << 12) /* target gfx controller */ # define ATIF_TARGET_GFX_SINGLE 0 # define ATIF_TARGET_GFX_PX_IGPU 1 @@ -208,76 +223,6 @@ struct atcs_pref_req_output { # define ATIF_POWER_SOURCE_DC 2 # define ATIF_POWER_SOURCE_RESTRICTED_AC_1 3 # define ATIF_POWER_SOURCE_RESTRICTED_AC_2 4 -#define ATIF_FUNCTION_SELECT_ACTIVE_DISPLAYS 0x3 -/* ARG0: ATIF_FUNCTION_SELECT_ACTIVE_DISPLAYS - * ARG1: - * WORD - structure size in bytes (includes size field) - * WORD - selected displays - * WORD - connected displays - * OUTPUT: - * WORD - structure size in bytes (includes size field) - * WORD - selected displays - */ -# define ATIF_LCD1 (1 << 0) -# define ATIF_CRT1 (1 << 1) -# define ATIF_TV (1 << 2) -# define ATIF_DFP1 (1 << 3) -# define ATIF_CRT2 (1 << 4) -# define ATIF_LCD2 (1 << 5) -# define ATIF_DFP2 (1 << 7) -# define ATIF_CV (1 << 8) -# define ATIF_DFP3 (1 << 9) -# define ATIF_DFP4 (1 << 10) -# define ATIF_DFP5 (1 << 11) -# define ATIF_DFP6 (1 << 12) -#define ATIF_FUNCTION_GET_LID_STATE 0x4 -/* ARG0: ATIF_FUNCTION_GET_LID_STATE - * ARG1: none - * OUTPUT: - * WORD - structure size in bytes (includes size field) - * BYTE - lid state (0: open, 1: closed) - * - * GET_LID_STATE only works at boot and resume, for general lid - * status, use the kernel provided status - */ -#define ATIF_FUNCTION_GET_TV_STANDARD_FROM_CMOS 0x5 -/* ARG0: ATIF_FUNCTION_GET_TV_STANDARD_FROM_CMOS - * ARG1: none - * OUTPUT: - * WORD - structure size in bytes (includes size field) - * BYTE - 0 - * BYTE - TV standard - */ -# define ATIF_TV_STD_NTSC 0 -# define ATIF_TV_STD_PAL 1 -# define ATIF_TV_STD_PALM 2 -# define ATIF_TV_STD_PAL60 3 -# define ATIF_TV_STD_NTSCJ 4 -# define ATIF_TV_STD_PALCN 5 -# define ATIF_TV_STD_PALN 6 -# define ATIF_TV_STD_SCART_RGB 9 -#define ATIF_FUNCTION_SET_TV_STANDARD_IN_CMOS 0x6 -/* ARG0: ATIF_FUNCTION_SET_TV_STANDARD_IN_CMOS - * ARG1: - * WORD - structure size in bytes (includes size field) - * BYTE - 0 - * BYTE - TV standard - * OUTPUT: none - */ -#define ATIF_FUNCTION_GET_PANEL_EXPANSION_MODE_FROM_CMOS 0x7 -/* ARG0: ATIF_FUNCTION_GET_PANEL_EXPANSION_MODE_FROM_CMOS - * ARG1: none - * OUTPUT: - * WORD - structure size in bytes (includes size field) - * BYTE - panel expansion mode - */ -#define ATIF_FUNCTION_SET_PANEL_EXPANSION_MODE_IN_CMOS 0x8 -/* ARG0: ATIF_FUNCTION_SET_PANEL_EXPANSION_MODE_IN_CMOS - * ARG1: - * WORD - structure size in bytes (includes size field) - * BYTE - panel expansion mode - * OUTPUT: none - */ #define ATIF_FUNCTION_TEMPERATURE_CHANGE_NOTIFICATION 0xD /* ARG0: ATIF_FUNCTION_TEMPERATURE_CHANGE_NOTIFICATION * ARG1: @@ -286,21 +231,43 @@ struct atcs_pref_req_output { * BYTE - current temperature (degress Celsius) * OUTPUT: none */ -#define ATIF_FUNCTION_GET_GRAPHICS_DEVICE_TYPES 0xF -/* ARG0: ATIF_FUNCTION_GET_GRAPHICS_DEVICE_TYPES - * ARG1: none +#define ATIF_FUNCTION_QUERY_BRIGHTNESS_TRANSFER_CHARACTERISTICS 0x10 +/* ARG0: ATIF_FUNCTION_QUERY_BRIGHTNESS_TRANSFER_CHARACTERISTICS + * ARG1: + * WORD - structure size in bytes (includes size field) + * BYTE - requested display * OUTPUT: - * WORD - number of gfx devices - * WORD - device structure size in bytes (excludes device size field) - * DWORD - flags \ - * WORD - bus number } repeated structure - * WORD - device number / + * WORD - structure size in bytes (includes size field) + * WORD - flags (currently all 16 bits are reserved) + * BYTE - error code (on failure, disregard all below fields) + * BYTE - AC level (default brightness in percent when machine has full power) + * BYTE - DC level (default brightness in percent when machine is on battery) + * BYTE - min input signal, in range 0-255, corresponding to 0% backlight + * BYTE - max input signal, in range 0-255, corresponding to 100% backlight + * BYTE - number of reported data points + * BYTE - luminance level in percent \ repeated structure + * BYTE - input signal in range 0-255 / does not have entries for 0% and 100% + */ +/* requested display */ +# define ATIF_QBTC_REQUEST_LCD1 0 +# define ATIF_QBTC_REQUEST_CRT1 1 +# define ATIF_QBTC_REQUEST_DFP1 3 +# define ATIF_QBTC_REQUEST_CRT2 4 +# define ATIF_QBTC_REQUEST_LCD2 5 +# define ATIF_QBTC_REQUEST_DFP2 7 +# define ATIF_QBTC_REQUEST_DFP3 9 +# define ATIF_QBTC_REQUEST_DFP4 10 +# define ATIF_QBTC_REQUEST_DFP5 11 +# define ATIF_QBTC_REQUEST_DFP6 12 +/* error code */ +# define ATIF_QBTC_ERROR_CODE_SUCCESS 0 +# define ATIF_QBTC_ERROR_CODE_FAILURE 1 +# define ATIF_QBTC_ERROR_CODE_DEVICE_NOT_SUPPORTED 2 +#define ATIF_FUNCTION_READY_TO_UNDOCK_NOTIFICATION 0x11 +/* ARG0: ATIF_FUNCTION_READY_TO_UNDOCK_NOTIFICATION + * ARG1: none + * OUTPUT: none */ -/* flags */ -# define ATIF_PX_REMOVABLE_GRAPHICS_DEVICE (1 << 0) -# define ATIF_XGP_PORT (1 << 1) -# define ATIF_VGA_ENABLED_GRAPHICS_DEVICE (1 << 2) -# define ATIF_XGP_PORT_IN_DOCK (1 << 3) #define ATIF_FUNCTION_GET_EXTERNAL_GPU_INFORMATION 0x15 /* ARG0: ATIF_FUNCTION_GET_EXTERNAL_GPU_INFORMATION * ARG1: none diff --git a/drivers/gpu/drm/amd/include/asic_reg/mmhub/mmhub_9_4_0_offset.h b/drivers/gpu/drm/amd/include/asic_reg/mmhub/mmhub_9_4_0_offset.h new file mode 100644 index 000000000000..8f515875a34d --- /dev/null +++ b/drivers/gpu/drm/amd/include/asic_reg/mmhub/mmhub_9_4_0_offset.h @@ -0,0 +1,32 @@ +/* + * Copyright (C) 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef _mmhub_9_4_0_OFFSET_HEADER +#define _mmhub_9_4_0_OFFSET_HEADER + + +// addressBlock: mmhub_utcl2_vmsharedpfdec +// base address: 0x6a040 +#define mmMC_VM_XGMI_LFB_CNTL 0x0823 +#define mmMC_VM_XGMI_LFB_CNTL_BASE_IDX 0 +#define mmMC_VM_XGMI_LFB_SIZE 0x0824 +#define mmMC_VM_XGMI_LFB_SIZE_BASE_IDX 0 + +#endif diff --git a/drivers/gpu/drm/amd/include/asic_reg/mmhub/mmhub_9_4_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/mmhub/mmhub_9_4_0_sh_mask.h new file mode 100644 index 000000000000..0a6b072d191e --- /dev/null +++ b/drivers/gpu/drm/amd/include/asic_reg/mmhub/mmhub_9_4_0_sh_mask.h @@ -0,0 +1,35 @@ +/* + * Copyright (C) 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef _mmhub_9_4_0_SH_MASK_HEADER +#define _mmhub_9_4_0_SH_MASK_HEADER + + +// addressBlock: mmhub_utcl2_vmsharedpfdec +//MC_VM_XGMI_LFB_CNTL +#define MC_VM_XGMI_LFB_CNTL__PF_LFB_REGION__SHIFT 0x0 +#define MC_VM_XGMI_LFB_CNTL__PF_MAX_REGION__SHIFT 0x4 +#define MC_VM_XGMI_LFB_CNTL__PF_LFB_REGION_MASK 0x00000007L +#define MC_VM_XGMI_LFB_CNTL__PF_MAX_REGION_MASK 0x00000070L +//MC_VM_XGMI_LFB_SIZE +#define MC_VM_XGMI_LFB_SIZE__PF_LFB_SIZE__SHIFT 0x0 +#define MC_VM_XGMI_LFB_SIZE__PF_LFB_SIZE_MASK 0x0000FFFFL + +#endif diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h index 64ecffd52126..8154d67388cc 100644 --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h @@ -188,8 +188,8 @@ struct tile_config { */ #define ALLOC_MEM_FLAGS_VRAM (1 << 0) #define ALLOC_MEM_FLAGS_GTT (1 << 1) -#define ALLOC_MEM_FLAGS_USERPTR (1 << 2) /* TODO */ -#define ALLOC_MEM_FLAGS_DOORBELL (1 << 3) /* TODO */ +#define ALLOC_MEM_FLAGS_USERPTR (1 << 2) +#define ALLOC_MEM_FLAGS_DOORBELL (1 << 3) /* * Allocation flags attributes/access options. @@ -205,20 +205,6 @@ struct tile_config { /** * struct kfd2kgd_calls * - * @init_gtt_mem_allocation: Allocate a buffer on the gart aperture. - * The buffer can be used for mqds, hpds, kernel queue, fence and runlists - * - * @free_gtt_mem: Frees a buffer that was allocated on the gart aperture - * - * @get_local_mem_info: Retrieves information about GPU local memory - * - * @get_gpu_clock_counter: Retrieves GPU clock counter - * - * @get_max_engine_clock_in_mhz: Retrieves maximum GPU clock in MHz - * - * @alloc_pasid: Allocate a PASID - * @free_pasid: Free a PASID - * * @program_sh_mem_settings: A function that should initiate the memory * properties such as main aperture memory type (cache / non cached) and * secondary aperture base address, size and memory type. @@ -255,64 +241,16 @@ struct tile_config { * * @get_tile_config: Returns GPU-specific tiling mode information * - * @get_cu_info: Retrieves activated cu info - * - * @get_vram_usage: Returns current VRAM usage - * - * @create_process_vm: Create a VM address space for a given process and GPU - * - * @destroy_process_vm: Destroy a VM - * - * @get_process_page_dir: Get physical address of a VM page directory - * * @set_vm_context_page_table_base: Program page table base for a VMID * - * @alloc_memory_of_gpu: Allocate GPUVM memory - * - * @free_memory_of_gpu: Free GPUVM memory - * - * @map_memory_to_gpu: Map GPUVM memory into a specific VM address - * space. Allocates and updates page tables and page directories as - * needed. This function may return before all page table updates have - * completed. This allows multiple map operations (on multiple GPUs) - * to happen concurrently. Use sync_memory to synchronize with all - * pending updates. - * - * @unmap_memor_to_gpu: Unmap GPUVM memory from a specific VM address space - * - * @sync_memory: Wait for pending page table updates to complete - * - * @map_gtt_bo_to_kernel: Map a GTT BO for kernel access - * Pins the BO, maps it to kernel address space. Such BOs are never evicted. - * The kernel virtual address remains valid until the BO is freed. - * - * @restore_process_bos: Restore all BOs that belong to the - * process. This is intended for restoring memory mappings after a TTM - * eviction. - * * @invalidate_tlbs: Invalidate TLBs for a specific PASID * * @invalidate_tlbs_vmid: Invalidate TLBs for a specific VMID * - * @submit_ib: Submits an IB to the engine specified by inserting the - * IB to the corresponding ring (ring type). The IB is executed with the - * specified VMID in a user mode context. - * - * @get_vm_fault_info: Return information about a recent VM fault on - * GFXv7 and v8. If multiple VM faults occurred since the last call of - * this function, it will return information about the first of those - * faults. On GFXv9 VM fault information is fully contained in the IH - * packet and this function is not needed. - * * @read_vmid_from_vmfault_reg: On Hawaii the VMID is not set in the * IH ring entry. This function allows the KFD ISR to get the VMID * from the fault status register as early as possible. * - * @gpu_recover: let kgd reset gpu after kfd detect CPC hang - * - * @set_compute_idle: Indicates that compute is idle on a device. This - * can be used to change power profiles depending on compute activity. - * * @get_hive_id: Returns hive id of current device, 0 if xgmi is not enabled * * This structure contains function pointers to services that the kgd driver @@ -320,21 +258,6 @@ struct tile_config { * */ struct kfd2kgd_calls { - int (*init_gtt_mem_allocation)(struct kgd_dev *kgd, size_t size, - void **mem_obj, uint64_t *gpu_addr, - void **cpu_ptr, bool mqd_gfx9); - - void (*free_gtt_mem)(struct kgd_dev *kgd, void *mem_obj); - - void (*get_local_mem_info)(struct kgd_dev *kgd, - struct kfd_local_mem_info *mem_info); - uint64_t (*get_gpu_clock_counter)(struct kgd_dev *kgd); - - uint32_t (*get_max_engine_clock_in_mhz)(struct kgd_dev *kgd); - - int (*alloc_pasid)(unsigned int bits); - void (*free_pasid)(unsigned int pasid); - /* Register access functions */ void (*program_sh_mem_settings)(struct kgd_dev *kgd, uint32_t vmid, uint32_t sh_mem_config, uint32_t sh_mem_ape1_base, @@ -398,49 +321,11 @@ struct kfd2kgd_calls { uint64_t va, uint32_t vmid); int (*get_tile_config)(struct kgd_dev *kgd, struct tile_config *config); - void (*get_cu_info)(struct kgd_dev *kgd, - struct kfd_cu_info *cu_info); - uint64_t (*get_vram_usage)(struct kgd_dev *kgd); - - int (*create_process_vm)(struct kgd_dev *kgd, unsigned int pasid, void **vm, - void **process_info, struct dma_fence **ef); - int (*acquire_process_vm)(struct kgd_dev *kgd, struct file *filp, - unsigned int pasid, void **vm, void **process_info, - struct dma_fence **ef); - void (*destroy_process_vm)(struct kgd_dev *kgd, void *vm); - void (*release_process_vm)(struct kgd_dev *kgd, void *vm); - uint64_t (*get_process_page_dir)(void *vm); void (*set_vm_context_page_table_base)(struct kgd_dev *kgd, uint32_t vmid, uint64_t page_table_base); - int (*alloc_memory_of_gpu)(struct kgd_dev *kgd, uint64_t va, - uint64_t size, void *vm, - struct kgd_mem **mem, uint64_t *offset, - uint32_t flags); - int (*free_memory_of_gpu)(struct kgd_dev *kgd, struct kgd_mem *mem); - int (*map_memory_to_gpu)(struct kgd_dev *kgd, struct kgd_mem *mem, - void *vm); - int (*unmap_memory_to_gpu)(struct kgd_dev *kgd, struct kgd_mem *mem, - void *vm); - int (*sync_memory)(struct kgd_dev *kgd, struct kgd_mem *mem, bool intr); - int (*map_gtt_bo_to_kernel)(struct kgd_dev *kgd, struct kgd_mem *mem, - void **kptr, uint64_t *size); - int (*restore_process_bos)(void *process_info, struct dma_fence **ef); - int (*invalidate_tlbs)(struct kgd_dev *kgd, uint16_t pasid); int (*invalidate_tlbs_vmid)(struct kgd_dev *kgd, uint16_t vmid); - - int (*submit_ib)(struct kgd_dev *kgd, enum kgd_engine_type engine, - uint32_t vmid, uint64_t gpu_addr, - uint32_t *ib_cmd, uint32_t ib_len); - - int (*get_vm_fault_info)(struct kgd_dev *kgd, - struct kfd_vm_fault_info *info); uint32_t (*read_vmid_from_vmfault_reg)(struct kgd_dev *kgd); - - void (*gpu_recover)(struct kgd_dev *kgd); - - void (*set_compute_idle)(struct kgd_dev *kgd, bool idle); - uint64_t (*get_hive_id)(struct kgd_dev *kgd); }; diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h index 980e696989b1..1479ea1dc3e7 100644 --- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h @@ -276,6 +276,10 @@ struct amd_pm_funcs { struct amd_pp_simple_clock_info *clocks); int (*notify_smu_enable_pwe)(void *handle); int (*enable_mgpu_fan_boost)(void *handle); + int (*set_active_display_count)(void *handle, uint32_t count); + int (*set_hard_min_dcefclk_by_freq)(void *handle, uint32_t clock); + int (*set_hard_min_fclk_by_freq)(void *handle, uint32_t clock); + int (*set_min_deep_sleep_dcefclk)(void *handle, uint32_t clock); }; #endif diff --git a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c index d6aa1d414320..9bc27f468d5b 100644 --- a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c +++ b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c @@ -300,7 +300,7 @@ static int pp_set_clockgating_by_smu(void *handle, uint32_t msg_id) return -EINVAL; if (hwmgr->hwmgr_func->update_clock_gatings == NULL) { - pr_info("%s was not implemented.\n", __func__); + pr_info_ratelimited("%s was not implemented.\n", __func__); return 0; } @@ -387,7 +387,7 @@ static uint32_t pp_dpm_get_sclk(void *handle, bool low) return 0; if (hwmgr->hwmgr_func->get_sclk == NULL) { - pr_info("%s was not implemented.\n", __func__); + pr_info_ratelimited("%s was not implemented.\n", __func__); return 0; } mutex_lock(&hwmgr->smu_lock); @@ -405,7 +405,7 @@ static uint32_t pp_dpm_get_mclk(void *handle, bool low) return 0; if (hwmgr->hwmgr_func->get_mclk == NULL) { - pr_info("%s was not implemented.\n", __func__); + pr_info_ratelimited("%s was not implemented.\n", __func__); return 0; } mutex_lock(&hwmgr->smu_lock); @@ -422,7 +422,7 @@ static void pp_dpm_powergate_vce(void *handle, bool gate) return; if (hwmgr->hwmgr_func->powergate_vce == NULL) { - pr_info("%s was not implemented.\n", __func__); + pr_info_ratelimited("%s was not implemented.\n", __func__); return; } mutex_lock(&hwmgr->smu_lock); @@ -438,7 +438,7 @@ static void pp_dpm_powergate_uvd(void *handle, bool gate) return; if (hwmgr->hwmgr_func->powergate_uvd == NULL) { - pr_info("%s was not implemented.\n", __func__); + pr_info_ratelimited("%s was not implemented.\n", __func__); return; } mutex_lock(&hwmgr->smu_lock); @@ -505,7 +505,7 @@ static void pp_dpm_set_fan_control_mode(void *handle, uint32_t mode) return; if (hwmgr->hwmgr_func->set_fan_control_mode == NULL) { - pr_info("%s was not implemented.\n", __func__); + pr_info_ratelimited("%s was not implemented.\n", __func__); return; } mutex_lock(&hwmgr->smu_lock); @@ -522,7 +522,7 @@ static uint32_t pp_dpm_get_fan_control_mode(void *handle) return 0; if (hwmgr->hwmgr_func->get_fan_control_mode == NULL) { - pr_info("%s was not implemented.\n", __func__); + pr_info_ratelimited("%s was not implemented.\n", __func__); return 0; } mutex_lock(&hwmgr->smu_lock); @@ -540,7 +540,7 @@ static int pp_dpm_set_fan_speed_percent(void *handle, uint32_t percent) return -EINVAL; if (hwmgr->hwmgr_func->set_fan_speed_percent == NULL) { - pr_info("%s was not implemented.\n", __func__); + pr_info_ratelimited("%s was not implemented.\n", __func__); return 0; } mutex_lock(&hwmgr->smu_lock); @@ -558,7 +558,7 @@ static int pp_dpm_get_fan_speed_percent(void *handle, uint32_t *speed) return -EINVAL; if (hwmgr->hwmgr_func->get_fan_speed_percent == NULL) { - pr_info("%s was not implemented.\n", __func__); + pr_info_ratelimited("%s was not implemented.\n", __func__); return 0; } @@ -594,7 +594,7 @@ static int pp_dpm_set_fan_speed_rpm(void *handle, uint32_t rpm) return -EINVAL; if (hwmgr->hwmgr_func->set_fan_speed_rpm == NULL) { - pr_info("%s was not implemented.\n", __func__); + pr_info_ratelimited("%s was not implemented.\n", __func__); return 0; } mutex_lock(&hwmgr->smu_lock); @@ -720,12 +720,12 @@ static int pp_dpm_force_clock_level(void *handle, return -EINVAL; if (hwmgr->hwmgr_func->force_clock_level == NULL) { - pr_info("%s was not implemented.\n", __func__); + pr_info_ratelimited("%s was not implemented.\n", __func__); return 0; } if (hwmgr->dpm_level != AMD_DPM_FORCED_LEVEL_MANUAL) { - pr_info("force clock level is for dpm manual mode only.\n"); + pr_debug("force clock level is for dpm manual mode only.\n"); return -EINVAL; } @@ -745,7 +745,7 @@ static int pp_dpm_print_clock_levels(void *handle, return -EINVAL; if (hwmgr->hwmgr_func->print_clock_levels == NULL) { - pr_info("%s was not implemented.\n", __func__); + pr_info_ratelimited("%s was not implemented.\n", __func__); return 0; } mutex_lock(&hwmgr->smu_lock); @@ -763,7 +763,7 @@ static int pp_dpm_get_sclk_od(void *handle) return -EINVAL; if (hwmgr->hwmgr_func->get_sclk_od == NULL) { - pr_info("%s was not implemented.\n", __func__); + pr_info_ratelimited("%s was not implemented.\n", __func__); return 0; } mutex_lock(&hwmgr->smu_lock); @@ -781,7 +781,7 @@ static int pp_dpm_set_sclk_od(void *handle, uint32_t value) return -EINVAL; if (hwmgr->hwmgr_func->set_sclk_od == NULL) { - pr_info("%s was not implemented.\n", __func__); + pr_info_ratelimited("%s was not implemented.\n", __func__); return 0; } @@ -800,7 +800,7 @@ static int pp_dpm_get_mclk_od(void *handle) return -EINVAL; if (hwmgr->hwmgr_func->get_mclk_od == NULL) { - pr_info("%s was not implemented.\n", __func__); + pr_info_ratelimited("%s was not implemented.\n", __func__); return 0; } mutex_lock(&hwmgr->smu_lock); @@ -818,7 +818,7 @@ static int pp_dpm_set_mclk_od(void *handle, uint32_t value) return -EINVAL; if (hwmgr->hwmgr_func->set_mclk_od == NULL) { - pr_info("%s was not implemented.\n", __func__); + pr_info_ratelimited("%s was not implemented.\n", __func__); return 0; } mutex_lock(&hwmgr->smu_lock); @@ -878,7 +878,7 @@ static int pp_get_power_profile_mode(void *handle, char *buf) return -EINVAL; if (hwmgr->hwmgr_func->get_power_profile_mode == NULL) { - pr_info("%s was not implemented.\n", __func__); + pr_info_ratelimited("%s was not implemented.\n", __func__); return snprintf(buf, PAGE_SIZE, "\n"); } @@ -894,12 +894,12 @@ static int pp_set_power_profile_mode(void *handle, long *input, uint32_t size) return ret; if (hwmgr->hwmgr_func->set_power_profile_mode == NULL) { - pr_info("%s was not implemented.\n", __func__); + pr_info_ratelimited("%s was not implemented.\n", __func__); return ret; } if (hwmgr->dpm_level != AMD_DPM_FORCED_LEVEL_MANUAL) { - pr_info("power profile setting is for manual dpm mode only.\n"); + pr_debug("power profile setting is for manual dpm mode only.\n"); return ret; } @@ -917,7 +917,7 @@ static int pp_odn_edit_dpm_table(void *handle, uint32_t type, long *input, uint3 return -EINVAL; if (hwmgr->hwmgr_func->odn_edit_dpm_table == NULL) { - pr_info("%s was not implemented.\n", __func__); + pr_info_ratelimited("%s was not implemented.\n", __func__); return -EINVAL; } @@ -935,7 +935,7 @@ static int pp_dpm_switch_power_profile(void *handle, return -EINVAL; if (hwmgr->hwmgr_func->set_power_profile_mode == NULL) { - pr_info("%s was not implemented.\n", __func__); + pr_info_ratelimited("%s was not implemented.\n", __func__); return -EINVAL; } @@ -972,7 +972,7 @@ static int pp_set_power_limit(void *handle, uint32_t limit) return -EINVAL; if (hwmgr->hwmgr_func->set_power_limit == NULL) { - pr_info("%s was not implemented.\n", __func__); + pr_info_ratelimited("%s was not implemented.\n", __func__); return -EINVAL; } @@ -1072,7 +1072,7 @@ static int pp_get_current_clocks(void *handle, &hw_clocks, PHM_PerformanceLevelDesignation_Activity); if (ret) { - pr_info("Error in phm_get_clock_info \n"); + pr_debug("Error in phm_get_clock_info \n"); mutex_unlock(&hwmgr->smu_lock); return -EINVAL; } @@ -1212,7 +1212,7 @@ static int pp_dpm_powergate_mmhub(void *handle) return -EINVAL; if (hwmgr->hwmgr_func->powergate_mmhub == NULL) { - pr_info("%s was not implemented.\n", __func__); + pr_info_ratelimited("%s was not implemented.\n", __func__); return 0; } @@ -1227,7 +1227,7 @@ static int pp_dpm_powergate_gfx(void *handle, bool gate) return 0; if (hwmgr->hwmgr_func->powergate_gfx == NULL) { - pr_info("%s was not implemented.\n", __func__); + pr_info_ratelimited("%s was not implemented.\n", __func__); return 0; } @@ -1242,7 +1242,7 @@ static void pp_dpm_powergate_acp(void *handle, bool gate) return; if (hwmgr->hwmgr_func->powergate_acp == NULL) { - pr_info("%s was not implemented.\n", __func__); + pr_info_ratelimited("%s was not implemented.\n", __func__); return; } @@ -1257,7 +1257,7 @@ static void pp_dpm_powergate_sdma(void *handle, bool gate) return; if (hwmgr->hwmgr_func->powergate_sdma == NULL) { - pr_info("%s was not implemented.\n", __func__); + pr_info_ratelimited("%s was not implemented.\n", __func__); return; } @@ -1303,7 +1303,7 @@ static int pp_notify_smu_enable_pwe(void *handle) return -EINVAL; if (hwmgr->hwmgr_func->smus_notify_pwe == NULL) { - pr_info("%s was not implemented.\n", __func__); + pr_info_ratelimited("%s was not implemented.\n", __func__); return -EINVAL;; } @@ -1332,6 +1332,78 @@ static int pp_enable_mgpu_fan_boost(void *handle) return 0; } +static int pp_set_min_deep_sleep_dcefclk(void *handle, uint32_t clock) +{ + struct pp_hwmgr *hwmgr = handle; + + if (!hwmgr || !hwmgr->pm_en) + return -EINVAL; + + if (hwmgr->hwmgr_func->set_min_deep_sleep_dcefclk == NULL) { + pr_debug("%s was not implemented.\n", __func__); + return -EINVAL;; + } + + mutex_lock(&hwmgr->smu_lock); + hwmgr->hwmgr_func->set_min_deep_sleep_dcefclk(hwmgr, clock); + mutex_unlock(&hwmgr->smu_lock); + + return 0; +} + +static int pp_set_hard_min_dcefclk_by_freq(void *handle, uint32_t clock) +{ + struct pp_hwmgr *hwmgr = handle; + + if (!hwmgr || !hwmgr->pm_en) + return -EINVAL; + + if (hwmgr->hwmgr_func->set_hard_min_dcefclk_by_freq == NULL) { + pr_debug("%s was not implemented.\n", __func__); + return -EINVAL;; + } + + mutex_lock(&hwmgr->smu_lock); + hwmgr->hwmgr_func->set_hard_min_dcefclk_by_freq(hwmgr, clock); + mutex_unlock(&hwmgr->smu_lock); + + return 0; +} + +static int pp_set_hard_min_fclk_by_freq(void *handle, uint32_t clock) +{ + struct pp_hwmgr *hwmgr = handle; + + if (!hwmgr || !hwmgr->pm_en) + return -EINVAL; + + if (hwmgr->hwmgr_func->set_hard_min_fclk_by_freq == NULL) { + pr_debug("%s was not implemented.\n", __func__); + return -EINVAL;; + } + + mutex_lock(&hwmgr->smu_lock); + hwmgr->hwmgr_func->set_hard_min_fclk_by_freq(hwmgr, clock); + mutex_unlock(&hwmgr->smu_lock); + + return 0; +} + +static int pp_set_active_display_count(void *handle, uint32_t count) +{ + struct pp_hwmgr *hwmgr = handle; + int ret = 0; + + if (!hwmgr || !hwmgr->pm_en) + return -EINVAL; + + mutex_lock(&hwmgr->smu_lock); + ret = phm_set_active_display_count(hwmgr, count); + mutex_unlock(&hwmgr->smu_lock); + + return ret; +} + static const struct amd_pm_funcs pp_dpm_funcs = { .load_firmware = pp_dpm_load_fw, .wait_for_fw_loading_complete = pp_dpm_fw_loading_complete, @@ -1378,4 +1450,8 @@ static const struct amd_pm_funcs pp_dpm_funcs = { .get_display_mode_validation_clocks = pp_get_display_mode_validation_clocks, .notify_smu_enable_pwe = pp_notify_smu_enable_pwe, .enable_mgpu_fan_boost = pp_enable_mgpu_fan_boost, + .set_active_display_count = pp_set_active_display_count, + .set_min_deep_sleep_dcefclk = pp_set_min_deep_sleep_dcefclk, + .set_hard_min_dcefclk_by_freq = pp_set_hard_min_dcefclk_by_freq, + .set_hard_min_fclk_by_freq = pp_set_hard_min_fclk_by_freq, }; diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c b/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c index a2a7e0e94aa6..1f92a9f4c9e3 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c @@ -288,8 +288,8 @@ int phm_store_dal_configuration_data(struct pp_hwmgr *hwmgr, if (display_config == NULL) return -EINVAL; - if (NULL != hwmgr->hwmgr_func->set_deep_sleep_dcefclk) - hwmgr->hwmgr_func->set_deep_sleep_dcefclk(hwmgr, display_config->min_dcef_deep_sleep_set_clk); + if (NULL != hwmgr->hwmgr_func->set_min_deep_sleep_dcefclk) + hwmgr->hwmgr_func->set_min_deep_sleep_dcefclk(hwmgr, display_config->min_dcef_deep_sleep_set_clk); for (index = 0; index < display_config->num_path_including_non_display; index++) { if (display_config->displays[index].controller_id != 0) @@ -480,3 +480,44 @@ int phm_disable_smc_firmware_ctf(struct pp_hwmgr *hwmgr) return hwmgr->hwmgr_func->disable_smc_firmware_ctf(hwmgr); } + +int phm_set_active_display_count(struct pp_hwmgr *hwmgr, uint32_t count) +{ + PHM_FUNC_CHECK(hwmgr); + + if (!hwmgr->hwmgr_func->set_active_display_count) + return -EINVAL; + + return hwmgr->hwmgr_func->set_active_display_count(hwmgr, count); +} + +int phm_set_min_deep_sleep_dcefclk(struct pp_hwmgr *hwmgr, uint32_t clock) +{ + PHM_FUNC_CHECK(hwmgr); + + if (!hwmgr->hwmgr_func->set_min_deep_sleep_dcefclk) + return -EINVAL; + + return hwmgr->hwmgr_func->set_min_deep_sleep_dcefclk(hwmgr, clock); +} + +int phm_set_hard_min_dcefclk_by_freq(struct pp_hwmgr *hwmgr, uint32_t clock) +{ + PHM_FUNC_CHECK(hwmgr); + + if (!hwmgr->hwmgr_func->set_hard_min_dcefclk_by_freq) + return -EINVAL; + + return hwmgr->hwmgr_func->set_hard_min_dcefclk_by_freq(hwmgr, clock); +} + +int phm_set_hard_min_fclk_by_freq(struct pp_hwmgr *hwmgr, uint32_t clock) +{ + PHM_FUNC_CHECK(hwmgr); + + if (!hwmgr->hwmgr_func->set_hard_min_fclk_by_freq) + return -EINVAL; + + return hwmgr->hwmgr_func->set_hard_min_fclk_by_freq(hwmgr, clock); +} + diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c index dd18cb710391..f95c5f50eb0f 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c @@ -216,12 +216,12 @@ static inline uint32_t convert_10k_to_mhz(uint32_t clock) return (clock + 99) / 100; } -static int smu10_set_deep_sleep_dcefclk(struct pp_hwmgr *hwmgr, uint32_t clock) +static int smu10_set_min_deep_sleep_dcefclk(struct pp_hwmgr *hwmgr, uint32_t clock) { struct smu10_hwmgr *smu10_data = (struct smu10_hwmgr *)(hwmgr->backend); if (smu10_data->need_min_deep_sleep_dcefclk && - smu10_data->deep_sleep_dcefclk != convert_10k_to_mhz(clock)) { + smu10_data->deep_sleep_dcefclk != convert_10k_to_mhz(clock)) { smu10_data->deep_sleep_dcefclk = convert_10k_to_mhz(clock); smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetMinDeepSleepDcefclk, @@ -230,6 +230,34 @@ static int smu10_set_deep_sleep_dcefclk(struct pp_hwmgr *hwmgr, uint32_t clock) return 0; } +static int smu10_set_hard_min_dcefclk_by_freq(struct pp_hwmgr *hwmgr, uint32_t clock) +{ + struct smu10_hwmgr *smu10_data = (struct smu10_hwmgr *)(hwmgr->backend); + + if (smu10_data->dcf_actual_hard_min_freq && + smu10_data->dcf_actual_hard_min_freq != convert_10k_to_mhz(clock)) { + smu10_data->dcf_actual_hard_min_freq = convert_10k_to_mhz(clock); + smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_SetHardMinDcefclkByFreq, + smu10_data->dcf_actual_hard_min_freq); + } + return 0; +} + +static int smu10_set_hard_min_fclk_by_freq(struct pp_hwmgr *hwmgr, uint32_t clock) +{ + struct smu10_hwmgr *smu10_data = (struct smu10_hwmgr *)(hwmgr->backend); + + if (smu10_data->f_actual_hard_min_freq && + smu10_data->f_actual_hard_min_freq != convert_10k_to_mhz(clock)) { + smu10_data->f_actual_hard_min_freq = convert_10k_to_mhz(clock); + smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_SetHardMinFclkByFreq, + smu10_data->f_actual_hard_min_freq); + } + return 0; +} + static int smu10_set_active_display_count(struct pp_hwmgr *hwmgr, uint32_t count) { struct smu10_hwmgr *smu10_data = (struct smu10_hwmgr *)(hwmgr->backend); @@ -1206,7 +1234,7 @@ static const struct pp_hwmgr_func smu10_hwmgr_funcs = { .get_max_high_clocks = smu10_get_max_high_clocks, .read_sensor = smu10_read_sensor, .set_active_display_count = smu10_set_active_display_count, - .set_deep_sleep_dcefclk = smu10_set_deep_sleep_dcefclk, + .set_min_deep_sleep_dcefclk = smu10_set_min_deep_sleep_dcefclk, .dynamic_state_management_enable = smu10_enable_dpm_tasks, .power_off_asic = smu10_power_off_asic, .asic_setup = smu10_setup_asic_task, @@ -1217,6 +1245,8 @@ static const struct pp_hwmgr_func smu10_hwmgr_funcs = { .display_clock_voltage_request = smu10_display_clock_voltage_request, .powergate_gfx = smu10_gfx_off_control, .powergate_sdma = smu10_powergate_sdma, + .set_hard_min_dcefclk_by_freq = smu10_set_hard_min_dcefclk_by_freq, + .set_hard_min_fclk_by_freq = smu10_set_hard_min_fclk_by_freq, }; int smu10_init_function_pointers(struct pp_hwmgr *hwmgr) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c index b61a01f55284..d91390459326 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c @@ -269,7 +269,7 @@ static int smu7_construct_voltage_tables(struct pp_hwmgr *hwmgr) hwmgr->dyn_state.mvdd_dependency_on_mclk); PP_ASSERT_WITH_CODE((0 == result), - "Failed to retrieve SVI2 MVDD table from dependancy table.", + "Failed to retrieve SVI2 MVDD table from dependency table.", return result;); } @@ -288,7 +288,7 @@ static int smu7_construct_voltage_tables(struct pp_hwmgr *hwmgr) result = phm_get_svi2_voltage_table_v0(&(data->vddci_voltage_table), hwmgr->dyn_state.vddci_dependency_on_mclk); PP_ASSERT_WITH_CODE((0 == result), - "Failed to retrieve SVI2 VDDCI table from dependancy table.", + "Failed to retrieve SVI2 VDDCI table from dependency table.", return result); } @@ -317,7 +317,7 @@ static int smu7_construct_voltage_tables(struct pp_hwmgr *hwmgr) table_info->vddc_lookup_table); PP_ASSERT_WITH_CODE((0 == result), - "Failed to retrieve SVI2 VDDC table from dependancy table.", return result;); + "Failed to retrieve SVI2 VDDC table from dependency table.", return result;); } tmp = smum_get_mac_definition(hwmgr, SMU_MAX_LEVELS_VDDC); @@ -2859,7 +2859,10 @@ static int smu7_vblank_too_short(struct pp_hwmgr *hwmgr, case CHIP_POLARIS10: case CHIP_POLARIS11: case CHIP_POLARIS12: - switch_limit_us = data->is_memory_gddr5 ? 190 : 150; + if (hwmgr->is_kicker) + switch_limit_us = data->is_memory_gddr5 ? 450 : 150; + else + switch_limit_us = data->is_memory_gddr5 ? 190 : 150; break; case CHIP_VEGAM: switch_limit_us = 30; @@ -4223,9 +4226,17 @@ static int smu7_check_mc_firmware(struct pp_hwmgr *hwmgr) if (tmp & (1 << 23)) { data->mem_latency_high = MEM_LATENCY_HIGH; data->mem_latency_low = MEM_LATENCY_LOW; + if ((hwmgr->chip_id == CHIP_POLARIS10) || + (hwmgr->chip_id == CHIP_POLARIS11) || + (hwmgr->chip_id == CHIP_POLARIS12)) + smum_send_msg_to_smc(hwmgr, PPSMC_MSG_EnableFFC); } else { data->mem_latency_high = 330; data->mem_latency_low = 330; + if ((hwmgr->chip_id == CHIP_POLARIS10) || + (hwmgr->chip_id == CHIP_POLARIS11) || + (hwmgr->chip_id == CHIP_POLARIS12)) + smum_send_msg_to_smc(hwmgr, PPSMC_MSG_DisableFFC); } return 0; diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_powertune.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_powertune.c index 5e19f5977eb1..d138ddae563d 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_powertune.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_powertune.c @@ -967,7 +967,7 @@ int smu7_enable_didt_config(struct pp_hwmgr *hwmgr) PP_CAP(PHM_PlatformCaps_TDRamping) || PP_CAP(PHM_PlatformCaps_TCPRamping)) { - adev->gfx.rlc.funcs->enter_safe_mode(adev); + amdgpu_gfx_rlc_enter_safe_mode(adev); mutex_lock(&adev->grbm_idx_mutex); value = 0; value2 = cgs_read_register(hwmgr->device, mmGRBM_GFX_INDEX); @@ -1014,13 +1014,13 @@ int smu7_enable_didt_config(struct pp_hwmgr *hwmgr) "Failed to enable DPM DIDT.", goto error); } mutex_unlock(&adev->grbm_idx_mutex); - adev->gfx.rlc.funcs->exit_safe_mode(adev); + amdgpu_gfx_rlc_exit_safe_mode(adev); } return 0; error: mutex_unlock(&adev->grbm_idx_mutex); - adev->gfx.rlc.funcs->exit_safe_mode(adev); + amdgpu_gfx_rlc_exit_safe_mode(adev); return result; } @@ -1034,7 +1034,7 @@ int smu7_disable_didt_config(struct pp_hwmgr *hwmgr) PP_CAP(PHM_PlatformCaps_TDRamping) || PP_CAP(PHM_PlatformCaps_TCPRamping)) { - adev->gfx.rlc.funcs->enter_safe_mode(adev); + amdgpu_gfx_rlc_enter_safe_mode(adev); result = smu7_enable_didt(hwmgr, false); PP_ASSERT_WITH_CODE((result == 0), @@ -1046,12 +1046,12 @@ int smu7_disable_didt_config(struct pp_hwmgr *hwmgr) PP_ASSERT_WITH_CODE((0 == result), "Failed to disable DPM DIDT.", goto error); } - adev->gfx.rlc.funcs->exit_safe_mode(adev); + amdgpu_gfx_rlc_exit_safe_mode(adev); } return 0; error: - adev->gfx.rlc.funcs->exit_safe_mode(adev); + amdgpu_gfx_rlc_exit_safe_mode(adev); return result; } diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu8_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu8_hwmgr.c index fef111ddb736..553a203ac47c 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu8_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu8_hwmgr.c @@ -1228,17 +1228,14 @@ static int smu8_dpm_force_dpm_level(struct pp_hwmgr *hwmgr, static int smu8_dpm_powerdown_uvd(struct pp_hwmgr *hwmgr) { - if (PP_CAP(PHM_PlatformCaps_UVDPowerGating)) { - smu8_nbdpm_pstate_enable_disable(hwmgr, true, true); + if (PP_CAP(PHM_PlatformCaps_UVDPowerGating)) return smum_send_msg_to_smc(hwmgr, PPSMC_MSG_UVDPowerOFF); - } return 0; } static int smu8_dpm_powerup_uvd(struct pp_hwmgr *hwmgr) { if (PP_CAP(PHM_PlatformCaps_UVDPowerGating)) { - smu8_nbdpm_pstate_enable_disable(hwmgr, false, true); return smum_send_msg_to_smc_with_parameter( hwmgr, PPSMC_MSG_UVDPowerON, @@ -1995,6 +1992,7 @@ static const struct pp_hwmgr_func smu8_hwmgr_funcs = { .power_state_set = smu8_set_power_state_tasks, .dynamic_state_management_disable = smu8_disable_dpm_tasks, .notify_cac_buffer_info = smu8_notify_cac_buffer_info, + .update_nbdpm_pstate = smu8_nbdpm_pstate_enable_disable, .get_thermal_temperature_range = smu8_get_thermal_temperature_range, }; diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_powertune.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_powertune.c index 2d88abf97e7b..6f26cb241ecc 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_powertune.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_powertune.c @@ -937,7 +937,7 @@ static int vega10_enable_cac_driving_se_didt_config(struct pp_hwmgr *hwmgr) num_se = adev->gfx.config.max_shader_engines; - adev->gfx.rlc.funcs->enter_safe_mode(adev); + amdgpu_gfx_rlc_enter_safe_mode(adev); mutex_lock(&adev->grbm_idx_mutex); for (count = 0; count < num_se; count++) { @@ -962,7 +962,7 @@ static int vega10_enable_cac_driving_se_didt_config(struct pp_hwmgr *hwmgr) vega10_didt_set_mask(hwmgr, true); - adev->gfx.rlc.funcs->exit_safe_mode(adev); + amdgpu_gfx_rlc_exit_safe_mode(adev); return 0; } @@ -971,11 +971,11 @@ static int vega10_disable_cac_driving_se_didt_config(struct pp_hwmgr *hwmgr) { struct amdgpu_device *adev = hwmgr->adev; - adev->gfx.rlc.funcs->enter_safe_mode(adev); + amdgpu_gfx_rlc_enter_safe_mode(adev); vega10_didt_set_mask(hwmgr, false); - adev->gfx.rlc.funcs->exit_safe_mode(adev); + amdgpu_gfx_rlc_exit_safe_mode(adev); return 0; } @@ -988,7 +988,7 @@ static int vega10_enable_psm_gc_didt_config(struct pp_hwmgr *hwmgr) num_se = adev->gfx.config.max_shader_engines; - adev->gfx.rlc.funcs->enter_safe_mode(adev); + amdgpu_gfx_rlc_enter_safe_mode(adev); mutex_lock(&adev->grbm_idx_mutex); for (count = 0; count < num_se; count++) { @@ -1007,7 +1007,7 @@ static int vega10_enable_psm_gc_didt_config(struct pp_hwmgr *hwmgr) vega10_didt_set_mask(hwmgr, true); - adev->gfx.rlc.funcs->exit_safe_mode(adev); + amdgpu_gfx_rlc_exit_safe_mode(adev); vega10_program_gc_didt_config_registers(hwmgr, GCDiDtDroopCtrlConfig_vega10); if (PP_CAP(PHM_PlatformCaps_GCEDC)) @@ -1024,11 +1024,11 @@ static int vega10_disable_psm_gc_didt_config(struct pp_hwmgr *hwmgr) struct amdgpu_device *adev = hwmgr->adev; uint32_t data; - adev->gfx.rlc.funcs->enter_safe_mode(adev); + amdgpu_gfx_rlc_enter_safe_mode(adev); vega10_didt_set_mask(hwmgr, false); - adev->gfx.rlc.funcs->exit_safe_mode(adev); + amdgpu_gfx_rlc_exit_safe_mode(adev); if (PP_CAP(PHM_PlatformCaps_GCEDC)) { data = 0x00000000; @@ -1049,7 +1049,7 @@ static int vega10_enable_se_edc_config(struct pp_hwmgr *hwmgr) num_se = adev->gfx.config.max_shader_engines; - adev->gfx.rlc.funcs->enter_safe_mode(adev); + amdgpu_gfx_rlc_enter_safe_mode(adev); mutex_lock(&adev->grbm_idx_mutex); for (count = 0; count < num_se; count++) { @@ -1070,7 +1070,7 @@ static int vega10_enable_se_edc_config(struct pp_hwmgr *hwmgr) vega10_didt_set_mask(hwmgr, true); - adev->gfx.rlc.funcs->exit_safe_mode(adev); + amdgpu_gfx_rlc_exit_safe_mode(adev); return 0; } @@ -1079,11 +1079,11 @@ static int vega10_disable_se_edc_config(struct pp_hwmgr *hwmgr) { struct amdgpu_device *adev = hwmgr->adev; - adev->gfx.rlc.funcs->enter_safe_mode(adev); + amdgpu_gfx_rlc_enter_safe_mode(adev); vega10_didt_set_mask(hwmgr, false); - adev->gfx.rlc.funcs->exit_safe_mode(adev); + amdgpu_gfx_rlc_exit_safe_mode(adev); return 0; } @@ -1097,7 +1097,7 @@ static int vega10_enable_psm_gc_edc_config(struct pp_hwmgr *hwmgr) num_se = adev->gfx.config.max_shader_engines; - adev->gfx.rlc.funcs->enter_safe_mode(adev); + amdgpu_gfx_rlc_enter_safe_mode(adev); vega10_program_gc_didt_config_registers(hwmgr, AvfsPSMResetConfig_vega10); @@ -1118,7 +1118,7 @@ static int vega10_enable_psm_gc_edc_config(struct pp_hwmgr *hwmgr) vega10_didt_set_mask(hwmgr, true); - adev->gfx.rlc.funcs->exit_safe_mode(adev); + amdgpu_gfx_rlc_exit_safe_mode(adev); vega10_program_gc_didt_config_registers(hwmgr, PSMGCEDCDroopCtrlConfig_vega10); @@ -1138,11 +1138,11 @@ static int vega10_disable_psm_gc_edc_config(struct pp_hwmgr *hwmgr) struct amdgpu_device *adev = hwmgr->adev; uint32_t data; - adev->gfx.rlc.funcs->enter_safe_mode(adev); + amdgpu_gfx_rlc_enter_safe_mode(adev); vega10_didt_set_mask(hwmgr, false); - adev->gfx.rlc.funcs->exit_safe_mode(adev); + amdgpu_gfx_rlc_exit_safe_mode(adev); if (PP_CAP(PHM_PlatformCaps_GCEDC)) { data = 0x00000000; @@ -1160,7 +1160,7 @@ static int vega10_enable_se_edc_force_stall_config(struct pp_hwmgr *hwmgr) struct amdgpu_device *adev = hwmgr->adev; int result; - adev->gfx.rlc.funcs->enter_safe_mode(adev); + amdgpu_gfx_rlc_enter_safe_mode(adev); mutex_lock(&adev->grbm_idx_mutex); WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xE0000000); @@ -1173,7 +1173,7 @@ static int vega10_enable_se_edc_force_stall_config(struct pp_hwmgr *hwmgr) vega10_didt_set_mask(hwmgr, false); - adev->gfx.rlc.funcs->exit_safe_mode(adev); + amdgpu_gfx_rlc_exit_safe_mode(adev); return 0; } diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c index 3b7fce5d7258..2e99ecf4ab76 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c @@ -2777,7 +2777,7 @@ static int vega20_print_clock_levels(struct pp_hwmgr *hwmgr, for (i = 0; i < clocks.num_levels; i++) size += sprintf(buf + size, "%d: %uMhz %s\n", i, clocks.data[i].clocks_in_khz / 1000, - (clocks.data[i].clocks_in_khz == now) ? "*" : ""); + (clocks.data[i].clocks_in_khz == now * 10) ? "*" : ""); break; case PP_MCLK: @@ -2794,7 +2794,7 @@ static int vega20_print_clock_levels(struct pp_hwmgr *hwmgr, for (i = 0; i < clocks.num_levels; i++) size += sprintf(buf + size, "%d: %uMhz %s\n", i, clocks.data[i].clocks_in_khz / 1000, - (clocks.data[i].clocks_in_khz == now) ? "*" : ""); + (clocks.data[i].clocks_in_khz == now * 10) ? "*" : ""); break; case PP_PCIE: @@ -3476,109 +3476,64 @@ static int vega20_get_thermal_temperature_range(struct pp_hwmgr *hwmgr, static const struct pp_hwmgr_func vega20_hwmgr_funcs = { /* init/fini related */ - .backend_init = - vega20_hwmgr_backend_init, - .backend_fini = - vega20_hwmgr_backend_fini, - .asic_setup = - vega20_setup_asic_task, - .power_off_asic = - vega20_power_off_asic, - .dynamic_state_management_enable = - vega20_enable_dpm_tasks, - .dynamic_state_management_disable = - vega20_disable_dpm_tasks, + .backend_init = vega20_hwmgr_backend_init, + .backend_fini = vega20_hwmgr_backend_fini, + .asic_setup = vega20_setup_asic_task, + .power_off_asic = vega20_power_off_asic, + .dynamic_state_management_enable = vega20_enable_dpm_tasks, + .dynamic_state_management_disable = vega20_disable_dpm_tasks, /* power state related */ - .apply_clocks_adjust_rules = - vega20_apply_clocks_adjust_rules, - .pre_display_config_changed = - vega20_pre_display_configuration_changed_task, - .display_config_changed = - vega20_display_configuration_changed_task, + .apply_clocks_adjust_rules = vega20_apply_clocks_adjust_rules, + .pre_display_config_changed = vega20_pre_display_configuration_changed_task, + .display_config_changed = vega20_display_configuration_changed_task, .check_smc_update_required_for_display_configuration = vega20_check_smc_update_required_for_display_configuration, .notify_smc_display_config_after_ps_adjustment = vega20_notify_smc_display_config_after_ps_adjustment, /* export to DAL */ - .get_sclk = - vega20_dpm_get_sclk, - .get_mclk = - vega20_dpm_get_mclk, - .get_dal_power_level = - vega20_get_dal_power_level, - .get_clock_by_type_with_latency = - vega20_get_clock_by_type_with_latency, - .get_clock_by_type_with_voltage = - vega20_get_clock_by_type_with_voltage, - .set_watermarks_for_clocks_ranges = - vega20_set_watermarks_for_clocks_ranges, - .display_clock_voltage_request = - vega20_display_clock_voltage_request, - .get_performance_level = - vega20_get_performance_level, + .get_sclk = vega20_dpm_get_sclk, + .get_mclk = vega20_dpm_get_mclk, + .get_dal_power_level = vega20_get_dal_power_level, + .get_clock_by_type_with_latency = vega20_get_clock_by_type_with_latency, + .get_clock_by_type_with_voltage = vega20_get_clock_by_type_with_voltage, + .set_watermarks_for_clocks_ranges = vega20_set_watermarks_for_clocks_ranges, + .display_clock_voltage_request = vega20_display_clock_voltage_request, + .get_performance_level = vega20_get_performance_level, /* UMD pstate, profile related */ - .force_dpm_level = - vega20_dpm_force_dpm_level, - .get_power_profile_mode = - vega20_get_power_profile_mode, - .set_power_profile_mode = - vega20_set_power_profile_mode, + .force_dpm_level = vega20_dpm_force_dpm_level, + .get_power_profile_mode = vega20_get_power_profile_mode, + .set_power_profile_mode = vega20_set_power_profile_mode, /* od related */ - .set_power_limit = - vega20_set_power_limit, - .get_sclk_od = - vega20_get_sclk_od, - .set_sclk_od = - vega20_set_sclk_od, - .get_mclk_od = - vega20_get_mclk_od, - .set_mclk_od = - vega20_set_mclk_od, - .odn_edit_dpm_table = - vega20_odn_edit_dpm_table, + .set_power_limit = vega20_set_power_limit, + .get_sclk_od = vega20_get_sclk_od, + .set_sclk_od = vega20_set_sclk_od, + .get_mclk_od = vega20_get_mclk_od, + .set_mclk_od = vega20_set_mclk_od, + .odn_edit_dpm_table = vega20_odn_edit_dpm_table, /* for sysfs to retrive/set gfxclk/memclk */ - .force_clock_level = - vega20_force_clock_level, - .print_clock_levels = - vega20_print_clock_levels, - .read_sensor = - vega20_read_sensor, + .force_clock_level = vega20_force_clock_level, + .print_clock_levels = vega20_print_clock_levels, + .read_sensor = vega20_read_sensor, /* powergate related */ - .powergate_uvd = - vega20_power_gate_uvd, - .powergate_vce = - vega20_power_gate_vce, + .powergate_uvd = vega20_power_gate_uvd, + .powergate_vce = vega20_power_gate_vce, /* thermal related */ - .start_thermal_controller = - vega20_start_thermal_controller, - .stop_thermal_controller = - vega20_thermal_stop_thermal_controller, - .get_thermal_temperature_range = - vega20_get_thermal_temperature_range, - .register_irq_handlers = - smu9_register_irq_handlers, - .disable_smc_firmware_ctf = - vega20_thermal_disable_alert, + .start_thermal_controller = vega20_start_thermal_controller, + .stop_thermal_controller = vega20_thermal_stop_thermal_controller, + .get_thermal_temperature_range = vega20_get_thermal_temperature_range, + .register_irq_handlers = smu9_register_irq_handlers, + .disable_smc_firmware_ctf = vega20_thermal_disable_alert, /* fan control related */ - .get_fan_speed_percent = - vega20_fan_ctrl_get_fan_speed_percent, - .set_fan_speed_percent = - vega20_fan_ctrl_set_fan_speed_percent, - .get_fan_speed_info = - vega20_fan_ctrl_get_fan_speed_info, - .get_fan_speed_rpm = - vega20_fan_ctrl_get_fan_speed_rpm, - .set_fan_speed_rpm = - vega20_fan_ctrl_set_fan_speed_rpm, - .get_fan_control_mode = - vega20_get_fan_control_mode, - .set_fan_control_mode = - vega20_set_fan_control_mode, + .get_fan_speed_percent = vega20_fan_ctrl_get_fan_speed_percent, + .set_fan_speed_percent = vega20_fan_ctrl_set_fan_speed_percent, + .get_fan_speed_info = vega20_fan_ctrl_get_fan_speed_info, + .get_fan_speed_rpm = vega20_fan_ctrl_get_fan_speed_rpm, + .set_fan_speed_rpm = vega20_fan_ctrl_set_fan_speed_rpm, + .get_fan_control_mode = vega20_get_fan_control_mode, + .set_fan_control_mode = vega20_set_fan_control_mode, /* smu memory related */ - .notify_cac_buffer_info = - vega20_notify_cac_buffer_info, - .enable_mgpu_fan_boost = - vega20_enable_mgpu_fan_boost, + .notify_cac_buffer_info = vega20_notify_cac_buffer_info, + .enable_mgpu_fan_boost = vega20_enable_mgpu_fan_boost, }; int vega20_hwmgr_init(struct pp_hwmgr *hwmgr) diff --git a/drivers/gpu/drm/amd/powerplay/inc/hardwaremanager.h b/drivers/gpu/drm/amd/powerplay/inc/hardwaremanager.h index 54fd0125d9cf..f4dab979a3a1 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/hardwaremanager.h +++ b/drivers/gpu/drm/amd/powerplay/inc/hardwaremanager.h @@ -463,5 +463,8 @@ extern int phm_display_clock_voltage_request(struct pp_hwmgr *hwmgr, extern int phm_get_max_high_clocks(struct pp_hwmgr *hwmgr, struct amd_pp_simple_clock_info *clocks); extern int phm_disable_smc_firmware_ctf(struct pp_hwmgr *hwmgr); + +extern int phm_set_active_display_count(struct pp_hwmgr *hwmgr, uint32_t count); + #endif /* _HARDWARE_MANAGER_H_ */ diff --git a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h index e5a60aa44b5d..0d298a0409f5 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h +++ b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h @@ -28,7 +28,6 @@ #include "hardwaremanager.h" #include "hwmgr_ppt.h" #include "ppatomctrl.h" -#include "hwmgr_ppt.h" #include "power_state.h" #include "smu_helper.h" @@ -310,7 +309,7 @@ struct pp_hwmgr_func { int (*avfs_control)(struct pp_hwmgr *hwmgr, bool enable); int (*disable_smc_firmware_ctf)(struct pp_hwmgr *hwmgr); int (*set_active_display_count)(struct pp_hwmgr *hwmgr, uint32_t count); - int (*set_deep_sleep_dcefclk)(struct pp_hwmgr *hwmgr, uint32_t clock); + int (*set_min_deep_sleep_dcefclk)(struct pp_hwmgr *hwmgr, uint32_t clock); int (*start_thermal_controller)(struct pp_hwmgr *hwmgr, struct PP_TemperatureRange *range); int (*notify_cac_buffer_info)(struct pp_hwmgr *hwmgr, uint32_t virtual_addr_low, @@ -318,6 +317,9 @@ struct pp_hwmgr_func { uint32_t mc_addr_low, uint32_t mc_addr_hi, uint32_t size); + int (*update_nbdpm_pstate)(struct pp_hwmgr *hwmgr, + bool enable, + bool lock); int (*get_thermal_temperature_range)(struct pp_hwmgr *hwmgr, struct PP_TemperatureRange *range); int (*get_power_profile_mode)(struct pp_hwmgr *hwmgr, char *buf); @@ -330,6 +332,8 @@ struct pp_hwmgr_func { int (*smus_notify_pwe)(struct pp_hwmgr *hwmgr); int (*powergate_sdma)(struct pp_hwmgr *hwmgr, bool bgate); int (*enable_mgpu_fan_boost)(struct pp_hwmgr *hwmgr); + int (*set_hard_min_dcefclk_by_freq)(struct pp_hwmgr *hwmgr, uint32_t clock); + int (*set_hard_min_fclk_by_freq)(struct pp_hwmgr *hwmgr, uint32_t clock); }; struct pp_table_func { diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu7_common.h b/drivers/gpu/drm/amd/powerplay/inc/smu7_common.h index 65eb630bfea3..94bf7b649c20 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/smu7_common.h +++ b/drivers/gpu/drm/amd/powerplay/inc/smu7_common.h @@ -40,10 +40,6 @@ #include "bif/bif_5_0_d.h" #include "bif/bif_5_0_sh_mask.h" - -#include "bif/bif_5_0_d.h" -#include "bif/bif_5_0_sh_mask.h" - #include "dce/dce_10_0_d.h" #include "dce/dce_10_0_sh_mask.h" diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu7_ppsmc.h b/drivers/gpu/drm/amd/powerplay/inc/smu7_ppsmc.h index c1a99dfe4913..6e19f4c7cf8f 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/smu7_ppsmc.h +++ b/drivers/gpu/drm/amd/powerplay/inc/smu7_ppsmc.h @@ -397,6 +397,9 @@ typedef uint16_t PPSMC_Result; #define PPSMC_MSG_SetVBITimeout ((uint16_t) 0x306) +#define PPSMC_MSG_EnableFFC ((uint16_t) 0x307) +#define PPSMC_MSG_DisableFFC ((uint16_t) 0x308) + #define PPSMC_MSG_EnableDpmDidt ((uint16_t) 0x309) #define PPSMC_MSG_DisableDpmDidt ((uint16_t) 0x30A) diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c index a1e0ac9ae248..52abca065764 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c @@ -44,7 +44,6 @@ #include "smu7_hwmgr.h" #include "hardwaremanager.h" -#include "ppatomctrl.h" #include "atombios.h" #include "pppcielanes.h" @@ -1529,8 +1528,21 @@ static int polaris10_populate_clock_stretcher_data_table(struct pp_hwmgr *hwmgr) efuse = efuse >> 24; if (hwmgr->chip_id == CHIP_POLARIS10) { - min = 1000; - max = 2300; + if (hwmgr->is_kicker) { + min = 1200; + max = 2500; + } else { + min = 1000; + max = 2300; + } + } else if (hwmgr->chip_id == CHIP_POLARIS11) { + if (hwmgr->is_kicker) { + min = 900; + max = 2100; + } else { + min = 1100; + max = 2100; + } } else { min = 1100; max = 2100; @@ -1627,6 +1639,7 @@ static int polaris10_populate_avfs_parameters(struct pp_hwmgr *hwmgr) { struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); struct polaris10_smumgr *smu_data = (struct polaris10_smumgr *)(hwmgr->smu_backend); + struct amdgpu_device *adev = hwmgr->adev; SMU74_Discrete_DpmTable *table = &(smu_data->smc_state_table); int result = 0; @@ -1647,6 +1660,59 @@ static int polaris10_populate_avfs_parameters(struct pp_hwmgr *hwmgr) result = atomctrl_get_avfs_information(hwmgr, &avfs_params); if (0 == result) { + if (((adev->pdev->device == 0x67ef) && + ((adev->pdev->revision == 0xe0) || + (adev->pdev->revision == 0xe5))) || + ((adev->pdev->device == 0x67ff) && + ((adev->pdev->revision == 0xcf) || + (adev->pdev->revision == 0xef) || + (adev->pdev->revision == 0xff)))) { + avfs_params.ucEnableApplyAVFS_CKS_OFF_Voltage = 1; + if ((adev->pdev->device == 0x67ef && adev->pdev->revision == 0xe5) || + (adev->pdev->device == 0x67ff && adev->pdev->revision == 0xef)) { + if ((avfs_params.ulGB_VDROOP_TABLE_CKSOFF_a0 == 0xEA522DD3) && + (avfs_params.ulGB_VDROOP_TABLE_CKSOFF_a1 == 0x5645A) && + (avfs_params.ulGB_VDROOP_TABLE_CKSOFF_a2 == 0x33F9E) && + (avfs_params.ulAVFSGB_FUSE_TABLE_CKSOFF_m1 == 0xFFFFC5CC) && + (avfs_params.usAVFSGB_FUSE_TABLE_CKSOFF_m2 == 0x1B1A) && + (avfs_params.ulAVFSGB_FUSE_TABLE_CKSOFF_b == 0xFFFFFCED)) { + avfs_params.ulGB_VDROOP_TABLE_CKSOFF_a0 = 0xF718F1D4; + avfs_params.ulGB_VDROOP_TABLE_CKSOFF_a1 = 0x323FD; + avfs_params.ulGB_VDROOP_TABLE_CKSOFF_a2 = 0x1E455; + avfs_params.ulAVFSGB_FUSE_TABLE_CKSOFF_m1 = 0; + avfs_params.usAVFSGB_FUSE_TABLE_CKSOFF_m2 = 0; + avfs_params.ulAVFSGB_FUSE_TABLE_CKSOFF_b = 0x23; + } + } + } else if (hwmgr->chip_id == CHIP_POLARIS12 && !hwmgr->is_kicker) { + avfs_params.ucEnableApplyAVFS_CKS_OFF_Voltage = 1; + avfs_params.ulGB_VDROOP_TABLE_CKSOFF_a0 = 0xF6B024DD; + avfs_params.ulGB_VDROOP_TABLE_CKSOFF_a1 = 0x3005E; + avfs_params.ulGB_VDROOP_TABLE_CKSOFF_a2 = 0x18A5F; + avfs_params.ulAVFSGB_FUSE_TABLE_CKSOFF_m1 = 0x315; + avfs_params.usAVFSGB_FUSE_TABLE_CKSOFF_m2 = 0xFED1; + avfs_params.ulAVFSGB_FUSE_TABLE_CKSOFF_b = 0x3B; + } else if (((adev->pdev->device == 0x67df) && + ((adev->pdev->revision == 0xe0) || + (adev->pdev->revision == 0xe3) || + (adev->pdev->revision == 0xe4) || + (adev->pdev->revision == 0xe5) || + (adev->pdev->revision == 0xe7) || + (adev->pdev->revision == 0xef))) || + ((adev->pdev->device == 0x6fdf) && + ((adev->pdev->revision == 0xef) || + (adev->pdev->revision == 0xff)))) { + avfs_params.ucEnableApplyAVFS_CKS_OFF_Voltage = 1; + avfs_params.ulGB_VDROOP_TABLE_CKSOFF_a0 = 0xF843B66B; + avfs_params.ulGB_VDROOP_TABLE_CKSOFF_a1 = 0x59CB5; + avfs_params.ulGB_VDROOP_TABLE_CKSOFF_a2 = 0xFFFF287F; + avfs_params.ulAVFSGB_FUSE_TABLE_CKSOFF_m1 = 0; + avfs_params.usAVFSGB_FUSE_TABLE_CKSOFF_m2 = 0xFF23; + avfs_params.ulAVFSGB_FUSE_TABLE_CKSOFF_b = 0x58; + } + } + + if (0 == result) { table->BTCGB_VDROOP_TABLE[0].a0 = PP_HOST_TO_SMC_UL(avfs_params.ulGB_VDROOP_TABLE_CKSON_a0); table->BTCGB_VDROOP_TABLE[0].a1 = PP_HOST_TO_SMC_UL(avfs_params.ulGB_VDROOP_TABLE_CKSON_a1); table->BTCGB_VDROOP_TABLE[0].a2 = PP_HOST_TO_SMC_UL(avfs_params.ulGB_VDROOP_TABLE_CKSON_a2); diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/smu10_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/smu10_smumgr.c index d0eb8ab50148..d111dd4e03d7 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/smu10_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/smu10_smumgr.c @@ -29,7 +29,6 @@ #include "rv_ppsmc.h" #include "smu10_driver_if.h" #include "smu10.h" -#include "ppatomctrl.h" #include "pp_debug.h" diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/smu8_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/smu8_smumgr.c index 09b844ec3eab..e2787e14a500 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/smu8_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/smu8_smumgr.c @@ -24,6 +24,7 @@ #include <linux/delay.h> #include <linux/gfp.h> #include <linux/kernel.h> +#include <linux/ktime.h> #include <linux/slab.h> #include <linux/types.h> @@ -61,9 +62,13 @@ static uint32_t smu8_get_argument(struct pp_hwmgr *hwmgr) mmSMU_MP1_SRBM2P_ARG_0); } -static int smu8_send_msg_to_smc_async(struct pp_hwmgr *hwmgr, uint16_t msg) +/* Send a message to the SMC, and wait for its response.*/ +static int smu8_send_msg_to_smc_with_parameter(struct pp_hwmgr *hwmgr, + uint16_t msg, uint32_t parameter) { int result = 0; + ktime_t t_start; + s64 elapsed_us; if (hwmgr == NULL || hwmgr->device == NULL) return -EINVAL; @@ -74,28 +79,31 @@ static int smu8_send_msg_to_smc_async(struct pp_hwmgr *hwmgr, uint16_t msg) /* Read the last message to SMU, to report actual cause */ uint32_t val = cgs_read_register(hwmgr->device, mmSMU_MP1_SRBM2P_MSG_0); - pr_err("smu8_send_msg_to_smc_async (0x%04x) failed\n", msg); - pr_err("SMU still servicing msg (0x%04x)\n", val); + pr_err("%s(0x%04x) aborted; SMU still servicing msg (0x%04x)\n", + __func__, msg, val); return result; } + t_start = ktime_get(); + + cgs_write_register(hwmgr->device, mmSMU_MP1_SRBM2P_ARG_0, parameter); cgs_write_register(hwmgr->device, mmSMU_MP1_SRBM2P_RESP_0, 0); cgs_write_register(hwmgr->device, mmSMU_MP1_SRBM2P_MSG_0, msg); - return 0; + result = PHM_WAIT_FIELD_UNEQUAL(hwmgr, + SMU_MP1_SRBM2P_RESP_0, CONTENT, 0); + + elapsed_us = ktime_us_delta(ktime_get(), t_start); + + WARN(result, "%s(0x%04x, %#x) timed out after %lld us\n", + __func__, msg, parameter, elapsed_us); + + return result; } -/* Send a message to the SMC, and wait for its response.*/ static int smu8_send_msg_to_smc(struct pp_hwmgr *hwmgr, uint16_t msg) { - int result = 0; - - result = smu8_send_msg_to_smc_async(hwmgr, msg); - if (result != 0) - return result; - - return PHM_WAIT_FIELD_UNEQUAL(hwmgr, - SMU_MP1_SRBM2P_RESP_0, CONTENT, 0); + return smu8_send_msg_to_smc_with_parameter(hwmgr, msg, 0); } static int smu8_set_smc_sram_address(struct pp_hwmgr *hwmgr, @@ -135,17 +143,6 @@ static int smu8_write_smc_sram_dword(struct pp_hwmgr *hwmgr, return result; } -static int smu8_send_msg_to_smc_with_parameter(struct pp_hwmgr *hwmgr, - uint16_t msg, uint32_t parameter) -{ - if (hwmgr == NULL || hwmgr->device == NULL) - return -EINVAL; - - cgs_write_register(hwmgr->device, mmSMU_MP1_SRBM2P_ARG_0, parameter); - - return smu8_send_msg_to_smc(hwmgr, msg); -} - static int smu8_check_fw_load_finish(struct pp_hwmgr *hwmgr, uint32_t firmware) { @@ -737,6 +734,10 @@ static int smu8_start_smu(struct pp_hwmgr *hwmgr) cgs_write_register(hwmgr->device, mmMP0PUB_IND_INDEX, index); hwmgr->smu_version = cgs_read_register(hwmgr->device, mmMP0PUB_IND_DATA); + pr_info("smu version %02d.%02d.%02d\n", + ((hwmgr->smu_version >> 16) & 0xFF), + ((hwmgr->smu_version >> 8) & 0xFF), + (hwmgr->smu_version & 0xFF)); adev->pm.fw_version = hwmgr->smu_version >> 8; return smu8_request_smu_load_fw(hwmgr); diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/vegam_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/vegam_smumgr.c index 9f71512b2510..1e69300f6175 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/vegam_smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/vegam_smumgr.c @@ -40,7 +40,6 @@ #include "smu7_hwmgr.h" #include "hardwaremanager.h" -#include "ppatomctrl.h" #include "atombios.h" #include "pppcielanes.h" |