diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 196 |
1 files changed, 175 insertions, 21 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index e92e7dea71da..f93883db2b46 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -36,6 +36,7 @@ #include <linux/vga_switcheroo.h> #include <drm/drm_probe_helper.h> #include <linux/mmu_notifier.h> +#include <linux/suspend.h> #include "amdgpu.h" #include "amdgpu_irq.h" @@ -45,6 +46,8 @@ #include "amdgpu_amdkfd.h" #include "amdgpu_ras.h" +#include "amdgpu_xgmi.h" +#include "amdgpu_reset.h" /* * KMS wrapper. @@ -90,9 +93,10 @@ * - 3.38.0 - Add AMDGPU_IB_FLAG_EMIT_MEM_SYNC * - 3.39.0 - DMABUF implicit sync does a full pipeline sync * - 3.40.0 - Add AMDGPU_IDS_FLAGS_TMZ + * - 3.41.0 - Add video codec query */ #define KMS_DRIVER_MAJOR 3 -#define KMS_DRIVER_MINOR 40 +#define KMS_DRIVER_MINOR 41 #define KMS_DRIVER_PATCHLEVEL 0 int amdgpu_vram_limit; @@ -145,6 +149,7 @@ int amdgpu_compute_multipipe = -1; int amdgpu_gpu_recovery = -1; /* auto */ int amdgpu_emu_mode; uint amdgpu_smu_memory_pool_size; +int amdgpu_smu_pptable_id = -1; /* * FBC (bit 0) disabled by default * MULTI_MON_PP_MCLK_SWITCH (bit 1) enabled by default @@ -162,16 +167,26 @@ int amdgpu_discovery = -1; int amdgpu_mes; int amdgpu_noretry = -1; int amdgpu_force_asic_type = -1; -int amdgpu_tmz; +int amdgpu_tmz = -1; /* auto */ +uint amdgpu_freesync_vid_mode; int amdgpu_reset_method = -1; /* auto */ int amdgpu_num_kcq = -1; +static void amdgpu_drv_delayed_reset_work_handler(struct work_struct *work); + struct amdgpu_mgpu_info mgpu_info = { .mutex = __MUTEX_INITIALIZER(mgpu_info.mutex), + .delayed_reset_work = __DELAYED_WORK_INITIALIZER( + mgpu_info.delayed_reset_work, + amdgpu_drv_delayed_reset_work_handler, 0), }; int amdgpu_ras_enable = -1; uint amdgpu_ras_mask = 0xffffffff; int amdgpu_bad_page_threshold = -1; +struct amdgpu_watchdog_timer amdgpu_watchdog_timer = { + .timeout_fatal_disable = false, + .period = 0x0, /* default to 0x0 (timeout disable) */ +}; /** * DOC: vramlimit (int) @@ -502,7 +517,7 @@ module_param_named(compute_multipipe, amdgpu_compute_multipipe, int, 0444); * DOC: gpu_recovery (int) * Set to enable GPU recovery mechanism (1 = enable, 0 = disable). The default is -1 (auto, disabled except SRIOV). */ -MODULE_PARM_DESC(gpu_recovery, "Enable GPU recovery mechanism, (1 = enable, 0 = disable, -1 = auto)"); +MODULE_PARM_DESC(gpu_recovery, "Enable GPU recovery mechanism, (2 = advanced tdr mode, 1 = enable, 0 = disable, -1 = auto)"); module_param_named(gpu_recovery, amdgpu_gpu_recovery, int, 0444); /** @@ -528,6 +543,20 @@ MODULE_PARM_DESC(ras_mask, "Mask of RAS features to enable (default 0xffffffff), module_param_named(ras_mask, amdgpu_ras_mask, uint, 0444); /** + * DOC: timeout_fatal_disable (bool) + * Disable Watchdog timeout fatal error event + */ +MODULE_PARM_DESC(timeout_fatal_disable, "disable watchdog timeout fatal error (false = default)"); +module_param_named(timeout_fatal_disable, amdgpu_watchdog_timer.timeout_fatal_disable, bool, 0644); + +/** + * DOC: timeout_period (uint) + * Modify the watchdog timeout max_cycles as (1 << period) + */ +MODULE_PARM_DESC(timeout_period, "watchdog timeout period (0 = timeout disabled, 1 ~ 0x23 = timeout maxcycles = (1 << period)"); +module_param_named(timeout_period, amdgpu_watchdog_timer.period, uint, 0644); + +/** * DOC: si_support (int) * Set SI support driver. This parameter works after set config CONFIG_DRM_AMDGPU_SI. For SI asic, when radeon driver is enabled, * set value 0 to use radeon driver, while set value 1 to use amdgpu driver. The default is using radeon driver when it available, @@ -748,6 +777,13 @@ bool no_system_mem_limit; module_param(no_system_mem_limit, bool, 0644); MODULE_PARM_DESC(no_system_mem_limit, "disable system memory limit (false = default)"); +/** + * DOC: no_queue_eviction_on_vm_fault (int) + * If set, process queues will not be evicted on gpuvm fault. This is to keep the wavefront context for debugging (0 = queue eviction, 1 = no queue eviction). The default is 0 (queue eviction). + */ +int amdgpu_no_queue_eviction_on_vm_fault = 0; +MODULE_PARM_DESC(no_queue_eviction_on_vm_fault, "No queue eviction on VM fault (0 = queue eviction, 1 = no queue eviction)"); +module_param_named(no_queue_eviction_on_vm_fault, amdgpu_no_queue_eviction_on_vm_fault, int, 0444); #endif /** @@ -792,10 +828,21 @@ module_param_named(backlight, amdgpu_backlight, bint, 0444); * * The default value: 0 (off). TODO: change to auto till it is completed. */ -MODULE_PARM_DESC(tmz, "Enable TMZ feature (-1 = auto, 0 = off (default), 1 = on)"); +MODULE_PARM_DESC(tmz, "Enable TMZ feature (-1 = auto (default), 0 = off, 1 = on)"); module_param_named(tmz, amdgpu_tmz, int, 0444); /** + * DOC: freesync_video (uint) + * Enabled the optimization to adjust front porch timing to achieve seamless mode change experience + * when setting a freesync supported mode for which full modeset is not needed. + * The default value: 0 (off). + */ +MODULE_PARM_DESC( + freesync_video, + "Enable freesync modesetting optimization feature (0 = off (default), 1 = on)"); +module_param_named(freesync_video, amdgpu_freesync_vid_mode, uint, 0444); + +/** * DOC: reset_method (int) * GPU reset method (-1 = auto (default), 0 = legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco, 5 = pci) */ @@ -815,6 +862,15 @@ module_param_named(bad_page_threshold, amdgpu_bad_page_threshold, int, 0444); MODULE_PARM_DESC(num_kcq, "number of kernel compute queue user want to setup (8 if set to greater than 8 or less than 0, only affect gfx 8+)"); module_param_named(num_kcq, amdgpu_num_kcq, int, 0444); +/** + * DOC: smu_pptable_id (int) + * Used to override pptable id. id = 0 use VBIOS pptable. + * id > 0 use the soft pptable with specicfied id. + */ +MODULE_PARM_DESC(smu_pptable_id, + "specify pptable id to be used (-1 = auto(default) value, 0 = use pptable from vbios, > 0 = soft pptable id)"); +module_param_named(smu_pptable_id, amdgpu_smu_pptable_id, int, 0444); + static const struct pci_device_id pciidlist[] = { #ifdef CONFIG_DRM_AMDGPU_SI {0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI}, @@ -1125,6 +1181,11 @@ static const struct pci_device_id pciidlist[] = { {0x1002, 0x73E2, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH}, {0x1002, 0x73FF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH}, + /* Aldebaran */ + {0x1002, 0x7408, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ALDEBARAN|AMD_EXP_HW_SUPPORT}, + {0x1002, 0x740C, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ALDEBARAN|AMD_EXP_HW_SUPPORT}, + {0x1002, 0x740F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ALDEBARAN|AMD_EXP_HW_SUPPORT}, + {0, 0, 0} }; @@ -1279,6 +1340,98 @@ amdgpu_pci_shutdown(struct pci_dev *pdev) adev->mp1_state = PP_MP1_STATE_NONE; } +/** + * amdgpu_drv_delayed_reset_work_handler - work handler for reset + * + * @work: work_struct. + */ +static void amdgpu_drv_delayed_reset_work_handler(struct work_struct *work) +{ + struct list_head device_list; + struct amdgpu_device *adev; + int i, r; + struct amdgpu_reset_context reset_context; + + memset(&reset_context, 0, sizeof(reset_context)); + + mutex_lock(&mgpu_info.mutex); + if (mgpu_info.pending_reset == true) { + mutex_unlock(&mgpu_info.mutex); + return; + } + mgpu_info.pending_reset = true; + mutex_unlock(&mgpu_info.mutex); + + /* Use a common context, just need to make sure full reset is done */ + reset_context.method = AMD_RESET_METHOD_NONE; + set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); + + for (i = 0; i < mgpu_info.num_dgpu; i++) { + adev = mgpu_info.gpu_ins[i].adev; + reset_context.reset_req_dev = adev; + r = amdgpu_device_pre_asic_reset(adev, &reset_context); + if (r) { + dev_err(adev->dev, "GPU pre asic reset failed with err, %d for drm dev, %s ", + r, adev_to_drm(adev)->unique); + } + if (!queue_work(system_unbound_wq, &adev->xgmi_reset_work)) + r = -EALREADY; + } + for (i = 0; i < mgpu_info.num_dgpu; i++) { + adev = mgpu_info.gpu_ins[i].adev; + flush_work(&adev->xgmi_reset_work); + adev->gmc.xgmi.pending_reset = false; + } + + /* reset function will rebuild the xgmi hive info , clear it now */ + for (i = 0; i < mgpu_info.num_dgpu; i++) + amdgpu_xgmi_remove_device(mgpu_info.gpu_ins[i].adev); + + INIT_LIST_HEAD(&device_list); + + for (i = 0; i < mgpu_info.num_dgpu; i++) + list_add_tail(&mgpu_info.gpu_ins[i].adev->reset_list, &device_list); + + /* unregister the GPU first, reset function will add them back */ + list_for_each_entry(adev, &device_list, reset_list) + amdgpu_unregister_gpu_instance(adev); + + /* Use a common context, just need to make sure full reset is done */ + set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags); + r = amdgpu_do_asic_reset(&device_list, &reset_context); + + if (r) { + DRM_ERROR("reinit gpus failure"); + return; + } + for (i = 0; i < mgpu_info.num_dgpu; i++) { + adev = mgpu_info.gpu_ins[i].adev; + if (!adev->kfd.init_complete) + amdgpu_amdkfd_device_init(adev); + amdgpu_ttm_set_buffer_funcs_status(adev, true); + } + return; +} + +static int amdgpu_pmops_prepare(struct device *dev) +{ + struct drm_device *drm_dev = dev_get_drvdata(dev); + + /* Return a positive number here so + * DPM_FLAG_SMART_SUSPEND works properly + */ + if (amdgpu_device_supports_boco(drm_dev)) + return pm_runtime_suspended(dev) && + pm_suspend_via_firmware(); + + return 0; +} + +static void amdgpu_pmops_complete(struct device *dev) +{ + /* nothing to do */ +} + static int amdgpu_pmops_suspend(struct device *dev) { struct drm_device *drm_dev = dev_get_drvdata(dev); @@ -1364,7 +1517,7 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev) } adev->in_runpm = true; - if (amdgpu_device_supports_atpx(drm_dev)) + if (amdgpu_device_supports_px(drm_dev)) drm_dev->switch_power_state = DRM_SWITCH_POWER_CHANGING; ret = amdgpu_device_suspend(drm_dev, false); @@ -1373,16 +1526,14 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev) return ret; } - if (amdgpu_device_supports_atpx(drm_dev)) { + if (amdgpu_device_supports_px(drm_dev)) { /* Only need to handle PCI state in the driver for ATPX * PCI core handles it for _PR3. */ - if (!amdgpu_is_atpx_hybrid()) { - amdgpu_device_cache_pci_state(pdev); - pci_disable_device(pdev); - pci_ignore_hotplug(pdev); - pci_set_power_state(pdev, PCI_D3cold); - } + amdgpu_device_cache_pci_state(pdev); + pci_disable_device(pdev); + pci_ignore_hotplug(pdev); + pci_set_power_state(pdev, PCI_D3cold); drm_dev->switch_power_state = DRM_SWITCH_POWER_DYNAMIC_OFF; } else if (amdgpu_device_supports_baco(drm_dev)) { amdgpu_device_baco_enter(drm_dev); @@ -1401,19 +1552,17 @@ static int amdgpu_pmops_runtime_resume(struct device *dev) if (!adev->runpm) return -EINVAL; - if (amdgpu_device_supports_atpx(drm_dev)) { + if (amdgpu_device_supports_px(drm_dev)) { drm_dev->switch_power_state = DRM_SWITCH_POWER_CHANGING; /* Only need to handle PCI state in the driver for ATPX * PCI core handles it for _PR3. */ - if (!amdgpu_is_atpx_hybrid()) { - pci_set_power_state(pdev, PCI_D0); - amdgpu_device_load_pci_state(pdev); - ret = pci_enable_device(pdev); - if (ret) - return ret; - } + pci_set_power_state(pdev, PCI_D0); + amdgpu_device_load_pci_state(pdev); + ret = pci_enable_device(pdev); + if (ret) + return ret; pci_set_master(pdev); } else if (amdgpu_device_supports_boco(drm_dev)) { /* Only need to handle PCI state in the driver for ATPX @@ -1424,7 +1573,10 @@ static int amdgpu_pmops_runtime_resume(struct device *dev) amdgpu_device_baco_exit(drm_dev); } ret = amdgpu_device_resume(drm_dev, false); - if (amdgpu_device_supports_atpx(drm_dev)) + if (ret) + return ret; + + if (amdgpu_device_supports_px(drm_dev)) drm_dev->switch_power_state = DRM_SWITCH_POWER_ON; adev->in_runpm = false; return 0; @@ -1505,6 +1657,8 @@ out: } static const struct dev_pm_ops amdgpu_pm_ops = { + .prepare = amdgpu_pmops_prepare, + .complete = amdgpu_pmops_complete, .suspend = amdgpu_pmops_suspend, .resume = amdgpu_pmops_resume, .freeze = amdgpu_pmops_freeze, |